(*********************************************************************** @Pascal-file{ author = "Tom Schneider", version = "1.00", date = "10 Sep 1991", filename = "sortbibtex.p", address = "National Cancer Institute Laboratory of Mathematical Biology Frederick, Maryland 21701-1013 USA", checksum = "13231 403 1542 11359", email = "toms@ncifcrf.gov (Internet)", codetable = "ISO/ASCII", keywords = "BibTeX", supported = "yes", docstring = "This Pascal program sorts a BibTeX database file. The checksum field above contains a CRC-16 checksum as the first value, followed by the equivalent of the standard UNIX wc (word count) utility output of lines, words, and characters. This is produced by Robert Solovay's checksum utility." } ***********************************************************************) program sortbibtex(fin, fout, output); (* sortbibtex: sort a bibtex database *) label 1; (* end of program *) const (* begin module version *) version = 2.11; (* of sortbibtex.p 1990 May 2 origin 1990 May 2 *) (* end module version *) (* begin module describe.sortbibtex *) (* name sortbibtex: sort a bibtex database synopsis sortbibtex(fin: in, fout: fout, output: out) files fin: a bibtex database fout: bibtex database sorted by the key output: messages to the user, including errors in the structure of the database and duplicate entries. description Sort a BibTeX database by the citation keys. examples documentation see also rembla.p author Thomas Dana Schneider bugs Entries are defined by blank lines. Use rembla to make sure that there are no extra spaces on the ends of lines. technical notes *) (* end module describe.sortbibtex *) (* const *) mapmax = 200000; (* largest number of entries that can be handled by the program. It determines the size of the map array. *) linewidth = 80; (* maximum width of lines in the file (one should never exceed 80 characters. This is for safety of transportation of files on tape and over the net. It also avoids confusion with wrapped lines. *) type position = 0..mapmax; (* somewhere on the map. note: position 0 is not used, but it allows the quicksort to function properly *) lineptr = ^entryline; entryline = packed record (* a line of an entry *) string: array[1..linewidth] of char; (* a character string *) stringlength: integer; (* length of the string *) next: lineptr; (* pointer to the next line *) end; entryptr = ^entry; (* pointer to an entry *) entry = packed record (* entry of a bibtex database *) key: array[1..linewidth] of char; (* the citation key to sort on *) line: lineptr; (* the lines of the entry *) end; var fin, fout: text; (* files used by this program *) (* the entire set of entries read in *) map: packed array[1..mapmax] of entryptr; (* begin module halt *) procedure halt; (* stop the program. the procedure performs a goto to the end of the program. you must have a label: label 1; declared, and also the end of the program must have this label: 1: end. examples are in the module libraries. this is the only goto in the delila system. *) begin writeln(output,' program halt.'); goto 1 end; (* end module halt version = 'prgmod 4.05 89 Aug 28 tds'; *) procedure writekey(var f: text; e: entryptr); (* write the key of the entry e *) var k: integer; (* position in a key *) begin k := 1; while e^.key[k] <> ' ' do begin write(f,e^.key[k]); k := k + 1; end; end; function lessthan(alow, blow: position): boolean; (* is the entry at alow less than (before, alphabetically) the entry at blow? *) var a,b: char; (* characters in the two keys *) done: boolean; (* are we done yet? *) k: integer; (* position in a key *) begin done := false; k := 0; { writeln(output,'lessthan:'); write(output,'"'); writekey(output,map[alow]); write(output,'" vs "'); writekey(output,map[blow]); writeln(output,'"'); } while not done do begin k := succ(k); a := map[alow]^.key[k]; b := map[blow]^.key[k]; { writeln(output,'a=',a,' b=',b); } if (a = ' ') and (b = ' ') then begin { writeln(output,'identical entry keys: '); writekey(output,map[alow]); } done := true; lessthan := false end; if not done then if (a = ' ') or (b = ' ') then begin done := true; if a = ' ' then lessthan := true else lessthan := false; end else begin if ord(a) < ord(b) then begin { writekey(output,map[alow]); write(output,' < '); writekey(output,map[blow]); writeln(output); } lessthan := true; done := true; end; if ord(a) > ord(b) then begin { writekey(output,map[alow]); write(output,' > '); writekey(output,map[blow]); writeln(output); } lessthan := false; done := true; end end end; end; procedure swap(a, b: position); (* switch positions a and b *) var hold: entryptr; begin hold:=map[a]; map[a]:=map[b]; map[b]:=hold end; (* begin module quicksort *) procedure quicksort(left, right: position); (* quick sort a list between positions left and right, into ascending order. a position is simply a scalar of the form 0..max. the array to be sorted is dimensioned 1..max. (the difference in the ranges is important to the correct operation of the sort...) two external routines are used: function lessthan(a, b: position): boolean is a generalized test for value-at-a < value-at-b. procedure swap(a, b: position) switches the items at positions a and b. since these routines are external, the procedure is general. this procedure taken from the book 'algorithms + data structures = programs' by niklaus wirth, prentice-hall, inc., englewood cliffs, n.j.(1976), pp. 76-82 *) var lower, upper: position; (* the positions looked at currently *) center: position; (* the rough center of the region being sorted *) begin lower := left; center := (left + right) div 2; upper := right; repeat while lessthan(lower, center) do lower := succ(lower); while lessthan(center, upper) do upper := pred(upper); if lower <= upper then begin (* keep track of the center through the map: *) if lower = center then center:=upper else if upper = center then center:=lower; swap(lower, upper); lower := succ(lower); upper := pred(upper) end until lower > upper; if left < upper then quicksort(left, upper); if lower < right then quicksort(lower, right) end; (* end module quicksort version = 'prgmod 4.05 89 Aug 28 tds'; *) procedure readline(var f: text; var l: lineptr; linenumber: integer); (* read a bibtex line from f into l. The line number is linenumber *) begin new(l); with l^ do begin stringlength := 0; while (not eoln(f)) and (stringlength < linewidth) do begin stringlength := stringlength + 1; read(f,l^.string[stringlength]); if (stringlength = linewidth) and not eoln(f) then begin writeln(output,'line ',linenumber:1,' is longer than ', linewidth:1,' characters. Make it two lines.'); halt end; end; readln(f); next := nil; end; end; procedure readentry(var f: text; var linenumber: integer; var e: entryptr); (* read a bibtex entry from f into e, keep track of the current line number in linenumber *) var l: lineptr; (* pointer to a line of text *) p: integer; (* position on a line *) pkey: integer; (* position on a line that the key starts *) begin new(e); with e^ do begin (* read the entry in *) (* locate the start of the entry *) while not eof(f) and (f^<>'@') do begin readln(f); linenumber := linenumber + 1; end; if eof(f) then begin writeln(output,'Remove blank lines from the end of the file', ' using program rembla'); halt end; (* read the first line in *) readline(f,line,linenumber); linenumber := linenumber + 1; (* find the citation key on the line *) p := 1; while line^.string[p] <> '{' do begin if p = linewidth then begin writeln(output, 'line ',linenumber:1,' is missing the "{"'); halt end; p := p + 1; end; p := p + 1; pkey := p; (* read in the key *) while line^.string[p] <> ',' do begin if p = linewidth then begin writeln(output, 'line ',linenumber:1,' is missing the ","'); halt end; key[p-pkey+1] := line^.string[p]; p := p + 1; end; (* finish the key with a blank character *) key[p-pkey+1] := ' '; { write(output,'line ',linenumber:1,' "'); writekey(output,e); writeln(output,'"'); } (* read the rest of the entry *) l := line; while (not eof(f)) and (not eoln(f)) do begin linenumber := linenumber + 1; readline(f,l^.next,linenumber); if not eof(f) then l := l^.next; end; end; end; procedure writeentry(var f: text; e: entryptr); (* write the entry e to output *) var l: lineptr; (* pointer to a line of text *) p: integer; (* position on a line *) begin l := e^.line; while l <> nil do with l^ do begin for p := 1 to stringlength do write(f,string[p]); writeln(f); l := l^.next end end; (* begin module sortbibtex.themain *) procedure themain(var fin, fout: text); (* the main procedure of the program *) var linenumber: integer; (* line number in the file *) entries: integer; (* counter of the entries read in *) e: position; (* index to the entries *) begin writeln(output,'sortbibtex ',version:4:2); reset(fin); rewrite(fout); linenumber := 0; entries := 0; while not eof(fin) do begin entries := entries + 1; readentry(fin,linenumber,map[entries]); end; quicksort(1,entries); (* show the entire set of entries *) for e := 1 to entries do begin writeentry(fout,map[e]); if e <> entries then writeln(fout) (* space between entries *) end; for e := 2 to entries do begin if not(lessthan(e, e-1)) and not(lessthan(e-1, e)) then begin (* must be identical! *) write(output,'duplicate entry: '); writekey(output,map[e]); writeln(output); end end end; (* end module sortbibtex.themain *) begin themain(fin, fout); 1: end.