(***********************************************************************
 @Pascal-file{
    author              = "Tom Schneider",
    version             = "1.00",
    date                = "10 Sep 1991",
    filename            = "sortbibtex.p",
    address             = "National Cancer Institute
                           Laboratory of Mathematical Biology
                           Frederick, Maryland 21701-1013
                           USA",
    checksum            = "13231 403 1542 11359",
    email               = "toms@ncifcrf.gov (Internet)",
    codetable           = "ISO/ASCII",
    keywords            = "BibTeX",
    supported           = "yes",
    docstring           = "This Pascal program sorts a BibTeX database
                           file.

                           The checksum field above contains a CRC-16
                           checksum as the first value, followed by the
                           equivalent of the standard UNIX wc (word
                           count) utility output of lines, words, and
                           characters.  This is produced by Robert
                           Solovay's checksum utility."
 }
***********************************************************************)
program sortbibtex(fin, fout, output);
(* sortbibtex: sort a bibtex database *)

label 1; (* end of program *)

const
(* begin module version *)
version = 2.11; (* of sortbibtex.p 1990 May 2
origin 1990 May 2 *)
(* end module version *)

(* begin module describe.sortbibtex *)
(*
name
   sortbibtex: sort a bibtex database

synopsis
   sortbibtex(fin: in, fout: fout, output: out)

files
   fin: a bibtex database
   fout: bibtex database sorted by the key
   output: messages to the user, including errors in the
      structure of the database and duplicate entries.

description
   Sort a BibTeX database by the citation keys.

examples

documentation

see also
   rembla.p

author
   Thomas Dana Schneider

bugs
   Entries are defined by blank lines.  Use rembla to make sure that
   there are no extra spaces on the ends of lines.

technical notes

*)
(* end module describe.sortbibtex *)

(* const *)
   mapmax = 200000; (* largest number of entries that can be handled by
         the program.  It determines the size of the map array. *)
   linewidth = 80; (* maximum width of lines in the file (one should never
      exceed 80 characters.  This is for safety of transportation of
      files on tape and over the net.  It also avoids confusion with
      wrapped lines. *)

type
   position = 0..mapmax;  (* somewhere on the map.  note: position 0 is not
      used, but it allows the quicksort to function properly *)

   lineptr = ^entryline;
   entryline = packed record (* a line of an entry *)
      string: array[1..linewidth] of char; (* a character string *)
      stringlength: integer; (* length of the string *)
      next: lineptr; (* pointer to the next line *)
   end;

   entryptr = ^entry; (* pointer to an entry *)
   entry = packed record (* entry of a bibtex database *)
      key: array[1..linewidth] of char; (* the citation key to sort on *)
      line: lineptr; (* the lines of the entry *)
   end;

var
   fin, fout: text; (* files used by this program *)

   (* the entire set of entries read in *)
   map: packed array[1..mapmax] of entryptr;

(* begin module halt *)
procedure halt;
(* stop the program.  the procedure performs a goto to the end of the
   program.  you must have a label:
      label 1;
   declared, and also the end of the program must have this label:
      1: end.
   examples are in the module libraries.
   this is the only goto in the delila system. *)
begin
      writeln(output,' program halt.');
      goto 1
end;
(* end module halt version = 'prgmod 4.05  89 Aug 28 tds'; *)

procedure writekey(var f: text; e: entryptr);
(* write the key of the entry e *)
var
   k: integer; (* position in a key *)
begin
   k := 1;
   while e^.key[k] <> ' ' do begin
      write(f,e^.key[k]);
      k := k + 1;
   end;
end;

function lessthan(alow, blow: position): boolean;
(* is the entry at alow less than (before, alphabetically)
   the entry at blow? *)
var
   a,b: char; (* characters in the two keys *)
   done: boolean; (* are we done yet? *)
   k: integer; (* position in a key *)
begin
   done := false;
   k := 0;
{
writeln(output,'lessthan:');
write(output,'"'); writekey(output,map[alow]);
write(output,'" vs "'); writekey(output,map[blow]);
writeln(output,'"');
}
   while not done do begin
      k := succ(k);
      a := map[alow]^.key[k];
      b := map[blow]^.key[k];

{
writeln(output,'a=',a,' b=',b);
}

      if (a = ' ') and (b = ' ')
      then begin
{
writeln(output,'identical entry keys: ');
writekey(output,map[alow]);
}
         done := true;
         lessthan := false
      end;

      if not done
      then if (a = ' ') or (b = ' ')
           then begin
              done := true;
              if a = ' ' then lessthan := true
                         else lessthan := false;
           end

           else begin
              if ord(a) < ord(b)
              then begin
{
writekey(output,map[alow]);
write(output,' < ');
writekey(output,map[blow]);
writeln(output);
}
                 lessthan := true;
                 done := true;
              end;

              if ord(a) > ord(b)
              then begin
{
writekey(output,map[alow]);
write(output,' > ');
writekey(output,map[blow]);
writeln(output);
}
                 lessthan := false;
                 done := true;
              end

           end
   end;
end;

procedure swap(a, b: position);
(* switch positions a and b *)
var
      hold: entryptr;
begin
      hold:=map[a];
      map[a]:=map[b];
      map[b]:=hold
end;

(* begin module quicksort *)

procedure quicksort(left, right: position);
(* quick sort a list between positions left and right, into ascending order.
   a position is simply a scalar of the form 0..max.
   the array to be sorted is dimensioned 1..max.
(the difference in the ranges is important to the correct operation
of the sort...)
   two external routines are used:
      function lessthan(a, b: position): boolean is a generalized test for
          value-at-a < value-at-b.
      procedure swap(a, b: position) switches the items at positions a and b.
   since these routines are external, the procedure is general.

   this procedure taken from the book
   'algorithms + data structures = programs' by niklaus wirth,
   prentice-hall, inc., englewood cliffs, n.j.(1976), pp. 76-82 *)
var
      lower, upper: position; (* the positions looked at currently *)
      center: position; (* the rough center of the region being sorted *)
begin
      lower  := left;
      center := (left + right) div 2;
      upper  := right;

      repeat
         while lessthan(lower, center) do lower := succ(lower);
         while lessthan(center, upper) do upper := pred(upper);

         if lower <= upper then begin
            (* keep track of the center through the map: *)
                 if lower = center then center:=upper
            else if upper = center then center:=lower;
            swap(lower, upper);
            lower := succ(lower);
            upper := pred(upper)
         end
      until lower > upper;

      if  left < upper then quicksort(left,  upper);
      if lower < right then quicksort(lower, right)
end;

(* end module quicksort version = 'prgmod 4.05  89 Aug 28 tds'; *)

procedure readline(var f: text; var l: lineptr; linenumber: integer);
(* read a bibtex line from f into l.  The line number is linenumber *)
begin
   new(l);
   with l^ do begin
      stringlength := 0;
      while (not eoln(f)) and (stringlength < linewidth) do begin
         stringlength := stringlength + 1;
         read(f,l^.string[stringlength]);
         if (stringlength = linewidth) and not eoln(f) then begin
            writeln(output,'line ',linenumber:1,' is longer than ',
                    linewidth:1,' characters.  Make it two lines.');
            halt
         end;
      end;
      readln(f);
      next := nil;
   end;
end;

procedure readentry(var f: text; var linenumber: integer; var e: entryptr);
(* read a bibtex entry from f into e, keep track of the current line number
in linenumber *)
var
   l: lineptr; (* pointer to a line of text *)
   p: integer; (* position on a line *)
   pkey: integer; (* position on a line that the key starts *)
begin
   new(e);
   with e^ do begin
      (* read the entry in *)

      (* locate the start of the entry *)
      while not eof(f) and (f^<>'@') do begin
         readln(f);
         linenumber := linenumber + 1;
      end;

      if eof(f) then begin
         writeln(output,'Remove blank lines from the end of the file',
                        ' using program rembla');
         halt
      end;

      (* read the first line in *)
      readline(f,line,linenumber);
      linenumber := linenumber + 1;

      (* find the citation key on the line *)
      p := 1;
      while line^.string[p] <> '{' do begin
         if p = linewidth then begin
            writeln(output, 'line ',linenumber:1,' is missing the "{"');
            halt
         end;
         p := p + 1;
      end;
      p := p + 1;
      pkey := p;

      (* read in the key *)
      while line^.string[p] <> ',' do begin
         if p = linewidth then begin
            writeln(output, 'line ',linenumber:1,' is missing the ","');
            halt
         end;
         key[p-pkey+1] := line^.string[p];
         p := p + 1;
      end;

      (* finish the key with a blank character *)
      key[p-pkey+1] := ' ';

{
      write(output,'line ',linenumber:1,' "');
      writekey(output,e);
      writeln(output,'"');
}
      (* read the rest of the entry *)
      
      l := line;
      while (not eof(f)) and (not eoln(f)) do begin
         linenumber := linenumber + 1;
         readline(f,l^.next,linenumber);
         if not eof(f) then l := l^.next;
      end;
   end;
end;

procedure writeentry(var f: text; e: entryptr);
(* write the entry e to output *)
var
   l: lineptr; (* pointer to a line of text *)
   p: integer; (* position on a line *)
begin
   l := e^.line;
   while l <> nil do with l^ do begin
      for p := 1 to stringlength do write(f,string[p]);
      writeln(f);
      l := l^.next
   end
end;

(* begin module sortbibtex.themain *)
procedure themain(var fin, fout: text);
(* the main procedure of the program *)
var
   linenumber: integer; (* line number in the file *)
   entries: integer; (* counter of the entries read in *)
   e: position; (* index to the entries *)
begin
   writeln(output,'sortbibtex ',version:4:2);
   reset(fin);
   rewrite(fout);
   linenumber := 0;

   entries := 0;
   while not eof(fin) do begin
      entries := entries + 1;
      readentry(fin,linenumber,map[entries]);
   end;

   quicksort(1,entries);

   (* show the entire set of entries *)
   for e := 1 to entries do begin
      writeentry(fout,map[e]);
      if e <> entries then writeln(fout) (* space between entries *)
   end;

   for e := 2 to entries do begin
      if not(lessthan(e, e-1)) and
         not(lessthan(e-1, e))
       then begin (* must be identical! *)
         write(output,'duplicate entry: ');
         writekey(output,map[e]);
         writeln(output);
      end
   end
end;
(* end module sortbibtex.themain *)

begin
   themain(fin, fout);
1: end.
