.\" -*-nroff-*-
.\" ====================================================================
.\"  @Troff-man-file{
.\"     author          = "Nelson H. F. Beebe",
.\"     version         = "0.15",
.\"     date            = "17 January 2000",
.\"     time            = "12:54:02 MST",
.\"     filename        = "bibsort.man",
.\"     address         = "Center for Scientific Computing
.\"                        University of Utah
.\"                        Department of Mathematics, 105 JWB
.\"                        155 S 1400 E RM 233
.\"                        Salt Lake City, UT 84112-0090
.\"                        USA",
.\"     telephone       = "+1 801 581 5254",
.\"     FAX             = "+1 801 581 4148",
.\"     URL             = "http://www.math.utah.edu/~beebe",
.\"     checksum        = "30229 803 3368 24250",
.\"     email           = "beebe@math.utah.edu, beebe@acm.org,
.\"                        beebe@ieee.org (Internet)",
.\"     codetable       = "ISO/ASCII",
.\"     keywords        = "bibliography, sorting, BibTeX",
.\"     supported       = "yes",
.\"     docstring       = "This file contains the UNIX manual pages
.\"                        for the bibsort utility, a program for
.\"                        sorting BibTeX data base files by their
.\"                        BibTeX citation label names.
.\"
.\"                        The checksum field above contains a CRC-16
.\"                        checksum as the first value, followed by the
.\"                        equivalent of the standard UNIX wc (word
.\"                        count) utility output of lines, words, and
.\"                        characters.  This is produced by Robert
.\"                        Solovay's checksum utility.",
.\"  }
.\" ====================================================================
.if t .ds Bi B\s-2IB\s+2T\\h'-0.1667m'\\v'0.20v'E\\v'-0.20v'\\h'-0.125m'X
.if n .ds Bi BibTeX
.if t .ds Te T\\h'-0.1667m'\\v'0.20v'E\\v'-0.20v'\\h'-0.125m'X
.if n .ds Te TeX
.TH BIBSORT 1 "17 January 2000" "Version 0.15"
.\"======================================================================
.SH NAME
bibsort \- sort a BibTeX bibliography file
.\"======================================================================
.SH SYNOPSIS
.B bibsort
.RB [ \-? ]
.RB [ \-author ]
.if n .ti +\w'\fBbibsort\fP\ 'u
.if t .ti +\w'\fBbibsort\fP\ 'u
.RB [ \-byday
.RB " or " \-bylabel
.RB " or " \-bypages
.if n .ti +\w'\fBbibsort\fP\ 'u
.RB " or " \-byseriesvolume
.RB " or " \-byvolume
.RB " or " \-byyear ]
.if n .ti +\w'\fBbibsort\fP\ 'u
.if t .ti +\w'\fBbibsort\fP\ 'u
.RB [ \-copyright ]
.RB [ \-help ]
.RB [ \-reverse ]
.RB [ \-version ]
.if n .ti +\w'\fBbibsort\fP\ 'u
.if t .ti +\w'\fBbibsort\fP\ 'u
[ optional
.BR sort (1)
options ]
.if n .ti +\w'\fBbibsort\fP\ 'u
.if t .ti +\w'\fBbibsort\fP\ 'u
[
.RI < infile
or
.I BibTeXfile(s)
]
.RI > outfile
.\"======================================================================
.SH DESCRIPTION
.B bibsort
filters a \*(Bi\& bibliography, or bibliography
fragment, on its standard input, printing on
standard output a sorted bibliography.
.PP
Sorting is normally by \*(Bi\& citation label name, or by
.I @String
macro name, and letter case is always ignored in
the sorting.
.PP
.\"======================================================================
.SH OPTIONS
Command-line options may be abbreviated to a
unique leading prefix, and letter case is ignored,
so that
.BR \-option ,
.BR \-Option ,
.BR \-OPTION ,
.BR \-oPtIoN ,
etc. are all equivalent.
.PP
For the sort order options beginning
.BR \-by ,
the last one seen overrides all earlier ones.
.PP
All options are parsed before any input
bibliography files are read, no matter what their
order on the command line.
.PP
Except for the options described below,
command-line words beginning with a hyphen
are assumed to be options to be passed to
.BR sort (1).
.PP
The leading hyphen that distinguishes an option
from a filename may be doubled, for compatibility
with GNU and POSIX conventions.  Thus,
.B \-author
and
.B \-\-author
are equivalent.
.PP
All remaining command-line words are assumed to be
input files.  Should such a filename begin with a
hyphen, it must be disguised by a leading absolute
or relative directory path, e.g.,
.I /tmp/-foo.bib
or
.IR ./-foo.bib .
.PP
The
.BR sort (1)
.B \-f
option to ignore letter case differences is always
supplied.  The
.B \-u
option removes duplicate bibliography entries from
the input stream; however, such entries must match
exactly, including all white space.
.PP
Sort keys are constructed from several parts of
the \*(Bi\& entry.  If non-numeric values are
found where numbers are normally expected (that
is, for
\*(Bi\&
.IR day ,
.IR number ,
.IR pages ,
.IR volume ,
and
.I year
keys), they are replaced by large integers that
will sort higher than any reasonable integer value
likely to be present.  Nondigits after the first
character are ignored, so
.I 20S
will reduce to
.IR 20 :
such values are occasionally seen for
.IR volume ,
.IR number ,
and
.I pages
values.
.PP
However, uncertain
.I year
values of the form
.I 19xx
or
.I 20xx
are sorted at the end of their century.
.\"-----------------------------------------------
.TP \w'\-byseriesvolume'u+2n
.B \-?
Give a brief help message on
.IR stderr ,
process all further options, but exit with a
successful status code (on UNIX, 0) before
processing any files.
.\"-----------------------------------------------
.TP
.B \-author
Give an author credit on
.IR stderr ,
then process all further options, but exit with a
successful status code (on UNIX, 0) before
processing any files.
.\"-----------------------------------------------
.TP
.B \-byday
This option is intended for use with
bibliographies of publications containing day,
month, and year data, such as technical reports,
newspapers, and magazines.
.IP
With
.B \-byday
sorting, a
.I day
keyword is recognized (it will be standard in
\*(Bi\& 1.0), but for backward compatibility,
.I month
entries of the form
.IP
.nf
"daynumber " # monthname
"daynumber~" # monthname
{daynumber } # monthname
{daynumber~} # monthname
monthname # "daynumber "
monthname # "daynumber~"
monthname # {daynumber }
monthname # {daynumber~}
.fi
.IP
are also recognized, and will yield both a day and
a month.  If a day number is not available, a very
large value is assumed, which will sort the entry
after others that have day values in the same year
and month.
.IP
The sort keys are:
.I <part>
.I <year>
.I <month>
.I <day>
.I <start-pages>
.I <end-pages>
.IR <citation-label> ,
in that order.
.IP
The
.I <part>
key represents one of the \*(Bi\& file parts
described in a later section.
.\"-----------------------------------------------
.TP
.B \-bylabel
Sort the input by \*(Bi\& citation label.  This is
the default, if no
.B -byxxx
options are specified.
.IP
The sort keys are:
.I <part>
.I <citation-label>
.I <journal>
.I <year>
.I <volume>
.I <number>
.I <start- pages>
.IR <end-pages> .
.IP
The use of additional sort keys after the initial
two or three is intentional: that way, entries
that are otherwise `equal' will be consistently
ordered according to their publication times.
.\"-----------------------------------------------
.TP
.B \-bypages
This option is intended for use with
bibliographies of articles from those journals
where page numbers increase monotonically through
the volume, across all issue numbers.  Do not use
it for bibliographies of journals or magazines
where page numbers are reset at each issue.
.IP
.B \-bypages
is similar to
.BR \-byvolume ,
except that the issue number is ignored.
.IP
The reason for ignoring the issue number is that
some journal databases lack that information.  If
.BR \-byvolume
were used, then articles lacking issue numbers
would be sorted separately from those with issue
numbers, which makes it harder to check for
duplicates, or to compare entries with original
journal issues.
.IP
The sort keys are:
.I <part>
.I <journal>
.I <year>
.I <volume>
.I <start-pages>
.I <end-pages>
.IR <citation-label> .
.\"-----------------------------------------------
.TP
.B \-byseriesvolume
This option is intended for use with
bibliographies of series, such as
.IR "Lecture Notes in Mathematics" .
.IP
The sort keys are:
.I <part>
.I <volume>
.I <citation-label>
.I <journal>
.I <year>
.I <volume>
.I <number>
.I <start-pages>
.IR <end-pages> .
.\"-----------------------------------------------
.TP
.B \-byvolume
This option is intended for use with
bibliographies of single journals.
.IP
The journal name is included in the sort keys, so
that in a bibliography with multiple journals,
output entries for each journal are kept together.
.IP
With
.B \-byvolume
sorting, warnings are issued for any entry in
which any of these fields are missing, and a value
of the missing field is supplied that will sort
higher than any printable value.
.IP
Because
.B \-byvolume
sorting is first on journal name, it is essential
that there be only one form of each journal name;
the best way to ensure this is to always use
@String{...}  abbreviations for them.  Order
.B \-byvolume
is convenient for checking a bibliography against
the original journal, but less convenient for a
bibliography user.
.IP
The sort keys are:
.I <part>
.I <journal>
.I <year>
.I <volume>
.I <number>
.I <start-pages>
.I <end-pages>
.IR <citation-label> .
.\"-----------------------------------------------
.TP
.B \-byyear
If this option is given, then sorting is first by
year, then by citation label.  This is useful for
keeping a bibliography in approximate
chronological order, ordered by citation label
within each year.
.IP
The sort keys are:
.I <part>
.I <year>
.I <citation-label>
.I <journal>
.I <year>
.I <volume>
.I <number>
.I <start-pages>
.IR <end-pages> .
.\"-----------------------------------------------
.TP
.B \-copyright
Give a brief copyright message on
.IR stderr ,
then process all further options, but exit with a
successful status code (on UNIX, 0) before
processing any files.
.\"-----------------------------------------------
.TP
.B \-help
Give a brief help message on
.IR stderr ,
then process all further options, but exit with a
successful status code (on UNIX, 0) before
processing any files.
.\"-----------------------------------------------
.TP
.B \-reverse
Reverse the order of the sort.  This option does
.I not
affect the ordering of the \*(Bi\& file parts (see
below).  It applies only to the bibliographic
entries, and within those entries, only to the
citation label and `numeric' fields (volume,
number, pages, day, month, and year).
.IP
Thus,
.B "bibsort \-reverse \-byvolume"
for a bibliography with multiple journals will
sort entries for each journal in reverse
publication order, but the journal blocks will
still be in ascending order by journal name.
.\"-----------------------------------------------
.TP
.B \-version
Give a brief version number message on
.IR stderr ,
then process all further options, but exit with a
successful status code (on UNIX, 0) before
processing any files.
.\"======================================================================
.SH "BIBTEX FILE PARTS"
The input stream is conceptually divided into five
parts, any of which may be absent.
.RS
.TP \w'1.'u+2n
1.
Introductory material such as comments, file
headers, and edit logs that are ignored by
\*(Bi\&.  No line in this part begins with an
at-sign, ``@''.
.TP
2.
Preamble material delineated by ``@Preamble{'' and
a matching closing ``}'', intended to be processed
by \*(Te\&.  Normally, there is only one such
entry in a bibliography file, although \*(Bi\&,
and
.BR bibsort ,
permit more than one.
.TP
3.
Macro definitions (abbreviations) of the form
``@String{.\|.\|.}''.  Any single @String
specification may span multiple lines, and there
are usually several such definitions.
.TP
4.
Bibliography entries such as ``@Article{.\|.\|.}'',
``@Book{.\|.\|.}'', ``@InProceedings{.\|.\|.}'', and
so on, provided that their citation labels have
not already been encountered in a
.I crossref
assignment in a preceding entry.  For
.BR bibsort ,
any line that begins with an ``@'' followed by
letters and digits and an open brace is considered
to be such an entry.  Optional spaces and tabs may
surround the ``@'', and precede the first open
brace; these spaces and tabs will be deleted from
the output to help standardize the appearance.
.TP
5.
``@Proceedings{.\|.\|.}'' bibliography entries,
which are likely to be cross-referenced by
``@InProceedings{.\|.\|.}'' entries, and any other
bibliography entries for which a crossref
assignment was met before the entry itself.
.PP
An unfortunate implementation limitation of the
current \*(Bi\& requires cross-referenced entries
to appear
.I after
all other entries that cross-reference them,
although this limitation works to the advantage of
.BR bibsort ,
allowing single-pass processing.
.RE
.PP
The order of these parts is preserved in the
output stream.  Part 1 will be unchanged, but
parts 2\(en5 will be sorted within themselves.
.PP
The sort key of ``@Preamble'' entries is their
initial line, of ``@String'' entries, the
abbreviation name.  For all other \*(Bi\& entries,
the sort key is citation label between the open
curly brace and the trailing comma, unless the
sort key is prefixed with additional fields as
requested by
.B \-byvolume
or
.B \-byyear
options.
.PP
.B bibsort
will correctly handle UNIX files with LF line
terminators, as well as IBM PC DOS files with CR
LF line terminators; the essential requirement is
that input lines be delineated by LF characters.
Thus, files from the Apple Macintosh, which uses
bare CR to terminate lines, would first have to be
converted to UNIX or PC DOS line format before
giving them to
.BR bibsort .
.\"======================================================================
.SH CAVEATS
\*(Bi\& has loose syntactical requirements that
the current simple implementation of
.B bibsort
does not support.  In particular, outer
parentheses may
.I not
be used in place of braces following ``@keyword''
patterns.  If you have such a file, you can use
.BR bibclean (1)
to prettyprint it into a form that
.B bibsort
can handle successfully.
.PP
The user must be aware that sorting a bibliography
is not without peril, for at least these reasons:
.RS
.TP \w'1.'u+2n
1.
\*(Bi\& has a
requirement that entry labels given in
.IR "crossref" " = " "label"
pairs in a bibliography entry
.I must
refer to entries defined
.IR later ,
rather than earlier, in the bibliography file.
This regrettable implementation limitation of the
current (pre-1.0) \*(Bi\& prevents arbitrary
ordering of entries when
.I crossref
values are present.
To partially solve this problem,
.B bibsort
will place ``@Proceedings'' entries last, since
they are frequently cross-referenced by
``@InProceedings'' entries.  However, it is also
possible for ``@Book'', ``@InBook'', and
``@InCollection'' entries to cross-reference
``@Book'' entries, and for ``@Article'' entries to
cross-reference other ``@Article'' entries.
Neither of these cases are dealt with by
.BR bibsort ,
except that ``@Book'' entries that contain a
``booktitle'' assignment, and entries that are
explicitly cross-referenced before their
definition, are sorted with ``@Proceedings'',
.TP
2.
If the \*(Bi\& file contains interspersed
commentary between ``@keyword{.\|.\|.}'' entries,
this material will be considered part of the
.I preceding
entry, and will be sorted with it.  Leading
commentary is more common, and will be moved
elsewhere in the file.
.IP
This is normally not a problem for the part 1
material before the ``@Preamble'', since it is kept
together at the beginning of the output stream.
.TP
3.
Some kinds of bibliography files should be kept in
a different order than alphabetically by citation
labels.  Good examples are a bibliography file with
the contents of a journal, or a personal
publication list, for both of which chronological
publication order is likely to be preferred.
.RE
.PP
While a much more sophisticated implementation of
.B bibsort
could deal with the first point, and the
.B \-byvolume
option provides a partial solution to the third
point, in general, a satisfactory solution
requires human intelligence and natural language
understanding that computers lack.
.PP
.B bibsort
uses octal ASCII control characters 001 through
007, 0177, and 0377, for temporary modifications
of the input stream.  If any of these are already
present in the input, they will be altered on
output.  This is unlikely to be a problem, because
those characters have neither a printable
representation, nor are they conventionally used
to mark line or page boundaries in text files.
.\"======================================================================
.SH "PROGRAMMING NOTES"
Some text editors permit application of an
arbitrary filter command to a region of text.
For example, in GNU
.BR emacs (1),
the command
.IR "C-u M-x shell-command-on-region" ,
or equivalently,
.IR "C-u M-|" ,
can be used to run
.B bibsort
on a region of the buffer that is devoid of cross
references and other material that cannot be
safely sorted.
.PP
Some implementations of \*(Bi\& editing support in
GNU
.BR emacs (1)
have a
.I sort-bibtex-entries
command that is functionally similar to
.BR bibsort .
However, the file size that can be processed
by
.BR emacs (1)
is limited, while
.B bibsort
can be used on arbitrarily large files, since it
acts as a filter, processing a small amount of
data at a time.  The sort stage needs the entire
data stream, but fortunately, the UNIX
.BR sort (1)
command is clever enough to deal with very large
inputs.
.PP
The current implementation of
.B bibsort
follows the UNIX tradition of combining simple
already-available tools.  A six-stage pipeline of
.BR egrep (1),
.BR nawk (1),
.BR sort (1),
and
.BR tr (1)
accomplishes the job in one pass with about 900
lines of heavily-commented shell script, about 500
lines of which is a
.BR nawk (1)
program for insertion of sort keys.  The initial
prototype of
.B bibsort
was written and tested on several large
bibliographies in a couple of hours, and after
considerable use, was later extended with advanced
sorting capabilities and cross-reference
recognition in a couple of days of work.  By
contrast,
.BR bibtex (1)
is more than 11\0000 lines of code and
documentation, and
.BR bibclean (1)
is more than 15\0000 lines long; both took months
to develop, implement, and test.
.\"======================================================================
.SH BUGS
.B bibsort
may fail on some UNIX systems if their
.BR sort (1)
implementations cannot handle very long lines,
because for sorting purposes, each complete
bibliography entry is temporarily folded into a
single line.  You may be able to overcome this
problem by adding a
.BI \-z nnnnn
option to the
.BR sort (1)
command (passed via the command line to
.BR bibsort )
to increase the maximum line size to some larger
value of
.I nnnnn
bytes.  According to their documentation, some UNIX
.BR sort (1)
implementations require a space after
.BR \-z ,
others forbid it, and still others do not support it at all.
If a space is required, you must quote the pair,
to prevent the
.I nnnnn
value from being interpreted as a filename by
.BR bibsort .
.\"======================================================================
.SH "SEE ALSO"
.BR bibcheck (1),
.BR bibclean (1),
.BR bibdup (1),
.BR bibextract (1),
.BR bibjoin (1),
.BR biblabel (1),
.BR biblex (1),
.BR biborder (1),
.BR bibparse (1),
.BR bibsearch (1),
.BR bibsplit (1),
.BR bibtex (1),
.BR bibunlex (1),
.BR citesub (1),
.BR egrep (1),
.BR emacs (1),
.BR gawk (1),
.BR mawk (1),
.BR nawk (1),
.BR sort (1),
.BR tr (1).
.\"======================================================================
.SH AUTHOR
.nf
Nelson H. F. Beebe, Ph.D.
Center for Scientific Computing
University of Utah
Department of Mathematics, 322 INSCC
155 S 1400 E RM 233
Salt Lake City, UT 84112-0090
USA
Tel: +1 801 581 5254
FAX: +1 801 585 1640, +1 801 581 4148
Email: \fCbeebe@math.utah.edu\fP, \fCbeebe@acm.org\fP, \fCbeebe@ieee.org\fP (Internet)
WWW URL: \fChttp://www.math.utah.edu/~beebe\fP
.fi
.\"=====================================================================
.SH COPYRIGHT
.nf
\fC########################################################################
########################################################################
########################################################################
###                                                                  ###
###             bibsort: sort a BibTeX bibliography file             ###
###                                                                  ###
###              Copyright (C) 2000 Nelson H. F. Beebe               ###
###                                                                  ###
### This program is covered by the GNU General Public License (GPL), ###
### version 2 or later, available as the file COPYING in the program ###
### source distribution, and on the Internet at                      ###
###                                                                  ###
###               ftp://ftp.gnu.org/gnu/GPL                          ###
###                                                                  ###
###               http://www.gnu.org/copyleft/gpl.html               ###
###                                                                  ###
### This program is free software; you can redistribute it and/or    ###
### modify it under the terms of the GNU General Public License as   ###
### published by the Free Software Foundation; either version 2 of   ###
### the License, or (at your option) any later version.              ###
###                                                                  ###
### This program is distributed in the hope that it will be useful,  ###
### but WITHOUT ANY WARRANTY; without even the implied warranty of   ###
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    ###
### GNU General Public License for more details.                     ###
###                                                                  ###
### You should have received a copy of the GNU General Public        ###
### License along with this program; if not, write to the Free       ###
### Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,   ###
### MA 02111-1307 USA                                                ###
########################################################################
########################################################################
########################################################################\fP
.fi
.\"======================================================================
.SH AVAILABILITY
Internet source distributions of
.B bibsort
are available at the World-Wide Web Uniform
Resource Locator addresses
.PP
.RS
.nf
\fPftp://ftp.math.utah.edu/pub/tex/bib/bibsort-x.yy.jar
ftp://ftp.math.utah.edu/pub/tex/bib/bibsort-x.yy.tar.gz
ftp://ftp.math.utah.edu/pub/tex/bib/bibsort-x.yy.zip
ftp://ftp.math.utah.edu/pub/tex/bib/bibsort-x.yy.zoo
.PP
http://www.math.utah.edu/pub/tex/bib/bibsort-x.yy.jar
http://www.math.utah.edu/pub/tex/bib/bibsort-x.yy.tar.gz
http://www.math.utah.edu/pub/tex/bib/bibsort-x.yy.zip
http://www.math.utah.edu/pub/tex/bib/bibsort-x.yy.zoo\fC
.fi
.RE
.PP
where
.I x.yy
is the current version (0.15 for the version whose
documentation you are now reading).
.PP
That site is mirrored to several other Internet
archives, so you may also be able to find it
elsewhere on the Internet; try searching for the
string
.I bibsort
at one or more of the popular Web search sites,
such as
.PP
.RS
.nf
\&\fChttp://altavista.digital.com/
http://search.microsoft.com/us/default.asp
http://www.dejanews.com/
http://www.dogpile.com/index.html
http://www.euroseek.net/page?ifl=uk
http://www.excite.com/
http://www.go2net.com/search.html
http://www.google.com/
http://www.hotbot.com/
http://www.infoseek.com/
http://www.inktomi.com/
http://www.lycos.com/
http://www.northernlight.com/
http://www.snap.com/
http://www.stpt.com/
http://www.yahoo.com/\fP
.fi
.RE
.\"==============================[The End]==============================
.\" This is for GNU Emacs file-specific customization:
.\" Local Variables:
.\" fill-column: 50
.\" End:
