#!/bin/sh
### ====================================================================
###  @UNIX-shell-file{
###     author          = "Nelson H. F. Beebe",
###     version         = "0.14",
###     date            = "08 October 1999",
###     time            = "18:54:20 MDT",
###     filename        = "bibsort.sh",
###     address         = "Center for Scientific Computing
###                        University of Utah
###                        Department of Mathematics, 322 INSCC
###                        155 S 1400 E RM 233
###                        Salt Lake City, UT 84112-0090
###                        USA",
###     telephone       = "+1 801 581 5254",
###     FAX             = "+1 801 581 4148",
###     checksum        = "10383 862 3619 30954",
###     email           = "beebe@math.utah.edu, beebe@acm.org,
###                        beebe@ieee.org (Internet)",
###     codetable       = "ISO/ASCII",
###     keywords        = "bibliography, BibTeX, sorting",
###     supported       = "yes",
###     docstring       = "This file contains the bibsort utility, a
###                        program for sorting BibTeX data base files by
###                        their BibTeX citation label names, or by
###                        another order determined by command-line
###                        switches, as described below.
###
###                        Usage:
###                              bibsort [-? | -author | -byday | \
###                                      -bypages | -byseriesvolume | \
###                                      -byvolume | -byyear | \
###                                      -copyright | -help | \
###                                      -version ] \
###                                      [optional sort(1) switches] \
###                                      bibfile(s) >outfile
###                        or
###                              bibsort [-? | -author | -byday | \
###                                      -bypages | -byseriesvolume | \
###                                      -byvolume | -byyear | \
###                                      -copyright | -help | \
###                                      -version ] \
###                                      [optional sort(1) switches] \
###                                      <infile >outfile
###
###                        The -?, -author, -copyright, -help, and
###                        -version options produce output on stdout.
###                        After all remaining options are processed,
###                        bibsort terminates with a successful exit
###                        code (on UNIX, 0), without processing any
###                        input files.
###
###                        Bibliography items are normally sorted in
###                        ascending order by citation label.  However,
###                        this sorting is modified if a -byday,
###                        -byseriesvolume, -byvolume, or -byyear
###                        switch, or certain other sort(1) switches,
###                        are specified.
###
###                        -byday, -bypages, -byseriesvolume,
###                        -byvolume and -byyear may be abbreviated to
###                        unique prefixes -byd, -byp, -bys, -byv and
###                        -byy, if desired.  Switches may appear in
###                        any order; however, if more than one -byxxx
###                        switch is specified, only the last one is
###                        used.
###
###                        The -byday switch is intended for use with
###                        bibliographies of publications containing
###                        day, month, and year data, such as technical
###                        reports, newspapers, and magazines.  It
###                        causes entries to be sorted by year, month,
###                        day, and citation label, so that the entries
###                        appear in their original publication order.
###
###                        With -byday sorting, a day keyword is
###                        recognized (it will be standard in BibTeX
###                        1.0), but for backward compatibility, month
###                        entries of the form
###
###                        <quote>daynumber <quote> # monthname
###                        <quote>daynumber~<quote> # monthname
###                        monthname # <quote>daynumber <quote>
###                        monthname # <quote>daynumber~<quote>
###
###                        are also recognized.  If a day number is
###                        not available, a very large value is
###                        assumed, so as to sort the entry after
###                        others that have realistic day values.
###
###                        The -bypages switch is like -byvolume,
###                        except that the issue number is not
###                        included in the sort key.  Use it for
###                        journal bibliographies containing entries
###                        with missing issue number data to produce a
###                        better sort than -byvolume would give.
###
###                        The -byvolume switch is intended for use with
###                        bibliographies of single journals.  It causes
###                        entries to be sorted by journal, volume,
###                        number, page, year, and citation label, so
###                        that the entries appear in their original
###                        publication order.  The journal name is
###                        included in the sort key, so that in a
###                        bibliography with multiple journals, output
###                        entries for each journal are kept together.
###
###                        With -byseriesvolume sorting, only the volume
###                        number and citation label are used in
###                        preparing the sort key; this is useful for a
###                        series bibliography, such as that for Lecture
###                        Notes in Computer Science.
###
###                        With -byvolume sorting, warnings are issued
###                        for any entry in which any of these fields
###                        are missing, and a value of the missing field
###                        is supplied that will sort higher than any
###                        printable value.
###
###                        Because -byvolume sorting is first on journal
###                        name, it is essential that there be only one
###                        form of each journal name; the best way to
###                        ensure this is to always use @String{...}
###                        abbreviations for them.  Order -byvolume is
###                        convenient for checking a bibliography
###                        against the original journal, but less
###                        convenient for a bibliography user.
###
###                        The -byyear switch causes entries to be
###                        sorted first by year, then by citation label.
###                        This is useful for keeping a bibliography in
###                        approximate chronological order, ordered by
###                        citation label within each year.
###
###                        Other command-line words beginning with a
###                        hyphen are assumed to be options to be passed
###                        to sort(1).
###
###                        All remaining command-line words are assumed
###                        to be input files.  Should such a filename
###                        begin with a hyphen, it must be disguised by
###                        a leading absolute or relative directory
###                        path, e.g. /tmp/-foo.bib or ./-foo.bib.
###
###                        The sort(1) -f (ignore letter case
###                        differences) is always supplied.  The -r
###                        switch reverses the order of the sort. The -u
###                        switch removes duplicate bibliography entries
###                        from the input stream; however, such entries
###                        must match exactly, including all white
###                        space.
###
###                        CAVEATS:
###
###                        Sorting of bibliographic entries cannot
###                        safely be done in general, because @String
###                        and @Preamble entries should come first, and
###                        in the current BibTeX, cross-referenced
###                        entries MUST come last.  This is an
###                        unfortunate, undesirable, and non-intuitive
###                        implementation limitation that I hope will be
###                        lifted in the final version of BibTeX.
###
###                        There is no simple way to detect which
###                        entries might be cross-referenced, unless
###                        cross-references to them precede them.
###                        According to btxdoc.tex, cross-references can
###                        occur
###
###                             * from @InProceedings or @Conference to
###                               @Proceedings,
###                             * from @Book, @InBook, and @InCollection
###                               to @Book, and
###                             * from @Article to @Article.
###
###                        Thus, even though we can move all
###                        @Proceedings entries to the end on the
###                        grounds that there are likely to be
###                        cross-references to them, we cannot
###                        reasonably do so for @Book and @Article
###                        entries.  A cross-referenced @Book entry
###                        needs a booktitle assignment, and that
###                        requirement is used to recognize that special
###                        case, and sort it properly.
###
###                        When an entry contains a crossref assignment,
###                        the cross-referenced citation label is saved
###                        in a list of such labels, so that subsequent
###                        entries with matching labels can be
###                        recognized as needing special handling to
###                        place them in a separate group at the end.
###
###                        We deal with these constraints by giving
###                        leading commentary, @Preamble entries, and
###                        @String entries temporary sort keys that
###                        place them before other bibliography entries,
###                        and @Proceedings entries, and entries that
###                        are cross-referenced before they are defined,
###                        temporary sort keys to place them last, so
###                        that the output order is
###
###                             (1) leading commentary,
###                             (2) @Preamble entries,
###                             (3) @String entries,
###                             (4) all other entries, except
###                                 @Proceedings entries, and explicitly
###                                 cross-referenced entries, and
###                             (5) @Proceedings entries and explicitly
###                                 cross-referenced entries.
###
###                        Since cross-references are most common
###                        between @InProceedings and @Proceedings, this
###                        heuristic will usually be correct.
###
###                        However, to be completely safe, you should
###                        only apply bibsort to a fragment of a .bib
###                        file that you know in advance can be sorted.
###
###                        Commentary BETWEEN entries will sort with the
###                        preceding entry, rather than the following
###                        one.  This is usually NOT what is desired, so
###                        the recommendation is simply to avoid
###                        commentary altogether outside of the initial
###                        commentary at the start of the file.
###
###                        WARNINGS:
###
###                        (1) This simple version does NOT recognize
###                        bib entries with outer parentheses instead of
###                        braces, or with line breaks between the @Name
###                        and following opening brace.  Use bibclean(1)
###                        to standardize and syntax check the
###                        bibliography entries first.
###
###                        (2) This program may fail on some UNIX sort
###                        implementations that cannot handle very long
###                        lines, because for sorting purposes, each
###                        complete bib entry is temporarily folded into
###                        a single line.  You may be able to overcome
###                        this problem by supplying a command-line
###                        -z nnnnn switch value to set the maximum line
###                        size to nnnnn bytes.  You must supply quotes
###                        around the -z nnnnn pair in order to prevent
###                        interpretation of nnnnn as a file name.
###
###                        (3) The UNIX sort command does not provide a
###                        stable sort: the order of records with equal
###                        sort keys is not guaranteed to be preserved.
###                        Since BibTeX raises an error for duplicate
###                        bibliography entries, this should not be a
###                        limitation.
###
###                        The checksum field above contains a CRC-16
###                        checksum as the first value, followed by the
###                        equivalent of the standard UNIX wc (word
###                        count) utility output of lines, words, and
###                        characters.  This is produced by Robert
###                        Solovay's checksum utility.",
###  }
########################################################################

# Assign default initial values
BYDAY=0
BYPAGES=0
BYSERIESVOLUME=0
BYVOLUME=0
BYYEAR=0
FILES=
GO=1
OTHERSORTFLAGS=
SORTFLAGS=

# Loop over the command-line arguments, collecting bibsort switches,
# sort(1) switches, and file names.
while [ $# -gt -0 ]
do
	case $1 in
	--a*|-a*)
		cat <<EOF
Author:
 	Nelson H. F. Beebe
 	Center for Scientific Computing
 	University of Utah
 	Department of Mathematics, 322 INSCC
 	155 S 1400 E RM 233
 	Salt Lake City, UT 84112-0090
 	USA
 	Email: beebe@math.utah.edu, beebe@acm.org, beebe@ieee.org (Internet)
 	WWW URL: http://www.math.utah.edu/~beebe
 	Telephone: +1 801 581 5254
 	FAX: +1 801 585 1640, +1 801 581 4148
EOF
		GO=0
		;;

	--byd*|-byd*)
		BYDAY=1
		BYPAGES=0
		BYSERIESVOLUME=0
		BYVOLUME=0
		BYYEAR=0
 		SORTFLAGS=
		;;
	--byp*|-byp*)
		BYDAY=0
		BYPAGES=1
		BYSERIESVOLUME=0
		BYVOLUME=0
		BYYEAR=0
		# key = <group>
		#	<C-k><journal>
		#	<C-k><year>
		#	<C-k><volume>
		#	<C-k>0
		#	<C-k><pages-1>
		#	<C-k><pages-2>
		#	<C-k><citation-label>
		SORTFLAGS="-t +0 -1 +1 -2 +2n -3 +3n -4 +4n -5 +5n -6 +6n -7 +7 -8"
		;;
	--bys*|-bys*)
		BYDAY=0
		BYPAGES=0
		BYSERIESVOLUME=1
		BYVOLUME=0
		BYYEAR=0
		# key = <group>
		#	<C-k><volume>
		#	<C-k><citation-label>
 		SORTFLAGS="-t +0 -1 +1n -2 +2 -3"
		;;
	--byv*|-byv*)
		BYDAY=0
		BYPAGES=0
		BYSERIESVOLUME=0
		BYVOLUME=1
		BYYEAR=0
		# key = <group>
		#	<C-k><journal>
		#	<C-k><year>
		#	<C-k><volume>
		#	<C-k><number>
		#	<C-k><pages-1>
		#	<C-k><pages-2>
		#	<C-k><citation-label>
		SORTFLAGS="-t +0 -1 +1 -2 +2n -3 +3n -4 +4n -5 +5n -6 +6n -7 +7 -8"
		;;
	--byy*|-byy*)
		BYDAY=0
		BYPAGES=0
		BYSERIESVOLUME=0
		BYVOLUME=0
		BYYEAR=1
 		SORTFLAGS=
		;;
	--c*|-c*)
		echo 'bibsort version 0.14 [08-Oct-1999]'
		cat <<EOF
        *****************************************
        * THIS PROGRAM IS IN THE PUBLIC DOMAIN. *
        *****************************************
EOF
		GO=0
		;;

	--\?|-\?|--h*|-h*)
	        cat <<EOF
Usage:
        bibsort [-?]  [-author]
                [-byday or -bypages or -byseriesvolume or -byvolume or -byyear]
                [-copyright] [-help] [-version]
                [ optional sort(1) options ]
                [ <infile or BibTeXfile(s) ] >outfile
EOF
		GO=0
		;;

	--v*|-v*)
		echo 'bibsort version 0.14 [08-Oct-1999]'
		GO=0
		;;

	-*)			# all other switches are passed to sort
		OTHERSORTFLAGS="$OTHERSORTFLAGS $1"
		;;
	*)			# everything else is assumed to be a filename
		FILES="$FILES $1"
		;;
	esac
	shift			# discard this switch
done

if test $GO -eq 0
then
	exit 0
fi


# We store the awk program as a (large) string constant
PROGRAM='BEGIN { initialize() }

/^[ \t]*@[ \t]*[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee][ \t]*{/ {
	trim()
	squeeze()
        k = index($0,"{") + 1
        print Sort_Key Prefix_Preamble substr($0,k) Hidden_Newline
        print_braced_item()
        next
}

/^[ \t]*@[ \t]*[sS][tT][rR][iI][nN][gG][ \t]*{/ {
	trim()
	squeeze()
        k = index($0,"{") + 1
        m = index($0,"=")
        print Sort_Key Prefix_String substr($0,k,m-k) Hidden_Newline
        print_braced_item()
        next
}

/^[ \t]*@[ \t]*[Pp][Rr][Oo][Cc][Ee][Ee][Dd][Ii][Nn][Gg][Ss][ \t]*{/ {
	item = collect_braced_item()
        k = index(item,"{") + 1
        m = index(item,",")
	citation_key = substr(item,k,m-k)
	print_item(BYSERIESVOLUME ? \
	    Prefix_Normal_Entry : Prefix_Cross_Referenced, \
	    substr(item,k,m-k),item)
        next
}

/^[ \t]*@[ \t]*[Bb][Oo][Oo][Kk][ \t]*{/ {
	# Need to do lookahead to find booktitle to decide whether to
	# sort like @Proceedings or @Article.  A cross-referenced @Book
	# must contain a booktitle assignment, which means that it
	# must be moved to the @Proceedings section of the .bib file.
	item = collect_braced_item()
        k = index(item,"{") + 1
        m = index(item,",")
	citation_key = substr(item,k,m-k)
	if (match(item,/[Bb][Oo][Oo][Kk][Tt][Ii][Tt][Ll][Ee] *=/) && \
	    (!BYSERIESVOLUME))
	    prefix = Prefix_Cross_Referenced	# sort like @Proceedings
	else
	    prefix = Prefix_Normal_Entry # sort like @Article
	print_item(prefix,citation_key,item)
        next
}

# "@keyword{label,"
/^[ \t]*@[ \t]*[a-zA-Z0-9]*[ \t]*{/       {
	item = collect_braced_item()
        k = index(item,"{") + 1
        m = index(item,",")
	citation_key = substr(item,k,m-k)
	print_item(Prefix_Normal_Entry,substr(item,k,m-k),item)
        next
}

{				# all other line types match this
	trim()
	print
	last_line = $0
}

END {
	if (last_line != "^[ \t]*$")
	    print Hidden_Newline
	printf(Sort_Prefix)
}


function brace_count(s, k,n,t)
{
    # NB: This implementation of brace_count() is new with bibsort
    # version 0.13; see the README file in the bibsort distribution for
    # a lengthy performance report.  The old algorithm is labeled bc-1
    # there, and the new one, bc-2.  On the tests there, the new one was
    # up to 25.6 times faster.

    n = 0
    t = s
    while ((k = index(t,"{")) > 0)
    {
	n++
	t = substr(t,k+1)
    }
    t = s
    while ((k = index(t,"}")) > 0)
    {
	n--
	t = substr(t,k+1)
    }
    return (n)
}


function collect_braced_item( count,item)
{
    # Starting with the current contents of $0, collect lines until we
    # reach a zero brace count. To guard against infinite loops in the
    # event of unbalanced braces, we abruptly terminate processing if
    # an at-sign is detected in column 1.  This function is used for
    # those entry types that require fancy sort preprocessing.

    squeeze()
    trim()
    count = brace_count($0)
    item = $0 "\n"
    while (count != 0)
    {
        if (getline <= 0)
            break
	if (substr($0,1,1) == "@") # should use match($0,/^[ \t]+@/),
				   # but this is faster, and usually correct
	    error("New entry encountered before balanced braces found")
        trim()
        item = item $0 Visible_Newline
        count += brace_count($0)
    }
    return item
}


function day_key(item,citation_key, day,month,n,parts,year)
{
    # Return a -byday 8-digit key of the form YYYYMMDD
    day   = numeric_value(value(item,citation_key,"day[ \t]*=[ \t]*"))
    month = value(item,citation_key,"month[ \t]*=[ \t]*")
    year  = year_value(value(item,citation_key,"year[ \t]*=[ \t]*"))

    # Expect month values like this:
    #	jan
    #	January
    #	jan # " 10"
    #	jan # "~10"
    #	"10 " # jan
    #	"10~" # jan
    #	"10 January"
    #	"10~January"
    #	"January 10"
    #	"January~10"
    #	jan # { 10}
    #	jan # {~10}
    #	{10 } # jan
    #	{10~} # jan
    #	{10 January}
    #	{10~January}
    #	{January 10}
    #	{January~10}

    gsub(/[{}\t\#\"~]/," ",month) # remove delimiters, ties, concatenation
    gsub(/[ ]+/," ",month)	# squeeze multiple spaces to single ones
    n = split(month,parts," ")

    #### print "DEBUG: " citation_key ": year=[" year "] month=[" month \
    #### "] n=" n " parts[1]=[" parts[1] "] parts[2]=[" parts[2] "]" >"/dev/tty"

    if (parts[1] ~ /^[a-zA-Z]+$/)
	month = Month_Number[substr(tolower(parts[1]),1,3)]
    else if (parts[2] ~ /^[a-zA-Z]+$/)
	month = Month_Number[substr(tolower(parts[2]),1,3)]
    else
	month = 99		# force bad months to largest 2-digit value

    if (day == Unknown_Value)	# then expect day in month value
    {
	if (parts[1] ~ /^[0-9]+$/)
	    day = parts[1]
	else if (parts[2] ~ /^[0-9]+$/)
	    day = parts[2]
	else
	    day = "99"		# force bad days to largest 2-digit value
    }

    return ( year sprintf("%02d",month) sprintf("%02d",day) )
}


function error(message)
{		# print a message and terminate with failing exit code
    warning(message)
    exit(1)
}


function initialize()
{
    Warning_OK			= 1

    Prefix_Header		= "\001"
    Prefix_Preamble		= "\002"
    Prefix_String		= "\003"
    Prefix_Normal_Entry		= "\004"
    Prefix_Cross_Referenced	= "\177"
    Sort_Prefix			= "\005"
    Hidden_Newline		= "\006"
    Visible_Newline		= "\007"
    Unknown_Value		= "\377" 	# such entries sort last
    Volume_Key_Separator	= "\013"	# C-k character

    Sort_Key			= Sort_Prefix "%%SORTKEY:"
    print Sort_Key Prefix_Header Hidden_Newline

    Month_Number["jan"]		= 1
    Month_Number["feb"]		= 2
    Month_Number["mar"]		= 3
    Month_Number["apr"]		= 4
    Month_Number["may"]		= 5
    Month_Number["jun"]		= 6
    Month_Number["jul"]		= 7
    Month_Number["aug"]		= 8
    Month_Number["sep"]		= 9
    Month_Number["oct"]		= 10
    Month_Number["nov"]		= 11
    Month_Number["dec"]		= 12
}


function numeric_value(s)
{
    # Convert a string to a numeric value, substituting "infinity" (a
    # large integer) for strings that begin with a nondigit, so that
    # sort keys for unknown values will sort higher than any realistic
    # value.  Subsequent nondigits in values are ignored, so that 20S
    # will reduce to 20: such values are occasionally seen for volume,
    # number, and pages values.
    if (s !~ /^[0-9]/)		# non-numeric values sort AFTER numeric ones
	s = 2147483647		# 2^31 - 1 = largest 32-bit twos complement integer
    return (0 + s)
}


function pages_key(item,citation_key, n,parts)
{
    n = split(value(item,citation_key,"pages[ \t]*=[ \t]*"),parts,"--")
    return ( \
	Volume_Key_Separator \
	value(item,citation_key,"journal[ \t]*=[ \t]*") Volume_Key_Separator \
	year_value(value(item,citation_key,"year[ \t]*=[ \t]*")) Volume_Key_Separator \
	numeric_value(value(item,citation_key,"volume[ \t]*=[ \t]*")) Volume_Key_Separator \
	"0" Volume_Key_Separator \
	numeric_value(parts[1]) Volume_Key_Separator  \
	numeric_value((n > 1) ? parts[2] : parts[1]) Volume_Key_Separator )
}


function print_braced_item(count)
{
    # Starting with the current contents of $0, print lines until we
    # reach a zero brace count.  This function is used for
    # @Preamble{...} and @String{...}, which require no special
    # processing.

    count = brace_count($0)
    print $0
    while (count != 0)
    {
        if (getline <= 0)
            break
        printf("%s%s",$0,Visible_Newline)
        count += brace_count($0)
    }
}


function print_item(prefix,citation_key,item, complete_sort_key,extra_key,second_extra_key,v)
{
    if (citation_key in Cross_Referenced_Item) # change prefix if this item
	prefix = Prefix_Cross_Referenced # was cross-referenced earlier

    second_extra_key = ""
    if (BYDAY)
	extra_key = day_key(item,citation_key)
    else if (BYPAGES)
	extra_key = pages_key(item,citation_key)
    else if (BYSERIESVOLUME)
	extra_key = series_volume_key(item,citation_key)
    else if (BYVOLUME)
	extra_key = volume_key(item,citation_key)
    else if (BYYEAR)
    {
	extra_key = (match(item,/:[12][0-9][0-9x][0-9x]:/)) ? \
	    substr(item,RSTART,RLENGTH) : \
	    (":" year_value(value(item,citation_key,"year[ \t]*=[ \t]*")) ":")
	# Now add a volume key too, so that identical labels
	# in a periodical bibliography (e.g. from a regular column)
	# sort in publication order.  Warnings are suppressed, because
	# we may not have journal/year/volume/number data.
	Warning_OK = 0
	second_extra_key = volume_key(item,citation_key)
	Warning_OK = 1
    }
    else
	extra_key = ""

    complete_sort_key = Sort_Key prefix extra_key citation_key second_extra_key
    gsub(Visible_Newline," ",complete_sort_key)	# change all visible newlines to spaces
    gsub(/ +/," ",complete_sort_key)		# and collapse multiple spaces

    print complete_sort_key Hidden_Newline
    printf("%s", item)

    # Check for use of crossref = "citation-key": such items must be
    # sorted last, like @Proceedings.  This will only succeed if the
    # input bibliography file follows the requirement of BibTeX 0.99
    # that cross-referenced items must follow items that
    # cross-reference them.
    v = value(item,citation_key,"crossref[ \t]*=[ \t]*")
    if (v != Unknown_Value)
	Cross_Referenced_Item[v] = 1
}


function series_volume_key(item,citation_key, v)
{
    v = value(item,citation_key,"volume[ \t]*=[ \t]*")
    gsub(/[^0-9].*$/,"",v) # reduce "10--12", "10/12", "10(12)" to "10", etc.
    return ( Volume_Key_Separator v Volume_Key_Separator )
}


function squeeze( kbrace,kspace)
{
    sub(/^[ \t]*@[ \t]*/,"@")	# eliminate space before and after initial @
    kbrace = index($0,"{")	# eliminate space between entryname and brace
    kspace = match($0,"[ \t]")
    if (kspace < kbrace)	# then found intervening space
	sub(/[ \t]+{/,"{")	# NB: sub(), NOT gsub(), here
}


function trim()
{
    sub(/[ \t]+$/,"")
}


function value(item,citation_key,keyword_pattern, n,s,v)
{
    match(item,keyword_pattern)
    ### print "DEBUG: value() [" substr(item,RSTART,RLENGTH) "] [" item "]\n\n" >"/dev/tty"
    if (substr(item,RSTART+RLENGTH,1) == "\"") # have key = "value"
    {
	s = substr(item,RSTART+RLENGTH)
	match(s,/["][^"]*["]/)
	v = (RLENGTH > 2) ? substr(s,RSTART+1,RLENGTH-2) : Unknown_Value
    }
    else if (substr(item,RSTART+RLENGTH,1) == "{") # have key = {value}
    {
	s = substr(item,RSTART+RLENGTH)
	match(s,/{[^}]*}/)
	v = (RLENGTH > 2) ? substr(s,RSTART+1,RLENGTH-2) : Unknown_Value
    }
    else if (substr(item,RSTART+RLENGTH,1) ~ /[0-9]/) # have key = number,
    {
	s = substr(item,RSTART+RLENGTH)
	match(s,/[^,]+,/)
	v = (RLENGTH > 1) ? substr(s,RSTART,RLENGTH-1) : Unknown_Value
    }
    else if (substr(item,RSTART+RLENGTH,1) ~ /[A-Za-z]/) # have key = abbrev,
    {
	s = substr(item,RSTART+RLENGTH)
	match(s,/[^,]+,/)
	v = (RLENGTH > 1) ? substr(s,RSTART,RLENGTH-1) : Unknown_Value
    }
    else			# unexpected pattern
	v = Unknown_Value

    # For -byday sorting, we also need to return any immediately
    # following concatenated string in a month value.  We just grab
    # text up to the next newline or comma, which is not rigorous, but
    # adequate for our purposes.
    if (BYDAY && \
	(v != Unknown_Value) && \
	(substr(keyword_pattern,1,5) == "month"))
    {
	s = substr(s,RSTART+RLENGTH)
	if (match(s,/[ \t\n]*\#/) && (RSTART == 1))
	{
	    s = substr(s,RSTART+RLENGTH)
	    match(s,/[^\n,]+/)
	    v = v " # " substr(s,RSTART,RLENGTH)
	}
    }

    if ((index(v,Unknown_Value) > 0) &&
	(index(keyword_pattern,"crossref") == 0))
    {				# warn about missing values
	match(keyword_pattern,/[a-zA-Z]+/)
	warning("Missing " substr(keyword_pattern,RSTART,RLENGTH) \
	    " value in " citation_key)
    }

    return (v)
}


function volume_key(item,citation_key, n,parts)
{
    n = split(value(item,citation_key,"pages[ \t]*=[ \t]*"),parts,"--")
    return ( \
	Volume_Key_Separator \
	value(item,citation_key,"journal[ \t]*=[ \t]*") Volume_Key_Separator \
	year_value(value(item,citation_key,"year[ \t]*=[ \t]*")) Volume_Key_Separator \
	numeric_value(value(item,citation_key,"volume[ \t]*=[ \t]*")) Volume_Key_Separator \
	numeric_value(value(item,citation_key,"number[ \t]*=[ \t]*")) Volume_Key_Separator \
	numeric_value(parts[1]) Volume_Key_Separator  \
	numeric_value((n > 1) ? parts[2] : parts[1]) Volume_Key_Separator )
}


function warning(message)
{
    if (Warning_OK)
        print FILENAME ":" FNR ":%%" message >"/dev/stderr"
}


function year_value(year)
{
    if (year !~ /^[12][0-9][0-9x][0-9x]$/)
	year = "9999"		# force bad years to largest 4-digit value
    return (year)
}
'

# The bibliography sorting is implemented as a filter pipeline:
#
# Stage 1 (nawk) finds bib file entries and prefixes them with a line
# containing a special customized recognizable sort key, where each such
# line begins with a Ctl-E, and the file ends with Ctl-E.  The sort key
# contains unprintable characters, so as to essentially eliminate any
# possibility of confusion with bibliography data.
#
# Stage 2 (tr) turns LF into Ctl-G and Ctl-E into LF.  This hides
# line boundaries, and makes each bibliography item a separate `line'.
#
# Stage 3 (sort) sorts `lines' (i.e. bib entries), ignoring
# letter case differences.
#
# Stage 4 (tr) turns LF into Ctl-E, and Ctl-G back into LF.  This
# restores the original line boundaries.
#
# Stage 5 (tr) deletes all Ctl-E and Ctl-F characters.
#
# Stage 6 (egrep) removes the sort key lines.
#
# Finally, here is the pipeline that does all of the work:

gawk	"$PROGRAM" \
	BYDAY=$BYDAY \
	BYPAGES=$BYPAGES \
	BYSERIESVOLUME=$BYSERIESVOLUME \
	BYVOLUME=$BYVOLUME \
	BYYEAR=$BYYEAR \
	$FILES | \
	    tr '\012\005' '\007\012' | \
		sort -f $SORTFLAGS $OTHERSORTFLAGS | \
		    tr '\007\012' '\012\005' | \
			 tr -d '\005\006' | \
			     egrep -v  '^%%SORTKEY:'
################################[The End]###############################
