#!/bin/sh
### ====================================================================
###  @UNIX-shell-file{
###     author          = "Nelson H. F. Beebe",
###     version         = "0.02",
###     date            = "16 September 1996",
###     time            = "18:24:14 MDT",
###     filename        = "bibjoin.sh",
###     address         = "Center for Scientific Computing
###                        Department of Mathematics
###                        University of Utah
###                        Salt Lake City, UT 84112
###                        USA",
###     telephone       = "+1 801 581 5254",
###     FAX             = "+1 801 581 4148",
###     checksum        = "61917 973 3302 28793",
###     email           = "beebe@math.utah.edu (Internet)",
###     codetable       = "ISO/ASCII",
###     keywords        = "bibliography, ordering, BibTeX",
###     supported       = "yes",
###     docstring       = "This file contains the bibjoin utility, a
###                        program for joining adjacent duplicate, or
###                        similar, entries.  Such action may be
###                        necessary when bibliography entries are
###                        collected from many sources.
###
###                        bibjoin should be applied to a bibliography
###                        file only after entries have been suitably
###                        ordered so that candidates for joining
###                        appear consecutively.  This can be done
###                        mostly automatically if standardized
###                        citation labels are first generated, then
###                        the bibliography is sorted by citation
###                        labels, such as by bibsort(1).
###
###                        Only a human reader can reliably decide
###                        when two bibliography entries are truly the
###                        same.  bibjoin can help automate much of
###                        this work, but manual editing will almost
###                        certainly still be necessary.  If two
###                        entries are joined, these conditions must
###                        be satisfied:
###
###                        (1) identical citation labels;
###                        (2) identical year;
###                        (3) if a journal entry, identical volume and
###                            initial page numbers.
###
###                        When two `equal' value strings are found
###                        for the same key, one of them is deleted.
###                        Otherwise, both key/value pairs are output.
###                        Manual editing will then be required to
###                        choose between them.
###
###                        Value strings are considered equal if they
###                        match after all non-alphanumerics are
###                        removed, and letter case is ignored.  This
###                        choice helps to eliminate many match
###                        failures that arise from minor variations
###                        in punctuation, spacing, and
###                        capitalization.  bibjoin has no way of
###                        determining which of the two strings should
###                        be preserved, so it uniformly discards the
###                        shorter one (which presumably has less
###                        `information'): this means that it will be
###                        wrong about half the time.
###
###                        Syntax errors in the input stream will
###                        cause abrupt termination with a fatal error
###                        message and a non-zero exit code.  The
###                        output will be incomplete, so you should
###                        always examine the output file before
###                        assuming that you can replace the input
###                        file with the output file.
###
###                        Usage:
###                              bibjoin
###					[-check-missing]
###					[-version]
###					bibfile(s) >outfile
###                        or
###                              bibjoin
###					[-check-missing]
###					[-version]
###					<infile >outfile
###
###			   Switch names may be abbreviated to the
###			   minimal unique prefix.
###
###			   If -check-missing is specified, missing
###			   expected fields will be supplied, with the
###			   field name prefixed with OPT, and the value
###			   string set to a pair of question marks,
###			   e.g.  OPTvolume = "??".  The OPT prefix
###			   ensures that the key is ignored by BibTeX
###			   (and thus that the question marks will not
###			   appear in an output .bbl file), and
###			   together with the question marks,
###			   highlights the missing data.  In addition,
###			   the GNU Emacs bibtex-mode editing support
###			   has convenient functions for removing the
###			   OPT prefixes, and so does bibclean(1).
###
###			   If -version is specified, the bibjoin version
###			   will be displayed on stdout, and then the
###			   program will immediately exit.
###
###                        All remaining command-line words are assumed
###                        to be input files.  Should such a filename
###                        begin with a hyphen, it must be disguised by
###                        a leading absolute or relative directory
###                        path, e.g. /tmp/-foo.bib or ./-foo.bib.
###
###                        WARNINGS:
###
###                        (1) This simple version does NOT recognize
###                        bib entries with outer parentheses instead of
###                        braces, or with line breaks between the @Name
###                        and following opening brace.  Use bibclean(1)
###                        to standardize and syntax check the
###                        bibliography entries first.
###
###                        (2) Implementation limitations in nawk or
###                        gawk may result in premature termination
###                        because of maximum string lengths being
###                        exceeded.  This can happen with long
###                        abstract or summary strings.  This problem
###                        has been seen more frequently with some
###                        UNIX nawk implementations than with the
###                        Free Software Foundation GNU Project's
###                        gawk, so we prefer to use gawk, if
###                        available.
###
###                        The checksum field above contains a CRC-16
###                        checksum as the first value, followed by the
###                        equivalent of the standard UNIX wc (word
###                        count) utility output of lines, words, and
###                        characters.  This is produced by Robert
###                        Solovay's checksum utility.",
###  }
########################################################################

# Assign default initial values
CHECKMISSING=0
FILES=
VERSION=0

# Loop over the command-line arguments, collecting bibjoin switches,
# and file names.
while [ $# -gt -0 ]
do
	case $1 in
	-c*)
		CHECKMISSING=1
		;;
	-v*)
		VERSION=1
		;;
	-*)
		echo Usage: $0
		echo '		[-check-missing]'
		echo '		[-version]'
		echo '		BibTeXfiles or <infile'
		echo '		>outfile'
		exit 1
		;;
	*)			# everything else is assumed to be a filename
		FILES="$FILES $1"
		;;
	esac
	shift			# discard this switch or filename
done

# We store the awk program as a (large) string constant
PROGRAM='
BEGIN {
	BIBJOIN_VERSION = "bibjoin version 0.02 [16-Sep-1996]"
	stderr = "/dev/tty"	# nawk
	stderr = "/dev/stderr"	# gawk, and some recent nawk installations
	CHECKMISSING += 0	# coerce string to number
	UNKNOWN_VALUE = "??"
	VERSION += 0
	if (VERSION)
	{
	    print BIBJOIN_VERSION
	    exit(0)
	}
}

/^[ \t]*@[ \t]*[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee][ \t]*{/ {
	trim()
	squeeze()
	k = index($0,"{") + 1
	print_braced_item()
	next
}

/^[ \t]*@[ \t]*[sS][tT][rR][iI][nN][gG][ \t]*{/ {
	trim()
	squeeze()
	print_braced_item()
	next
}

# "@keyword{label,"
/^[ \t]*@[ \t]*[a-zA-Z0-9]*[ \t]*{/       {
	collect_braced_item()
	if (!("__ENTRY__" in last_key_value_pair))
	    ;			# nothing to do the first time through
	else if (same_entry())
	    join_entries()
	else
	    print_entry(last_key_value_pair)

	copy_array(last_key_value_pair,key_value_pair)
	next
}

{				# all other line types match this
	trim()
	if (($0 == "") && (last_line == ""))
	    ;			# discard consecutive empty lines
	else
	    print
	last_line = $0
	next
}

END {
    print_entry(last_key_value_pair)
}

function add_key_abbrev_pair(key,abbrev)
{
    key_value_pair[key] = "  OPT" key " =" \
	substr("                 ",1,17 - (5 + length(key) + 2)) abbrev ","
}

function add_key_value_pair(key,value)
{
    key_value_pair[key] = "  OPT" key " =" \
	substr("                 ",1,17 - (5 + length(key) + 2)) \
	"\"" value "\","
}

function brace_count(s, k,n)
{
    n = 0
    for (k = 1; k <= length(s); ++k)
    {
	if (substr(s,k,1) == "{")
	    n++
	else if (substr(s,k,1) == "}")
	    n--
    }
    return (n)
}

function check_article()
{
    check_missing_key("author")
    check_missing_key("title")
    check_missing_key("journal")
    check_missing_key("volume")
    check_missing_key("number")
    check_missing_key("pages")
    check_missing_key("month")
    check_missing_key("year")
}

function check_author_editor()
{
    if (!("author" in key_value_pair) && !("editor" in key_value_pair))
	add_key_value_pair("author",UNKNOWN_VALUE)
}

function check_book()
{
    check_author_editor()
    check_missing_key("title")
    check_missing_key("publisher")
    check_missing_key("address")
    check_missing_ISBN()
    check_missing_key("LCCN")
    check_missing_key("pages")
    check_missing_key("year")
}

function check_booklet()
{
    check_missing_key("author")
    check_missing_key("title")
    check_missing_key("howpublished")
    check_missing_key("address")
    check_missing_key("year")
}

function check_inbook()
{
    check_author_editor()
    check_missing_key("title")
    check_missing_key("chapter")
    if (!("crossref" in key_value_pair))
    {
	check_missing_key("publisher")
	check_missing_key("address")
	check_missing_ISBN()
	check_missing_key("LCCN")
    }
    check_missing_key("pages")
    check_missing_key("year")
}

function check_incollection()
{
    check_author_editor()
    check_missing_key("title")
    if (!("crossref" in key_value_pair))
    {
	check_missing_key("booktitle")
	check_missing_key("publisher")
	check_missing_key("address")
	check_missing_ISBN()
	check_missing_key("LCCN")
    }
    check_missing_key("pages")
    check_missing_key("year")
}

function check_inproceedings()
{
    check_author_editor()
    check_missing_key("title")
    if (!("crossref" in key_value_pair))
    {
	check_missing_key("booktitle")
	check_missing_key("publisher")
	check_missing_key("address")
	check_missing_ISBN()
	check_missing_key("LCCN")
    }
    check_missing_key("pages")
    check_missing_key("year")
}

function check_manual()
{
    check_author_editor()
    check_missing_key("title")
    check_missing_key("organization")
    check_missing_key("address")
    check_missing_key("pages")
    check_missing_key("year")
}

function check_mastersthesis()
{
    check_missing_key("author")
    check_missing_key("title")
    check_missing_key("school")
    check_missing_key("address")
    check_missing_key("type")
    check_missing_key("month")
    check_missing_key("year")
}

function check_misc()
{
    check_missing_key("author")
    check_missing_key("title")
    check_missing_key("howpublished")
    check_missing_key("year")
}

function check_missing()
{
    if      (entry_type == "article")		check_article()
    else if (entry_type == "book")		check_book()
    else if (entry_type == "booklet")		check_booklet()
    else if (entry_type == "inbook")		check_inbook()
    else if (entry_type == "incollection")	check_incollection()
    else if (entry_type == "inproceedings")	check_inproceedings()
    else if (entry_type == "manual")		check_manual()
    else if (entry_type == "mastersthesis")	check_mastersthesis()
    else if (entry_type == "misc")		check_misc()
    else if (entry_type == "periodical")	check_periodical()
    else if (entry_type == "phdthesis")		check_phdthesis()
    else if (entry_type == "proceedings")	check_proceedings()
    else if (entry_type == "techreport")	check_techreport()
    else if (entry_type == "unpublished")	check_unpublished()
    else
	warning("unrecognized entry type [" entry_type "]")
}

function check_missing_ISBN()
{
    if (!("ISBN" in key_value_pair) && \
	("year" in key_value_pair) && \
	((0 + get_value("year",key_value_pair["year"])) > 1971))
	add_key_value_pair("ISBN",UNKNOWN_VALUE)
}

function check_missing_key(key)
{
    if (!(key in key_value_pair))
	add_key_value_pair(key,UNKNOWN_VALUE)
}

function check_periodical()
{
    check_missing_key("key")
    check_missing_key("address")
    check_missing_key("ISSN")
    check_missing_key("LCCN")
    check_missing_key("publisher")
    check_missing_key("title")
}

function check_phdthesis()
{
    check_missing_key("author")
    check_missing_key("title")
    check_missing_key("school")
    check_missing_key("address")
    check_missing_key("type")
    check_missing_key("month")
    check_missing_key("year")
}

function check_proceedings()
{
    if (!("author" in key_value_pair) && \
	!("editor" in key_value_pair))
	check_missing_key("key")
    check_missing_key("title")
    check_missing_key("publisher")
    check_missing_key("address")
    check_missing_ISBN()
    check_missing_key("LCCN")
    check_missing_key("pages")
    check_missing_key("year")
}

function check_techreport()
{
    check_missing_key("author")
    check_missing_key("title")
    check_missing_key("institution")
    check_missing_key("address")
    check_missing_key("type")
    check_missing_key("month")
    check_missing_key("year")
}

function check_unpublished()
{
    check_missing_key("author")
    check_missing_key("title")
    check_missing_key("note")
    check_missing_key("year")
}

function clear_array(array, key)
{
    for (key in array)
	delete array[key]
}

function collect_braced_item( count,s)
{
    # Starting with the current contents of $0, collect lines until we
    # reach a zero brace count. To guard against infinite loops in the
    # event of unbalanced braces, we abruptly terminate processing if
    # an at-sign is detected in column 1.  This function is used for
    # those entry types that require key/value pair reordering.

    start_fnr = FNR
    start_line = $0
    entry_type = substr($0,2)
    sub(/ *{.*$/,"",entry_type)
    entry_type = tolower(entry_type)
    clear_array(key_value_pair)
    squeeze()
    trim()
    count = brace_count($0)
    key_value_pair["__ENTRY__"] = $0
    while (count != 0)
    {
	if (getline <= 0)
	    break
	if (substr($0,1,1) == "@") # should use match($0,/^[ \t]+@/), but
				   # this is faster, and usually correct
	    error("New entry encountered before balanced braces found")
	trim()
	# NB: syntax of abbrev, entry, key, and field names taken from
	# biblex source code: see Nelson H. F. Beebe, "Bibliography
	# prettyprinting and syntax checking", TUGboat 14(3), 222,
	# October (1993) and TUGboat 14(4), 395--419, December (1993).
	# NB: in match() below, \047 is an apostrophe, which we cannot
	# use inside the sh apostrophe-delimited string containing
	# this program.
	if (match($0,/^[ \t]*[A-Za-z][---A-Za-z0-9:.+\/\047]*[ \t]*=/))
	    s = collect_key_value_pair()
	else
	    s = $0
	count += brace_count(s)
    }

    if (CHECKMISSING)
	check_missing()
}

function collect_key_value_pair( key,s)
{
    # This function is called when a line of the form " key = ..." is met.
    s = $0
    match($0,/[A-Za-z][---A-Za-z0-9:.+\/\047]*/)
    key = substr($0,RSTART,RLENGTH)
    if (key in key_value_pair)
    {
	warning("duplicate key [" key "]")
	while (key in key_value_pair)	# append -z to get a unique sort key
	    key = key "-z"
    }
    if (match($0,/^[ \t]*[A-Za-z][---A-Za-z0-9:.+\/\047]*[ \t]*=[ \t]*\"/))
    {				# then we have key = "...value..."
	# Collect any multiline key/value assignment, using the simple
	# heuristic (guaranteeable by bibclean) that a quoted value string
	# ends with a quote, or quote comma.
	while (match($0,/\",?$/) == 0)
	{
	    if (getline <= 0)
		error("Unexpected end-of-file while collecting key/value pair")
	    if (substr($0,1,1) == "@")	# should use match($0,/^[ \t]+@/), but
					# this is faster, and usually correct
		error("New entry encountered before end of string")
	    trim()
	    s = s "\n" $0
	}
    }
    # else must be key = abbrev, which we assume takes just one line
    key_value_pair[key] = s	# NB: omits final newline
    return (s)
}

function copy_array(to_array,from_array, key)
{
    clear_array(to_array)
    for (key in from_array)
	to_array[key] = from_array[key]
}

function error(msg)
{		# print a message and terminate with failing exit code
    message("??FATAL ERROR:" msg)
    exit(1)
}

function get_pair(kv_pair,key, s)
{
    if (key in kv_pair)
    {
	s = kv_pair[key] "\n"
	delete kv_pair[key]
    }
    else
	s = ""
    return (s)
}

function get_value(key,key_value, s)
{
    if (match(key_value,"^[ \t]*" key "[ \t]*=[ \t]*\""))
    {				# have key = "value"
	s = substr(key_value,RSTART+RLENGTH)
	sub(/\",$/,"",s)
	sub(/\"$/,"",s)
	return (s)
    }
    else if (match(key_value,"^[ \t]*" key "[ \t]*=[ \t]*[a-zA-Z]"))
    {				# have key = abbrev
	s = substr(key_value,RSTART+RLENGTH-1)
	sub(/,$/,"",s)
    }
    else
	s = ""
    ## print "DEBUG: get_value(" key ",[" key_value "]) -> [" s "]"
    return (s)
}

function join_entries( key)
{
    # Join entries by copying last_key_value_pair[] into key_value_pair[]
    # so that on exit, last_key_value_pair[] is completely empty.

    for (key in last_key_value_pair)
    {
	if (!(key in key_value_pair))
	    key_value_pair[key] = last_key_value_pair[key]
	else if (reduce_string(last_key_value_pair[key]) == \
	    reduce_string(key_value_pair[key]))
	{			# same key, same reduced value: save longer
	    if (length(last_key_value_pair[key]) > length(key_value_pair[key]))
		key_value_pair[key] = last_key_value_pair[key]
	}
	else if (((key == "author") || (key == "editor")) && \
	    same_personal_names(get_value(key,last_key_value_pair[key]),\
				get_value(key,key_value_pair[key])))
	{			# names differ only in initials vs. full names
	    key_value_pair[key] = "  " key " =       \"" combined_names "\","
				# global value combined_names is set by same_personal_names()
	}
	else			# same key, but different value
	{
	    ## print "DEBUG: join_entries [" last_key_value_pair[key] "] [" \
	    ##    key_value_pair[key] "]"

	    key_value_pair[new_key(key_value_pair,key)] = \
		last_key_value_pair[key]
	}
	delete last_key_value_pair[key]
    }
}

function min(a,b)
{
    return ((a < b) ? a : b)
}

function message(msg)
{
    print FILENAME ":" FNR ":" msg "\tIn:" start_fnr ":" start_line >stderr
}

function max(a,b)
{
    return ((a > b) ? a : b)
}

function new_key(array,key, k,keynew)
{
    for (k = 1; ; ++k)
    {
	keynew = sprintf("%s%04d",key,k)
	if (!(keynew in array))
	{
	    ## print "DEBUG: new_key() -> " keynew
	    return (keynew)
	}
    }
}

function order_entry(kv_pair, item,k)
{
    ## for (k in kv_pair)
    ##	  print "DEBUG:",k,":",kv_pair[k] | "sort"
    ## close("sort")

    # For the purposes of manual merging, it is best to have the keys
    # appear in strictly sorted order.  Duplicate keys with different
    # string values will then appear consecutively, because their
    # table indexes take the form "key", "key0001", "key0002", etc.
    # Once manual editing is complete, biborder(1) can be used to
    # standardize the key order.

    item = get_pair(kv_pair,"__ENTRY__")

    sort_keys(kv_pair)

    for (k = 1; sorted_keys[k]; ++k) # output them in ordered by key
	item = item get_pair(kv_pair,sorted_keys[k])

    item = item "}\n"
    return (item)
}

function print_braced_item(count)
{
    # Starting with the current contents of $0, print lines until we
    # reach a zero brace count.  This function is used for
    # @Preamble{...} and @String{...}, which require no special
    # processing.

    start_fnr = FNR
    start_line = $0
    count = brace_count($0)
    print $0
    while (count != 0)
    {
	if (getline <= 0)
	    break
	if (substr($0,1,1) == "@")
	    error("New entry encountered before balanced braces found")
	print $0
	count += brace_count($0)
    }
    last_line = $0
}

function print_entry(kv_pair)
{
    last_line = order_entry(kv_pair)
    printf("%s", last_line)
}

function reduce_string(s, t)
{
    t = s
    gsub(/[^A-Za-z0-9]/,"",t)	# remove all but letters and digits
    t = tolower(t)		# and collapse to one letter case
    ## print "DEBUG: reduce_string() ->", t
    return (t)
}

function same_entry( last_first_page,this_first_page,last_last_page,this_last_page)
{
    if (tolower(the_entry(last_key_value_pair)) != \
	tolower(the_entry(key_value_pair)))
	return (0)
    else if (the_year(last_key_value_pair) != the_year(key_value_pair))
	return (0)
    else if (tolower(the_entry(key_value_pair)) !~ /@article/)
	return (1)
    else if (the_volume(last_key_value_pair) != the_volume(key_value_pair))
	return (0)
    else
    {
	last_first_page = the_first_page(last_key_value_pair)
	this_first_page = the_first_page(key_value_pair)
	last_last_page = the_last_page(last_key_value_pair)
	this_last_page = the_last_page(key_value_pair)
	if ((last_first_page == "") || (this_first_page == ""))
	    return (1)
        else if ((last_last_page ~ /^[?][?]*$/) && (this_last_page !~ /^[?][?]*$/))
	{			# merge pages = "123--??" with "123--124"
	    last_key_value_pair["pages"] = key_value_pair["pages"]
	    return (1)
        }
        else if ((last_last_page !~ /^[?][?]*$/) && (this_last_page ~ /^[?][?]*$/))
	{			# merge pages = "123--??" with "123--124"
	    key_value_pair["pages"] = last_key_value_pair["pages"]
	    return (1)
        }
	else if (last_first_page == this_first_page)
	    return (1)
	else
	    return (0)
    }
}

function same_personal_names(last_value,this_value, \
	k,last_persons,n_last,n_this,the_person,this_persons)
{
    # Given two author/editor values, such as
    #
    #	"W. H. Durdan and W. J. Bowhill and J. F. Brown"
    #	"William H. Durdan and W. Bowhill and J. Frederick Brown"
    #
    # return 1 if they match by word/initial, and otherwise 0.
    #
    # Also, if the return value is 1, set the global variable
    # combined_names to a string with the longest names (e.g. "William"
    # instead of "W.").  For the above example, it would be set to
    #
    #	"William H. Durdan and W. J. Bowhill and J. Frederick Brown"
    #
    # Because some bibliography sources drop initials, two personal
    # names are considered to match if one has extra initials AFTER the
    # first (e.g. "W. J. Bowhill" matches "W. Bowhill", but does not
    # match "J. Bowhill").
    #
    # For entries with more than three authors, the UnCover database
    # stores only the first, second, and last, so we also consider
    # values to match if one has exactly three names, the other has more
    # than three, and the first, second, and last in each match.

    ## print "DEBUG: same_personal_names([" last_value "],[" this_value "])"
    combined_names = ""
    n_last = split(last_value,last_persons,/ +and +/)
    n_this = split(this_value,this_persons,/ +and +/)
    if (n_last == n_this)	# easy case: same number of personal names
    {
	for (k = 1; k <= n_last; ++k)
        {
	    the_person = same_person(last_persons[k],this_persons[k])
	    if (the_person == "")
		return (0)	# names mismatch
	    combined_names = combined_names the_person ((k < n_last) ? " and " : "")
	}
	return (1)
    }
    else if ((min(n_last,n_this) == 3) && (max(n_last,n_this) > 3))
    {				# UnCover database special case
	if (n_last > 3)
	{			# interchange the two arguments by resplitting
	    n_last = split(this_value,last_persons,/ +and +/)
	    n_this = split(last_value,this_persons,/ +and +/)
	}
	for (k = 1; k <= 2; ++k)
        {
	    the_person = same_person(last_persons[k],this_persons[k])
	    if (the_person == "")
		return (0)	# names mismatch
	    combined_names = combined_names the_person " and "
	}
	the_person = same_person(last_persons[3],this_persons[n_this])
	if (the_person == "")
	    return (0)		# names mismatch
	for (k = 4; k < n_this; ++k)
	    combined_names = combined_names this_persons[k] " and "
        combined_names = combined_names the_person

	return (1)

    }
    else			# different count of names: no match possible
	return (0)
}

function same_person(last_person,this_person, \
	k,last_words,n_this,n_last,this_words,the_person,the_word)
{
    ## print "DEBUG: same_person([" last_person "],[" this_person "])"
    n_last = split(last_person,last_words,/ +/)
    n_this = split(this_person,this_words,/ +/)

    the_person = ""
    if (n_last == n_this)
    {
	for (k = 1; k <= n_last; ++k)
	{
	    ## print "DEBUG:" k, n_last, last_words[k], this_words[k]
	    the_word = same_word(last_words[k],this_words[k])
	    if (the_word == "")
		return ("")	# names differ
	    the_person = the_person the_word ((k < n_last) ? " " : "")
	}
    }
    else
    {				# e.g. "P. D. Bach" and "P. D. Q. Bach"
	if (n_last > n_this)
	{			# interchange the two arguments by resplitting
	    n_last = split(this_person,last_words,/ +/)
	    n_this = split(last_person,this_words,/ +/)
	}
	for (k = 1; k < n_last; ++k)
	{
	    the_word = same_word(last_words[k],this_words[k])
	    if (the_word == "")
		return ("")	# names differ
	    the_person = the_person the_word " "
	}
	the_word = same_word(last_words[n_last],this_words[n_this])
	if (the_word == "")
	    return ("")		# family names differ
	for (k = n_last; k < n_this; ++k)
	    the_person = the_person this_words[k] " "
        the_person = the_person the_word
    }
    return (the_person)
}

function same_word(last_word,this_word)
{
    ## print "DEBUG: same_word([" last_word "],[" this_word "])"
    if (last_word == this_word)
	return (last_word)
    else if (length(last_word) < length(this_word))
    {				# e.g. "J." and "James"
        sub(/[.]$/,"",last_word)
	if ((length(last_word) == 1) && (last_word == substr(this_word,1,1)))
	    return (this_word)
	else
	    return ("")
    }
    else if (length(this_word) < length(last_word))
    {				# e.g. "James" and "J."
        sub(/[.]$/,"",this_word)
	if ((length(this_word) == 1) && (this_word == substr(last_word,1,1)))
	    return (last_word)
	else
	    return ("")
    }
    else			# lengths match, but words differ
	return ("")
}
function sort_keys(kv_pair, k,key,m,n)
{
    clear_array(sorted_keys)

    n = 0
    for (key in kv_pair)
    {
	n++
	sorted_keys[n] = key
    }
    for (k = 1; k < n; ++k)
    {
	for (m = k + 1; m <= n; ++m)
	{
	    if (tolower(sorted_keys[k]) > tolower(sorted_keys[m]))
	    {
		key = sorted_keys[m]
		sorted_keys[m] = sorted_keys[k]
		sorted_keys[k] = key
	    }
	}
    }
}

function squeeze( kbrace,kspace)
{
    sub(/^[ \t]*@[ \t]*/,"@")	# eliminate space before and after initial @
    kbrace = index($0,"{")	# eliminate space between entryname and brace
    kspace = match($0,"[ \t]")
    if (kspace < kbrace)	# then found intervening space
	sub(/[ \t]+{/,"{")	# NB: sub(), NOT gsub(), here
}

function the_first_page(array, pages)
{
    split(the_pages(array),pages,"--")
    ## print "DEBUG: the_first_page() ->",pages[1]
    return (pages[1])
}

function the_last_page(array, pages)
{
    split(the_pages(array),pages,"--")
    ## print "DEBUG: the_last_page() ->",pages[2]
    if (pages[2] == "")		# supply default unknown ending page
	pages[2] = "??"
    return (pages[2])
}

function the_entry(array)
{
    return ("__ENTRY__" in array) ? array["__ENTRY__"] : ""
}

function the_pages(array)
{
    return ("pages" in array) ? get_value("pages",array["pages"]) : ""
}

function the_volume(array)
{
    return ("volume" in array) ? get_value("volume",array["volume"]) : ""
}

function the_year(array)
{
    return ("year" in array) ? get_value("year",array["year"]) : ""
}

function trim()
{
    sub(/[ \t]+$/,"")
}

function warning(msg)
{
    message("%%" msg)
}
'
# Use GNU gawk instead of nawk: Sun Solaris 2.x nawk often complains
# `input record too long'.
gawk \
	-v CHECKMISSING=$CHECKMISSING \
	-v VERSION=$VERSION \
	"$PROGRAM" \
	$FILES
################################[The End]###############################
