/***********************************************************************
 @C-file{
    author              = "Nelson H. F. Beebe",
    version             = "1.05",
    date                = "01 August 1992",
    time                = "18:02:37 MDT",
    filename            = "bibclean.c",
    address             = "Center for Scientific Computing
                           Department of Mathematics
                           South Physics Building
                           University of Utah
                           Salt Lake City, UT 84112
                           USA",
    telephone           = "+1 801 581 5254",
    FAX                 = "+1 801 581 4148",
    checksum            = "05545 1395 4528 35137",
    email               = "beebe@alfred.math.utah.edu (Internet)",
    codetable           = "ISO/ASCII",
    keywords            = "prettyprint, bibliography",
    supported           = "yes",
    docstring           = {Prettyprint a BibTeX file on stdin to stdout,
                           and check the brace balance as well.

                           Text outside @item-type{...} BibTeX entries
                           is passed through verbatim, except that
                           trailing blanks are trimmed.

                           BibTeX items are formatted into a consistent
                           structure with one key = "value" pair per
                           line, and the initial @ and trailing right
                           brace in column 1.  Long values are split at a
                           blank and continued onto the next line with
                           leading indentation.  Tabs are expanded into
                           blank strings; their use is discouraged
                           because they inhibit portability, and can
                           suffer corruption in electronic mail.  Braced
                           strings are converted to quoted strings.

                           This format facilitates the later application
                           of simple filters to process the text for
                           extraction of items, and also is the one
                           expected by the GNU Emacs BibTeX support
                           functions.

                           Usage:
                               bibclean [-author] [-help] [-keep-initials]
                                   [-version] <oldbib >newbib

                           The checksum field above contains a CRC-16
                           checksum as the first value, followed by the
                           equivalent of the standard UNIX wc (word
                           count) utility output of lines, words, and
                           characters.  This is produced by Robert
                           Solovay's checksum utility.},
       }
***********************************************************************/

/***********************************************************************

The formatting should perhaps be user-customizable; that is left for
future work.

The major goal has been to convert entries to the standard form

@item-type{citation-key,
  key =           "value",
  key =           "value",
  ...
}

while applying heuristics to permit early error detection.  If
the input file is syntactically correct for BibTeX and LaTeX,
this is reasonably easy.  If the file has errors, error recovery
is attempted, but cannot be guaranteed to be successful; however,
the output file, and stderr, will contain an error message that
should localize the error to a single entry where a human can
find it more easily than a computer can.  To facilitate error
checking and recovery, the following conditions are used:

	@       starts a BibTeX entry only it it occurs at brace
		level 0 and is not preceded by non-blank text on
		the same line.
	"       is significant only at brace level 1.
	{}      are expected to occur at @-level 1 or higher
	}       at beginning of line ends a BibTeX entry

Backslashes preceding these 4 characters remove their special
significance.

These heuristics are needed to deal with legal value strings like

	{..."...}
	"...{..}..."

and will flag as errors strings like

	"...{..."
	"...}..."

The special treatment of @ and } at beginning of line attempts to
detect errors in entries before the rest of the file is swallowed
up in an attempt to complete an unclosed entry.

The output bibliography file should be processed by BibTeX and
the LaTeX without errors before discarding the original
bibliography file.

We do our own output and line buffering here so as to be able to
trim trailing blanks, and output data in rather large blocks for
efficiency (in filters of this type, I/O accounts for the bulk of
the processing, so large output buffers offer significant
performance gains).

Revision history (reverse time order):

[01-Aug-1992]	1.05
		Add -keep-initials switch support (thanks to Karl Berry
		<karl@cs.umb.edu>).  Internationalize telephone and FAX
		numbers.

[02-Jan-1992]	1.04
		Modify fix_title() to ignore macros.  Modify fix_author
		to ignore author lists with parentheses (e.g.
		author = "P. D. Q. Bach (113 MozartStrasse, Vienna, Austria)").

[31-Dec-1991]	1.03
		Add fix_title() to supply braces around unbraced
		upper-case acronyms in titles, and add private
		definition of MAX().

[15-Nov-1991]	1.02
		Handle @String(...) and @Preamble(...), converting
		outer parentheses to braces.  Insert spaces after
		author and editor initials, and normalize names to
		form "P. D. Q. Bach" instead of "Bach, P. D. Q.".

[10-Oct-1991]	1.01
		Increase MAX_TOKEN to match enlarged BibTeX, and add
		check against STD_MAX_TOKEN.
		Output ISBN and ISSN in upper case.
		Always surround = by blanks in key = "value".

[19-Dec-1990]	1.00 (version number unchanged)
		Install Sun386i bug fix.

[08-Oct-1990]	1.00
		Original version.
***********************************************************************/

#define BIBCLEAN_VERSION        "bibclean Version 1.05 [01-Aug-1992]"

#include "os.h"
#include "xstdlib.h"            /* only for EXIT_FAILURE and EXIT_SUCCESS */
#include <stdio.h>
#include <ctype.h>
#include "xstring.h"

#ifdef MAX
#undef MAX
#endif

#define MAX(a,b) (((a) > (b)) ? (a) : (b))

#ifdef sun386
/* Sun386i run-time library bug in fputs(): only first line in s is written! */
#define fputs(s,fp) fwrite(s,1,strlen(s),fp)
#endif

/* See LaTeX User's Guide and Reference Manual, Section B.1.3, for the
rules of what characters can be used in a BibTeX word value.  Section
30 of BibTeX initializes id_class[] to match this, but curiously,
allows ASCII DELete (0x3f), as an identifier.  This irregularity has
been reported to Oren Patashnik on [06-Oct-1990].  We disallow it
here.  */

#define isidchar(c) (isgraph(c) && (strchr("\"#%'(),={}",c) == (char*)NULL))

#ifndef EXIT_FAILURE
#define EXIT_FAILURE 1
#endif

#ifndef EXIT_SUCCESS
#define EXIT_SUCCESS 0
#endif

#define NO  0

#define YES 1

#define KEY_INDENT      2       /* how far to indent "key = value," pairs */

#define MAX_COLUMN      72      /* length of longest entry line; */
				/* non-BibTeX entry text is output verbatim */

#define MAX_KEY_LENGTH  12      /* "howpublished" */

#define MAX_BUFFER      16384   /* output buffer size; this does NOT */
				/* limit lengths of input lines */

#define STD_MAX_TOKEN   1000    /* Standard BibTeX limit */

#define MAX_TOKEN       3000    /* internal buffer size; no BibTeX string
				value may be larger than this. */

#define NOOP                    /* dummy statement */

#define VALUE_INDENTATION       (KEY_INDENT + MAX_KEY_LENGTH + 3)
				/* where item values are output; allow space */
				/* for "<key indent><key name>< = >" */

/* All functions except main() are static so that we avoid short external
name restrictions on older machines. */

static void     author ARGS((void));
static void     do_args ARGS((int argc, char* argv[]));
static void     do_at ARGS((void));
static void     do_BibTeX_entry ARGS((void));
static void     do_close_brace ARGS((void));
static void     do_comma ARGS((void));
static void     do_equals ARGS((void));
static void     do_entry_name ARGS((void));
static void     do_group ARGS((void));
static int      do_item ARGS((void));
static void     do_key ARGS((void));
static void     do_open_brace ARGS((void));
static void     do_other ARGS((void));
static void     do_tag_name ARGS((void));
static void     do_value ARGS((void));
static void     error ARGS((const char *msg));
static char*    fix_author ARGS((char *author));
static void     fix_month ARGS((void));
static void     fix_namelist ARGS((void));
static char*    fix_periods ARGS((char *author));
static char*    fix_title ARGS((char *title));
static int      get_char ARGS((void));
static int      get_next_non_blank ARGS((void));
static char     *get_simple_string ARGS((void));
static void     help ARGS((void));
int             main ARGS((int argc, char* argv[]));
static void     new_entry ARGS((void));
static void     out_c ARGS((int c));
static void     out_s ARGS((const char* s));
static void     put_back ARGS((int c));
static void     put_char ARGS((int c));
static void     resync();
int		strnicmp ARGS((const char *s1, const char *s2, size_t n));
static void     usage ARGS((void));
static void     version ARGS((void));
static void     warning ARGS((const char *msg));
static int      word_length ARGS((const char* s));
static void     wrap_line ARGS((void));

static int      at_level = 0;                   /* @ nesting level */
static int      at_line_number = 0;             /* line number of last @ */
static int      brace_level = 0;                /* curly brace nesting level */
static int      close_char = EOF;               /* BibTeX entry closing; may */
						/* be right paren or brace */
static int      column = 0;                     /* last output column used */
static char     entry_name[MAX_TOKEN+1];        /* BibTeX entry name */
static int      eofile = NO;                    /* set to YES at end-of-file */
static int      error_count = 0;                /* used to decide exit code */
static int	keep_initials = 0;		/* reformat A.U. Thor?  */
static char     key[MAX_TOKEN+1];               /* BibTeX key name */
static int      input_line_number = 1;          /* used in error messages */
static int      non_white_chars = 0;            /* used to test for legal @ */
static int      output_line_number = 1;         /* used in error messages */
static char     *program_name;                  /* set to argv[0] */
static int      rflag = NO;                     /* YES if resynchronizing */
static char     tag[MAX_TOKEN+1];               /* BibTeX citation tag */
static char     value[MAX_TOKEN+1];             /* BibTeX string value */

static void
author()
{
    (void)fprintf(stderr,"Author:\n");
    (void)fprintf(stderr,"\tNelson H. F. Beebe\n");
    (void)fprintf(stderr,"\tCenter for Scientific Computing\n");
    (void)fprintf(stderr,"\tDepartment of Mathematics\n");
    (void)fprintf(stderr,"\tSouth Physics Building\n");
    (void)fprintf(stderr,"\tUniversity of Utah\n");
    (void)fprintf(stderr,"\tSalt Lake City, UT 84112\n");
    (void)fprintf(stderr,"\tUSA\n");
    (void)fprintf(stderr,"\tTel: +1 801 581 5254\n");
    (void)fprintf(stderr,"\tFAX: +1 801 581 4801\n");
    (void)fprintf(stderr,"\tEmail: <beebe@math.utah.edu>\n");
}


static void
do_args(argc,argv)
int argc;
char* argv[];
{
    int k;

    program_name = argv[0];

    for (k = 1; k < argc; ++k)
    {
	if (strnicmp(&argv[k][1],"author",MAX(1,strlen(argv[k])-1)) == 0)
	    author();
	else if (strnicmp(&argv[k][1],"help",MAX(1,strlen(argv[k])-1)) == 0)
	    help();
	else if (strnicmp(&argv[k][1],"?",MAX(1,strlen(argv[k])-1)) == 0)
	    help();
	else if (strnicmp(&argv[k][1],"keep-initials",
                 MAX(1,strlen(argv[k])-1)) == 0)
	    keep_initials = 1;
	else if (strnicmp(&argv[k][1],"version",MAX(1,strlen(argv[k])-1)) == 0)
	    version();
	else
	{
	    usage();
	    exit(EXIT_FAILURE);
	}
    }
}


static void
do_at()                         /* parse @name{...} */
{
    int c;

    c = get_next_non_blank();
    if ((non_white_chars == 1) && (c == '@'))
    {
	at_level++;
	out_c(c);
	if (brace_level != 0)
	{
	    error("@ begins line, but brace level is not zero");
	    brace_level = 0;
	}
    }
    else if (c != EOF)
    {
	out_c(c);
	error("Expected @name{...}");
	resync();
    }
}


static void
do_BibTeX_entry()
{
    /*************************************************************
     Parse a BibTeX entry, one of:
       @entry-name{tag,key=value,key=value,...,}
       @string{name=value}
       @preamble{...}
    *************************************************************/

    new_entry();

    do_at();
    if (rflag || eofile) return;

    do_entry_name();
    if (rflag) return;

    if (strcmp(entry_name,"Preamble") == 0)
	do_group();
    else if (strcmp(entry_name,"String") == 0)
	do_group();
    else
    {
	do_open_brace();
	if (rflag) return;

	do_tag_name();
	if (rflag) return;

	do_comma();
	if (rflag) return;

	while (do_item() == YES)
	    NOOP;

	do_close_brace();
    }
}


static void
do_close_brace()                /* parse level 1 closing brace or parenthesis */
{
    int c;

    c = get_next_non_blank();
    if (c == EOF)
	return;
    else if (c == close_char)
    {
	if (c == ')')
	    brace_level--;      /* get_char() could not do this for us */
	out_c('}');             /* standardize parenthesis to brace */
	if (brace_level != 0)
	{
	    error("Non-zero brace level after @name{...} processed");
	    resync();
	}
    }
    else                        /* raise error and try to resynchronize */
    {
	out_c(c);
	error("Expected closing brace or parenthesis");
	resync();
    }
}


static void
do_comma()
{
    int c;

    /* Parse a comma, or an optional comma before a closing brace or
       parenthesis;  an omitted legal comma is supplied explicitly.
       A newline is output after the comma so that key = value
       pairs appear on separate lines. */

    c = get_next_non_blank();
    if (c == EOF)
	NOOP;
    else if (c == ',')
    {
	out_c(c);
	out_c('\n');
    }
    else if (c == close_char)
    {                   /* supply missing comma for last key = value pair*/
	if (c == ')')
	    brace_level--;      /* get_char() could not do this for us */
	if (brace_level == 0)
	{
	    put_back(c);
	    out_c(',');
	    out_c('\n');
	}
	else
	{
	    out_c(c);
	    error("Non-zero brace level after @name{...} processed");
	    resync();
	}
    }
    else                        /* raise error and try to resynchronize */
    {
	out_c(c);
	error("Expected comma");
	resync();
    }
}


static void
do_entry_name()                 /* process BibTeX entry name */
{
    int c;
    int k;

    for (k = 0; ((c = get_next_non_blank()) != EOF) && isidchar(c); ++k)
    {                           /* store capitalized entry name */
	if ((k == 0) && !isalpha(c))
	    error("Non-alphabetic character begins an entry name");
	if ((k == 0) && islower(c))
	    c = toupper(c);
	else if ((k > 0) && isupper(c))
	    c = tolower(c);
	if (k >= MAX_TOKEN)
	{
	    entry_name[k] = '\0';
	    out_s(entry_name);
	    error("@entry_name too long");
	    resync();
	    return;
	}
	entry_name[k] = c;
    }
    entry_name[k] = '\0';
    if (c != EOF)
	put_back(c);
    out_s(entry_name);
    if (k >= STD_MAX_TOKEN)
	warning("String length exceeds standard BibTeX limit");
}


static void
do_equals()                     /* process = in key = value */
{
    int c;

    c = get_next_non_blank();
    if (c == EOF)
	NOOP;
    else if (c == '=')
    {
	out_c(' ');
	out_c(c);
	out_c(' ');             /* always surround = by spaces */
    }
    else
    {
	out_c(c);
	error("Expected \"=\"");
	resync();
    }
    while (column < VALUE_INDENTATION)
	out_c(' ');             /* supply leading indentation */
}


static void
do_group()                      /* copy a braced group verbatim */
{
    int c;

    do_open_brace();
    if (rflag) return;

    while ((c = get_char()) != EOF)
    {
	if ((brace_level == 1) && (close_char == ')') && (c == close_char))
	{                               /* end of @keyword(...) */
	    brace_level = 0;
	    c = '}';
	}
	if ((non_white_chars == 1) && (c == '@'))
	    error("@ begins line, but brace level is not zero");
	out_c(c);
	if (brace_level == 0)
	    break;
    }
}


static int
do_item()                       /* process key = value pair */
{
    if (eofile)
	return (NO);

    do_key();
    if (rflag || eofile || (key[0] == '\0')) return (NO);

    do_equals();
    if (rflag || eofile) return (NO);

    do_value();
    if (rflag || eofile) return (NO);

    do_comma();                 /* this supplies any missing optional comma */
    if (rflag || eofile) return (NO);

    return (YES);
}


static void
do_key()                        /* process BibTeX key name */
{
    int c;
    int k;
    int n;

    for (k = 0, c = get_next_non_blank(); (c != EOF) && isidchar(c);
	c = get_char(), k++)
    {
	if (k >= MAX_TOKEN)
	{
	    key[k] = '\0';
	    out_s(key);
	    error("Entry keyword too long");
	    resync();
	    return;
	}
	else if ((k == 0) && !isalpha(c))
	    error("Non-alphabetic character begins a keyword");
	key[k] = isupper(c) ? tolower(c) : c;
    }
    if (c != EOF)
	put_back(c);
    key[k] = '\0';

    if (strcmp(key,"isbn") == 0)        /* these look better in uppercase */
	(void)strcpy(key,"ISBN");
    else if (strcmp(key,"issn") == 0)
	(void)strcpy(key,"ISSN");
    else if (strncmp("opt",key,3) == 0) /* GNU Emacs bibtex.el expects OPT*/
	(void)strncpy(key,"OPT",3);

    if (k > 0)
    {
	for (n = KEY_INDENT; n > 0; --n) /* supply leading indentation */
	    out_c(' ');
	out_s(key);
    }
    if (k >= STD_MAX_TOKEN)
	warning("String length exceeds standard BibTeX limit");
}


static void
do_open_brace()                 /* process open brace or parenthesis */
{
    int c;

    c = get_next_non_blank();

    if (c == EOF)
	return;
    else if (c == '{')
    {
	close_char = '}';
	out_c('{');

    }
    else if (c == '(')
    {
	close_char = ')';
	brace_level++;          /* get_char() could not do this for us */
	out_c('{');             /* standardize parenthesis to brace */
    }
    else                        /* raise error and try to resynchronize */
    {
	out_c(c);
	error("Expected open brace or parenthesis");
	resync();
    }
}


static void
do_other()                      /* copy non-BibTeX text verbatim */
{
    int c;

    while ((c = get_char()) != EOF)
    {
	if ((c == '@') && (non_white_chars == 1))
	{                       /* new entry found */
	    put_back(c);
	    break;
	}
	out_c(c);
    }
}


static void
do_tag_name()                   /* process BibTeX citation tag */
{
    int c;
    int k;

    for (k = 0, c = get_next_non_blank();
	(c != EOF) && (c != ',') && !isspace(c);
	c = get_char(), k++)
    {
	if (k >= MAX_TOKEN)
	{
	    tag[k] = '\0';
	    out_s(tag);
	    error("Citation tag too long");
	    resync();
	    return;
	}
	tag[k] = c;
    }
    tag[k] = '\0';
    if (c != EOF)
	put_back(c);
    out_s(tag);
    if (k >= STD_MAX_TOKEN)
	warning("String length exceeds standard BibTeX limit");
}


/***********************************************************************
BibTeX value fields can take several forms, as illustrated by this
simple BNF grammar:

BibTeX-value-string:
	simple-string |
	simple-string # BibTeX-value-string

simple-string:
	"quoted string" |
	{braced-string} |
	digit-sequence  |
	alpha-sequence  |
***********************************************************************/

static void
do_value()                      /* process BibTeX value string */
{
    int c;
    int k;
    char *s;
    int n;

    s = get_simple_string();
    for (k = 0; *s; )
    {
	n = strlen(s);
	if ((k + n) >= MAX_TOKEN)
	{
	    value[k] = '\0';
	    out_s(value);
	    out_s(s);
	    error("Value too long");
	    resync();
	    return;
	}
	strcpy(&value[k],s);
	k += n;
	c = get_next_non_blank();
	if (c == '#')
	{
	    if ((k + 3) >= MAX_TOKEN)
	    {
		value[k] = '\0';
		out_s(value);
		out_s(" # ");
		error("Value too long");
		resync();
		return;
	    }
	    value[k++] = ' ';
	    value[k++] = '#';
	    value[k++] = ' ';
	    s = get_simple_string();
	}
	else                    /* end of string */
	{
	    put_back(c);
	    s = "";
	}
    }
    if (strcmp(key,"month") == 0)
	fix_month();
    else if (strcmp(key,"author") == 0)
	fix_namelist();
    else if (strcmp(key,"editor") == 0)
	fix_namelist();
    else if (strcmp(key,"title") == 0)
	strcpy(value,fix_title(value));
    out_s(value);
    if (k >= STD_MAX_TOKEN)
	warning("String length exceeds standard BibTeX limit");
}


static void
error(msg)                      /* issue an error message */
const char *msg;                /* default provided if this is NULL */
{
    error_count++;
    put_char(EOF);              /* flush all buffered output */
    (void)fprintf(stderr,
	"?? %s at line %d (input) %d (output)\n",
	(msg == (const char*)NULL) ? "Unbalanced braces detected" : msg,
	input_line_number,output_line_number);
    (void)fprintf(stdout,
	"\n?? %s at line %d (input) %d (output)\n",
	(msg == (const char*)NULL) ? "Unbalanced braces detected" : msg,
	input_line_number,output_line_number);
    at_level = 0;               /* suppress further messages */
				/* until we have resynchronized */
}


static char*
fix_author(author)              /* normalize author names and return */
char *author;                   /* new string from static space */
{
    size_t a;			/* index into author[] */
    int b_level;                /* brace level */
    char *p;                    /* pointer into author[] */
    static char s[MAX_TOKEN+1]; /* static to save stack space */

    /* Convert "Smith, J.K." to "J. K. Smith" provided "," and "." are */
    /* at brace level 0 */

    /* Leave untouched entries like: */
    /* author = "P. D. Q. Bach (113 MozartStrasse, Vienna, Austria)" */
    if (strchr(author,'(') != (char*)NULL)
	return (fix_periods(author));

    b_level = 0;

    for (a = 0; author[a]; ++a) /* convert "Smith, John" to "John Smith" */
    {                           /* leaving period job to fix_periods() */
	switch (author[a])
	{
	case '{':
	    b_level++;
	    break;

	case '}':
	    b_level--;
	    break;

	case ',':
	    if (b_level == 0)
	    {
		for (p = &author[a+1]; isspace(*p); ++p)
		    /* NO-OP */;
		(void)strcpy(s,p);      /* s <- "J.K." */
		(void)strcat(s," ");    /* s <- "J.K. " */
		(void)strncat(s,author,a); /* s <- "J.K. Smith" */
		return (fix_periods(s));
	    }
	}
    }
    return (fix_periods(author));
}


static void
fix_month()                     /* convert full month names to macros*/
{                               /* for better style-file customization */
    int k;
    static struct
    {
	char *old;
	char *new;
    }
    months[] =
    {
	{"\"January\"",  "jan",},
	{"\"February\"", "feb",},
	{"\"March\"",    "mar",},
	{"\"April\"",    "apr",},
	{"\"May\"",      "may",},
	{"\"June\"",     "jun",},
	{"\"July\"",     "jul",},
	{"\"August\"",   "aug",},
	{"\"September\"","sep",},
	{"\"October\"",  "oct",},
	{"\"November\"", "nov",},
	{"\"December\"", "dec",},
    };

    for (k = 0; k < 12; ++k)
    {
	if (strcmp(value,months[k].old) == 0)
	{
	    strcpy(value,months[k].new);
	    break;
	}
    }
}


static void
fix_namelist()                  /* normalize list of personal names */
{                               /* leaving it in global value[] */
    int m;                      /* index of start of author in value[] */
    int n;                      /* length of value[], less 1 */
    static char namelist[MAX_TOKEN+1];  /* static to save stack space */
    int v;                      /* loop index into value[] */

    /* Convert "Smith, J.K. and Brown, P.M." to */
    /* "J. K. Smith and P. M. Brown" */
    /* We loop over names separated by " and ", and hand each off */
    /* to fix_author */

    n = strlen(value) - 1;      /* namelist = "\"...\"" */

    if ((value[0] != '"') || (value[n] != '"')) /* sanity check */
	return;                 /* not quoted string, may be macro */

    (void)strcpy(namelist,"\"");        /* supply initial quotation mark */
    value[n] = '\0';            /* clobber final quotation mark */
    for (v = 1, m = 1; v < n; ++v) /* start past initial quotation mark */
    {
	if (strncmp(" and ",&value[v],5) == 0)
	{
	    value[v] = '\0';
	    (void)strcat(namelist,fix_author(&value[m]));
	    (void)strcat(namelist," and ");
	    value[v] = ' ';
	    v += 4;
	    m = v + 1;
	}
    }
    (void)strcat(namelist,fix_author(&value[m])); /* handle last author */
    (void)strcat(namelist,"\"");        /* supply final quotation mark */
    (void)strcpy(value,namelist);
}


static char*
fix_periods(author)
char *author;
{
    int b_level;                /* brace level */
    int a;                      /* index in author[] */
    int n;                      /* index in name[] */
    static char name[MAX_TOKEN+1]; /* static to save stack space */

    if (keep_initials)
      return author;

    /* Convert "J.K. Smith" to "J. K. Smith" if "." at brace level 0 */

    for (b_level = 0, a = 0, n = 0; /* NO-OP (exit below) */ ; ++a, ++n)
    {
	name[n] = author[a];    /* copy character */
	if (author[a] == '\0')
	    break;              /* here's the loop exit */
	switch (author[a])
	{
	case '{':
	    b_level++;
	    break;

	case '}':
	    b_level--;
	    break;

	case '.':
	    if (b_level == 0)
	    {
		if ((a > 0) && isupper(author[a-1]) && isupper(author[a+1]))
		    name[++n] = ' '; /* supply space between initials */
	    }
	    break;
	}
    }
    return (name);
}


static char*
fix_title(title)			/* protect upper-case acronyms */
char *title;
{
    int k;				/* index into s[] */
    int t;				/* index into title[] */
    int b_level;			/* brace level */
    static char s[MAX_TOKEN+1];		/* static to save stack space */

    if (title[0] != '\"')
	return (strcpy(s,title));	/* leave macros alone */

    for (k = 0, b_level = 0, t = 0; title[t]; )
    {
	switch (title[t])
	{
	case '{':
	    b_level++;
	    s[k++] = title[t++];
	    break;

	case '}':
	    b_level--;
	    s[k++] = title[t++];
	    break;

	default:
	    if ( (b_level == 0) && isupper(title[t]) && isupper(title[t+1]) )
	    {
		s[k++] = '{';
		while (isupper(title[t]))
		    s[k++] = title[t++];
		s[k++] = '}';
	    }
	    else
	        s[k++] = title[t++];
	    break;
	}
    }
    s[k] = '\0';
    if (k >= STD_MAX_TOKEN)
	warning("String length exceeds standard BibTeX limit");
    return (s);
}


static int
get_char()                      /* all input is read through this function */
{
    int c;

    c = getchar();

    /* Adjust global status values */

    if (c == EOF)
	eofile = YES;
    else if (c == '\n')
	input_line_number++, non_white_chars = 0;
    else if (!isspace(c))
	non_white_chars++;

    if (c == '{')
	brace_level++;
    else if (c == '}')
	brace_level--;

    return (c);
}


static int
get_next_non_blank()
{
    int c;

    while (((c = get_char()) != EOF) && isspace(c))
	NOOP;
    return (c);
}


static char *
get_simple_string()             /* read simple BibTeX string */
{
    int b_level = 0;
    int c;
    int collecting = YES;
    int k;
    int n;
    static char s[MAX_TOKEN+1]; /* static so we can return it to caller */
    static char t[MAX_TOKEN+1]; /* static so we can return it to caller */
    int type;

#define B_VALUE 0               /* braced entry */
#define I_VALUE 1               /* integer entry */
#define S_VALUE 2               /* string entry */
#define W_VALUE 3               /* word entry */

    k = 0;

    c = get_next_non_blank();

    if (c == '{')
	type = B_VALUE;
    else if (isdigit(c))
	s[k++] = '"', type = I_VALUE; /* convert I type to S type */
    else if (c == '"')
	type = S_VALUE;
    else if (isalpha(c))
	type = W_VALUE;
    else
    {
	if (c != EOF)
	    out_c(c);
	error("Expected BibTeX value string");
	resync();
	return("");
    }

    for (; (c != EOF) && (collecting == YES); k++)
    {
	if (k >= MAX_TOKEN)
	{
	    s[k] = '\0';
	    error("BibTeX string too long");
	    resync();
	    return(s);
	}
	if (isspace(c))
	    c = ' ';            /* change whitespace to real space */
	else if (c == '{')
	    b_level++;
	else if (c == '}')
	    b_level--;
	switch (type)
	{
	case B_VALUE:
	    s[k] = c;
	    if (b_level == 0)
		collecting = NO;
	    break;
	case I_VALUE:
	    if (isdigit(c))
		s[k] = c;
	    else
		s[k] = '"', put_back(c), collecting = NO;
	    break;
	case S_VALUE:
	    if (isdigit(c))
	    {
		if ((strcmp(key,"pages") == 0) && (k > 1) &&
		    isdigit(s[k-2]) && (s[k-1] == '-'))
		    s[k++] = '-'; /* change hyphen to en-dash */
	    }
	    s[k] = c;
	    if ((c == '"') && (k > 0) && (b_level == 0) && (s[k-1] != '\\'))
		collecting = NO;
	    break;
	case W_VALUE:
	    if (isidchar(c))
		s[k] = c;
	    else
		--k, put_back(c), collecting = NO;
	}
	if (collecting == YES)
	    c = isspace(c) ? get_next_non_blank() : get_char();
    }
    s[k] = '\0';
    if (type == B_VALUE)        /* convert braced string to quoted string */
    {
	b_level = 0;
	for (k = 0, n = 0; s[k]; ++k)
	{
	    if (s[k] == '{')
		b_level++;
	    else if (s[k] == '}')
		b_level--;
	    if ((s[k] == '"') && (s[k-1] != '\\') && (b_level == 1))
		t[n++] = '{', t[n++] = '"', t[n++] = '}';
	    else
		t[n++] = s[k];
	}
	t[0] = '"';
	t[n-1] = '"';
	t[n] = '\0';
	if (n >= STD_MAX_TOKEN)
	    warning("String length exceeds standard BibTeX limit");
	return (t);
    }
    else
    {
	if (k >= STD_MAX_TOKEN)
	    warning("String length exceeds standard BibTeX limit");
	return (s);
    }
}


static void
help()
{
    usage();
    (void)fprintf(stderr,"For more details, on UNIX, do\n\tman bibclean\n");
    exit(EXIT_SUCCESS);
}


int
main(argc,argv)
int argc;
char* argv[];
{

#ifdef vms
    extern char **cmd_lin();

    argv = cmd_lin( "", &argc, argv );
#endif

    do_args(argc,argv);

    new_entry();
    while (eofile == NO)
    {
	do_other();
	do_BibTeX_entry();
    }
    put_char(EOF);              /* flush all buffered output */

    exit (error_count ? EXIT_FAILURE : EXIT_SUCCESS);
    return (error_count ? EXIT_FAILURE : EXIT_SUCCESS);
}


static void
new_entry()                     /* initialize for new BibTeX @name{...} */
{
    at_level = 0;
    at_line_number = output_line_number;
    brace_level = 0;
}


static void
out_c(c)                        /* output a character and bump column */
int c;                          /* all output flows through this function */
{
    if (c == '\n')
	column = 0, output_line_number++;
    else if (c == '\t')
	column = (column + 8) & ~7;
    else
	column++;
    put_char(c);
}


static void
out_s(s)                        /* output a string, wrapping long lines */
const char *s;
{
    /* NB: Wrap tests are ">= MAX_COLUMN", because we need to allow one
       extra space for a trailing comma following any string. */

    for (; *s; ++s)
    {
	switch (*s)
	{
	case ' ':               /* may change space to wrap */
	case '\t':
	case '\f':
	case '\n':
	    if ((column + 1 + word_length(s+1)) >= MAX_COLUMN)
		wrap_line();
	    else
		out_c(*s);
	    break;

	case ',':               /* may wrap after certain punctuation */
	case ':':
	case ';':
	case '.':
	    out_c(*s);
	    if ((column + word_length(s+1)) >= MAX_COLUMN)
		wrap_line();
	    break;

	default:                /* everything else is output verbatim */
	    out_c(*s);
	}
    }
}


static void
put_back(c)             /* put last get_char() value back onto input stream */
int c;
{
    ungetc(c,stdin);

    /* Adjust status values that are set in get_char() */

    if (!isspace(c))
	non_white_chars--;

    if (c == EOF)
	eofile = NO;
    else if (c == '\n')
	input_line_number--;
    else if (c == '{')
	brace_level--;
    else if (c == '}')
	brace_level++;
}


static void
put_char(c)                     /* output c, but trim trailing blanks, */
int c;                          /* and output buffer if c == EOF */
{
    static int buf_length = 0;
    static char buf[MAX_BUFFER+1]; /* 1 extra slot for trailing NUL */

    if ((c == EOF) || (buf_length >= MAX_BUFFER))
    {
	buf[buf_length] = '\0';
	fputs(buf,stdout);
	buf_length = 0;
	if (c == EOF)
	    return;
    }
    if (c == '\n')              /* trim trailing spaces */
    {
	while ((buf_length > 0) && (buf[buf_length-1] == ' '))
	    buf_length--;
    }
    buf[buf_length++] = c;
}


static void
resync()                        /* copy input to output until new entry met */
{                               /* and set resynchronization flag */
    rflag = YES;
    do_other();                 /* copy text until new entry found */
}


int
strnicmp(s1,s2,n)
const char	*s1;
const char	*s2;
size_t		n;
{
    int	   c1;
    int	   c2;

    /*******************************************************************
      Compare strings ignoring case, stopping after n characters, or at
      end-of-string, whichever comes first.
    *******************************************************************/

    for (; (n > 0) && *s1 && *s2; ++s1, ++s2, --n)
    {
	c1 = 0xff & (int)(islower(*s1) ? *s1 : tolower(*s1));
	c2 = 0xff & (int)(islower(*s2) ? *s2 : tolower(*s2));
	if (c1 < c2)
	    return (-1);
	else if (c1 > c2)
	    return (1);
    }
    if (n <= 0)		   /* first n characters match */
	return (0);
    else if (*s1 == '\0')
	return ((*s2 == '\0') ? 0 : -1);
    else /* (*s2 == '\0') */
	return (1);
}


static void
usage()
{
    (void)fprintf(stderr, "Usage: %s %s\n",
		  program_name,
    "[-author] [-help] ['-?'] [-keep-initials] [-version] <infile >outfile");
}


static void
version()
{
    (void)fprintf(stderr,"%s\n",BIBCLEAN_VERSION);

#if defined(HOST) || defined(USER) || defined(__DATE__) || defined(__TIME__)
    (void)fprintf(stderr,"Compiled");

#if defined(USER)
    (void)fprintf(stderr," by <%s",USER);

#if defined(HOST)
    (void)fprintf(stderr,"@%s",HOST);
#endif /* defined(HOST) */

    (void)fprintf(stderr,">");
#endif /* defined(USER) */

#if defined(__DATE__)
    (void)fprintf(stderr," on %s",__DATE__);
#endif /* defined(__DATE__) */

#if defined(__TIME__)
    (void)fprintf(stderr," %s",__TIME__);
#endif /* defined(__TIME__) */

    (void)fprintf(stderr,"\n");
#endif /* defined(HOST)||defined(USER)||defined(__DATE__)||defined(__TIME__) */

}


static void
warning(msg)                    /* issue a warning message to stderr */
const char *msg;
{
    put_char(EOF);              /* flush all buffered output */
    (void)fprintf(stderr,
	"%% %s at line %d (input) %d (output)\n", msg,
	input_line_number,output_line_number);
}


static int
word_length(s)                  /* return length of leading non-blank prefix */
const char *s;
{
    int n;

    for (n = 0; s[n]; ++n)
    {
	if (isspace(s[n]))
	    break;
    }
    return (n);
}


static void
wrap_line()                     /* insert a new line and leading indentation */
{
    int k;

    out_c('\n');
    for (k = VALUE_INDENTATION; k > 0; --k)
	out_c(' ');          /* supply leading indentation */
}
