/***********************************************************************
 @C-file{
    author              = "Nelson H. F. Beebe",
    version             = "0.10",
    date                = "22 April 2000",
    time                = "13:12:52 MDT",
    filename            = "bibcheck.c",
    address             = "Center for Scientific Computing
			   Department of Mathematics
			   University of Utah
			   Salt Lake City, UT 84112
			   USA",
    telephone           = "+1 801 581 5254",
    FAX                 = "+1 801 581 4148",
    checksum            = "36188 1372 3378 31386",
    email               = "beebe@math.utah.edu (Internet)",
    URL                 = "http://www.math.utah.edu/~beebe/",
    codetable           = "ISO/ASCII",
    keywords            = "BibTeX, pattern matching, syntax checking",
    supported           = "yes",
    docstring           = "This program checks for miscellaneous
			   formatting irregularities in one or more
			   BibTeX files, using heuristic pattern
			   matching to detect common problems.

			   lacheck(1) performs similar checks on LaTeX
			   files, and its great utility was the
			   inspiration for this program.

			   Any warning messages produced are sent to
			   stdout; serious errors result in messages
			   on stderr.

			   The input stream is parsed using the
			   biblex(1) lexical analyzer, guaranteeing
			   adherence to the rigorous BibTeX grammar
			   (see Nelson H. F. Beebe, "Bibliography
			   prettyprinting and syntax checking",
			   TUGboat 14(3), 222, October (1993) and
			   TUGboat 14(4), 395--419, December (1993)).

			   Usage:
				   bibcheck [-author] [-copyright]
					[-debug] [-help] [-version]
					BibTeXfile(s)

				   bibcheck [-author] [-copyright]
					[-debug] [-help] [-version]
					<BibTeXfile

			   Command-line options may be abbreviated to
			   unique leading prefixes, and letter case is
			   ignored.

			   If -author is specified, program author
			   information is displayed on stdout, and
			   similarly for -copyright, -help, and
			   -version.

			   If -debug is given, bibcheck will produce a
			   token stream on stdout identical to that
			   from biblex or from bibclean -noprettyprint;
			   bibunlex(1) can reconstruct a BibTeX file
			   from this token stream.

			   The checksum field above contains a CRC-16
			   checksum as the first value, followed by
			   the equivalent of the standard UNIX wc
			   (word count) utility output of lines,
			   words, and characters.  This is produced by
			   Robert Solovay's checksum utility.",
}
***********************************************************************/

#define BIBCHECK_VERSION	"bibcheck Version 0.10 [22-Apr-2000]"

#include <config.h>
#include "xstring.h"
#include "xstdlib.h"
#include "xctype.h"
#include "hash.h"
#include "regexp/regexp.h"

#ifdef MAX
#undef MAX
#endif
#define MAX(a,b)	(((a) > (b)) ? (a) : (b))

#define ARGEQUAL(s,t,n)	(strnicmp((s),(t),MAX((n),(int)strlen(s))) == 0)
#define BYTE_VAL(c)	((unsigned int)((c) & 0xff))
#define ISDIGIT(c)	isdigit(BYTE_VAL(c))
#define ISPRINT(c)	isprint(BYTE_VAL(c)) /* ensure arg in 0..255 */
#define ISSPACE(c)	isspace(BYTE_VAL(c))
#define output(c)	(void)fputc((int)(c),stdout)

#if defined(HAVE_STDC)
typedef enum token_list {
    TOKEN_UNKNOWN = 0,
    TOKEN_ABBREV = 1,		/* alphabetical order, starting at 1 */
    TOKEN_AT,
    TOKEN_COMMA,
    TOKEN_COMMENT,
    TOKEN_ENTRY,
    TOKEN_STREQUALS,
    TOKEN_FIELD,
    TOKEN_INCLUDE,
    TOKEN_INLINE,
    TOKEN_KEY,
    TOKEN_LBRACE,
    TOKEN_LITERAL,
    TOKEN_NEWLINE,
    TOKEN_PREAMBLE,
    TOKEN_RBRACE,
    TOKEN_SHARP,
    TOKEN_SPACE,
    TOKEN_STRING,
    TOKEN_VALUE
} token_t;
#else
typedef int token_t;
#define	TOKEN_UNKNOWN	0
#define	TOKEN_ABBREV	1		/* alphabetical order, starting at 1 */
#define	TOKEN_AT	2
#define	TOKEN_COMMA	3
#define	TOKEN_COMMENT	4
#define	TOKEN_ENTRY	5
#define	TOKEN_STREQUALS	6
#define	TOKEN_FIELD	7
#define	TOKEN_INCLUDE	8
#define	TOKEN_INLINE	9
#define	TOKEN_KEY	10
#define	TOKEN_LBRACE	11
#define	TOKEN_LITERAL	12
#define	TOKEN_NEWLINE	13
#define	TOKEN_PREAMBLE	14
#define	TOKEN_RBRACE	15
#define	TOKEN_SHARP	16
#define	TOKEN_SPACE	17
#define	TOKEN_STRING	18
#define	TOKEN_VALUE	19
#endif

static const char *type_name[] =
{				/* must be indexable by TOKEN_xxx */
    "UNKNOWN",
    "ABBREV",			/* alphabetical order, starting at 1 */
    "AT",
    "COMMA",
    "COMMENT",
    "ENTRY",
    "EQUALS",
    "FIELD",
    "INCLUDE",
    "INLINE",
    "KEY",
    "LBRACE",
    "LITERAL",
    "NEWLINE",
    "PREAMBLE",
    "RBRACE",
    "SHARP",
    "SPACE",
    "STRING",
    "VALUE",
};

#define	ERROR_PREFIX	"??"	/* this prefixes all error messages */
#define WARNING_PREFIX	"%%"	/* this prefixes all warning messages */

#ifndef EXIT_SUCCESS
#define EXIT_SUCCESS	0
#endif

#ifndef EXIT_FAILURE
#define EXIT_FAILURE	1
#endif

#define MAX_ABBREV	((HASH_INT)1021)
#define MAX_LABEL	((HASH_INT)8191)/* Each entry takes 8 bytes: keep */
					/* below 64KB total for IBM PC. */
#define MAX_KEYWORD	((HASH_INT)257)

#define MAX_MSG		512		/* message buffer size */

typedef struct REGEXP_ENTRY_S
{
    const char *re;
    regexp *compiled_re;
    void (*action) ARGS((void));
} REGEXP_ENTRY;

extern long		line_number;
extern const char	*the_filename;
extern char		yytext[];

#ifndef YYLMAX
#define YYLMAX		32512
#endif

static HASH_TABLE	*abbrev_table;
static long		blanks = 0;
static int		debug = 0;

#if 0
static long		end_line_number = 0;
#endif

static HASH_TABLE	*keyword_table;
static HASH_TABLE	*label_table;
static char		last_keyword[YYLMAX+1];
static int		RLENGTH; /* length of last regexp match, (-1 if none) */
static int		RSTART;  /* index in yytext[] of last match (0 if none) */
static token_t		type = TOKEN_UNKNOWN;

int			main ARGS((int argc, char* argv[]));

static void		add_keyword ARGS((const char *s_));
static void		ampersand ARGS((void));
static void		author ARGS((void));
static void		author_editor ARGS((void));
static void		bad_period ARGS((void));
static void		bad_punctuation ARGS((void));
static void		begin_entry ARGS((void));
static void		braced_letter ARGS((void));
static void		colon_quote ARGS((void));
static void		compact_space ARGS((void));
static void		delete_table_entries ARGS((HASH_TABLE *table_));
static void		do_keyword ARGS((void));
static void		do_label ARGS((void));
static void		do_string ARGS((void));
static void		do_value ARGS((void));
static void		dolex ARGS((void));
static void		editor ARGS((void));
static void		ellipsis ARGS((void));
static void		end_blank ARGS((void));
static void		end_entry ARGS((void));
static HASH_TABLE	*enlarge_table ARGS((HASH_TABLE *table_));
static void		error ARGS((const char *s_));
static void		free_table ARGS((HASH_TABLE *table_));
static void		initials ARGS((void));
static int		in_table ARGS((const char *s_, HASH_TABLE *table_));
static void		install ARGS((const char *key_, const char *value_,
				      HASH_TABLE **ptable_));
static const char	*lookup ARGS((const char *key_, HASH_TABLE *table_));
static char		*lower ARGS((char *s_));
static HASH_TABLE	*make_hash_table ARGS((HASH_INT size_));
static int		match ARGS((REGEXP_ENTRY *r_, const char *s_));
static void		math_mode ARGS((void));
static void		mixed_case ARGS((void));
static void		opt_author ARGS((void));
static void		opt_copyright ARGS((void));
static void		opt_usage ARGS((void));
static void		opt_version ARGS((void));
static void		out_lines ARGS((FILE *fpout_, const char *lines_[]));
static token_t		out_protected_string ARGS((token_t t_));
static token_t		out_token ARGS((token_t t_));
static void		overflow ARGS((void));
static void		pages ARGS((void));
static void		page_range ARGS((void));
static void		percent ARGS((void));
static void		quotation_mark ARGS((void));
static void		sharp ARGS((void));
static void		tab ARGS((void));
static void		tie ARGS((void));
static void		trailing_blank ARGS((void));
static void		warning ARGS((const char *s_));

#if defined(HAVE_FLEX)
int			yylex ARGS((void));
#else
#if (__hppa || _IBMR2 || __sgi || __solaris || sun) && defined(__cplusplus)
/* Most C++ compilers assume that yylex() is a C function instead of a C++ function */
extern "C" int		yylex ARGS((void));
#else
int			yylex ARGS((void));
#endif
#endif

#if defined(HAVE_STDC)
int
main(int argc, char* argv[])
#else
int
main(argc,argv)
int argc;
char* argv[];
#endif
{
    FILE *fp;
    int k;
    int nfiles = 0;

    last_keyword[0] = '\0';
    abbrev_table = make_hash_table(MAX_ABBREV);
    keyword_table = make_hash_table(MAX_KEYWORD);
    label_table = make_hash_table(MAX_LABEL);

    if (argc > 1)		/* files to lex named on command line */
    {
	for (k = 1; k < argc; ++k)
	{
	    char *the_arg;

	    the_arg = argv[k];
	    if ((the_arg[0] == '-') && (the_arg[1] == '-'))
		the_arg++;
	    if (ARGEQUAL(the_arg,"-author",2))
	    {
		opt_author();
		exit(EXIT_SUCCESS);
	    }
	    else if (ARGEQUAL(the_arg,"-copyright",2))
	    {
		opt_copyright();
		exit(EXIT_SUCCESS);
	    }
	    else if (ARGEQUAL(the_arg,"-debug",2))
	    {
		debug = 1;
		continue;
	    }
	    else if (ARGEQUAL(the_arg,"-help",2))
	    {
		opt_usage();
		exit(EXIT_SUCCESS);
	    }
	    else if (ARGEQUAL(the_arg,"-version",2))
	    {
		opt_version();
		exit(EXIT_SUCCESS);
	    }
	    fp = freopen(the_arg,"r",stdin);
	    nfiles++;
	    if (fp == (FILE*)NULL)
	    {
		(void)fprintf(stderr,
		    "\n%s Ignoring open failure on file [%s]\n",
		    ERROR_PREFIX, the_arg);
		(void)perror("perror() says");
	    }
	    else
	    {
		the_filename = (const char*)the_arg;
		dolex();
		(void)fclose(fp);
	    }
	}
    }
    if (nfiles == 0)			/* lex stdin */
    {
	the_filename = "stdin";
	dolex();
    }
    free_table(abbrev_table);
    free_table(keyword_table);
    free_table(label_table);
    exit (EXIT_SUCCESS);
    return (0);				/* NOT REACHED */
}

static void
dolex(VOID)
{
    int		n;
    token_t	t;

    delete_table_entries(abbrev_table);
    delete_table_entries(keyword_table);
    delete_table_entries(label_table);

    line_number = 1L;
    while ((n = (token_t)(yylex() - 1000)) > 0)
    {
	RSTART = 0;			/* clear in case warning() is */
	RLENGTH = -1;			/* invoked before match() is  */
	t = (token_t)n;
	if (debug)
	    out_token(t);
	switch (t)
	{
	case TOKEN_ABBREV:
	    if (type == TOKEN_STRING)
		do_string();
	    break;
	case TOKEN_AT:
	    begin_entry();
	    break;
	case TOKEN_ENTRY:
	    type = t;
	    break;
	case TOKEN_FIELD:
	    do_keyword();
	    break;
	case TOKEN_KEY:
	    do_label();
	    break;
	case TOKEN_PREAMBLE:
	    type = t;
	    break;
	case TOKEN_RBRACE:
	    end_entry();
	    break;
	case TOKEN_STRING:
	    type = t;
	    break;
	case TOKEN_VALUE:
	    if (type != TOKEN_PREAMBLE)
		do_value();
	    break;
	case TOKEN_UNKNOWN:
	    warning("unrecognized token");
	    break;
	case TOKEN_COMMA:
	case TOKEN_COMMENT:
	case TOKEN_STREQUALS:
	case TOKEN_INCLUDE:
	case TOKEN_INLINE:
	case TOKEN_LBRACE:
	case TOKEN_LITERAL:
	case TOKEN_NEWLINE:
	case TOKEN_SHARP:
	case TOKEN_SPACE:
	default:
	    break;
	}
    }
}

#if defined(HAVE_STDC)
static void
add_keyword(const char *name)
#else
static void
add_keyword(name)
const char *name;
#endif
{
    char	msg[MAX_MSG];
    const char	*p;

    (void)sprintf(msg, "%ld", (long)line_number);

    if (in_table(name,keyword_table) &&
	!STREQUAL((p = lookup(name,keyword_table)),msg))
    {
	(void)sprintf(msg, "duplicate keyword seen at line(s) %s", p);
	warning(msg);
	(void)sprintf(msg, "%s, %ld", p, (long)line_number);
    }
    install(name,msg,&keyword_table);
    (void)strcpy(last_keyword,name);
}

static void
ampersand(VOID)
{
    static REGEXP_ENTRY r = { "^[ \t]*%", (regexp*)NULL, NULL };

    if (match(&r,yytext))
	warning("unbackslashed ampersand");
    end_blank();
}

static void
author(VOID)
{
    add_keyword("author");
    author_editor();
}

static void
author_editor(VOID)
{
    static REGEXP_ENTRY r = { "[^{}0-9A-Za-z.\"][A-Z][^{}0-9A-Za-z.'\\\"]",
			      (regexp*)NULL, NULL };
    int		brace_level;
    size_t	k;
    size_t	len_s;
    char	*s;

    if (match(&r,yytext))
	warning("period missing following initial(s)");
    s = &yytext[0];
    brace_level = 0;
    for (k = 0, len_s = strlen(s); k < len_s; ++k)
    {
	if (s[k] == '{')
	    brace_level++;
	else if (s[k] == '}')
	    brace_level--;
	else if ((s[k] == ',') && (brace_level == 0))
	    warning("comma at brace-level zero in author/editor names");
	if (brace_level < 0)
	    warning("unbalanced closing brace in author/editor");
    }
    if (brace_level > 0)
	warning("unbalanced opening brace in author/editor");
}

static void
bad_period(VOID)
{
    if (!STREQUAL(last_keyword,"lccn"))
	bad_punctuation();
    end_blank();
}

static void
bad_punctuation(VOID)
{
    warning("space before punctuation");
    end_blank();
}

static void
begin_entry(VOID)
{

}

static void
braced_letter(VOID)
{
    warning("old-style bracing hinders word searches");
    end_blank();
}

static void
colon_quote(VOID)
{
    if (STREQUAL(last_keyword,"title"))
	warning("...: ``...'' may need protecting brace on first word inside quotation marks");
}

static void
compact_space(VOID) /* compact runs of space to single blank */
{
    char	*p;
    char	*q;

    for (p = q = (char*)&yytext[0]; *p ; )
    {
	*q++ = ISSPACE(*p) ? ' ' : *p;
	if (ISSPACE(*p))
	{
	    while (ISSPACE(*p))
		++p;
	}
	else
	    ++p;
    }
    *q = '\0';
}

#if defined(HAVE_STDC)
static void
delete_table_entries(HASH_TABLE *table)
#else
static void
delete_table_entries(table)
HASH_TABLE *table;
#endif
{
    HASH_ENTRY	*h;

    for (h = hash_next(1,table); h != (HASH_ENTRY*)NULL;
	 h = hash_next(0,table))
    {
	if (h->hash_key != (const char *)NULL)
	{
	    FREE(h->hash_key);
	    h->hash_key = (const char *)NULL;
	}
	if (h->hash_data != (VOIDP)NULL)
	{
	    FREE(h->hash_data);
	    h->hash_data = (VOIDP)NULL;
	}
    }
}

static void
do_keyword(VOID)
{
    const char	*key;

    key = lower(yytext);
    add_keyword(key);
    if (STREQUAL(key,"author"))
	author();
    else if (STREQUAL(key,"editor"))
	editor();
    else if (STREQUAL(key,"pages"))
	pages();
}

static void
do_label(VOID)
{
    static REGEXP_ENTRY label_regexp =
	{ "^[A-Za-z0-9:-]+$", (regexp*)NULL, NULL };
    char	msg[MAX_MSG];
    const char	*p;
    const char	*this_label;

    this_label = &yytext[0];
    if (in_table(this_label, label_table))
    {
	p = lookup(this_label, label_table);
	(void)sprintf(msg,"duplicate citation label [%s] at lines %s and %ld",
		      (const char*)this_label,
		      p,
		      (long)line_number);
	warning(msg);
	(void)sprintf(msg,"%s, %ld",p,(long)line_number);
    }
    else
	(void)sprintf(msg,"%ld",(long)line_number);
    install(this_label,msg,&label_table);

    if (!match(&label_regexp,this_label))
	warning("non-standard citation label");
}

static void
do_string(VOID)
{
    char	msg[MAX_MSG];
    const char	*p;
    const char	*this_abbrev;

    this_abbrev = &yytext[0];
    if (in_table(this_abbrev, abbrev_table))
    {
	p = lookup(this_abbrev, abbrev_table);
	(void)sprintf(msg,"duplicate abbreviation [%s] at lines %s and %ld",
		      (const char*)this_abbrev,
		      p,
		      (long)line_number);
	warning(msg);
	(void)sprintf(msg,"%s, %ld",p,(long)line_number);
    }
    else
	(void)sprintf(msg,"%ld",(long)line_number);
    install(this_abbrev,msg,&abbrev_table);
}

static void
do_value(VOID)
{
    static REGEXP_ENTRY regexp_table[] =
    {
	{ "[.][.][.]",		(regexp*)NULL,		ellipsis },
	{ "[^\\]&",		(regexp*)NULL,		ampersand },
	{ "[^%].*%",		(regexp*)NULL,		percent },
	{ "[^\\]~",		(regexp*)NULL,		tie },
	{ "[^\\]\t",		(regexp*)NULL,		tab },
	{ " \"$",		(regexp*)NULL,		trailing_blank },
	{ "[^\\]#",		(regexp*)NULL,		sharp },
	{ "[$^_]",		(regexp*)NULL,		math_mode },
	{ "{\"}",		(regexp*)NULL,		quotation_mark },
	{ "{[A-Z]}[a-z]|[^\\A-Za-z][A-Za-z]+{[A-Z]}",
				(regexp*)NULL,		braced_letter },
	{ " [.]",		(regexp*)NULL,		bad_period },
	{ " [,?;:!]",		(regexp*)NULL,		bad_punctuation },
	{ "[a-z][A-Z]",		(regexp*)NULL,		mixed_case },
	{ "[A-Z][.][A-Z][.]",	(regexp*)NULL,		initials },
	{ ": *``[A-Z]",		(regexp*)NULL,		colon_quote },
	{ (const char*)NULL,	(regexp*)NULL,		NULL }
    };
    int		k;

    for (k = 0; regexp_table[k].re != (const char*)NULL; ++k)
    {
	if (match(&regexp_table[k],yytext))
	    (*regexp_table[k].action)();
    }
}

static void
editor(VOID)
{
    add_keyword("editor");
    author_editor();
}

static void
ellipsis(VOID)
{
    warning("replace ... by \\ldots{}");
    end_blank();
}

static void
end_blank(VOID)
{
    if (blanks > 1)
	warning("multiple blank lines");
    blanks = 0;
}

static void
end_entry(VOID)
{

#if 0
    end_line_number = 0;
#endif

    end_blank();
    delete_table_entries(keyword_table);
}

#if defined(HAVE_STDC)
static HASH_TABLE*
enlarge_table(HASH_TABLE *table)
#else
static HASH_TABLE*
enlarge_table(table)
HASH_TABLE *table;
#endif
{
    HASH_ENTRY	*h;
    HASH_INT	new_size;
    HASH_TABLE	*new_table;

    new_size = table->hash_size / 3;
    new_size *= 4;
    new_size = MAX(new_size,table->hash_size + 5);
    new_table = make_hash_table(new_size);
    new_table->hash_index = table->hash_index;
    new_table->hash_compare = table->hash_compare;
    new_table->n_compare = table->n_compare;
    new_table->n_install = table->n_install;
    new_table->n_lookup = table->n_lookup;
    new_table->n_probe = table->n_probe;
    new_table->max_chain = table->max_chain;

    for (h = hash_next(1,table); h != (HASH_ENTRY*)NULL;
	 h = hash_next(0,table))
    {
	if (h->hash_key != (const char*)NULL)
	{
	    HASH_ENTRY *h_new;

	    h_new = hash_lookup(h->hash_key, new_table);
	    if (h_new == (HASH_ENTRY*)NULL)
		error("internal error: invalid hash table");
	    h_new->hash_key = h->hash_key;
	    h_new->hash_data = h->hash_data;
	}
    }
    hash_free(table);
    return (new_table);
}

#if defined(HAVE_STDC)
static void
error(const char *message)
#else
static void
error(message)
const char *message;
#endif
{
    (void)fprintf(stderr,
		  "%s:%ld:%s:%s\n",
		  the_filename,
		  (long)line_number,
		  ERROR_PREFIX,
		  message);
    exit(EXIT_FAILURE);
}

#if defined(HAVE_STDC)
static void
free_table(HASH_TABLE *table)
#else
static void
free_table(table)
HASH_TABLE *table;
#endif
{
    delete_table_entries(table);
    hash_free(table);
}

static void
initials(VOID)
{
    warning("adjacent dotted initials perhaps need a space");
}

#if defined(HAVE_STDC)
static int
in_table(const char *key, HASH_TABLE *table)
#else
static int
in_table(key,table)
const char *key;
HASH_TABLE *table;
#endif
{
    HASH_ENTRY *h;

    h = hash_lookup(key,table);
    return ((h != (HASH_ENTRY*)NULL) && (h->hash_key != (const char*)NULL));
}

#if defined(HAVE_STDC)
static void
install(const char *key, const char *value, HASH_TABLE **ptable)
#else
static void
install(key, value, ptable)
const char *key;
const char *value;
HASH_TABLE **ptable;
#endif
{
    HASH_ENTRY *h;

    while ((h = hash_lookup(key,*ptable)) == (HASH_ENTRY*)NULL)
	*ptable = enlarge_table(*ptable); /* mostly 0, rarely 1, trip loop */

    (*ptable)->n_install++;		/* update statistics */

    if (h->hash_key != (const char*)NULL)
    {
	if ((int)strlen(key) <= (int)strlen(h->hash_key)) /* reuse space */
	    (void)strcpy((char *)h->hash_key, key);
	else
	{
	    FREE(h->hash_key);
	    h->hash_key = Strdup(key);
	}
    }
    else
	h->hash_key = Strdup(key);

    if (h->hash_data != (VOIDP)NULL)
    {
	if ((int)strlen(value) <= (int)strlen((const char*)h->hash_data)) /* reuse space */
	    (void)strcpy((char *)h->hash_data, value);
	else
	{
	    FREE(h->hash_data);
	    h->hash_data = (VOIDP)Strdup(value);
	}
    }
    else
	h->hash_data = (VOIDP)Strdup(value);
}

#if defined(HAVE_STDC)
static const char*
lookup(const char *key, HASH_TABLE *table)
#else
static const char*
lookup(key,table)
const char *key;
HASH_TABLE *table;
#endif
{
    HASH_ENTRY	*h;

    h = hash_lookup(key,table);
    if (h == (HASH_ENTRY*)NULL)
	return ("");
    else if (h->hash_key == (const char*)NULL)
	return ("");
    else
	return ((const char*)h->hash_data);
}

#if defined(HAVE_STDC)
static char *
lower(char *s)
#else
static char *
lower(s)
char *s;
#endif
{
    char	*t;

    for (t = s ; *t; ++t)
    {
	if (isupper(*t))
	    *t = tolower(*t);
    }
    return (s);
}

#if defined(HAVE_STDC)
static HASH_TABLE*
make_hash_table(HASH_INT size)
#else
static HASH_TABLE*
make_hash_table(size)
HASH_INT size;
#endif
{
    HASH_TABLE	*table;
    char	msg[MAX_MSG];

    table = hash_alloc(size,0);
    if (table == (HASH_TABLE*)NULL)
    {
	(void)sprintf(msg,"cannot allocate hash table of %ld entries",
		      (long)size);
	error(msg);
    }
    return (table);
}

#if defined(HAVE_STDC)
static int
match(REGEXP_ENTRY *r, const char *s)
#else
static int
match(r,s)
REGEXP_ENTRY *r;
const char *s;
#endif
{
    int		result;
    char	msg[MAX_MSG];

    if (r->compiled_re == (regexp*)NULL) /* must be first call */
    {
	if ((r->compiled_re = regcomp(r->re)) == (regexp*)NULL)
	{
	    (void)sprintf(msg,
			  "internal error: could not compile regular expression `%s'",
			  r->re);
	    error(msg);
	}
    }
    result = regexec(r->compiled_re,s);

    RSTART  = result ? (int)(r->compiled_re->startp[0] - s) : 0;
    RLENGTH = result ?
	(int)(r->compiled_re->endp[0] - r->compiled_re->startp[0]) : -1;

    return (result);
}

static void
math_mode(VOID)
{
    int		dollars;
    size_t	k;
    size_t	len_value;
    int		subsup_error;
    char	*value;

    value = &yytext[0];

    /* Ignore bibsource and URL value strings which may have special
       characters, such as underscore */
    if (STREQUAL(last_keyword,"url") || STREQUAL(last_keyword,"bibsource"))
	return;

    dollars = 0;
    subsup_error = 0;
    for (k = 0, len_value = strlen(value); k < len_value; ++k)
    {				/* look for ^ and _ outside of math mode */
	if ((value[k] == '$') && (k > 0) && (value[k-1] != '\\'))
	    dollars++;
	if ((value[k] == '$') && (value[k+1] == '$'))
	    k++;			/* treat $$ ... $$ like $ ... $ */
	if (((dollars % 2) == 0) &&
	    ((value[k] == '_') || (value[k] == '^')) &&
	    (k > 0) &&
	    (value[k-1] != '\\'))
	    subsup_error++;
    }
    if (dollars % 2)
	warning("unclosed math mode");
    if (subsup_error)
	warning("subscript or superscript outside math mode");
}

static void
mixed_case(VOID)
{
    int 	brace_level;
    int		mixed;
    const char	*open_brace_position;
    const char	*value;

    for (brace_level = 0, mixed = 0, value = &yytext[0], open_brace_position = value;
	*value; ++value)
    {
	if (value[0] == '{')
	{
	    if (brace_level == 0)
		open_brace_position = value;
	    brace_level++;
	}
	else if (value[0] == '}')
	{
	    if (brace_level == 0)
	    {
		RSTART = (int)(value - &yytext[0]);
		RLENGTH = 1;
	        warning("extra close brace");
	    }
	    else
		brace_level--;
	}
	else if ((brace_level == 0) && islower(value[0]) && isupper(value[1]))
	    mixed++;
    }
    if (brace_level != 0)
    {
	RSTART = (int)(open_brace_position - &yytext[0]);
	RLENGTH = 1;
	warning("extra open brace");
    }
    if ((mixed > 0) && STREQUAL(last_keyword,"title"))
	warning("mixed-case word should be braced");
}

static void
opt_author(VOID)
{
    static CONST char *author_text[] =
    {
	"Author:\n",
	"\tNelson H. F. Beebe\n",
	"\tCenter for Scientific Computing\n",
	"\tDepartment of Mathematics\n",
	"\tUniversity of Utah\n",
	"\tSalt Lake City, UT 84112\n",
	"\tUSA\n",
	"\tTel: +1 801 581 5254\n",
	"\tFAX: +1 801 581 4801\n",
	"\tEmail: <beebe@math.utah.edu>\n",
	"\tWWW URL: http://www.math.utah.edu/~beebe\n",
	(const char*)NULL,
    };

    out_lines(stderr, author_text);
}

static void
opt_copyright(VOID)
{
    static CONST char *copyright_text[] =
    {
	"########################################################################\n",
	"########################################################################\n",
	"########################################################################\n",
	"###                                                                  ###\n",
	"###             bibcheck: check a BibTeX bibliography file           ###\n",
	"###                                                                  ###\n",
	"###              Copyright (C) 2000 Nelson H. F. Beebe               ###\n",
	"###                                                                  ###\n",
	"### This program is covered by the GNU General Public License (GPL), ###\n",
	"### version 2 or later, available as the file COPYING in the program ###\n",
	"### source distribution, and on the Internet at                      ###\n",
	"###                                                                  ###\n",
	"###               ftp://ftp.gnu.org/gnu/GPL                          ###\n",
	"###                                                                  ###\n",
	"###               http://www.gnu.org/copyleft/gpl.html               ###\n",
	"###                                                                  ###\n",
	"### This program is free software; you can redistribute it and/or    ###\n",
	"### modify it under the terms of the GNU General Public License as   ###\n",
	"### published by the Free Software Foundation; either version 2 of   ###\n",
	"### the License, or (at your option) any later version.              ###\n",
	"###                                                                  ###\n",
	"### This program is distributed in the hope that it will be useful,  ###\n",
	"### but WITHOUT ANY WARRANTY; without even the implied warranty of   ###\n",
	"### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    ###\n",
	"### GNU General Public License for more details.                     ###\n",
	"###                                                                  ###\n",
	"### You should have received a copy of the GNU General Public        ###\n",
	"### License along with this program; if not, write to the Free       ###\n",
	"### Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,   ###\n",
	"### MA 02111-1307 USA                                                ###\n",
	"########################################################################\n",
	"########################################################################\n",
	"########################################################################\n",
	(const char*)NULL,
    };
    out_lines(stderr, copyright_text);
}

static void
opt_usage(VOID)
{
    static CONST char *usage_text[] =
    {
	"Usage: bibcheck [-author] [-copyright] [-debug] [-help] [-version] [file(s)]\n",
	"The optional command-line switches may be abbreviated, and letter case\n",
	"is ignored.\n",
	(const char*)NULL,
    };
    out_lines(stderr, usage_text);
}

static void
opt_version(VOID)
{
    static CONST char *version_text[] =
    {
	BIBCHECK_VERSION,
	"\n",
	(const char*)NULL,
    };
    out_lines(stderr, version_text);
}

#if defined(HAVE_STDC)
static void
out_lines(FILE *fpout, const char *lines[])
#else
static void
out_lines(fpout, lines)
FILE *fpout;
const char *lines[];
#endif
{
    int	k;

    for (k = 0; lines[k] != (const char*)NULL; k++)
	(void)fputs(lines[k], fpout);
}

#if defined(HAVE_STDC)
static token_t
out_protected_string(token_t t)
#else
static token_t
out_protected_string(t)
token_t t;
#endif
{
    char	octal[4 + 1];
    const char	*token = (const char*)&yytext[0];

    if (*token == (char)'\0')	/* ignore empty tokens */
	return (TOKEN_VALUE);
    output('"');
    if ((yytext[0] != '"') && (t == TOKEN_VALUE))
    {					/* supply missing quote delimiters */
	output('\\');
	output('\"');
    }
    for (; *token; ++token)
    {
	switch (*token)
	{
	case '"':
	case '\\':
	    output('\\');
	    output(*token);
	    break;
	case '\b':
	    output('\\');
	    output('b');
	    break;
	case '\f':
	    output('\\');
	    output('f');
	    break;
	case '\n':
	    output('\\');
	    output('n');
	    break;
	case '\r':
	    output('\\');
	    output('r');
	    break;
	case '\t':
	    output('\\');
	    output('t');
	    break;
	case '\v':
	    output('\\');
	    output('v');
	    break;
	default:
	    if (ISPRINT(*token))
		output(*token);
	    else
	    {
		(void)sprintf(octal,"\\%03o",BYTE_VAL(*token));
		output(octal[0]);
		output(octal[1]);
		output(octal[2]);
		output(octal[3]);
	    }
	    break;
	}
    }
    if ((yytext[0] != '"') && (t == TOKEN_VALUE))
    {					/* supply missing quote delimiters */
	output('\\');
	output('\"');
    }
    output('"');
    output('\n');
    return (TOKEN_VALUE);
}

#if defined(HAVE_STDC)
static token_t
out_token(token_t t)
#else
static token_t
out_token(t)
token_t t;
#endif
{		/* ALL token output is directed through this function */
    int	n;

    (void)fprintf(stdout,"%d\t%s\t", (int)t, type_name[(int)t]);
    switch (t)
    {
    case TOKEN_AT:
	(void)fprintf(stdout,"\"%s\"\n", yytext);
	break;

    case TOKEN_VALUE:
	if (ISDIGIT(yytext[0]))
	{				/* supply surrounding quotes */
	    n = strlen((const char*)&yytext[0]);
	    if ((n + 3) > YYLMAX)
		overflow();
	    yytext[n+2] = '\0';
	    yytext[n+1] = '"';
	    for (; n > 0; --n)
		yytext[n] = yytext[n-1];
	}
	else
	    compact_space();
	out_protected_string(t);
	break;

    case TOKEN_COMMENT:
    case TOKEN_INCLUDE:
	out_protected_string(t);
	break;

    case TOKEN_ENTRY:
    case TOKEN_PREAMBLE:
    case TOKEN_STRING:
	(void)fprintf(stdout,"\"%s\"\n", yytext);
	break;

    case TOKEN_FIELD:
    case TOKEN_KEY:
	(void)fprintf(stdout,"\"%s\"\n", yytext);
	break;

    case TOKEN_INLINE:
    case TOKEN_NEWLINE:
	line_number++;
	out_protected_string(t);
	(void)fprintf(stdout,"# line %ld \"%s\"\n", line_number, the_filename);
	break;

    case TOKEN_LITERAL:
    default:
	out_protected_string(t);
	break;
    }
    return (t);
}

static void
overflow(VOID)
{
    char	msg[MAX_MSG];

    (void)sprintf(msg, "string too long for %ld-character buffer\n",
		  (long)YYLMAX);
    error(msg);
}

static void
pages(VOID)
{
    static REGEXP_ENTRY r = { "[0-9]-[0-9]", (regexp*)NULL, NULL };
    add_keyword("pages");
    if (match(&r,yytext))
	page_range();
}

static void
page_range(VOID)
{
    warning("hyphen found where en-dash expected");
    end_blank();
}

static void
percent(VOID)
{
    static REGEXP_ENTRY r = { "[^\\%]%", (regexp*)NULL, NULL };

    if (match(&r,yytext))
	warning("non-backslashed percent");
    end_blank();
}

static void
quotation_mark(VOID)
{
    warning("braced quotation mark");
    end_blank();
}

static void
sharp(VOID)
{
    warning("non-backslashed sharp");
    end_blank();
}

static void
tab(VOID)
{
    warning("horizontal tab");
    end_blank();
}

static void
tie(VOID)
{
    warning("ties (tildes) normally provided by BibTeX");
}

static void
trailing_blank(VOID)
{
    warning("trailing space");
}

#if defined(HAVE_STDC)
static void
warning(const char *message)
#else
static void
warning(message)
const char *message;
#endif
{
    if (RLENGTH > 0)
	(void)fprintf(stdout,
		      "%s:%ld:%s:%-45s:\t[%.*s%s%.*s%s%s]\n",
		      the_filename,
		      (long)line_number,
		      WARNING_PREFIX,
		      message,
		      RSTART, yytext,
		      "<<",
		      RLENGTH, &yytext[RSTART],
		      ">>",
		      &yytext[RSTART+RLENGTH]);
    else
	(void)fprintf(stdout,
		      "%s:%ld:%s:%-45s:\t[%s]\n",
		      the_filename,
		      (long)line_number,
		      WARNING_PREFIX,
		      message,
		      yytext);
}
