	/* bibparse - parse BibTeX file according to prototype grammar */
%{
 /**********************************************************************
 @Lex-file{
    author              = "Nelson H. F. Beebe",
    version             = "1.02",
    date                = "29 November 1994",
    time                = "12:08:14 MST",
    filename            = "biblex.l",
    address             = "Center for Scientific Computing
                           Department of Mathematics
                           University of Utah
                           Salt Lake City, UT 84112
                           USA",
    telephone           = "+1 801 581 5254",
    FAX                 = "+1 801 581 4148",
    checksum            = "00724 705 1596 14522",
    email               = "beebe@math.utah.edu (Internet)",
    codetable           = "ISO/ASCII",
    keywords            = "BibTeX, bibliography, lexical analysis, lexer",
    supported           = "yes",
    docstring           = "This lex file defines a lexical analyzer for
                           a prototype BibTeX grammar.

                           The resulting program reads one or more
                           BibTeX files specified on the command line,
                           or standard input, and produces on the
                           standard output a stream of tokens of the
                           form

                           <small-integer><tab><name><tab><quoted string>

                           This is similar to the format produced by
                           bibclean with the -no-prettyprint option.

                           Such token lines are interspersed with
                           input line identifier lines of the form
                           used by the ANSI/ISO Standard C preprocessor

                           # line 3 <quoted filename>

                           so that the error diagnostics can identify
                           the location in the input file(s).

                           The output stream can be filtered by other
                           utilities, and reconstructed into a BibTeX
                           file with an associated software tool,
                           bibunlex.

                           The checksum field above contains a CRC-16
                           checksum as the first value, followed by the
                           equivalent of the standard UNIX wc (word
                           count) utility output of lines, words, and
                           characters.  This is produced by Robert
                           Solovay's checksum utility.",
 }
 **********************************************************************/

#define NEW_STYLE	(__cplusplus || __STDC__ || c_plusplus)

#if NEW_STYLE
#define VOID	void
#define ARGS(parenthesized_list) parenthesized_list
#include <stdlib.h>
#else /* K&R style */
#define VOID
#define ARGS(parenthesized_list) ()
#endif /* NEW_STYLE */

#if !defined(EXIT_SUCCESS)
#define EXIT_SUCCESS	0
#define EXIT_FAILURE	1
#endif

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "bibyydcl.h"

#if NEW_STYLE
typedef enum token_list {
    TOKEN_UNKNOWN = 0,
    TOKEN_ABBREV = 1,		/* alphabetical order, starting at 1 */
    TOKEN_AT,
    TOKEN_COMMA,
    TOKEN_COMMENT,
    TOKEN_ENTRY,
    TOKEN_EQUALS,
    TOKEN_FIELD,
    TOKEN_INCLUDE,
    TOKEN_INLINE,
    TOKEN_KEY,
    TOKEN_LBRACE,
    TOKEN_LITERAL,
    TOKEN_NEWLINE,
    TOKEN_PREAMBLE,
    TOKEN_RBRACE,
    TOKEN_SHARP,
    TOKEN_SPACE,
    TOKEN_STRING,
    TOKEN_VALUE
} token_t;
#else /* K&R style */
typedef int token_t;
#define	TOKEN_UNKNOWN	0
#define	TOKEN_ABBREV	1		/* alphabetical order, starting at 1 */
#define	TOKEN_AT	2
#define	TOKEN_COMMA	3
#define	TOKEN_COMMENT	4
#define	TOKEN_ENTRY	5
#define	TOKEN_EQUALS	6
#define	TOKEN_FIELD	7
#define	TOKEN_INCLUDE	8
#define	TOKEN_INLINE	9
#define	TOKEN_KEY	10
#define	TOKEN_LBRACE	11
#define	TOKEN_LITERAL	12
#define	TOKEN_NEWLINE	13
#define	TOKEN_PREAMBLE	14
#define	TOKEN_RBRACE	15
#define	TOKEN_SHARP	16
#define	TOKEN_SPACE	17
#define	TOKEN_STRING	18
#define	TOKEN_VALUE	19
#endif /* NEW_STYLE */

const char *type_name[] =
{				/* must be indexable by TOKEN_xxx */
    "UNKNOWN",
    "ABBREV",			/* alphabetical order, starting at 1 */
    "AT",
    "COMMA",
    "COMMENT",
    "ENTRY",
    "EQUALS",
    "FIELD",
    "INCLUDE",
    "INLINE",
    "KEY",
    "LBRACE",
    "LITERAL",
    "NEWLINE",
    "PREAMBLE",
    "RBRACE",
    "SHARP",
    "SPACE",
    "STRING",
    "VALUE",
};

#if 0
static void		compact_space ARGS((void));
#endif

static void		eof ARGS((void));
static int		next_char ARGS((void));
static token_t		out_braced_literal ARGS((void));
static token_t		out_braced_string ARGS((void));
static token_t		out_lbrace ARGS((void));
static token_t		out_lparen ARGS((void));

#if 0
static token_t		out_protected_string ARGS((token_t t_));
#endif

static token_t		out_rbrace ARGS((void));
static token_t		out_rparen ARGS((void));
static token_t		out_string ARGS((void));
static token_t		out_token ARGS((token_t t_));
static void		overflow ARGS((void));

#if (defined(__cplusplus) || defined(cplusplus)) && (defined(__solaris) || defined(sun))
#define Input()		lex_input()
#define Output(c)	lex_output(c)
#else
#define Input()		input()
#define Output(c)	output(c)
#endif /* (defined(__cplusplus) || defined(cplusplus)) && defined(__solaris) */

#if defined(FLEX_SCANNER) && defined(__cplusplus)
#undef Input
#define Input()		yyinput()
#endif

static int		brace_level = 0;
static token_t		last_object = TOKEN_UNKNOWN;
static token_t		last_token = TOKEN_UNKNOWN;
long			line_number = 1L; /* global: used in lexmain() */
static int		paren_level = 0;
const char		*the_filename = ""; /* global: used in lexmain() */

#define BYTE_VAL(c)	((unsigned int)((c) & 0xff))
#define EOFILE		0		/* end-of-file from lex input() */
#define ISPRINT(c)	isprint(BYTE_VAL(c)) /* ensure arg in 0..255 */
#define ISDIGIT(c)	isdigit(BYTE_VAL(c))
#define ISSPACE(c)	isspace(BYTE_VAL(c))
#define RETURN(n)	return (1000 + (n))	/* bibparse.y biases by 1000 */

#undef YYLMAX
#define YYLMAX	40960
%}
			/* increase transition and output table sizes */
%a		3000
%o		6000
				/* abbrev, entry, key, field name syntax */
N		[A-Za-z][-A-Za-z0-9:.+/']*
					/* BibTeX entry opening delimiter */
O		[({]
					/* one white space character */
					/* \013 == \v, but lex doesn't */
					/* recognize \v */
W		[ \f\r\t\013]
					/* optional `horizontal' space */
S		{W}*

%%
[@]				RETURN (out_token(TOKEN_AT));

[Cc][Oo][Mm][Mm][Ee][Nn][Tt]	{ RETURN ((last_token == TOKEN_AT) ?
					out_token(TOKEN_COMMENT) :
					out_token(TOKEN_ABBREV));
				}

[Ii][Nn][Cc][Ll][Uu][Dd][Ee]	{ RETURN ((last_token == TOKEN_AT) ?
					out_token(TOKEN_INCLUDE) :
					out_token(TOKEN_ABBREV));
				}

[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee] { RETURN ((last_token == TOKEN_AT) ?
					out_token(TOKEN_PREAMBLE) :
					out_token(TOKEN_ABBREV));
				}

[Ss][Tt][Rr][Ii][Nn][Gg]	{ RETURN ((last_token == TOKEN_AT) ?
					out_token(TOKEN_STRING) :
					out_token(TOKEN_ABBREV));
				}

{N}				{
				    if (last_object == TOKEN_STRING)
					RETURN(out_token(TOKEN_ABBREV));
				    switch (last_token)
				    {
				    case TOKEN_COMMA:
					RETURN(out_token(TOKEN_FIELD));
				    case TOKEN_LBRACE:
					RETURN(out_token(TOKEN_KEY));
				    case TOKEN_AT:
					RETURN(out_token(TOKEN_ENTRY));
				    default:
					RETURN(out_token(TOKEN_ABBREV));
				    }
				}

[0-9]+				RETURN (out_token(TOKEN_VALUE));

[%].*[\n]{S}			RETURN (out_token(TOKEN_INLINE));

[#]				RETURN (out_token(TOKEN_SHARP));

["]				RETURN (out_string());

[{]				RETURN (out_lbrace());

[}]				RETURN (out_rbrace());

[(]				RETURN (out_lparen());

[)]				RETURN (out_rparen());

[=]				RETURN (out_token(TOKEN_EQUALS));

[,]				RETURN (out_token(TOKEN_COMMA));

[\n]				RETURN (out_token(TOKEN_NEWLINE));

{W}+				RETURN (out_token(TOKEN_SPACE));

.				RETURN (out_token(TOKEN_LITERAL));
%%
#if 0
static void
compact_space(VOID) /* compact runs of space to single blank */
{
    char *p;
    char *q;

    for (p = q = (char*)&yytext[0]; *p ; )
    {
	*q++ = ISSPACE(*p) ? ' ' : *p;
	if (ISSPACE(*p))
	{
	    while (ISSPACE(*p))
		++p;
	}
	else
	    ++p;
    }
    *q = '\0';
}
#endif


static void
eof(VOID)
{
    (void)fprintf(stderr,"End-of-file in value string\n");
    exit(EXIT_FAILURE);
}


static int
next_char(VOID)
{
    int c;

    c = Input();
    if (c == EOFILE)
	eof();
    else if (c == '\n')
	line_number++;

    return (c);
}


static token_t
out_braced_literal(VOID)
{
    int c;
    int n;
    int plevel = paren_level;

    for (n = 1; brace_level > 0; )
    {
	c = next_char();
	if (n > (YYLMAX - 2))
	    overflow();
	yytext[n++] = c;
	switch (c)
	{
	case '(':
	    plevel++;
	    break;
	case ')':
	    plevel--;
	    break;
	case '{':
	    brace_level++;
	    break;
	case '}':
	    brace_level--;
	    break;
	default:
	    break;
	}
	if ((paren_level > 0) && (plevel == 0))
	    break;
    }
    yytext[0] = '{';
    yytext[n-1] = '}';
    yytext[n] = '\0';
    return (out_token(TOKEN_LITERAL));
}


static token_t
out_braced_string(VOID)
{					/* convert braced to quoted string */
    int blevel;
    int c;
    int n;

    for (blevel = 1, n = 1; (blevel > 0); )
    {
	c = next_char();
	if (n > (YYLMAX - 5))
	    overflow();
	yytext[n++] = c;
	switch (c)
	{
	case '{':
	    blevel++;
	    break;

	case '}':
	    blevel--;
	    break;

	case '"':
	    if (blevel == 1)
	    {
		if (yytext[n-2] == '\\')
		{
		    c = next_char();
		    yytext[n-2] = '{';
		    yytext[n-1] = '\\';
		    yytext[n++] = '"';
		    yytext[n++] = c;
		    yytext[n++] = '}';
		}
		else
		{
		    yytext[n-1] = '{';
		    yytext[n++] = '"';
		    yytext[n++] = '}';
		}
	    }
	    break;

	default:
	    break;
	}
    }
    yytext[0] = '"';
    yytext[n-1] = '"';
    yytext[n] = '\0';
    return (out_token(TOKEN_VALUE));
}


static token_t
out_lbrace(VOID)
{
    if (brace_level == 0)
    {
	brace_level++;
	switch (last_object)
	{
	case TOKEN_COMMENT:
	case TOKEN_INCLUDE:
	    return (out_braced_literal());
	default:
	    return (out_token(TOKEN_LBRACE));
	}
    }
    else
	return (out_braced_string());
}


static token_t
out_lparen(VOID)
{
    switch (last_token)
    {
    case TOKEN_ENTRY:
    case TOKEN_PREAMBLE:
    case TOKEN_STRING:
	yytext[0] = '{';
	paren_level++;
	brace_level++;
	return (out_token(TOKEN_LBRACE));

    case TOKEN_COMMENT:
    case TOKEN_INCLUDE:
	yytext[0] = '{';
	paren_level++;
	brace_level++;
	return (out_braced_literal());

    default:
	return (out_token(TOKEN_LITERAL));
    }
}


#if 0
#if NEW_STYLE
static token_t
out_protected_string(token_t t)
#else /* K&R style */
static token_t
out_protected_string(t)
token_t t;
#endif /* NEW_STYLE */
{
    char octal[4 + 1];
    const char *token = (const char*)&yytext[0];

    if (*token == (char)'\0')	/* ignore empty tokens */
	return (TOKEN_VALUE);
    Output('"');
    if ((yytext[0] != '"') && (t == TOKEN_VALUE))
    {					/* supply missing quote delimiters */
	Output('\\');
	Output('\"');
    }
    for (; *token; ++token)
    {
	switch (*token)
	{
	case '"':
	case '\\':
	    Output('\\');
	    Output(*token);
	    break;
	case '\b':
	    Output('\\');
	    Output('b');
	    break;
	case '\f':
	    Output('\\');
	    Output('f');
	    break;
	case '\n':
	    Output('\\');
	    Output('n');
	    break;
	case '\r':
	    Output('\\');
	    Output('r');
	    break;
	case '\t':
	    Output('\\');
	    Output('t');
	    break;
	case '\v':
	    Output('\\');
	    Output('v');
	    break;
	default:
	    if (ISPRINT(*token))
		Output(*token);
	    else
	    {
		(void)sprintf(octal,"\\%03o",BYTE_VAL(*token));
		Output(octal[0]);
		Output(octal[1]);
		Output(octal[2]);
		Output(octal[3]);
	    }
	    break;
	}
    }
    if ((yytext[0] != '"') && (t == TOKEN_VALUE))
    {					/* supply missing quote delimiters */
	Output('\\');
	Output('\"');
    }
    Output('"');
    Output('\n');
    return (TOKEN_VALUE);
}
#endif


static token_t
out_rbrace(VOID)
{
    if (brace_level == 1)
    {
	brace_level--;
	return (out_token(TOKEN_RBRACE));
    }
    else
	return (out_token(TOKEN_LITERAL));
}


static token_t
out_rparen(VOID)
{
    paren_level--;
    if (paren_level  == 0)
    {
	yytext[0] = '}';
	brace_level--;
	return (out_token(TOKEN_RBRACE));
    }
    else
	return (out_token(TOKEN_LITERAL));
}


static token_t
out_string(VOID)
{
    int blevel;
    int c;
    int n;

    for (blevel = 0, n = 1; ; )
    {
	c = next_char();
	if (n > (YYLMAX - 2))
	    overflow();
	yytext[n++] = c;
	switch (c)
	{
	case '{':
	    blevel++;
	    break;
	case '}':
	    blevel--;
	    break;
	case '"':
	    if (blevel == 0)
		goto LOOP_EXIT;
	default:
	    break;
	}
    }
 LOOP_EXIT:
    yytext[n++] = '\0';
    return (out_token(TOKEN_VALUE));
}


#if NEW_STYLE
static token_t
out_token(token_t t)
#else /* K&R style */
static token_t
out_token(t)
token_t t;
#endif /* NEW_STYLE */
{		/* ALL token output is directed through this function */
#if 0
    int n;

    (void)printf("%d\t%s\t", (int)t, type_name[(int)t]);
#endif
    switch (t)
    {
    case TOKEN_AT:
	last_object = TOKEN_UNKNOWN;
#if 0
	(void)printf("\"%s\"\n", yytext);
#endif
	break;

    case TOKEN_VALUE:
#if 0
	if (ISDIGIT(yytext[0]))
	{				/* supply surrounding quotes */
	    n = strlen((const char*)&yytext[0]);
	    if ((n + 3) > YYLMAX)
		overflow();
	    yytext[n+2] = '\0';
	    yytext[n+1] = '"';
	    for (; n > 0; --n)
		yytext[n] = yytext[n-1];
	}
	else
	    compact_space();
	out_protected_string(t);
#endif
	break;

    case TOKEN_COMMENT:
    case TOKEN_INCLUDE:
#if 0
	out_protected_string(t);
#endif
	last_object = t;
	break;

    case TOKEN_ENTRY:
    case TOKEN_PREAMBLE:
    case TOKEN_STRING:
#if 0
	(void)printf("\"%s\"\n", yytext);
#endif
	last_object = t;
	break;

    case TOKEN_FIELD:
    case TOKEN_KEY:
#if 0
	(void)printf("\"%s\"\n", yytext);
#endif
	break;

    case TOKEN_INLINE:
    case TOKEN_NEWLINE:
	line_number++;
#if 0
	out_protected_string(t);
	(void)printf("# line %ld \"%s\"\n", line_number, the_filename);
#endif
	break;

    case TOKEN_LITERAL:
    default:
#if 0
	out_protected_string(t);
#endif
	break;
    }
    if (!((t == TOKEN_INLINE) ||
	  (t == TOKEN_SPACE) ||
	  (t == TOKEN_NEWLINE)))
	last_token = t;		/* remember last non-space token type */
    return (t);
}


static void
overflow()
{
    (void)fprintf(stderr,
	    "String too long for %ld-character buffer\n",(long)YYLMAX);
    exit (EXIT_FAILURE);
}
