#!/bin/sh
#
#	ref2bib - convert Unix "refer" format
#			to BibTeX "bib" format.
#
#	Written by Jonathan Bowen, Oxford University, October 1987.
#
#	Version 1.1 updated by JPB, October 1988:
#		Bugs found by Peter Hall corrected.
#
#	Copyright (C) 1987, J.P.Bowen
#
#	Permission is granted to copy this shell script for
#	non-profit purposes, provided this header is left intact.
#
#		JANET: bowen@uk.ac.oxford.prg
#		ARPA:  bowen%prg.oxford.ac.uk@nss.cs.ucl.ac.uk
#

PATH=/bin:/usr/bin:/usr/ucb
PROGNAME=`basename $0`
DEFAULTWIDTH=72
DEBUG=false
WIDTH=$DEFAULTWIDTH
USENAME=false
NAMEDFILES=false
BIB=bib
VERSION="Version 1.1"

while expr X$1 : X'-' > /dev/null
do
    case "$1" in
    -|-0|-w)
	WIDTH=
	;;
    -[1-9]|-[1-9][0-9]|-[1-9][0-9][0-9])
	WIDTH=`expr X"$1" : X'-\(.*\)'`
	;;
    -a) : Name by author and year
	USENAME=true
	;;
    -d) : Enable debugging
	echo "DEBUGGING: $VERSION"
	DEBUG=true
	;;
    -n) : Named output files
	NAMEDFILES=true
	;;
    -u|-U)
	echo "Usage: $PROGNAME [ options ] [ file ... ]
Converts Unix \"refer\" format to \"BibTeX\" database format.
-a	name entries by author and year		(default=$USENAME)
-d	enable debugging			(default=$DEBUG)
-n	output to named files (ext \".$BIB\")	(default=$NAMEDFILES)
-w	no maximum width
-u	display usage information
-N	maximum width of N characters (N=1-999)	(default=$DEFAULTWIDTH)"
	exit 0
	;;
    -*)
	echo "Usage: $PROGNAME [ -a -[width] ] [ file ... ]"
	exit 0
	;;
    esac
    shift
done

GEN=`date -u`" on "`hostname`
NAME=$BIB
EDITFILE=$HOME/.$PROGNAME
STDIN="-"
NEWFILE=""

$DEBUG && echo "Generated: <$GEN>" 1>&2
$DEBUG && echo "Width: <$WIDTH>" 1>&2

# Process each file, or if none given, standard input
for FILE in ${*-$STDIN}
do

# First set up shell variables as required
    if [ "$FILE" = "$STDIN" ]
    then
	NEWFILE=$NAME.$BIB
    else
	if [ -r "$FILE" -a -f "$FILE" ]
	then
		NAME=`basename $FILE`
		NEWFILE=$FILE.$BIB
	else
		NAME=""
		echo "$PROGNAME: Can't read $FILE" 1>&2
	fi
    fi

# If all is OK, read input and terminate with a blank line.
    if [ "$NAME" ]
    then
	if [ "$FILE" = "$STDIN" ]
	then
# If no files given, read from standard input.
	    $DEBUG && echo "Reading from standard input" 1>&2
	    cat
	    echo
	else
	    $DEBUG && echo "Reading <$FILE>" 1>&2
	    cat $FILE
	    echo
	fi |

# Expand (convert tabs to spaces)
	expand | 

# Remove trailing spaces if present
	sed 's/ *$//' |

# Unfold wrapped % fields (except %X field with can be long)
	awk '/^$|%X/	{ pcfield=0 }
/^%[^X]/ 	{ pcfield=1 }
/^[^%]/ 	{ if (pcfield > 0) {
			pcfield++
			printf "%s ",last
			last = $0
			next
		  }
		}
		{ print last; last = $0 }
END		{ print last }' |


# Next do the real work
	awk 'BEGIN {	# Initialization
		gen="'"$GEN"'"
		default="'"$NAME"'"
		progname="'"$PROGNAME"'"
		printf "@COMMENT{Generated by %s, %s}\n",progname,gen
	}
/^%/	{	# Any % line
	entry=substr($0,4)
	recordtype=substr($0,2,1)
	percent=1
	xcount=0
	}
/^%A /	{	# Author
	if (author == "") {
		name4 = substr($NF,1,4)
		author = entry
	}
	else author = sprintf("%s and %s",author,entry)
	next
	}
/^%B /	{	# Book
	if (booktitle == "") booktitle = entry
	else booktitle = booktitle " " entry
	entrytype = "INCOLLECTION"
	next
	}
/^%C /	{	# City
	if (address == "") address = entry
	else address = address " " entry
	next
	}
/^%D /	{	# Date
	if (year == "") {
		year = $NF
		year2 = substr($NF,length($NF)-1)
	}
	if (month == "" && NF > 2)
		month = substr(entry,1,length(entry)-length($NF)-1)
	next
	}
/^%E /	{	# Editor
	if (editor == "") editor = entry
	else editor = sprintf("%s and %s",editor,entry)
	if (entrytype == "") entrytype = "BOOK"
	next
	}
/^%F /	{	# Footnote
	if (note == "") note = entry
	else note = note " " entry
	next
	}
/^%G /	{	# Government order number
	next
	}
/^%H /	{	# Header
	if (annote == "") annote = entry
	else annote = annote " " entry
	next
	}
/^%I /	{	# Issuer
	if (institution == "") institution = entry
	else institution = institution " " entry
	next
	}
/^%J /	{	# Journal
#	if (index(entry,"Proc.") > 0 || index(entry,"Proceedings") || \
	if (index(entry,"Conf.") > 0 || \
	    index(entry,"Conference") > 0) {
		if (booktitle == "") booktitle = entry
		else booktitle = sprintf("%s, %s",booktitle,entry)
		entrytype = "INPROCEEDINGS"
	}
	else {
		if (journal == "") journal = entry
		else journal = journal " " entry
		entrytype = "ARTICLE"
	}
	next
	}
/^%K /	{	# Keyword
	if (keywords == "") keywords = entry
	else keywords = sprintf("%s, %s",keywords,entry)
	next
	}
/^%L /	{	# Label
	if (label == "") label = $2
	next
	}
/^%M /	{	# Memorandum
	next
	}
/^%N /	{	# Number
	if (number == "") number = entry
	next
	}
/^%O /	{	# Other (conference)
	if (conference == "") conference = entry
	else conference = conference " " entry
	entrytype = "INPROCEEDINGS"
	next
	}
/^%P /	{	# Page(s)
	if (index(entry,"-") > 0 || index(entry,",") > 0) {
		if (pages == "") pages = entry
		else pages = sprintf("%s,%s",pages,entry)
	}
	else if (numberofpages == "") numberofpages = entry
	next
	}
/^%Q /	{	# Corporate or foreign author (surname first)
	if (author == "") {
		name4 = substr($2,1,4)
		author = entry
	}
	else author = sprintf("%s and %s",author,entry)
	next
	}
/^%R /	{	# Report, paper or thesis (i.e. unpublished)
	if (index(entry,"Ph") > 0)	entrytype="PHDTHESIS"
	if (index(entry,"hesis") > 0)	entrytype="PHDTHESIS"
	if (index(entry,"Sc") > 0)	entrytype="MASTERSTHESIS"
	if (index(entry,"aster") > 0)	entrytype="MASTERSTHESIS"
	if (index(entry,"ech") > 0)	entrytype="TECHREPORT"
	if (index(entry,"eport") > 0)	entrytype="TECHREPORT"
	if (index(entry,"aper") > 0)	entrytype="UNPUBLISHED"
	if (index(entry,"orking") > 0)	entrytype="UNPUBLISHED"
	if (index(entry,"npub") > 0)	entrytype="UNPUBLISHED"
	if (index(entry,"ocument") > 0)	entrytype="MANUAL"
	if (index(entry,"anual") > 0)	entrytype="MANUAL"
	if (entrytype=="UNPUBLISHED") {
		if (note == "") note = entry
		else note = note " " entry
	}
	else {
		if (type == "") type = entry
	}
	next
	}
/^%S /	{	# Series title
	if (series == "") series = entry
	else series = series " " entry
	if (index(entry,"Tech") > 0)	entrytype="TECHREPORT"
	next
	}
/^%T /	{	# Title
	if (title == "") title = entry
	else title = title " " entry
	next
	}
/^%U /	{	# Unused
	next
	}
/^%V /	{	# Volume
	if (volume == "") volume = entry
	next
	}
/^%X /	{	# Abstract
	abstract[++xcount] = entry
	next
	}
/^%Y /	{	# Unused
	next
	}
/^%Z /	{	# Can be used to supply an entry name
	name = entry
	next
	}
/^[^%]/	{	# Other lines - abstract if started by "%X" field
	if (recordtype == "X") abstract[++xcount] = $0
	next
	}
/^$/	{	# Empty line delimits a record
	if (percent == 1) {	# End of a record
		if (index(entrytype,"BOOK") > 0 || \
		    index(entrytype,"INCOLLECTION") > 0) {
			publisher = institution
			institution = ""
		}
		if (entrytype == "" || entrytype == "BOOK") {
			if (pages != "") {
				if (booktitle != "") entrytype = "INCOLLECTION"
				else entrytype = "INBOOK"
			}
		}
		if (entrytype == "MANUAL" || \
		    index(entrytype,"PROCEEDINGS") > 0) {
			organization = institution
			institution = ""
		}
		if (entrytype == "TECHREPORT") {
			if (type == "") {
				type = series
				if (substr(type,length(type)) == "s") {
					type = substr(type,1,length(type)-1)
				}
				series = ""
			}
			if (number == "") {
				number = volume
				volume = ""
			}
		}
		if (index(entrytype,"THESIS") > 0) {
			school = institution
			institution = ""
		}
		if ("'"$USENAME"'" == "true") name = sprintf("%s%s",name4,year2)
		if (name == "") name = default;
		if (entrytype == "") entrytype = "MISC";
		printf "\n@%s{%s:%03d,\n",entrytype,name,++num
		if (author != "")
			printf "\tAUTHOR = {%s},\n",author;
		if (editor != "")
			printf "\tEDITOR = {%s},\n",editor;
		if (title != "")
			printf "\tTITLE = {%s},\n",title;
		if (booktitle != "")
			printf "\tBOOKTITLE = {%s},\n",booktitle;
		if (journal != "")
			printf "\tJOURNAL = {%s},\n",journal;
		if (volume != "")
			printf "\tVOLUME = {%s},\n",volume;
		if (number != "")
			printf "\tNUMBER = {%s},\n",number;
		if (pages != "")
			printf "\tPAGES = {%s},\n",pages;
		if (type != "")
			printf "\tTYPE = {%s},\n",type;
		if (series != "")
			printf "\tSERIES = {%s},\n",series;
		if (institution != "")
			printf "\tINSTITUTION = {%s},\n",institution;
		if (organization != "")
			printf "\tORGANIZATION = {%s},\n",organization;
		if (school != "")
			printf "\tSCHOOL = {%s},\n",school;
		if (publisher != "")
			printf "\tPUBLISHER = {%s},\n",publisher;
		if (address != "")
			printf "\tADDRESS = {%s},\n",address;
		if (conference != "")
			printf "\tCONFERENCE = {%s},\n",conference;
		if (key != "")
			printf "\tKEY = {%s},\n",key;
		if (keywords != "")	 # Non-standard keyword field
			printf "\tKEYWORDS = {%s},\n",keywords;
		if (label != "")	 # Non-standard label field
			printf "\tLABEL = {%s},\n",label;
		if (numberofpages != "") # Non-standard length (in pages) field
			printf "\tLENGTH = {%s},\n",numberofpages;
		if (year != "")
			printf "\tYEAR = {%s},\n",year;
		if (month != "")
			printf "\tMONTH = {%s},\n",month;
		if (annote != "")
			printf "\tANNOTE = {%s},\n",annote;
		if (note != "")
			printf "\tNOTE = {%s},\n",note;
		if (xcount != 0) {
			printf "\tABSTRACT = {%s",abstract[1];
			for (i=2; i<=xcount; i++)
				printf "\n\t\t%s",abstract[i];
			printf "},\n"
		}
#		printf "\tGENERATED = {%s}\n",gen
		printf "}\n"	# End of record
	}
	author=""
	booktitle=""
	address=""
	year=""
	month=""
	editor=""
	annote=""
	note=""
	institution=""
	organization=""
	school=""
	publisher=""
	address=""
	conference=""
	journal=""
	key=""
	keywords=""
	label=""
	number=""
	pages=""
	numberofpages=""
	type=""
	series=""
	title=""
	volume=""
	name=""
	entrytype=""
	name4=""
	year2=""
	percent=0
	xcount=0
	recordtype=""
	}
END {printf "\n"}' |

# Remove trailing comma from last line in each entry
	awk '/^$|^[^}]/ { print last }
/^}/	{ print substr(last,1,length(last)-1) }
	{ last=$0 }
END	{ print }' |

# Some things can be automatically edited
	sed '1d
s/\([\\\$&#^_]\)/\\\1/g
s/\([{ ]\)LaTeX\([} :]\)/\1{\\LaTeX}\2/g
s/\([{ ]\)TeX\([} :]\)/\1{\\TeX}\2/g
/^	TITLE = /{
	s/\([{ ]\)\([B-Z]\)\([ :\.,}]\)/\1{\2}\3/g
	s/\([{ ]\)\([A-Z][A-Z][A-Z]*\)\([ :\.,}]\)/\1{\2}\3/g
}
s/\\0/\~/g
s/"\([^"]*\)"/``\1'"''"'/g
s/ - / --- /g
s/\([0-9]\)-\([0-9]\)/\1--\2/g' |

# Other edits can be customised by the user
	if [ -r $EDITFILE -a -f $EDITFILE ]
	then
		sed -f $EDITFILE
	else
		cat
	fi |

# Optionally fold lines
	if [ "$WIDTH" ]
	then
		awk 'BEGIN	{width='"$WIDTH"'}
/^[^	]|^$/	{printf "\n%s",$0}	# Print most lines normally
/^	[^	]/ {	# Fold lines starting with a single tab
	    i=1
	    while (i <= NF) {
		if (i > 1) {
		    printf "\n\t\t%s",$i
		    pos = 16+length($i)
		}
		else {
		    printf "\n\t%s",$i
		    pos = 8+length($i)
		}
		if (++i <= NF) {
		    pos += 1+length($i)
		    while (pos <= width) {
			printf " %s",$i
			if (++i > NF) break
			pos += 1+length($i)
		    }
		}
	    }
	}
/^		/ {	# Fold multiple lines starting with a double tab
	    i=1
	    while (i <= NF) {
		pos += 1+length($i)
		if (pos > width) {
		    printf "\n\t\t%s",$i
		    pos = 16+length($i)
		}
		else {
		    printf " %s",$i
		}
		if (++i <= NF) {
		    pos += 1+length($i)
		    while (pos <= width) {
			printf " %s",$i
			if (++i > NF) break
			pos += 1+length($i)
		    }
		}
	    }
	}
END {printf "\n"}'
	else
		cat
	fi |

# Finally, output to named files or standard output
	if $NAMEDFILES
	then
		$DEBUG && echo "Output to <$NEWFILE>" 1>&2
		cat > $NEWFILE
	else
		cat
	fi

    fi
done

exit 0

