### /u/sy/beebe/tex/bst/xbbl-to-bib.awk, Tue Apr 11 16:02:00 2017 ### Edit by Nelson H. F. Beebe ### ====================================================================== ### Filter a BibTeX .bbl file produced with one of the x-*.bst extended ### styles, and recover usable, if rough, BibTeX entries from it. ### ### Usage: ### awk -f xbbl-to-bib.awk foo.bbl > foo.bib-maybe ### ### [11-Apr-2017] ### ====================================================================== BEGIN { initialize() } /^\\bibitem/ { do_bibitem($0) } END { } ### ====================================================================== function clear_array(array, key) { split("", array, "") } function collect_braced_argument(s, b, c, k, last, level, n, t) { ## Starting at the first left brace in s, collect the contiguous string ## that ends at a matching right brace (ignoring backslash-protected ## braces), discarding the outer braces. level = 1 n = index(s, "{") t = "" last = length(s) c = "" for (k = n + 1; (k <= last); ++k) { b = c # save previous character c = substr(s, k, 1) if ((c == "{") && (b != "\\")) level++ else if ((c == "}") && (b != "\\")) level-- if (level == 0) break t = (t c) } if (level != 0) warning("collect_braced_argument(): nonzero brace level in s = [" s "]") t = trim(squeeze_blanks(t)) ## print "%% DEBUG: s = [" substr(s,1,40) "...] -> t = [" t "]" return (t) } function do_bibitem(s, field) { ## Typical value: ## ## \bibitem{Colson:1726:SAN} ## \ifshowBIBTYPE \showBIBTYPE{article}{Colson:1726:SAN} \fi ## J.~{Colson, F.R.S.} ## \newblock A short account of negativo-affirmative arithmetick. ## \newblock {\em Philosophical transactions of the Royal Society of London}, ## \showVOLUME{34}\penalty 0 (392--398):\penalty 0 161--173, 1726. \ifshowCODEN ## {\showCODEN{PTRSAV}}. \fi \ifshowISSN {\showISSN{0370-2316}}. \fi ## \newblock \ifshowURL {\showURL ## \url|http://arith22.gforge.inria.fr/slides/s2-ercegovac.pdf|}. \fi ## \newblock \ifshowDOI {\showDOI \url|10.1098/rstl.1726.0032|}\ifshowDOIPERIOD . ## \fi \fi Bibtype = "BIBTYPE" gsub("\n", " ", s) if (match(s, "\\\\bibitem{[^{}]*}")) { Entry["label"] = substr(s, RSTART + 9, RLENGTH - 10) ## print "\n\n%% DEBUG: label = [" Entry["label"] "]" } if (match(s, "\\\\showBIBTYPE{[^{}]*}")) Bibtype = substr(s, RSTART + 13, RLENGTH - 14) for (field in Macro) { if (match(s, Macro[field])) Entry[field] = collect_braced_argument(substr(s, RSTART)) } ## Special fixups for certain fields if ("DOI" in Entry) { if (Entry["DOI"] ~ "^10[.][0-9]+/") Entry["DOI"] = ("http://dx.doi.org/" Entry["DOI"]) else if (Entry["DOI"] !~ "^http") # handle short DOI (e.g., b8sr3k -> http://dx.doi.org/b8sr3k) Entry["DOI"] = ("http://dx.doi.org/" Entry["DOI"]) } if ("month" in Entry) { if (match(Entry["month"], "^(January|February|March|April|May|June|July|August|September|October|November|December)$")) Entry["month"] = tolower(substr(Entry["month"], 1, 3)) } if ("pages" in Entry) sub("^ *pages?[ ~]", "", Entry["pages"]) if ("URL" in ENTRY) sub("^ *\\\\url[|]", "", Entry["URL"]) print_entry() } function fix_author(s) { return (s) } function fix_pages(s) { return (s) } function initialize() { RS = "" "date" | getline Current_Date_and_Time close("date") print "%%% -*-BibTeX-*-" Entry_Count = 100000 ## mapping from lowercase document classes to preferred mixed case names Class["article"] = "Article" # Class["BIBTYPE"] = "Unknown" Class["booklet"] = "Booklet" Class["book"] = "Book" Class["inbook"] = "InBook" Class["incollection"] = "InCollection" Class["inproceedings"] = "InProceedings" Class["manual"] = "Manual" Class["mastersthesis"] = "MastersThesis" Class["misc"] = "Misc" Class["periodical"] = "Periodical" Class["phdthesis"] = "PhdThesis" Class["proceedings"] = "Proceedings" Class["techreport"] = "TechReport" Class["unpublished"] = "Unpublished" ## mapping from preferred BibTeX field name to regular-expression for its wrapper Macro["address"] = "\\\\showADDRESS" Macro["articleno"] = "\\\\showARTICLENO" Macro["author"] = "\\\\showAUTHORRAW" # exception Macro["bibtype"] = "\\\\showBIBTYPE" Macro["bookpages"] = "\\\\showBOOKPAGES" Macro["booktitle"] = "\\\\showBOOKTITLE" Macro["chapter"] = "\\\\showCHAPTER" Macro["CODEN"] = "\\\\showCODEN" Macro["crossref"] = "\\\\showCROSSREF" Macro["day"] = "\\\\showDAY" Macro["DOI"] = "\\\\showDOI" Macro["edition"] = "\\\\showEDITION" Macro["editor"] = "\\\\showEDITORRAW" # exception Macro["howpublished"] = "\\\\showHOWPUBLISHED" Macro["institution"] = "\\\\showINSTITUTION" Macro["ISBN"] = "\\\\showISBN" Macro["ISBN-13"] = "\\\\showISBNXIII" Macro["ISSN"] = "\\\\showISSN" Macro["ISSN-L"] = "\\\\showISSN-L" Macro["ISSN-L"] = "\\\\showISSNL" Macro["journal"] = "\\\\showJOURNAL" Macro["key"] = "\\\\showKEY" Macro["label"] = "\\\\showLABEL" Macro["LCCN"] = "\\\\showLCCN" Macro["month"] = "\\\\showMONTH" Macro["note"] = "\\\\showNOTE" Macro["number"] = "\\\\showNUMBER" Macro["organization"] = "\\\\showORGANIZATION" Macro["pagecount"] = "\\\\showPAGECOUNT" Macro["pagecountone"] = "\\\\showPAGECOUNTONE" Macro["pages"] = "\\\\showPAGES" Macro["price"] = "\\\\showPRICE" Macro["publisher"] = "\\\\showPUBLISHER" Macro["school"] = "\\\\showSCHOOL" Macro["series"] = "\\\\showSERIES" Macro["title"] = "\\\\showTITLE" Macro["type"] = "\\\\showTYPE" Macro["URL"] = "\\\\showURL" Macro["volume"] = "\\\\showVOLUME" Macro["year"] = "\\\\showYEAR" } function print_and_delete_abbrev(key) { if (key in Entry) { print_key_abbrev(key,trim(Entry[key])) delete Entry[key] } } function print_and_delete_item(key) { if (key in Entry) { print_key_value(key,trim(Entry[key])) delete Entry[key] } } function print_entry( key,t) { ## print "%% DEBUG: print_entry()" #?? if ( (!("title" in Entry)) || (Entry["title"] == "") ) #?? return # if ((!("author" in Entry)) || (Entry["author"] == "")) # Entry["author"] = "Anonymous" # Entry["author"] = fix_author(Entry["author"]) # Entry["editor"] = fix_author(Entry["editor"]) # if ("pages" in Entry) # Entry["pages"] = fix_pages(Entry["pages"]) if (!(Bibtype in Class)) Class[Bibtype] = "Unknown" printf("\n@%s{%s,\n", Class[Bibtype], Entry["label"]) delete Entry["label"] delete Entry["bibtype"] ## Output the most common fields in the order preferred ## by biborder (from "man biborder"): print_and_delete_item("author") print_and_delete_item("editor") print_and_delete_item("key") print_and_delete_item("booktitle") print_and_delete_item("title") print_and_delete_item("crossref") print_and_delete_item("chapter") print_and_delete_item("journal") print_and_delete_item("volume") print_and_delete_item("type") print_and_delete_item("number") print_and_delete_item("howpublished") print_and_delete_item("institution") print_and_delete_item("organization") print_and_delete_item("publisher") print_and_delete_item("school") print_and_delete_item("address") print_and_delete_item("edition") print_and_delete_item("bookpages") print_and_delete_item("pages") print_and_delete_item("day") ## print_and_delete_item("month") ## Special handling for month, for which we prefer to output standard 3-letter abbreviations if (Entry["month"] == "") delete Entry["month"] else if (Entry["month"] ~ "^[a-z][a-z][a-z]$") print_and_delete_abbrev("month") else if (Entry["month"] ~ "^[a-z][a-z][a-z] ") print_and_delete_abbrev("month") else print_and_delete_item("month") print_and_delete_item("year") print_and_delete_item("CODEN") print_and_delete_item("DOI") print_and_delete_item("EAN") print_and_delete_item("ISBN") print_and_delete_item("ISBN-13") print_and_delete_item("ISSN") print_and_delete_item("ISSN-L") print_and_delete_item("LCCN") print_and_delete_item("MRclass") print_and_delete_item("MRnumber") print_and_delete_item("MRreviewer") print_key_value("bibdate", Current_Date_and_Time) print_and_delete_item("bibsource") print_and_delete_item("note") print_and_delete_item("price") print_and_delete_item("series") print_and_delete_item("URL") print_and_delete_item("ZMnumber") for (key in Entry) print_and_delete_item(key) print "}" clear_array(Entry) } function print_key_abbrev(key,abbrev) { printf(" %-15s%s,\n", key " = ", abbrev) } function print_key_value(key,value) { printf(" %-15s\"%s\",\n", key " =", squeeze_blanks(trim(value))) } function squeeze_blanks(s) { gsub(/[ \t][ \t]+/, " ", s) return (s) } function trim(s) { gsub(/^[ \t]+/, "", s) gsub(/[ \t]+$/, "", s) return (s) } function warning(message) { ### print FILENAME ":" FNR ":%%" message >"/dev/tty" fflush("") print ((Filename != "") ? Filename : FILENAME) ":" FNR ":%%" message > "/dev/stderr" fflush("") }