### /u/sy/beebe/tex/bib/check-page-gaps.awk, Sun Nov 15 21:10:26 1998 ### Edit by Nelson H. F. Beebe ### ==================================================================== ### In a journal-specific bibliography sorted -bypages or -byvolume, ### check for gaps in coverage, possibly indicating missing articles. ### ### Usage: ### gawk -f check-page-gaps.awk \ ### [-v EMATHFILE=filename] \ ### [-v IGNOREZEROPAGE=nnn ] \ ### [-v INNUMBER=nnn] \ ### [-v MISSINGLAST=nnn ] \ ### [-v NPAGES=nnn ] \ ### BibTeX-file(s) ### ### If EMATHFILE is specified, it is the name of an output file that ### will contain a script to attempt to retrieve missing entries from ### the EMath (Zentrallblat fuer Mathematik) database. ### ### If IGNOREZEROPAGE is specified, and nonzero, then warnings are ### suppressed about page gaps following a zero page number. ### ### If INNUMBER is specified, and nonzero, then only page gaps within ### issues are diagnosed; those between issues are silently ignored. ### ### If MISSINGLAST is specified, and nonzero, then entries with missing ### final page numbers are diagnosed; this is helpful in conjunction ### with the EMATHFILE option. This option intentionally suppresses ### output of requests for data in any page gaps. ### ### If NPAGES is specified, and nonzero, then that many nearby pages ### are scanned (default: NPAGES=2). ### ### [10-Apr-2002] -- Add -v NPAGES=nnn option. ### ### [10-Feb-2001] -- Add -v IGNOREZEROPAGE=nnn option, and add variables ### Last_Number_Range and Number_Range, so that we ### can avoid issuing bogus `missing issue' warnings ### for issues that are assigned multiple numbers ### (e.g., number 1--3, followed by number 4). ### ### [09-Feb-2001] -- Add -v MISSINGLAST=nnn option. ### ### [07-Oct-2000] -- Add function forget_entry() and global variable ### In_Article to eliminate bogus warnings in ### bibliographies that contain a mixture of ### @Article{...} and other document entry types. ### ### [05-Oct-2000] -- Add volume numbers to "missing issue" warning. ### ### [16-Oct-1999] -- add -v EMATHFILE=filename option. ### ### [16-Oct-1999] -- Use %3d format to line up output for improved ### readability. ### ### [11-Oct-1999] -- Add INNUMBER option and checking_pair() function. ### ### [18-Nov-1998] -- Update to include volume(number) prefix on page ### ranges in messages. ### ### Make warning() keep track of volume changes, and ### output a single blank line before such changes, for ### better output visibility and ease of use. ### ### Use LAST_FNR in messages, so that the location ### reported lies at the page keyword of the first ### entry that mismatches, rather than after the second ### of them. ### ### [15-Nov-1998] -- Original version. ### ==================================================================== BEGIN { ISSN = "ISSN-goes-here"; if (NPAGES == "") NPAGES = 2; NPAGES += 0 } /^@Article{/ { new_entry(); next } /^@.*{/ { forget_entry(); next } /^ *ISSN *= / { split(get_value($0),parts," "); ISSN = parts[1]; next } /^ *pages *= / { Pages = get_value($0); LAST_PAGES_FNR = FNR; next } /^ *number *= / { Number_Range = get_value($0); Number = int(Number_Range); next } /^ *volume *= / { Volume = int(get_value($0)); next } /^ *year *= / { Year = get_value($0); next } /^}/ { if (In_Article) check_pages(); next } END { if (EMATHFILE && EMATHARGS) { print "#! /bin/sh" > EMATHFILE print "\ndate 1>&2\n" >> EMATHFILE print "get-emath-entries.sh", ISSN, "\\" >> EMATHFILE close(EMATHFILE) sort_pipe = ("sort -u -k 1n -k 2n | sed -e '/^\t*$/d' >>" EMATHFILE) print "\t" EMATHARGS "\n" | sort_pipe close(sort_pipe) close(EMATHFILE) print "\ndate 1>&2" >> EMATHFILE close(EMATHFILE) system("chmod +x " EMATHFILE) } } ### ==================================================================== function add_arg(year,page) { EMATHARGS = EMATHARGS " " year " " page " \\\n\t" } function add_nearby_args(year,page, k,page_list) { ## Because journals sometimes have advertizing inserts between ## articles, or start articles on odd-numbered pages, or have ## inserts at the start or end of issues, we produce search ## requests for nearby page numbers. year += 0 # force numeric, and thereby trim any blanks page += 0 # force numeric, and thereby trim any blanks if (page < 1) # force sensible number page = 1 page_list = "" for (k = 0; k < NPAGES; ++k) page_list = (page_list ((k == 0) ? "" : ",") (page + k)) add_arg(year,page_list) } function checking_pair() { if (INNUMBER == 0) return (1) else if (Number == Last_Number) return (1) else return (0) } function check_gap(gap, last_n,last_parts,n,parts) { if ((Pages != "") && (Last_Pages != "")) { last_n = split(Last_Pages, last_parts, "-+") n = split(Pages, parts, "-+") if (MISSINGLAST) { if ((parts[1] ~ "^[0-9]+$") && (parts[2] ~ "[?]")) add_arg(Year,parts[1]) } else if (last_n >= 2) { if (last_parts[2] ~ "[?]") ; else if (!checking_pair()) ; else if (IGNOREZEROPAGE && (last_id() ~ " 0--0$")) ; else if (int(last_parts[2]) > int(parts[1])) { warning("overlapping page ranges [" last_id() "] [" this_id() "]") # Retrieve both references again to get corrected page ranges add_arg(Year,parts[1]) add_arg(Year,last_parts[1]) } else if ((int(last_parts[2]) + gap) < int(parts[1])) { warning("page gap of " sprintf("%3d",(int(parts[1]) - int(last_parts[2]) - 1)) \ " between [" last_id() "] [" this_id() "]") add_nearby_args(Year, last_parts[2] + 1) } } else if (checking_pair()) { warning("possible gap between [" last_id() "] [" this_id() "]") add_nearby_args(Year, last_parts[2] + 1) } } } function check_pages( n,parts) { ## In order to detect missing articles at the start of a new volume, ## we create a bogus last entry for page 0 of the new volume. if (Volume != Last_Volume) { Last_Volume = Volume Last_Number = Number Last_Number_Range = Number_Range Last_Pages = "0--0" } if (Volume == Last_Volume) { if (Number == Last_Number) check_gap(2) else if (Number == (Last_Number + 1)) check_gap(3) else { n = split(Last_Number_Range,parts,"[-/]+") if (n == 1) warning("missing issues between " Volume "(" Last_Number_Range ")" " and " Volume "(" Number_Range ")") else if (n == 2) { if (Number == (int(parts[2]) + 1)) check_gap(3) else warning("missing issues between " Volume "(" Last_Number_Range ")" " and " Volume "(" Number_Range ")") } else warning("could not parse number range [" Last_Number_Range "]") } } LAST_FNR = LAST_PAGES_FNR } function forget_entry() { In_Article = 0 Last_Volume = "" Last_Number = "" Last_Number_Range = "" Last_Pages = "" } function get_value(line, s) { s = substr(line,index(line,"\"")+1) sub(/\"?,?$/,"",s) ## print "DEBUG: v = [" s "]" return (s) } function id(volume,number,pages) { return (volume "(" number ") " pages) } function last_id() { return (id(Last_Volume,Last_Number_Range,Last_Pages)) } function new_entry() { In_Article = 1 Last_Volume = Volume Last_Number = Number Last_Number_Range = Number_Range Last_Pages = Pages } function this_id() { return (id(Volume,Number,Pages)) } function warning(message) { if (Last_Warned_Volume != Last_Volume) print "" print FILENAME ":" LAST_FNR ":%%:" message Last_Warned_Volume = Last_Volume }