# /u/sy/beebe/tex/bib/find-page-matches.awk, Fri Oct 8 07:03:57 1999 # Edit by Nelson H. F. Beebe # ====================================================================== # Search BibTeX entries for matches between the first page number in the # same or immediately neighboring volumes. Databases frequently have # errors in citation years, and this function may help to turn up # entries that should be corrected and merged. # # Usage: # gawk -f find-page-matches.awk \ # [-v MATCHAUTHOR=1] \ # [-v MATCHLETTER=1] \ # [BibTeX-file(s) or < BibTeX-file] # # If MATCHAUTHOR is set to a nonzero value, warnings are suppressed if # the author names extracted from BibNet-Project-style citation labels # do not match. # # If MATCHLETTER is set to a nonzero value, warnings are suppressed if # the initial letter of author names extracted from BibNet-Project-style # citation labels, and converted to lowercase, does not match. # # The output on stdout is suitable for parsing by the emacs # compile-library commands. # # This function should normally be run on only one file at a time, # unless you really want to check for matches between different files. # # [13-Oct-1999] -- add MATCHLETTER option # [08-Oct-1999] -- adapted from find-author-page-matches.awk # [26-Sep-1999] -- original version as find-author-page-matches.awk # ====================================================================== /^@[A-RT-Za-rt-z][A-Za-z]+{[A-Za-z0-9][-A-Za-z0-9]+:[12][0-9][0-9x][0-9x]:[A-Za-z]+,/ \ { begin_entry($0); next } /^ *pages *= *"/ { do_pages($0); next } /^ *volume *= *"/ { do_volume($0); next } /^ *year *= *"/ { do_year($0); next } /^ *} *$/ { end_entry(); next } # ====================================================================== function author_from_citation_label(s) { sub(":.*$","",s) return (s) } function begin_entry(s) { CitationLabel = s sub(".*{","",CitationLabel) sub(",.*$","",CitationLabel) ## warning("DEBUG: CitationLabel = [" CitationLabel "]") Entry_FNR = FNR Volume = Year = Page = "" } function do_pages(s) { Page = get_value(s) ## warning("DEBUG: Page = [" Page "]") } function do_volume(s) { Volume = get_value(s) ## warning("DEBUG: Volume = [" Volume "]") } function do_year(s) { Year = get_value(s) ## warning("DEBUG: Year = [" Year "]") } function end_entry() { ## warning("DEBUG: CitationLabel = [" CitationLabel "] Page = [" Page "] Volume = [" Volume "] Year = [" Year "]") # Ignore entries with incomplete information if ( (CitationLabel == "") || (Page == "") || (Volume == "") || (Year == "") ) return # Check volume/page consistency Warnings = 0 if ((Volume,Page) in Volume_Page) volume_match_warning(CitationLabel,Volume,Page) else if ((Volume-1,Page) in Volume_Page) volume_match_warning(CitationLabel,Volume-1,Page) else if ((Volume+1,Page) in Volume_Page) volume_match_warning(CitationLabel,Volume+1,Page) else if ((Volume,Page+1) in Volume_Page) volume_match_warning(CitationLabel,Volume,Page+1) else if ((Volume,Page-1) in Volume_Page) volume_match_warning(CitationLabel,Volume,Page-1) else if ((Volume-1,Page+1) in Volume_Page) volume_match_warning(CitationLabel,Volume-1,Page+1) else if ((Volume-1,Page-1) in Volume_Page) volume_match_warning(CitationLabel,Volume-1,Page-1) else if ((Volume+1,Page+1) in Volume_Page) volume_match_warning(CitationLabel,Volume+1,Page+1) else if ((Volume+1,Page-1) in Volume_Page) volume_match_warning(CitationLabel,Volume+1,Page-1) else { Volume_Page[Volume,Page] = FILENAME ":" Entry_FNR Volume_Page_CitationLabel[Volume,Page] = CitationLabel } if (Warnings > 0) return # we don't want multiple messages for the same entry # Check year/page consistency if ((Year,Page) in Year_Page) year_match_warning(CitationLabel,Year,Page) else if ((Year-1,Page) in Year_Page) year_match_warning(CitationLabel,Year-1,Page) else if ((Year+1,Page) in Year_Page) year_match_warning(CitationLabel,Year+1,Page) else if ((Year,Page+1) in Year_Page) year_match_warning(CitationLabel,Year,Page+1) else if ((Year,Page-1) in Year_Page) year_match_warning(CitationLabel,Year,Page-1) else if ((Year-1,Page+1) in Year_Page) year_match_warning(CitationLabel,Year-1,Page+1) else if ((Year-1,Page-1) in Year_Page) year_match_warning(CitationLabel,Year-1,Page-1) else if ((Year+1,Page+1) in Year_Page) year_match_warning(CitationLabel,Year+1,Page+1) else if ((Year+1,Page-1) in Year_Page) year_match_warning(CitationLabel,Year+1,Page-1) else { Year_Page[Year,Page] = FILENAME ":" Entry_FNR Year_Page_CitationLabel[Year,Page] = CitationLabel } } function get_value(s) { sub("^[^\"]*\"","",s) sub("\" *, *$","",s) split(s,parts,"[-,/]") return (parts[1]) } function volume_match_warning(citationlabel,volume,page) { if (MATCHAUTHOR && \ (author_from_citation_label(citationlabel) != \ author_from_citation_label(Volume_Page_CitationLabel[volume,page]))) return if (MATCHLETTER && \ (tolower(substr(author_from_citation_label(citationlabel),1,1)) != \ tolower(substr(author_from_citation_label(Volume_Page_CitationLabel[volume,page]),1,1)))) return warning(sprintf("%-31s\t%-31s\t%s", (citationlabel "," volume "," page), \ Volume_Page_CitationLabel[volume,page], \ Volume_Page[volume,page])) Warnings++ } function warning(message) { # print FILENAME ":" Entry_FNR ":\t" message >"/dev/tty" print FILENAME ":" Entry_FNR ":\t" message >"/dev/stderr" } function year_match_warning(citationlabel,year,page) { if (MATCHAUTHOR && \ (author_from_citation_label(citationlabel) != \ author_from_citation_label(Year_Page_CitationLabel[year,page]))) return if (MATCHLETTER && \ (tolower(substr(author_from_citation_label(citationlabel),1,1)) != \ tolower(substr(author_from_citation_label(Year_Page_CitationLabel[year,page]),1,1)))) return warning(sprintf("%-31s\t%-31s\t%s", (citationlabel "," year "," page), \ Year_Page_CitationLabel[year,page], \ Year_Page[year,page])) Warnings++ }