### -*-awk-*- ### ==================================================================== ### @Awk-file{ ### author = "Nelson H. F. Beebe", ### version = "1.01", ### date = "23 December 2002", ### time = "09:14:45 MST", ### filename = "ls-to-index-html-table.awk", ### copyright = "Copyright (C) 2000 Nelson H. F. Beebe", ### license = "GNU General Public License (version 2 or ### later)", ### address = "Center for Scientific Computing ### University of Utah ### Department of Mathematics, 322 INSCC ### 155 S 1400 E RM 233 ### Salt Lake City, UT 84112-0090 ### USA", ### telephone = "+1 801 581 5254", ### FAX = "+1 801 585 1640, +1 801 581 4148", ### URL = "http://www.math.utah.edu/~beebe", ### checksum = "07601 820 2683 23978", ### email = "beebe@math.utah.edu, beebe@acm.org, ### beebe@ieee.org (Internet)", ### codetable = "ISO/ASCII", ### keywords = "World-Wide Web index.html files", ### supported = "yes", ### docstring = "This program converts the output of `ls -1 *' ### to an HTML fragment on stdout suitable for ### inclusion in index.html, with manual addition ### of descriptive text. ### ### Any .PACKAGEINDEX file in the current ### directory is read to determine the default ### descriptions for each package. Provided that ### file is kept up-to-date, then completely ### automated construction of prettyprinted ### HTML-grammar-conformant index.html files is ### possible with this program. ### ### Usage: ### ls [--l --full-time] PATTERNS | \ ### awk \ ### [-v AFTER=file] \ ### [-v BEFORE=file] \ ### [-v DIRCOLS=n] \ ### [-v INDENT=n] \ ### [-v PACKAGEINDEX=file] \ ### [-v TITLE=title] \ ### -f ls-to-index-html-table.awk >outfile ### ### If the GNU -l --full-time form is used, then ### the output table will record the date of last ### modification of each file. ### ### The output begins with a section labeled ### `Package directory' with a DIRCOLS-column ### table of package names that serves as a ### directory into the entries in the following ### `Package descriptions' section. That ### directory will not be complete unless ### PATTERNS == *. By default, DIRCOLS = 3. ### ### The optional AFTER variable defines an HTML ### file fragment (default: /dev/null) whose ### contents are inserted after the `Package ### descriptions' section. It should normally ### begin with an H2-level header. The recommended ### name is `index.after'; a file with that name ### will not itself be indexed. ### ### The optional BEFORE variable defines an HTML ### file fragment (default: /dev/null) whose ### contents are inserted before the `Package ### directory' section. It should normally begin ### with an H2-level header. The recommended ### name is `index.before'; a file with that name ### will not itself be indexed. ### ### The optional TITLE variable defines the ### content of the ... ### environment, and the initial

...

### environment. ### ### Entries in the `Package descriptions' table ### are tagged with NAME anchors, so that other ### HTML files can refer to them using URLs ### ending in ``index.html#PACKAGENAME''. This ### is a very convenient way to offer a stable ### URL that will bring the reader to a ### description that leads to various archive ### formats, as well as providing nearby ### information about related package versions. ### Should a package be moved from the current ### directory, you cna leave behind a single ### .html file that leads the reader on to its ### new location. ### ### Output lines are indented according to their ### logical level using a minimum number of ### leading tabs and spaces. By default, each ### level corresponds to 8 columns (one tab), but ### that can be altered by assigning a different ### value on the command line to the INDENT ### variable. ### ### The package description file defaults to ### .PACKAGEINDEX, although that name can be ### overriden by the PACKAGEINDEX command-line ### option. It is intentionally named with a ### leading dot, so that directory listing ### commands don't show it, and it is read to ### find package descriptions that go into the ### index.html fragment output by that program. ### ### The PACKAGEINDEX file consists of lines of ### the forms: ### ### blank or empty lines ### ### % comment (to end-of-line) ### ### TITLE Package title ### ### - Description and ### optional more ### description ### ### Description and ### optional more ### description ### ### The only significant input column is 1: ### must start there. Continued ### description lines need only begin with at ### least one space or tab. ### ### The PACKAGE name must not contain any HTML ### markup. There is no need to describe the ### individual archive file formats or their ### associated listing files; that information ### will be supplied automatically. ### ### The description should consist of complete ### sentences, and may include HTML markup ### (usually for fonts), if that is desirable. ### It should NOT contain: ### ### * any mention of archive formats, since that ### information is repetitive, and should be ### separately documented in the index.html file; ### ### * any file time stamp information; that can ### be supplied automatically by ### ls-to-index-html-table.awk; ### ### * mention of manual page documentation, since ### that information too can be supplied ### automatically. ### ### When a version number is omitted, the ### description applies to all versions of the ### package. That reduces the work required to ### maintain this file, and in most cases, ### ensures that no further modifications are ### needed when a new version of a package is ### installed, unless additional version-specific ### remarks are required. ### ### The - portion of the field beginning ### in column 1 matches the regular expression ### ``-([0-9]+[-_.])+[0-9]+$''. ### ### - takes precedence over a ### similarly-named section, but ### applies only to that specific VERSION. ### ### The checksum field above contains a CRC-16 ### checksum as the first value, followed by the ### equivalent of the standard UNIX wc (word ### count) utility output of lines, words, and ### characters. This is produced by Robert ### Solovay's checksum utility.", ### } ### ==================================================================== BEGIN { initialize() } NF == 1 { add_entry($1,""); next } # "ls -1" input NF == 9 { add_entry($9,$6 " " substr($7,1,8)); next } # new ls -l --full-time" input NF == 11 { add_entry($11,$8 "-" $7 "-" $10 " " $9 ); next } # old ls -l --full-time" input END { terminate() } #======================================================================= function add_entry(fullname,timestamp) { if (fullname ~ "^foo*|.*~|#") # ignore temporary files and editor backup files return else if (fullname ~ "^index$|^index[.].*$") return ## Package naming conventions: ## if fullname == "foobar-5.3.2.tar.gz" ## then Backup == "foobar-5.3.2" ## and Package == "foobar" ## and Extension == "tar.gz" Basename = fullname Extension = "" if (match(Basename,"[.]arc$")) { Extension = "arc" sub("[.]arc$", "", Basename) } else if (match(Basename,"[.]arc-lst$")) { Extension = "arc-lst" sub("[.]arc-lst$", "", Basename) } else if ((match(Basename,"[.]html$")) && !(match(Basename,"^00t?dir[.]html"))) { Extension = "html" sub("[.]html$", "", Basename) } else if (match(Basename,"[.]jar$")) { Extension = "jar" sub("[.]jar$", "", Basename) } else if (match(Basename,"[.]jar-lst$")) { Extension = "jar-lst" sub("[.]jar-lst$", "", Basename) } else if (match(Basename,"[.]shar$")) { Extension = "shar" sub("[.]shar$", "", Basename) } else if (match(Basename,"[.]shar-lst$")) { Extension = "shar-lst" sub("[.]shar-lst$", "", Basename) } else if (match(Basename,"[.]tar[.]bz2$")) { Extension = "tar.bz2" sub("[.]tar[.]bz2$", "", Basename) } else if (match(Basename,"[.]tar[.]gz$")) { Extension = "tar.gz" sub("[.]tar[.]gz$", "", Basename) } else if (match(Basename,"[.]tar-lst$")) { Extension = "tar-lst" sub("[.]tar-lst$", "", Basename) } else if (match(Basename,"[.]zip$")) { Extension = "zip" sub("[.]zip$", "", Basename) } else if (match(Basename,"[.]zip-lst$")) { Extension = "zip-lst" sub("[.]zip-lst$", "", Basename) } else if (match(Basename,"[.]zoo$")) { Extension = "zoo" sub("[.]zoo$", "", Basename) } else if (match(Basename,"[.]zoo-lst$")) { Extension = "zoo-lst" sub("[.]zoo-lst$", "", Basename) } Package = package_name(Basename) ##?? print "DEBUG: Basename = [" Basename "] Last_Basename = [" Last_Basename "]" if (Basename == Last_Basename) { open_TD() add_href(fullname,Extension) } else # new file { open_BODY() open_TABLE() open_TR() open_TD() if (Basename != Last_Basename) { if (Last_Basename != "") { close_TD() close_TR() } Last_Basename = Basename TOC[++NTOC] = Basename open_TR() open_TD() } output("" Basename "") if (Package != Last_Package) Last_Package = Package close_TD() ## The table looks neater if the timestamp has a separate ## column, rather than being embedded in the description text. if (timestamp != "") { open_TD() output("" timestamp "") close_TD() } open_TD() ## We put the ... locator in the description ## cell, instead of in the preceding, and more logical, package ## name cell, because some browsers position the locator line at ## the top of the screen, which leaves a multiline description ## cell cut off at the top, forcing the user to manually scroll ## the screen to read the description, sigh... ## ## However, there is an additional problem. The HTML grammars ## sensibly permit the NAME anchor to have empty content, since ## it is, after all, only a position marker. Unfortunately, ## some browsers (including netscape) ignore such anchors. If ## the content is made a nonbreakable space ( ), then the ## anchor is handled correctly, but then the cell begins with a ## visible space. ## ## In order to help the reader, who is after all the most ## important one, we therefore make an ugly hack: we put the ## anchor at the end of the first description line, where the ## visible space may be less noticeable. That nasty code is ## hidden inside output_description(), and the two add_name() ## calls below are commented out, to preserve their ## partially-logical location. ## add_name(Basename,"") if (!(Package in Name_Used)) { ## Supply a NAME location for a URL of the form ## http://hostname/path/to/directory/index.html#PACKAGENAME ## where PACKAGENAME lacks any version number. ## add_name(Package,"") } ## Output any package description according to decreasing ## precedence of package naming. if (fullname in Description) # e.g., foobar-5.3.2.tar.gz output_description(Description[fullname]) else if (Basename in Description) # e.g., foobar-5.3.2 output_description(Description[Basename]) else if (Package in Description) # e.g., foobar output_description(Description[Package]) else output_description("") add_href(fullname,Extension) } } function add_href(fullname,extension) { begin_tag("TT") output("[" \ ((extension == "") ? "file" : extension) \ "]") end_tag("TT") } function add_name(name,description) { if (name in Name_Used) warning("duplicate ...") Name_Used[name]++ if (description == "") # use one-liner for empty description output("") else { begin_tag("A NAME=\"" name "\"") output(description) end_tag("A") } } function begin_tag(tag) { output("<" tag ">") HTML_Level++ } function close_BODY() { if (BODY_Open) { end_tag("BODY") end_tag("HTML") } } function close_TABLE() { if (TABLE_Open) { end_tag("TABLE") TABLE_Open = 0 } } function close_TD() { if (TD_Open) { end_tag("TD") TD_Open = 0 } } function close_TR() { if (TR_Open) { end_tag("TR") TR_Open = 0 } } function detab(s) { gsub("\t"," ",s) return (s) } function end_tag(tag) { HTML_Level-- output("") } function indentation(level, columns,ntabs,nspaces) { ## Return a string of tabs and blanks for indenting to level. Each ## indentation level corresponds to INDENT columns. columns = level * INDENT ntabs = int(columns / 8) nspaces = int(columns % 8) return (substr("\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",1,ntabs) \ substr(" ",1,nspaces)) } function initialize() { ## Handle the command-line customizations DIRCOLS = ((DIRCOLS == "") ? 3 : (0 + DIRCOLS)) if (DIRCOLS <= 0) DIRCOLS = 1 INDENT = ((INDENT == "") ? 8 : (0 + INDENT)) # user customizable if (INDENT < 0) INDENT = 8 if (PACKAGEINDEX == "") PACKAGEINDEX = ".PACKAGEINDEX" # where the package descriptions reside ## Handle remaining internal initializations "date" | getline Current_Date_and_Time close("date") if ("USER" in ENVIRON) User = ENVIRON["USER"] else if ("LOGNAME" in ENVIRON) User = ENVIRON["LOGNAME"] else User = "unknown" if ("HOST" in ENVIRON) Hostname = ENVIRON["HOST"] else { "hostname" | getline Hostname close("hostname") if (Hostname == "") Hostname = "unknown" } if ("INDOMAIN" in ENVIRON) Mailhost = ENVIRON["INDOMAIN"] else Mailhost = Hostname if ("PWD" in ENVIRON) Current_Directory = ENVIRON["PWD"] else if ("cwd" in ENVIRON) Current_Directory = ENVIRON["cwd"] else Current_Directory = "/unknown/directory" NTOC = 0 BODY_Open = 0 TD_Open = 0 TR_Open = 0 TABLE_Open = 0 Basename = "" Package = "" Last_Basename = "" Last_Package = "" ## Read the package descriptions in order to initialize the ## Description[] array. while ((getline < PACKAGEINDEX) > 0) { if (index($0,$1) == 1) # then have package or package-version { Basename = $1 Package = package_name(Basename) Description[Basename] = trim(detab(substr($0,1+length($1)))) Description[Package] = Description[Basename] } else if ((Basename != "") && (match($0,"^\t"))) { # then description continuation line Description[Basename] = Description[Basename] "\n" indentation(5) trim(detab($0)) Description[Package] = Description[Basename] } } close("index") ## The command-line TITLE variable can be set from the package ## description file: if (TITLE == "") { if ("TITLE" in Description) TITLE = Description["TITLE"] else TITLE = "" } ## Provide a few default descriptions for standard files in the Utah ## Web/FTP tree. set_default_description("00dir.cmd","Alphabetical FTP get commands") set_default_description("00dir.html","Alphabetical file listing") set_default_description("00dir.lst","Alphabetical file listing") set_default_description("00tdir.cmd","Reverse-time-ordered FTP get commands") set_default_description("00tdir.html","Reverse-time-ordered file listing") set_default_description("00tdir.lst","Reverse-time-ordered file listing") } function name_ref(name) { ## Return an NAME anchor with content of one visible space (to fool ## defective browsers), and warn if name has been used before this ## way (in which case, return an empty string). if (name in Name_Used) { warning("duplicate ...") return ("") } else { Name_Used[name]++ return (" ") } } function open_BODY() { if (!BODY_Open) { HTML_Level = 0 output("") output("") output("") output("") output("") output("") output("") output("") output("") output("") output("") output("") output("") output("") output("") output("") begin_tag("HTML") begin_tag("HEAD") begin_tag("TITLE") output(TITLE) end_tag("TITLE") output("") end_tag("HEAD") begin_tag("BODY") begin_tag("H1") output(TITLE) end_tag("H1") begin_tag("P") output("Last update: " Current_Date_and_Time "") end_tag("P") print_before() print_saved_lines(1) BODY_Open = 1 } } function open_TABLE() { if (!TABLE_Open) { begin_tag("H2") begin_tag("A NAME=\"package-descriptions\"") output("Package descriptions") end_tag("A") end_tag("H2") begin_tag("TABLE BORDER=1") TABLE_Open = 1 } } function open_TD() { if (!TD_Open) { begin_tag("TD") TD_Open = 1 } } function open_TR() { if (!TR_Open) { output("") begin_tag("TR") TR_Open = 1 } } function output(line) { Saved_Line[++N_Saved_Line] = indentation(HTML_Level) line } function output_description(description, n) { ## Output the package description, sneaking in one or two NAME ## anchors at the end of the first line, so that they are less ## visible. This ugly hack is a response to browser brain damage! if (description == "") # the PACKAGEINDEX file is out-of-date warning("no description available for " Package " in " PACKAGEINDEX) n = index(description,"\n") if (n == 0) # then one-line description n = length(description) + 1 output(substr(description,1,n-1) name_ref(Basename) \ ((Package in Name_Used) ? "" : name_ref(Package)) \ substr(description,n)) } function package_name(basename) { sub("-([0-9]+[-_.])+[0-9]+$","",basename) sub("[.][a-z]+$","",basename) return (basename) } function print_after() { print_before_or_after(AFTER) } function print_before() { print_before_or_after(BEFORE) } function print_before_or_after(infile, line,old_HTML_Level) { if ((infile != "") && (infile != "/dev/null")) { old_HTML_Level = HTML_Level HTML_Level = 0 # so input indentation is preserved! ##?? print "DEBUG: Reading " infile > "/dev/stderr" while ((getline line < infile) > 0) output(line) close(infile) HTML_Level = old_HTML_Level } } function print_saved_lines(start, k) { for (k = start; k <= N_Saved_Line; ++k) print Saved_Line[k] N_Saved_Line = start - 1 } function print_TOC( j,k,n,start) { start = N_Saved_Line + 1 if (NTOC > 0) { begin_tag("H2") begin_tag("A NAME=\"package-directory\"") output("Package directory") end_tag("A") end_tag("H2") begin_tag("TABLE") n = int((NTOC + (DIRCOLS - 1))/DIRCOLS) for (k = 1; k <= n; ++k) { begin_tag("TR") for (j = 0; j < DIRCOLS; ++j) print_TOC_cell(k + j*n) end_tag("TR") } end_tag("TABLE") } print_saved_lines(start) print_saved_lines(1) } function print_TOC_cell(n) { if ((n in TOC) && !(n in TOC_cell_printed)) { begin_tag("TD") begin_tag("A HREF=\"#" TOC[n] "\"") output(TOC[n]) end_tag("A") end_tag("TD") TOC_cell_printed[n]++ } } function set_default_description(file,descr) { if (!(file in Description)) Description[file] = descr } function terminate() { close_TD() close_TR() close_TABLE() print_TOC() print_after() close_BODY() print_saved_lines(1) } function trim(s) { gsub(/^[ \t]+/,"",s) gsub(/[ \t]+$/,"",s) return (s) } function warning(message) { print FILENAME ":" FNR ":%%" message >"/dev/stderr" }