### -*-awk-*-
### ====================================================================
### @Awk-file{
### author = "Nelson H. F. Beebe",
### version = "1.01",
### date = "23 December 2002",
### time = "09:14:45 MST",
### filename = "ls-to-index-html-table.awk",
### copyright = "Copyright (C) 2000 Nelson H. F. Beebe",
### license = "GNU General Public License (version 2 or
### later)",
### address = "Center for Scientific Computing
### University of Utah
### Department of Mathematics, 322 INSCC
### 155 S 1400 E RM 233
### Salt Lake City, UT 84112-0090
### USA",
### telephone = "+1 801 581 5254",
### FAX = "+1 801 585 1640, +1 801 581 4148",
### URL = "http://www.math.utah.edu/~beebe",
### checksum = "07601 820 2683 23978",
### email = "beebe@math.utah.edu, beebe@acm.org,
### beebe@ieee.org (Internet)",
### codetable = "ISO/ASCII",
### keywords = "World-Wide Web index.html files",
### supported = "yes",
### docstring = "This program converts the output of `ls -1 *'
### to an HTML fragment on stdout suitable for
### inclusion in index.html, with manual addition
### of descriptive text.
###
### Any .PACKAGEINDEX file in the current
### directory is read to determine the default
### descriptions for each package. Provided that
### file is kept up-to-date, then completely
### automated construction of prettyprinted
### HTML-grammar-conformant index.html files is
### possible with this program.
###
### Usage:
### ls [--l --full-time] PATTERNS | \
### awk \
### [-v AFTER=file] \
### [-v BEFORE=file] \
### [-v DIRCOLS=n] \
### [-v INDENT=n] \
### [-v PACKAGEINDEX=file] \
### [-v TITLE=title] \
### -f ls-to-index-html-table.awk >outfile
###
### If the GNU -l --full-time form is used, then
### the output table will record the date of last
### modification of each file.
###
### The output begins with a section labeled
### `Package directory' with a DIRCOLS-column
### table of package names that serves as a
### directory into the entries in the following
### `Package descriptions' section. That
### directory will not be complete unless
### PATTERNS == *. By default, DIRCOLS = 3.
###
### The optional AFTER variable defines an HTML
### file fragment (default: /dev/null) whose
### contents are inserted after the `Package
### descriptions' section. It should normally
### begin with an H2-level header. The recommended
### name is `index.after'; a file with that name
### will not itself be indexed.
###
### The optional BEFORE variable defines an HTML
### file fragment (default: /dev/null) whose
### contents are inserted before the `Package
### directory' section. It should normally begin
### with an H2-level header. The recommended
### name is `index.before'; a file with that name
### will not itself be indexed.
###
### The optional TITLE variable defines the
### content of the
...
### environment, and the initial
...
### environment.
###
### Entries in the `Package descriptions' table
### are tagged with NAME anchors, so that other
### HTML files can refer to them using URLs
### ending in ``index.html#PACKAGENAME''. This
### is a very convenient way to offer a stable
### URL that will bring the reader to a
### description that leads to various archive
### formats, as well as providing nearby
### information about related package versions.
### Should a package be moved from the current
### directory, you cna leave behind a single
### .html file that leads the reader on to its
### new location.
###
### Output lines are indented according to their
### logical level using a minimum number of
### leading tabs and spaces. By default, each
### level corresponds to 8 columns (one tab), but
### that can be altered by assigning a different
### value on the command line to the INDENT
### variable.
###
### The package description file defaults to
### .PACKAGEINDEX, although that name can be
### overriden by the PACKAGEINDEX command-line
### option. It is intentionally named with a
### leading dot, so that directory listing
### commands don't show it, and it is read to
### find package descriptions that go into the
### index.html fragment output by that program.
###
### The PACKAGEINDEX file consists of lines of
### the forms:
###
### blank or empty lines
###
### % comment (to end-of-line)
###
### TITLE Package title
###
### - Description and
### optional more
### description
###
### Description and
### optional more
### description
###
### The only significant input column is 1:
### must start there. Continued
### description lines need only begin with at
### least one space or tab.
###
### The PACKAGE name must not contain any HTML
### markup. There is no need to describe the
### individual archive file formats or their
### associated listing files; that information
### will be supplied automatically.
###
### The description should consist of complete
### sentences, and may include HTML markup
### (usually for fonts), if that is desirable.
### It should NOT contain:
###
### * any mention of archive formats, since that
### information is repetitive, and should be
### separately documented in the index.html file;
###
### * any file time stamp information; that can
### be supplied automatically by
### ls-to-index-html-table.awk;
###
### * mention of manual page documentation, since
### that information too can be supplied
### automatically.
###
### When a version number is omitted, the
### description applies to all versions of the
### package. That reduces the work required to
### maintain this file, and in most cases,
### ensures that no further modifications are
### needed when a new version of a package is
### installed, unless additional version-specific
### remarks are required.
###
### The - portion of the field beginning
### in column 1 matches the regular expression
### ``-([0-9]+[-_.])+[0-9]+$''.
###
### - takes precedence over a
### similarly-named section, but
### applies only to that specific VERSION.
###
### The checksum field above contains a CRC-16
### checksum as the first value, followed by the
### equivalent of the standard UNIX wc (word
### count) utility output of lines, words, and
### characters. This is produced by Robert
### Solovay's checksum utility.",
### }
### ====================================================================
BEGIN { initialize() }
NF == 1 { add_entry($1,""); next } # "ls -1" input
NF == 9 { add_entry($9,$6 " " substr($7,1,8)); next } # new ls -l --full-time" input
NF == 11 { add_entry($11,$8 "-" $7 "-" $10 " " $9 ); next } # old ls -l --full-time" input
END { terminate() }
#=======================================================================
function add_entry(fullname,timestamp)
{
if (fullname ~ "^foo*|.*~|#") # ignore temporary files and editor backup files
return
else if (fullname ~ "^index$|^index[.].*$")
return
## Package naming conventions:
## if fullname == "foobar-5.3.2.tar.gz"
## then Backup == "foobar-5.3.2"
## and Package == "foobar"
## and Extension == "tar.gz"
Basename = fullname
Extension = ""
if (match(Basename,"[.]arc$"))
{
Extension = "arc"
sub("[.]arc$", "", Basename)
}
else if (match(Basename,"[.]arc-lst$"))
{
Extension = "arc-lst"
sub("[.]arc-lst$", "", Basename)
}
else if ((match(Basename,"[.]html$")) && !(match(Basename,"^00t?dir[.]html")))
{
Extension = "html"
sub("[.]html$", "", Basename)
}
else if (match(Basename,"[.]jar$"))
{
Extension = "jar"
sub("[.]jar$", "", Basename)
}
else if (match(Basename,"[.]jar-lst$"))
{
Extension = "jar-lst"
sub("[.]jar-lst$", "", Basename)
}
else if (match(Basename,"[.]shar$"))
{
Extension = "shar"
sub("[.]shar$", "", Basename)
}
else if (match(Basename,"[.]shar-lst$"))
{
Extension = "shar-lst"
sub("[.]shar-lst$", "", Basename)
}
else if (match(Basename,"[.]tar[.]bz2$"))
{
Extension = "tar.bz2"
sub("[.]tar[.]bz2$", "", Basename)
}
else if (match(Basename,"[.]tar[.]gz$"))
{
Extension = "tar.gz"
sub("[.]tar[.]gz$", "", Basename)
}
else if (match(Basename,"[.]tar-lst$"))
{
Extension = "tar-lst"
sub("[.]tar-lst$", "", Basename)
}
else if (match(Basename,"[.]zip$"))
{
Extension = "zip"
sub("[.]zip$", "", Basename)
}
else if (match(Basename,"[.]zip-lst$"))
{
Extension = "zip-lst"
sub("[.]zip-lst$", "", Basename)
}
else if (match(Basename,"[.]zoo$"))
{
Extension = "zoo"
sub("[.]zoo$", "", Basename)
}
else if (match(Basename,"[.]zoo-lst$"))
{
Extension = "zoo-lst"
sub("[.]zoo-lst$", "", Basename)
}
Package = package_name(Basename)
##?? print "DEBUG: Basename = [" Basename "] Last_Basename = [" Last_Basename "]"
if (Basename == Last_Basename)
{
open_TD()
add_href(fullname,Extension)
}
else # new file
{
open_BODY()
open_TABLE()
open_TR()
open_TD()
if (Basename != Last_Basename)
{
if (Last_Basename != "")
{
close_TD()
close_TR()
}
Last_Basename = Basename
TOC[++NTOC] = Basename
open_TR()
open_TD()
}
output("" Basename "")
if (Package != Last_Package)
Last_Package = Package
close_TD()
## The table looks neater if the timestamp has a separate
## column, rather than being embedded in the description text.
if (timestamp != "")
{
open_TD()
output("" timestamp "")
close_TD()
}
open_TD()
## We put the ... locator in the description
## cell, instead of in the preceding, and more logical, package
## name cell, because some browsers position the locator line at
## the top of the screen, which leaves a multiline description
## cell cut off at the top, forcing the user to manually scroll
## the screen to read the description, sigh...
##
## However, there is an additional problem. The HTML grammars
## sensibly permit the NAME anchor to have empty content, since
## it is, after all, only a position marker. Unfortunately,
## some browsers (including netscape) ignore such anchors. If
## the content is made a nonbreakable space ( ), then the
## anchor is handled correctly, but then the cell begins with a
## visible space.
##
## In order to help the reader, who is after all the most
## important one, we therefore make an ugly hack: we put the
## anchor at the end of the first description line, where the
## visible space may be less noticeable. That nasty code is
## hidden inside output_description(), and the two add_name()
## calls below are commented out, to preserve their
## partially-logical location.
## add_name(Basename,"")
if (!(Package in Name_Used))
{
## Supply a NAME location for a URL of the form
## http://hostname/path/to/directory/index.html#PACKAGENAME
## where PACKAGENAME lacks any version number.
## add_name(Package,"")
}
## Output any package description according to decreasing
## precedence of package naming.
if (fullname in Description) # e.g., foobar-5.3.2.tar.gz
output_description(Description[fullname])
else if (Basename in Description) # e.g., foobar-5.3.2
output_description(Description[Basename])
else if (Package in Description) # e.g., foobar
output_description(Description[Package])
else
output_description("")
add_href(fullname,Extension)
}
}
function add_href(fullname,extension)
{
begin_tag("TT")
output("[" \
((extension == "") ? "file" : extension) \
"]")
end_tag("TT")
}
function add_name(name,description)
{
if (name in Name_Used)
warning("duplicate ...")
Name_Used[name]++
if (description == "") # use one-liner for empty description
output("")
else
{
begin_tag("A NAME=\"" name "\"")
output(description)
end_tag("A")
}
}
function begin_tag(tag)
{
output("<" tag ">")
HTML_Level++
}
function close_BODY()
{
if (BODY_Open)
{
end_tag("BODY")
end_tag("HTML")
}
}
function close_TABLE()
{
if (TABLE_Open)
{
end_tag("TABLE")
TABLE_Open = 0
}
}
function close_TD()
{
if (TD_Open)
{
end_tag("TD")
TD_Open = 0
}
}
function close_TR()
{
if (TR_Open)
{
end_tag("TR")
TR_Open = 0
}
}
function detab(s)
{
gsub("\t"," ",s)
return (s)
}
function end_tag(tag)
{
HTML_Level--
output("" tag ">")
}
function indentation(level, columns,ntabs,nspaces)
{
## Return a string of tabs and blanks for indenting to level. Each
## indentation level corresponds to INDENT columns.
columns = level * INDENT
ntabs = int(columns / 8)
nspaces = int(columns % 8)
return (substr("\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",1,ntabs) \
substr(" ",1,nspaces))
}
function initialize()
{
## Handle the command-line customizations
DIRCOLS = ((DIRCOLS == "") ? 3 : (0 + DIRCOLS))
if (DIRCOLS <= 0)
DIRCOLS = 1
INDENT = ((INDENT == "") ? 8 : (0 + INDENT)) # user customizable
if (INDENT < 0)
INDENT = 8
if (PACKAGEINDEX == "")
PACKAGEINDEX = ".PACKAGEINDEX" # where the package descriptions reside
## Handle remaining internal initializations
"date" | getline Current_Date_and_Time
close("date")
if ("USER" in ENVIRON)
User = ENVIRON["USER"]
else if ("LOGNAME" in ENVIRON)
User = ENVIRON["LOGNAME"]
else
User = "unknown"
if ("HOST" in ENVIRON)
Hostname = ENVIRON["HOST"]
else
{
"hostname" | getline Hostname
close("hostname")
if (Hostname == "")
Hostname = "unknown"
}
if ("INDOMAIN" in ENVIRON)
Mailhost = ENVIRON["INDOMAIN"]
else
Mailhost = Hostname
if ("PWD" in ENVIRON)
Current_Directory = ENVIRON["PWD"]
else if ("cwd" in ENVIRON)
Current_Directory = ENVIRON["cwd"]
else
Current_Directory = "/unknown/directory"
NTOC = 0
BODY_Open = 0
TD_Open = 0
TR_Open = 0
TABLE_Open = 0
Basename = ""
Package = ""
Last_Basename = ""
Last_Package = ""
## Read the package descriptions in order to initialize the
## Description[] array.
while ((getline < PACKAGEINDEX) > 0)
{
if (index($0,$1) == 1) # then have package or package-version
{
Basename = $1
Package = package_name(Basename)
Description[Basename] = trim(detab(substr($0,1+length($1))))
Description[Package] = Description[Basename]
}
else if ((Basename != "") && (match($0,"^\t")))
{ # then description continuation line
Description[Basename] = Description[Basename] "\n" indentation(5) trim(detab($0))
Description[Package] = Description[Basename]
}
}
close("index")
## The command-line TITLE variable can be set from the package
## description file:
if (TITLE == "")
{
if ("TITLE" in Description)
TITLE = Description["TITLE"]
else
TITLE = ""
}
## Provide a few default descriptions for standard files in the Utah
## Web/FTP tree.
set_default_description("00dir.cmd","Alphabetical FTP get commands")
set_default_description("00dir.html","Alphabetical file listing")
set_default_description("00dir.lst","Alphabetical file listing")
set_default_description("00tdir.cmd","Reverse-time-ordered FTP get commands")
set_default_description("00tdir.html","Reverse-time-ordered file listing")
set_default_description("00tdir.lst","Reverse-time-ordered file listing")
}
function name_ref(name)
{
## Return an NAME anchor with content of one visible space (to fool
## defective browsers), and warn if name has been used before this
## way (in which case, return an empty string).
if (name in Name_Used)
{
warning("duplicate ...")
return ("")
}
else
{
Name_Used[name]++
return ("")
}
}
function open_BODY()
{
if (!BODY_Open)
{
HTML_Level = 0
output("")
output("")
output("")
output("")
output("")
output("")
output("")
output("")
output("")
output("")
output("")
output("")
output("")
output("")
output("")
output("")
begin_tag("HTML")
begin_tag("HEAD")
begin_tag("TITLE")
output(TITLE)
end_tag("TITLE")
output("")
end_tag("HEAD")
begin_tag("BODY")
begin_tag("H1")
output(TITLE)
end_tag("H1")
begin_tag("P")
output("Last update: " Current_Date_and_Time "")
end_tag("P")
print_before()
print_saved_lines(1)
BODY_Open = 1
}
}
function open_TABLE()
{
if (!TABLE_Open)
{
begin_tag("H2")
begin_tag("A NAME=\"package-descriptions\"")
output("Package descriptions")
end_tag("A")
end_tag("H2")
begin_tag("TABLE BORDER=1")
TABLE_Open = 1
}
}
function open_TD()
{
if (!TD_Open)
{
begin_tag("TD")
TD_Open = 1
}
}
function open_TR()
{
if (!TR_Open)
{
output("")
begin_tag("TR")
TR_Open = 1
}
}
function output(line)
{
Saved_Line[++N_Saved_Line] = indentation(HTML_Level) line
}
function output_description(description, n)
{
## Output the package description, sneaking in one or two NAME
## anchors at the end of the first line, so that they are less
## visible. This ugly hack is a response to browser brain damage!
if (description == "") # the PACKAGEINDEX file is out-of-date
warning("no description available for " Package " in " PACKAGEINDEX)
n = index(description,"\n")
if (n == 0) # then one-line description
n = length(description) + 1
output(substr(description,1,n-1) name_ref(Basename) \
((Package in Name_Used) ? "" : name_ref(Package)) \
substr(description,n))
}
function package_name(basename)
{
sub("-([0-9]+[-_.])+[0-9]+$","",basename)
sub("[.][a-z]+$","",basename)
return (basename)
}
function print_after()
{
print_before_or_after(AFTER)
}
function print_before()
{
print_before_or_after(BEFORE)
}
function print_before_or_after(infile, line,old_HTML_Level)
{
if ((infile != "") && (infile != "/dev/null"))
{
old_HTML_Level = HTML_Level
HTML_Level = 0 # so input indentation is preserved!
##?? print "DEBUG: Reading " infile > "/dev/stderr"
while ((getline line < infile) > 0)
output(line)
close(infile)
HTML_Level = old_HTML_Level
}
}
function print_saved_lines(start, k)
{
for (k = start; k <= N_Saved_Line; ++k)
print Saved_Line[k]
N_Saved_Line = start - 1
}
function print_TOC( j,k,n,start)
{
start = N_Saved_Line + 1
if (NTOC > 0)
{
begin_tag("H2")
begin_tag("A NAME=\"package-directory\"")
output("Package directory")
end_tag("A")
end_tag("H2")
begin_tag("TABLE")
n = int((NTOC + (DIRCOLS - 1))/DIRCOLS)
for (k = 1; k <= n; ++k)
{
begin_tag("TR")
for (j = 0; j < DIRCOLS; ++j)
print_TOC_cell(k + j*n)
end_tag("TR")
}
end_tag("TABLE")
}
print_saved_lines(start)
print_saved_lines(1)
}
function print_TOC_cell(n)
{
if ((n in TOC) && !(n in TOC_cell_printed))
{
begin_tag("TD")
begin_tag("A HREF=\"#" TOC[n] "\"")
output(TOC[n])
end_tag("A")
end_tag("TD")
TOC_cell_printed[n]++
}
}
function set_default_description(file,descr)
{
if (!(file in Description))
Description[file] = descr
}
function terminate()
{
close_TD()
close_TR()
close_TABLE()
print_TOC()
print_after()
close_BODY()
print_saved_lines(1)
}
function trim(s)
{
gsub(/^[ \t]+/,"",s)
gsub(/[ \t]+$/,"",s)
return (s)
}
function warning(message)
{
print FILENAME ":" FNR ":%%" message >"/dev/stderr"
}