Home | History | Annotate | Line # | Download | only in testdir
funstack.awk revision 1.1.1.1
      1 ### ====================================================================
      2 ###  @Awk-file{
      3 ###     author          = "Nelson H. F. Beebe",
      4 ###     version         = "1.00",
      5 ###     date            = "09 October 1996",
      6 ###     time            = "15:57:06 MDT",
      7 ###     filename        = "journal-toc.awk",
      8 ###     address         = "Center for Scientific Computing
      9 ###                        Department of Mathematics
     10 ###                        University of Utah
     11 ###                        Salt Lake City, UT 84112
     12 ###                        USA",
     13 ###     telephone       = "+1 801 581 5254",
     14 ###     FAX             = "+1 801 581 4148",
     15 ###     URL             = "http://www.math.utah.edu/~beebe",
     16 ###     checksum        = "25092 977 3357 26493",
     17 ###     email           = "beebe (at] math.utah.edu (Internet)",
     18 ###     codetable       = "ISO/ASCII",
     19 ###     keywords        = "BibTeX, bibliography, HTML, journal table of
     20 ###                        contents",
     21 ###     supported       = "yes",
     22 ###     docstring       = "Create a journal cover table of contents from
     23 ###                        <at>Article{...} entries in a journal BibTeX
     24 ###                        .bib file for checking the bibliography
     25 ###                        database against the actual journal covers.
     26 ###                        The output can be either plain text, or HTML.
     27 ###
     28 ###                        Usage:
     29 ###                            bibclean -max-width 0 BibTeX-file(s) | \
     30 ###                                bibsort -byvolume | \
     31 ###                                awk -f journal-toc.awk \
     32 ###                                    [-v HTML=nnn] [-v INDENT=nnn] \
     33 ###                                    [-v BIBFILEURL=url] >foo.toc
     34 ###
     35 ###                            or if the bibliography is already sorted
     36 ###                            by volume,
     37 ###
     38 ###                            bibclean -max-width 0 BibTeX-file(s) | \
     39 ###                                awk -f journal-toc.awk \
     40 ###                                    [-v HTML=nnn] [-v INDENT=nnn] \
     41 ###                                    [-v BIBFILEURL=url] >foo.toc
     42 ###
     43 ###                        A non-zero value of the command-line option,
     44 ###                        HTML=nnn, results in HTML output instead of
     45 ###                        the default plain ASCII text (corresponding
     46 ###                        to HTML=0).  The
     47 ###
     48 ###                        The INDENT=nnn command-line option specifies
     49 ###                        the number of blanks to indent each logical
     50 ###                        level of HTML.  The default is INDENT=4.
     51 ###                        INDENT=0 suppresses indentation.  The INDENT
     52 ###                        option has no effect when the default HTML=0
     53 ###                        (plain text output) option is in effect.
     54 ###
     55 ###                        When HTML output is selected, the
     56 ###                        BIBFILEURL=url command-line option provides a
     57 ###                        way to request hypertext links from table of
     58 ###                        contents page numbers to the complete BibTeX
     59 ###                        entry for the article.  These links are
     60 ###                        created by appending a sharp (#) and the
     61 ###                        citation label to the BIBFILEURL value, which
     62 ###                        conforms with the practice of
     63 ###                        bibtex-to-html.awk.
     64 ###
     65 ###                        The HTML output form may be useful as a more
     66 ###                        compact representation of journal article
     67 ###                        bibliography data than the original BibTeX
     68 ###                        file provides.  Of course, the
     69 ###                        table-of-contents format provides less
     70 ###                        information, and is considerably more
     71 ###                        troublesome for a computer program to parse.
     72 ###
     73 ###                        When URL key values are provided, they will
     74 ###                        be used to create hypertext links around
     75 ###                        article titles.  This supports journals that
     76 ###                        provide article contents on the World-Wide
     77 ###                        Web.
     78 ###
     79 ###                        For parsing simplicity, this program requires
     80 ###                        that BibTeX
     81 ###
     82 ###                            key = "value"
     83 ###
     84 ###                        and
     85 ###
     86 ###                            @String{name = "value"}
     87 ###
     88 ###                        specifications be entirely contained on
     89 ###                        single lines, which is readily provided by
     90 ###                        the `bibclean -max-width 0' filter.  It also
     91 ###                        requires that bibliography entries begin and
     92 ###                        end at the start of a line, and that
     93 ###                        quotation marks, rather than balanced braces,
     94 ###                        delimit string values.  This is a
     95 ###                        conventional format that again can be
     96 ###                        guaranteed by bibclean.
     97 ###
     98 ###                        This program requires `new' awk, as described
     99 ###                        in the book
    100 ###
    101 ###                            Alfred V. Aho, Brian W. Kernighan, and
    102 ###                            Peter J. Weinberger,
    103 ###                            ``The AWK Programming Language'',
    104 ###                            Addison-Wesley (1988), ISBN
    105 ###                            0-201-07981-X,
    106 ###
    107 ###                        such as provided by programs named (GNU)
    108 ###                        gawk, nawk, and recent AT&T awk.
    109 ###
    110 ###                        The checksum field above contains a CRC-16
    111 ###                        checksum as the first value, followed by the
    112 ###                        equivalent of the standard UNIX wc (word
    113 ###                        count) utility output of lines, words, and
    114 ###                        characters.  This is produced by Robert
    115 ###                        Solovay's checksum utility.",
    116 ###  }
    117 ### ====================================================================
    118 
    119 BEGIN						{ initialize() }
    120 
    121 /^ *@ *[Ss][Tt][Rr][Ii][Nn][Gg] *{/		{ do_String(); next }
    122 
    123 /^ *@ *[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/	{ next }
    124 
    125 /^ *@ *[Aa][Rr][Tt][Ii][Cc][Ll][Ee]/		{ do_Article(); next }
    126 
    127 /^ *@/						{ do_Other(); next }
    128 
    129 /^ *author *= *\"/ 				{ do_author(); next }
    130 
    131 /^ *journal *= */				{ do_journal(); next }
    132 
    133 /^ *volume *= *\"/				{ do_volume(); next }
    134 
    135 /^ *number *= *\"/				{ do_number(); next }
    136 
    137 /^ *year *= *\"/				{ do_year(); next }
    138 
    139 /^ *month *= */					{ do_month(); next }
    140 
    141 /^ *title *= *\"/				{ do_title(); next }
    142 
    143 /^ *pages *= *\"/				{ do_pages(); next }
    144 
    145 /^ *URL *= *\"/					{ do_URL(); next }
    146 
    147 /^ *} *$/					{ if (In_Article) do_end_entry(); next }
    148 
    149 END						{ terminate() }
    150 
    151 
    152 ########################################################################
    153 # NB: The programming conventions for variables in this program are:   #
    154 #	UPPERCASE		global constants and user options      #
    155 #	Initialuppercase	global variables                       #
    156 #	lowercase		local variables                        #
    157 # Any deviation is an error!                                           #
    158 ########################################################################
    159 
    160 
    161 function do_Article()
    162 {
    163 	In_Article = 1
    164 
    165 	Citation_label = $0
    166 	sub(/^[^\{]*{/,"",Citation_label)
    167 	sub(/ *, *$/,"",Citation_label)
    168 
    169 	Author = ""
    170         Title = ""
    171         Journal = ""
    172         Volume = ""
    173         Number = ""
    174         Month = ""
    175         Year = ""
    176         Pages = ""
    177         Url = ""
    178 }
    179 
    180 
    181 function do_author()
    182 {
    183 	Author = TeX_to_HTML(get_value($0))
    184 }
    185 
    186 
    187 function do_end_entry( k,n,parts)
    188 {
    189 	n = split(Author,parts," and ")
    190 	if (Last_number != Number)
    191 		do_new_issue()
    192 	for (k = 1; k < n; ++k)
    193 		print_toc_line(parts[k] " and", "", "")
    194 	Title_prefix = html_begin_title()
    195 	Title_suffix = html_end_title()
    196 	if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line
    197 		print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages())
    198 	else			# need to split long title over multiple lines
    199 		do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages())
    200 }
    201 
    202 
    203 function do_journal()
    204 {
    205 	if ($0 ~ /[=] *"/)	# have journal = "quoted journal name",
    206 		Journal = get_value($0)
    207 	else			# have journal = journal-abbreviation,
    208 	{
    209         	Journal = get_abbrev($0)
    210 		if (Journal in String) # replace abbrev by its expansion
    211 			Journal = String[Journal]
    212 	}
    213 	gsub(/\\-/,"",Journal)	# remove discretionary hyphens
    214 }
    215 
    216 
    217 function do_long_title(author,title,pages, last_title,n)
    218 {
    219 	title = trim(title)			# discard leading and trailing space
    220 	while (length(title) > 0)
    221 	{
    222 		n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS)
    223 		last_title = substr(title,1,n)
    224 		title = substr(title,n+1)
    225 		sub(/^ +/,"",title)		# discard any leading space
    226 		print_toc_line(author, last_title, (length(title) == 0) ? pages : "")
    227 		author = ""
    228 	}
    229 }
    230 
    231 
    232 function do_month( k,n,parts)
    233 {
    234 	Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0)
    235 	gsub(/[\"]/,"",Month)
    236 	gsub(/ *# *\\slash *# */," / ",Month)
    237 	gsub(/ *# *-+ *# */," / ",Month)
    238 	n = split(Month,parts," */ *")
    239 	Month = ""
    240 	for (k = 1; k <= n; ++k)
    241 		Month = Month ((k > 1) ? " / " : "") \
    242 			((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k])
    243 }
    244 
    245 
    246 function do_new_issue()
    247 {
    248 	Last_number = Number
    249 	if (HTML)
    250 	{
    251 		if (Last_volume != Volume)
    252 		{
    253 			Last_volume = Volume
    254 			print_line(prefix(2) "<BR>")
    255 		}
    256 		html_end_toc()
    257 		html_begin_issue()
    258 		print_line(prefix(2) Journal "<BR>")
    259 	}
    260 	else
    261 	{
    262 		print_line("")
    263 		print_line(Journal)
    264 	}
    265 
    266 	print_line(strip_html(vol_no_month_year()))
    267 
    268 	if (HTML)
    269 	{
    270 		html_end_issue()
    271 		html_toc_entry()
    272 		html_begin_toc()
    273 	}
    274 	else
    275 		print_line("")
    276 }
    277 
    278 
    279 function do_number()
    280 {
    281 	Number = get_value($0)
    282 }
    283 
    284 
    285 function do_Other()
    286 {
    287 	In_Article = 0
    288 }
    289 
    290 
    291 function do_pages()
    292 {
    293 	Pages = get_value($0)
    294 	sub(/--[?][?]/,"",Pages)
    295 }
    296 
    297 
    298 function do_String()
    299 {
    300 	sub(/^[^\{]*\{/,"",$0)	# discard up to and including open brace
    301 	sub(/\} *$/,"",$0)	# discard from optional whitespace and trailing brace to end of line
    302 	String[get_key($0)] = get_value($0)
    303 }
    304 
    305 
    306 function do_title()
    307 {
    308 	Title = TeX_to_HTML(get_value($0))
    309 }
    310 
    311 
    312 function do_URL( parts)
    313 {
    314 	Url = get_value($0)
    315 	split(Url,parts,"[,;]")			# in case we have multiple URLs
    316 	Url = trim(parts[1])
    317 }
    318 
    319 
    320 function do_volume()
    321 {
    322 	Volume = get_value($0)
    323 }
    324 
    325 
    326 function do_year()
    327 {
    328 	Year = get_value($0)
    329 }
    330 
    331 
    332 function get_abbrev(s)
    333 {	# return abbrev from ``key = abbrev,''
    334 	sub(/^[^=]*= */,"",s)	# discard text up to start of non-blank value
    335 	sub(/ *,? *$/,"",s)	# discard trailing optional whitspace, quote,
    336 				# optional comma, and optional space
    337 	return (s)
    338 }
    339 
    340 
    341 function get_key(s)
    342 {	# return kay from ``key = "value",''
    343 	sub(/^ */,"",s)		# discard leading space
    344 	sub(/ *=.*$/,"",s)	# discard everthing after key
    345 
    346 	return (s)
    347 }
    348 
    349 
    350 function get_value(s)
    351 {	# return value from ``key = "value",''
    352 	sub(/^[^\"]*\" */,"",s)	# discard text up to start of non-blank value
    353 	sub(/ *\",? *$/,"",s)	# discard trailing optional whitspace, quote,
    354 				# optional comma, and optional space
    355 	return (s)
    356 }
    357 
    358 
    359 function html_accents(s)
    360 {
    361 	if (index(s,"\\") > 0)			# important optimization
    362 	{
    363 		# Convert common lower-case accented letters according to the
    364 		# table on p. 169 of in Peter Flynn's ``The World Wide Web
    365 		# Handbook'', International Thomson Computer Press, 1995, ISBN
    366 		# 1-85032-205-8.  The official table of ISO Latin 1 SGML
    367 		# entities used in HTML can be found in the file
    368 		# /usr/local/lib/html-check/lib/ISOlat1.sgml (your path
    369 		# may differ).
    370 
    371 		gsub(/{\\\a}/,	"\\&agrave;",	s)
    372 		gsub(/{\\'a}/,	"\\&aacute;",	s)
    373 		gsub(/{\\[\^]a}/,"\\&acirc;",	s)
    374 		gsub(/{\\~a}/,	"\\&atilde;",	s)
    375 		gsub(/{\\\"a}/,	"\\&auml;",	s)
    376 		gsub(/{\\aa}/,	"\\&aring;",	s)
    377 		gsub(/{\\ae}/,	"\\&aelig;",	s)
    378 
    379 		gsub(/{\\c{c}}/,"\\&ccedil;",	s)
    380 
    381 		gsub(/{\\\e}/,	"\\&egrave;",	s)
    382 		gsub(/{\\'e}/,	"\\&eacute;",	s)
    383 		gsub(/{\\[\^]e}/,"\\&ecirc;",	s)
    384 		gsub(/{\\\"e}/,	"\\&euml;",	s)
    385 
    386 		gsub(/{\\\i}/,	"\\&igrave;",	s)
    387 		gsub(/{\\'i}/,	"\\&iacute;",	s)
    388 		gsub(/{\\[\^]i}/,"\\&icirc;",	s)
    389 		gsub(/{\\\"i}/,	"\\&iuml;",	s)
    390 
    391 		# ignore eth and thorn
    392 
    393 		gsub(/{\\~n}/,	"\\&ntilde;",	s)
    394 
    395 		gsub(/{\\\o}/,	"\\&ograve;",	s)
    396 		gsub(/{\\'o}/,	"\\&oacute;",	s)
    397 		gsub(/{\\[\^]o}/, "\\&ocirc;",	s)
    398 		gsub(/{\\~o}/,	"\\&otilde;",	s)
    399 		gsub(/{\\\"o}/,	"\\&ouml;",	s)
    400 		gsub(/{\\o}/,	"\\&oslash;",	s)
    401 
    402 		gsub(/{\\\u}/,	"\\&ugrave;",	s)
    403 		gsub(/{\\'u}/,	"\\&uacute;",	s)
    404 		gsub(/{\\[\^]u}/,"\\&ucirc;",	s)
    405 		gsub(/{\\\"u}/,	"\\&uuml;",	s)
    406 
    407 		gsub(/{\\'y}/,	"\\&yacute;",	s)
    408 		gsub(/{\\\"y}/,	"\\&yuml;",	s)
    409 
    410 		# Now do the same for upper-case accents
    411 
    412 		gsub(/{\\\A}/,	"\\&Agrave;",	s)
    413 		gsub(/{\\'A}/,	"\\&Aacute;",	s)
    414 		gsub(/{\\[\^]A}/,	"\\&Acirc;",	s)
    415 		gsub(/{\\~A}/,	"\\&Atilde;",	s)
    416 		gsub(/{\\\"A}/,	"\\&Auml;",	s)
    417 		gsub(/{\\AA}/,	"\\&Aring;",	s)
    418 		gsub(/{\\AE}/,	"\\&AElig;",	s)
    419 
    420 		gsub(/{\\c{C}}/,"\\&Ccedil;",	s)
    421 
    422 		gsub(/{\\\e}/,	"\\&Egrave;",	s)
    423 		gsub(/{\\'E}/,	"\\&Eacute;",	s)
    424 		gsub(/{\\[\^]E}/,	"\\&Ecirc;",	s)
    425 		gsub(/{\\\"E}/,	"\\&Euml;",	s)
    426 
    427 		gsub(/{\\\I}/,	"\\&Igrave;",	s)
    428 		gsub(/{\\'I}/,	"\\&Iacute;",	s)
    429 		gsub(/{\\[\^]I}/,	"\\&Icirc;",	s)
    430 		gsub(/{\\\"I}/,	"\\&Iuml;",	s)
    431 
    432 		# ignore eth and thorn
    433 
    434 		gsub(/{\\~N}/,	"\\&Ntilde;",	s)
    435 
    436 		gsub(/{\\\O}/,	"\\&Ograve;",	s)
    437 		gsub(/{\\'O}/,	"\\&Oacute;",	s)
    438 		gsub(/{\\[\^]O}/,	"\\&Ocirc;",	s)
    439 		gsub(/{\\~O}/,	"\\&Otilde;",	s)
    440 		gsub(/{\\\"O}/,	"\\&Ouml;",	s)
    441 		gsub(/{\\O}/,	"\\&Oslash;",	s)
    442 
    443 		gsub(/{\\\U}/,	"\\&Ugrave;",	s)
    444 		gsub(/{\\'U}/,	"\\&Uacute;",	s)
    445 		gsub(/{\\[\^]U}/,	"\\&Ucirc;",	s)
    446 		gsub(/{\\\"U}/,	"\\&Uuml;",	s)
    447 
    448 		gsub(/{\\'Y}/,	"\\&Yacute;",	s)
    449 
    450 		gsub(/{\\ss}/,	"\\&szlig;",	s)
    451 
    452 		# Others not mentioned in Flynn's book
    453 		gsub(/{\\'\\i}/,"\\&iacute;",	s)
    454 		gsub(/{\\'\\j}/,"j",		s)
    455 	}
    456 	return (s)
    457 }
    458 
    459 
    460 function html_begin_issue()
    461 {
    462 	print_line("")
    463 	print_line(prefix(2) "<HR>")
    464 	print_line("")
    465 	print_line(prefix(2) "<H1>")
    466 	print_line(prefix(3) "<A NAME=\"" html_label() "\">")
    467 }
    468 
    469 
    470 function html_begin_pages()
    471 {
    472 	return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "")
    473 }
    474 
    475 
    476 function html_begin_pre()
    477 {
    478 	In_PRE = 1
    479 	print_line("<PRE>")
    480 }
    481 
    482 
    483 function html_begin_title()
    484 {
    485 	return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "")
    486 }
    487 
    488 
    489 function html_begin_toc()
    490 {
    491 	html_end_toc()
    492 	html_begin_pre()
    493 }
    494 
    495 
    496 function html_body( k)
    497 {
    498 	for (k = 1; k <= BodyLines; ++k)
    499 		print Body[k]
    500 }
    501 
    502 function html_breakpoint(title,maxlength, break_after,k)
    503 {
    504 	# Return the largest character position in title AFTER which we
    505 	# can break the title across lines, without exceeding maxlength
    506 	# visible characters.
    507 	if (html_length(title) > maxlength)	# then need to split title across lines
    508 	{
    509 		# In the presence of HTML markup, the initialization of
    510 		# k here is complicated, because we need to advance it
    511 		# until html_length(title) is at least maxlength,
    512 		# without invoking the expensive html_length() function
    513 		# too frequently.  The need to split the title makes the
    514 		# alternative of delayed insertion of HTML markup much
    515 		# more complicated.
    516 		break_after = 0
    517 		for (k = min(maxlength,length(title)); k < length(title); ++k)
    518 		{
    519 			if (substr(title,k+1,1) == " ")
    520 			{		# could break after position k
    521 				if (html_length(substr(title,1,k)) <= maxlength)
    522 					break_after = k
    523 				else	# advanced too far, retreat back to last break_after
    524 					break
    525 			}
    526 		}
    527 		if (break_after == 0)		# no breakpoint found by forward scan
    528 		{				# so switch to backward scan
    529 			for (k = min(maxlength,length(title)) - 1; \
    530 				(k > 0) && (substr(title,k+1,1) != " "); --k)
    531 				;		# find space at which to break title
    532 			if (k < 1)		# no break point found
    533 				k = length(title) # so must print entire string
    534 		}
    535 		else
    536 			k = break_after
    537 	}
    538 	else					# title fits on one line
    539 		k = length(title)
    540 	return (k)
    541 }
    542 
    543 
    544 
    545 function html_end_issue()
    546 {
    547 	print_line(prefix(3) "</A>")
    548 	print_line(prefix(2) "</H1>")
    549 }
    550 
    551 
    552 function html_end_pages()
    553 {
    554 	return ((HTML && (BIBFILEURL != "")) ? "</A>" : "")
    555 }
    556 
    557 
    558 function html_end_pre()
    559 {
    560 	if (In_PRE)
    561 	{
    562 		print_line("</PRE>")
    563 		In_PRE = 0
    564 	}
    565 }
    566 
    567 
    568 function html_end_title()
    569 {
    570 	return ((HTML && (Url != "")) ? "</A>" : "")
    571 }
    572 
    573 
    574 function html_end_toc()
    575 {
    576 	html_end_pre()
    577 }
    578 
    579 
    580 function html_fonts(s, arg,control_word,k,level,n,open_brace)
    581 {
    582 	open_brace = index(s,"{")
    583 	if (open_brace > 0)			# important optimization
    584 	{
    585 		level = 1
    586 		for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k)
    587 		{
    588 			if (substr(s,k,1) == "{")
    589 				level++
    590 			else if (substr(s,k,1) == "}")
    591 				level--
    592 		}
    593 
    594 		# {...} is now found at open_brace ... (k-1)
    595 		for (control_word in Font_decl_map)	# look for {\xxx ...}
    596 		{
    597 			if (substr(s,open_brace+1,length(control_word)+1) ~ \
    598 				("\\" control_word "[^A-Za-z]"))
    599 			{
    600 				n = open_brace + 1 + length(control_word)
    601 				arg = trim(substr(s,n,k - n))
    602 				if (Font_decl_map[control_word] == "toupper") # arg -> ARG
    603 					arg = toupper(arg)
    604 				else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG>
    605 					arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">"
    606 				return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k)))
    607 			}
    608 		}
    609 		for (control_word in Font_cmd_map)	# look for \xxx{...}
    610 		{
    611 			if (substr(s,open_brace - length(control_word),length(control_word)) ~ \
    612 				("\\" control_word))
    613 			{
    614 				n = open_brace + 1
    615 				arg = trim(substr(s,n,k - n))
    616 				if (Font_cmd_map[control_word] == "toupper") # arg -> ARG
    617 					arg = toupper(arg)
    618 				else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG>
    619 					arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">"
    620 				n = open_brace - length(control_word) - 1
    621 				return (substr(s,1,n) arg html_fonts(substr(s,k)))
    622 			}
    623 		}
    624 	}
    625 	return (s)
    626 }
    627 
    628 
    629 function html_header()
    630 {
    631 	USER = ENVIRON["USER"]
    632 	if (USER == "")
    633 	    USER = ENVIRON["LOGNAME"]
    634 	if (USER == "")
    635 	    USER = "????"
    636 	"hostname" | getline HOSTNAME
    637 	"date" | getline DATE
    638 	("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
    639 	if (PERSONAL_NAME == "")
    640 	    ("grep  '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
    641 
    642 
    643 	print "<!-- WARNING: Do NOT edit this file.  It was converted from -->"
    644 	print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->"
    645 	print "<!-- on " DATE " -->"
    646 	print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"
    647 	print ""
    648 	print ""
    649 	print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">"
    650 	print ""
    651 	print "<HTML>"
    652 	print prefix(1) "<HEAD>"
    653 	print prefix(2) "<TITLE>"
    654 	print prefix(3)  Journal
    655 	print prefix(2) "</TITLE>"
    656 	print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"
    657 	print prefix(1) "</HEAD>"
    658 	print ""
    659 	print prefix(1) "<BODY>"
    660 }
    661 
    662 
    663 function html_label( label)
    664 {
    665 	label = Volume "(" Number "):" Month ":" Year
    666 	gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label)
    667 	return (label)
    668 }
    669 
    670 
    671 function html_length(s)
    672 {	# Return visible length of s, ignoring any HTML markup
    673 	if (HTML)
    674 	{
    675 		gsub(/<\/?[^>]*>/,"",s)		# remove SGML tags
    676 		gsub(/&[A-Za-z0-9]+;/,"",s)	# remove SGML entities
    677 	}
    678 	return (length(s))
    679 }
    680 
    681 
    682 function html_toc()
    683 {
    684 	print prefix(2) "<H1>"
    685 	print prefix(3) "Table of contents for issues of " Journal
    686 	print prefix(2) "</H1>"
    687 	print HTML_TOC
    688 }
    689 
    690 
    691 function html_toc_entry()
    692 {
    693 	HTML_TOC = HTML_TOC "        <A HREF=\"#" html_label() "\">"
    694 	HTML_TOC = HTML_TOC vol_no_month_year()
    695 	HTML_TOC = HTML_TOC "</A><BR>" "\n"
    696 }
    697 
    698 
    699 function html_trailer()
    700 {
    701 	html_end_pre()
    702 	print prefix(1) "</BODY>"
    703 	print "</HTML>"
    704 }
    705 
    706 
    707 function initialize()
    708 {
    709 	# NB: Update these when the program changes
    710 	VERSION_DATE = "[09-Oct-1996]"
    711 	VERSION_NUMBER = "1.00"
    712 
    713 	HTML = (HTML == "") ? 0 : (0 + HTML)
    714 
    715 	if (INDENT == "")
    716 		INDENT = 4
    717 
    718 	if (HTML == 0)
    719 		INDENT = 0	# indentation suppressed in ASCII mode
    720 
    721 	LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ."
    722 
    723 	MAX_TITLE_CHARS = 36	# 36 produces a 79-char output line when there is
    724 				# just an initial page number.  If this is
    725 				# increased, the LEADERS string may need to be
    726 				# lengthened.
    727 
    728 	MIN_LEADERS = 4		# Minimum number of characters from LEADERS
    729 				# required when leaders are used.  The total
    730 				# number of characters that can appear in a
    731 				# title line is MAX_TITLE_CHARS + MIN_LEADERS.
    732 				# Leaders are omitted when the title length is
    733 				# between MAX_TITLE_CHARS and this sum.
    734 
    735 	MIN_LEADERS_SPACE = "        "	# must be at least MIN_LEADERS characters long
    736 
    737 	Month_expansion["jan"]	= "January"
    738 	Month_expansion["feb"]	= "February"
    739 	Month_expansion["mar"]	= "March"
    740 	Month_expansion["apr"]	= "April"
    741 	Month_expansion["may"]	= "May"
    742 	Month_expansion["jun"]	= "June"
    743 	Month_expansion["jul"]	= "July"
    744 	Month_expansion["aug"]	= "August"
    745 	Month_expansion["sep"]	= "September"
    746 	Month_expansion["oct"]	= "October"
    747 	Month_expansion["nov"]	= "November"
    748 	Month_expansion["dec"]	= "December"
    749 
    750 	Font_cmd_map["\\emph"]		= "EM"
    751 	Font_cmd_map["\\textbf"]	= "B"
    752 	Font_cmd_map["\\textit"]	= "I"
    753 	Font_cmd_map["\\textmd"]	= ""
    754 	Font_cmd_map["\\textrm"]	= ""
    755 	Font_cmd_map["\\textsc"]	= "toupper"
    756 	Font_cmd_map["\\textsl"]	= "I"
    757 	Font_cmd_map["\\texttt"]	= "t"
    758 	Font_cmd_map["\\textup"]	= ""
    759 
    760 	Font_decl_map["\\bf"]		= "B"
    761 	Font_decl_map["\\em"]		= "EM"
    762 	Font_decl_map["\\it"]		= "I"
    763 	Font_decl_map["\\rm"]		= ""
    764 	Font_decl_map["\\sc"]		= "toupper"
    765 	Font_decl_map["\\sf"]		= ""
    766 	Font_decl_map["\\tt"]		= "TT"
    767 	Font_decl_map["\\itshape"]	= "I"
    768 	Font_decl_map["\\upshape"]	= ""
    769 	Font_decl_map["\\slshape"]	= "I"
    770 	Font_decl_map["\\scshape"]	= "toupper"
    771 	Font_decl_map["\\mdseries"]	= ""
    772 	Font_decl_map["\\bfseries"]	= "B"
    773 	Font_decl_map["\\rmfamily"]	= ""
    774 	Font_decl_map["\\sffamily"]	= ""
    775 	Font_decl_map["\\ttfamily"]	= "TT"
    776 }
    777 
    778 function min(a,b)
    779 {
    780 	return (a < b) ? a : b
    781 }
    782 
    783 
    784 function prefix(level)
    785 {
    786 	# Return a prefix of up to 60 blanks
    787 
    788 	if (In_PRE)
    789 		return ("")
    790 	else
    791 		return (substr("                                                            ", \
    792 			1, INDENT * level))
    793 }
    794 
    795 
    796 function print_line(line)
    797 {
    798 	if (HTML)		# must buffer in memory so that we can accumulate TOC
    799 		Body[++BodyLines] = line
    800 	else
    801 		print line
    802 }
    803 
    804 
    805 function print_toc_line(author,title,pages, extra,leaders,n,t)
    806 {
    807 	# When we have a multiline title, the hypertext link goes only
    808 	# on the first line.  A multiline hypertext link looks awful
    809 	# because of long underlines under the leading indentation.
    810 
    811 	if (pages == "")	# then no leaders needed in title lines other than last one
    812 		t = sprintf("%31s   %s%s%s", author, Title_prefix, title, Title_suffix)
    813 	else					# last title line, with page number
    814 	{
    815 		n = html_length(title)		# potentially expensive
    816 		extra = n % 2			# extra space for aligned leader dots
    817 		if (n <= MAX_TITLE_CHARS) 	# then need leaders
    818 			leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \
    819 				   min(MAX_TITLE_CHARS,n))
    820 		else				# title (almost) fills line, so no leaders
    821 			leaders = substr(MIN_LEADERS_SPACE,1, \
    822 					 (MAX_TITLE_CHARS + MIN_LEADERS - extra - n))
    823 		t = sprintf("%31s   %s%s%s%s%s %4s", \
    824 			    author, Title_prefix, title, Title_suffix, \
    825 			    (extra ? " " : ""), leaders, pages)
    826 	}
    827 
    828 	Title_prefix = ""	# forget any hypertext
    829 	Title_suffix = ""	# link material
    830 
    831 	# Efficency note: an earlier version accumulated the body in a
    832 	# single scalar like this: "Body = Body t".  Profiling revealed
    833 	# this statement as the major hot spot, and the change to array
    834 	# storage made the program more than twice as fast.  This
    835 	# suggests that awk might benefit from an optimization of
    836 	# "s = s t" that uses realloc() instead of malloc().
    837 	if (HTML)
    838 		Body[++BodyLines] = t
    839 	else
    840 		print t
    841 }
    842 
    843 
    844 function protect_SGML_characters(s)
    845 {
    846     gsub(/&/,"\\&amp;",s)	# NB: this one MUST be first
    847     gsub(/</,"\\&lt;",s)
    848     gsub(/>/,"\\&gt;",s)
    849     gsub(/\"/,"\\&quot;",s)
    850     return (s)
    851 }
    852 
    853 
    854 function strip_braces(s, k)
    855 {	# strip non-backslashed braces from s and return the result
    856 
    857 	return (strip_char(strip_char(s,"{"),"}"))
    858 }
    859 
    860 
    861 function strip_char(s,c, k)
    862 {	# strip non-backslashed instances of c from s, and return the result
    863 	k = index(s,c)
    864 	if (k > 0)		# then found the character
    865 	{
    866 		if (substr(s,k-1,1) != "\\") # then not backslashed char
    867 			s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively)
    868 		else		# preserve backslashed char
    869 			s = substr(s,1,k) strip_char(s,k+1,c)
    870 	}
    871 	return (s)
    872 }
    873 
    874 
    875 function strip_html(s)
    876 {
    877 	gsub(/<\/?[^>]*>/,"",s)
    878 	return (s)
    879 }
    880 
    881 
    882 function terminate()
    883 {
    884 	if (HTML)
    885 	{
    886 		html_end_pre()
    887 
    888 		HTML = 0	# NB: stop line buffering
    889 		html_header()
    890 		html_toc()
    891 		html_body()
    892 		html_trailer()
    893 	}
    894 }
    895 
    896 
    897 function TeX_to_HTML(s, k,n,parts)
    898 {
    899 	# First convert the four SGML reserved characters to SGML entities
    900 	if (HTML)
    901 	{
    902 	    gsub(/>/,	"\\&gt;",	s)
    903 	    gsub(/</,	"\\&lt;",	s)
    904 	    gsub(/"/,	"\\&quot;",	s)
    905 	}
    906 
    907 	gsub(/[$][$]/,"$$",s)	# change display math to triple dollars for split
    908 	n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts
    909 
    910 	s = ""
    911 	for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact
    912 		s = s ((k > 1) ? "$" : "") \
    913 			((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \
    914 			TeX_to_HTML_math(parts[k]))
    915 
    916 	gsub(/[$][$][$]/,"$$",s) # restore display math
    917 
    918 	return (s)
    919 }
    920 
    921 
    922 function TeX_to_HTML_math(s)
    923 {
    924 	# Mostly a dummy for now, but HTML 3 could support some math translation
    925 
    926 	gsub(/\\&/,"\\&amp;",s)	# reduce TeX ampersands to SGML entities
    927 
    928 	return (s)
    929 }
    930 
    931 
    932 function TeX_to_HTML_nonmath(s)
    933 {
    934 	if (index(s,"\\") > 0)			# important optimization
    935 	{
    936 		gsub(/\\slash +/,"/",s)		# replace TeX slashes with conventional ones
    937 		gsub(/ *\\emdash +/," --- ",s)	# replace BibNet emdashes with conventional ones
    938 		gsub(/\\%/,"%",s)		# reduce TeX percents to conventional ones
    939 		gsub(/\\[$]/,"$",s)		# reduce TeX dollars to conventional ones
    940 		gsub(/\\#/,"#",s)		# reduce TeX sharps to conventional ones
    941 
    942 		if (HTML)			# translate TeX markup to HTML
    943 		{
    944 			gsub(/\\&/,"\\&amp;",s)	# reduce TeX ampersands to SGML entities
    945 			s = html_accents(s)
    946 			s = html_fonts(s)
    947 		}
    948 		else				# plain ASCII text output: discard all TeX markup
    949 		{
    950 			gsub(/\\\&/, "\\&", s)	# reduce TeX ampersands to conventional ones
    951 
    952 			gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes
    953 			gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols
    954 		}
    955 	}
    956 	return (s)
    957 }
    958 
    959 
    960 function trim(s)
    961 {
    962     gsub(/^[ \t]+/,"",s)
    963     gsub(/[ \t]+$/,"",s)
    964     return (s)
    965 }
    966 
    967 
    968 function vol_no_month_year()
    969 {
    970 	return ("Volume " wrap(Volume)  ",  Number " wrap(Number) ", " wrap(Month) ", " wrap(Year))
    971 }
    972 
    973 
    974 function wrap(value)
    975 {
    976 	return (HTML ? ("<STRONG>" value "</STRONG>") : value)
    977 }
    978