funstack.awk revision 1.1.1.1 1 ### ====================================================================
2 ### @Awk-file{
3 ### author = "Nelson H. F. Beebe",
4 ### version = "1.00",
5 ### date = "09 October 1996",
6 ### time = "15:57:06 MDT",
7 ### filename = "journal-toc.awk",
8 ### address = "Center for Scientific Computing
9 ### Department of Mathematics
10 ### University of Utah
11 ### Salt Lake City, UT 84112
12 ### USA",
13 ### telephone = "+1 801 581 5254",
14 ### FAX = "+1 801 581 4148",
15 ### URL = "http://www.math.utah.edu/~beebe",
16 ### checksum = "25092 977 3357 26493",
17 ### email = "beebe (at] math.utah.edu (Internet)",
18 ### codetable = "ISO/ASCII",
19 ### keywords = "BibTeX, bibliography, HTML, journal table of
20 ### contents",
21 ### supported = "yes",
22 ### docstring = "Create a journal cover table of contents from
23 ### <at>Article{...} entries in a journal BibTeX
24 ### .bib file for checking the bibliography
25 ### database against the actual journal covers.
26 ### The output can be either plain text, or HTML.
27 ###
28 ### Usage:
29 ### bibclean -max-width 0 BibTeX-file(s) | \
30 ### bibsort -byvolume | \
31 ### awk -f journal-toc.awk \
32 ### [-v HTML=nnn] [-v INDENT=nnn] \
33 ### [-v BIBFILEURL=url] >foo.toc
34 ###
35 ### or if the bibliography is already sorted
36 ### by volume,
37 ###
38 ### bibclean -max-width 0 BibTeX-file(s) | \
39 ### awk -f journal-toc.awk \
40 ### [-v HTML=nnn] [-v INDENT=nnn] \
41 ### [-v BIBFILEURL=url] >foo.toc
42 ###
43 ### A non-zero value of the command-line option,
44 ### HTML=nnn, results in HTML output instead of
45 ### the default plain ASCII text (corresponding
46 ### to HTML=0). The
47 ###
48 ### The INDENT=nnn command-line option specifies
49 ### the number of blanks to indent each logical
50 ### level of HTML. The default is INDENT=4.
51 ### INDENT=0 suppresses indentation. The INDENT
52 ### option has no effect when the default HTML=0
53 ### (plain text output) option is in effect.
54 ###
55 ### When HTML output is selected, the
56 ### BIBFILEURL=url command-line option provides a
57 ### way to request hypertext links from table of
58 ### contents page numbers to the complete BibTeX
59 ### entry for the article. These links are
60 ### created by appending a sharp (#) and the
61 ### citation label to the BIBFILEURL value, which
62 ### conforms with the practice of
63 ### bibtex-to-html.awk.
64 ###
65 ### The HTML output form may be useful as a more
66 ### compact representation of journal article
67 ### bibliography data than the original BibTeX
68 ### file provides. Of course, the
69 ### table-of-contents format provides less
70 ### information, and is considerably more
71 ### troublesome for a computer program to parse.
72 ###
73 ### When URL key values are provided, they will
74 ### be used to create hypertext links around
75 ### article titles. This supports journals that
76 ### provide article contents on the World-Wide
77 ### Web.
78 ###
79 ### For parsing simplicity, this program requires
80 ### that BibTeX
81 ###
82 ### key = "value"
83 ###
84 ### and
85 ###
86 ### @String{name = "value"}
87 ###
88 ### specifications be entirely contained on
89 ### single lines, which is readily provided by
90 ### the `bibclean -max-width 0' filter. It also
91 ### requires that bibliography entries begin and
92 ### end at the start of a line, and that
93 ### quotation marks, rather than balanced braces,
94 ### delimit string values. This is a
95 ### conventional format that again can be
96 ### guaranteed by bibclean.
97 ###
98 ### This program requires `new' awk, as described
99 ### in the book
100 ###
101 ### Alfred V. Aho, Brian W. Kernighan, and
102 ### Peter J. Weinberger,
103 ### ``The AWK Programming Language'',
104 ### Addison-Wesley (1988), ISBN
105 ### 0-201-07981-X,
106 ###
107 ### such as provided by programs named (GNU)
108 ### gawk, nawk, and recent AT&T awk.
109 ###
110 ### The checksum field above contains a CRC-16
111 ### checksum as the first value, followed by the
112 ### equivalent of the standard UNIX wc (word
113 ### count) utility output of lines, words, and
114 ### characters. This is produced by Robert
115 ### Solovay's checksum utility.",
116 ### }
117 ### ====================================================================
118
119 BEGIN { initialize() }
120
121 /^ *@ *[Ss][Tt][Rr][Ii][Nn][Gg] *{/ { do_String(); next }
122
123 /^ *@ *[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/ { next }
124
125 /^ *@ *[Aa][Rr][Tt][Ii][Cc][Ll][Ee]/ { do_Article(); next }
126
127 /^ *@/ { do_Other(); next }
128
129 /^ *author *= *\"/ { do_author(); next }
130
131 /^ *journal *= */ { do_journal(); next }
132
133 /^ *volume *= *\"/ { do_volume(); next }
134
135 /^ *number *= *\"/ { do_number(); next }
136
137 /^ *year *= *\"/ { do_year(); next }
138
139 /^ *month *= */ { do_month(); next }
140
141 /^ *title *= *\"/ { do_title(); next }
142
143 /^ *pages *= *\"/ { do_pages(); next }
144
145 /^ *URL *= *\"/ { do_URL(); next }
146
147 /^ *} *$/ { if (In_Article) do_end_entry(); next }
148
149 END { terminate() }
150
151
152 ########################################################################
153 # NB: The programming conventions for variables in this program are: #
154 # UPPERCASE global constants and user options #
155 # Initialuppercase global variables #
156 # lowercase local variables #
157 # Any deviation is an error! #
158 ########################################################################
159
160
161 function do_Article()
162 {
163 In_Article = 1
164
165 Citation_label = $0
166 sub(/^[^\{]*{/,"",Citation_label)
167 sub(/ *, *$/,"",Citation_label)
168
169 Author = ""
170 Title = ""
171 Journal = ""
172 Volume = ""
173 Number = ""
174 Month = ""
175 Year = ""
176 Pages = ""
177 Url = ""
178 }
179
180
181 function do_author()
182 {
183 Author = TeX_to_HTML(get_value($0))
184 }
185
186
187 function do_end_entry( k,n,parts)
188 {
189 n = split(Author,parts," and ")
190 if (Last_number != Number)
191 do_new_issue()
192 for (k = 1; k < n; ++k)
193 print_toc_line(parts[k] " and", "", "")
194 Title_prefix = html_begin_title()
195 Title_suffix = html_end_title()
196 if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line
197 print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages())
198 else # need to split long title over multiple lines
199 do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages())
200 }
201
202
203 function do_journal()
204 {
205 if ($0 ~ /[=] *"/) # have journal = "quoted journal name",
206 Journal = get_value($0)
207 else # have journal = journal-abbreviation,
208 {
209 Journal = get_abbrev($0)
210 if (Journal in String) # replace abbrev by its expansion
211 Journal = String[Journal]
212 }
213 gsub(/\\-/,"",Journal) # remove discretionary hyphens
214 }
215
216
217 function do_long_title(author,title,pages, last_title,n)
218 {
219 title = trim(title) # discard leading and trailing space
220 while (length(title) > 0)
221 {
222 n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS)
223 last_title = substr(title,1,n)
224 title = substr(title,n+1)
225 sub(/^ +/,"",title) # discard any leading space
226 print_toc_line(author, last_title, (length(title) == 0) ? pages : "")
227 author = ""
228 }
229 }
230
231
232 function do_month( k,n,parts)
233 {
234 Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0)
235 gsub(/[\"]/,"",Month)
236 gsub(/ *# *\\slash *# */," / ",Month)
237 gsub(/ *# *-+ *# */," / ",Month)
238 n = split(Month,parts," */ *")
239 Month = ""
240 for (k = 1; k <= n; ++k)
241 Month = Month ((k > 1) ? " / " : "") \
242 ((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k])
243 }
244
245
246 function do_new_issue()
247 {
248 Last_number = Number
249 if (HTML)
250 {
251 if (Last_volume != Volume)
252 {
253 Last_volume = Volume
254 print_line(prefix(2) "<BR>")
255 }
256 html_end_toc()
257 html_begin_issue()
258 print_line(prefix(2) Journal "<BR>")
259 }
260 else
261 {
262 print_line("")
263 print_line(Journal)
264 }
265
266 print_line(strip_html(vol_no_month_year()))
267
268 if (HTML)
269 {
270 html_end_issue()
271 html_toc_entry()
272 html_begin_toc()
273 }
274 else
275 print_line("")
276 }
277
278
279 function do_number()
280 {
281 Number = get_value($0)
282 }
283
284
285 function do_Other()
286 {
287 In_Article = 0
288 }
289
290
291 function do_pages()
292 {
293 Pages = get_value($0)
294 sub(/--[?][?]/,"",Pages)
295 }
296
297
298 function do_String()
299 {
300 sub(/^[^\{]*\{/,"",$0) # discard up to and including open brace
301 sub(/\} *$/,"",$0) # discard from optional whitespace and trailing brace to end of line
302 String[get_key($0)] = get_value($0)
303 }
304
305
306 function do_title()
307 {
308 Title = TeX_to_HTML(get_value($0))
309 }
310
311
312 function do_URL( parts)
313 {
314 Url = get_value($0)
315 split(Url,parts,"[,;]") # in case we have multiple URLs
316 Url = trim(parts[1])
317 }
318
319
320 function do_volume()
321 {
322 Volume = get_value($0)
323 }
324
325
326 function do_year()
327 {
328 Year = get_value($0)
329 }
330
331
332 function get_abbrev(s)
333 { # return abbrev from ``key = abbrev,''
334 sub(/^[^=]*= */,"",s) # discard text up to start of non-blank value
335 sub(/ *,? *$/,"",s) # discard trailing optional whitspace, quote,
336 # optional comma, and optional space
337 return (s)
338 }
339
340
341 function get_key(s)
342 { # return kay from ``key = "value",''
343 sub(/^ */,"",s) # discard leading space
344 sub(/ *=.*$/,"",s) # discard everthing after key
345
346 return (s)
347 }
348
349
350 function get_value(s)
351 { # return value from ``key = "value",''
352 sub(/^[^\"]*\" */,"",s) # discard text up to start of non-blank value
353 sub(/ *\",? *$/,"",s) # discard trailing optional whitspace, quote,
354 # optional comma, and optional space
355 return (s)
356 }
357
358
359 function html_accents(s)
360 {
361 if (index(s,"\\") > 0) # important optimization
362 {
363 # Convert common lower-case accented letters according to the
364 # table on p. 169 of in Peter Flynn's ``The World Wide Web
365 # Handbook'', International Thomson Computer Press, 1995, ISBN
366 # 1-85032-205-8. The official table of ISO Latin 1 SGML
367 # entities used in HTML can be found in the file
368 # /usr/local/lib/html-check/lib/ISOlat1.sgml (your path
369 # may differ).
370
371 gsub(/{\\\a}/, "\\à", s)
372 gsub(/{\\'a}/, "\\á", s)
373 gsub(/{\\[\^]a}/,"\\â", s)
374 gsub(/{\\~a}/, "\\ã", s)
375 gsub(/{\\\"a}/, "\\ä", s)
376 gsub(/{\\aa}/, "\\å", s)
377 gsub(/{\\ae}/, "\\æ", s)
378
379 gsub(/{\\c{c}}/,"\\ç", s)
380
381 gsub(/{\\\e}/, "\\è", s)
382 gsub(/{\\'e}/, "\\é", s)
383 gsub(/{\\[\^]e}/,"\\ê", s)
384 gsub(/{\\\"e}/, "\\ë", s)
385
386 gsub(/{\\\i}/, "\\ì", s)
387 gsub(/{\\'i}/, "\\í", s)
388 gsub(/{\\[\^]i}/,"\\î", s)
389 gsub(/{\\\"i}/, "\\ï", s)
390
391 # ignore eth and thorn
392
393 gsub(/{\\~n}/, "\\ñ", s)
394
395 gsub(/{\\\o}/, "\\ò", s)
396 gsub(/{\\'o}/, "\\ó", s)
397 gsub(/{\\[\^]o}/, "\\ô", s)
398 gsub(/{\\~o}/, "\\õ", s)
399 gsub(/{\\\"o}/, "\\ö", s)
400 gsub(/{\\o}/, "\\ø", s)
401
402 gsub(/{\\\u}/, "\\ù", s)
403 gsub(/{\\'u}/, "\\ú", s)
404 gsub(/{\\[\^]u}/,"\\û", s)
405 gsub(/{\\\"u}/, "\\ü", s)
406
407 gsub(/{\\'y}/, "\\ý", s)
408 gsub(/{\\\"y}/, "\\ÿ", s)
409
410 # Now do the same for upper-case accents
411
412 gsub(/{\\\A}/, "\\À", s)
413 gsub(/{\\'A}/, "\\Á", s)
414 gsub(/{\\[\^]A}/, "\\Â", s)
415 gsub(/{\\~A}/, "\\Ã", s)
416 gsub(/{\\\"A}/, "\\Ä", s)
417 gsub(/{\\AA}/, "\\Å", s)
418 gsub(/{\\AE}/, "\\Æ", s)
419
420 gsub(/{\\c{C}}/,"\\Ç", s)
421
422 gsub(/{\\\e}/, "\\È", s)
423 gsub(/{\\'E}/, "\\É", s)
424 gsub(/{\\[\^]E}/, "\\Ê", s)
425 gsub(/{\\\"E}/, "\\Ë", s)
426
427 gsub(/{\\\I}/, "\\Ì", s)
428 gsub(/{\\'I}/, "\\Í", s)
429 gsub(/{\\[\^]I}/, "\\Î", s)
430 gsub(/{\\\"I}/, "\\Ï", s)
431
432 # ignore eth and thorn
433
434 gsub(/{\\~N}/, "\\Ñ", s)
435
436 gsub(/{\\\O}/, "\\Ò", s)
437 gsub(/{\\'O}/, "\\Ó", s)
438 gsub(/{\\[\^]O}/, "\\Ô", s)
439 gsub(/{\\~O}/, "\\Õ", s)
440 gsub(/{\\\"O}/, "\\Ö", s)
441 gsub(/{\\O}/, "\\Ø", s)
442
443 gsub(/{\\\U}/, "\\Ù", s)
444 gsub(/{\\'U}/, "\\Ú", s)
445 gsub(/{\\[\^]U}/, "\\Û", s)
446 gsub(/{\\\"U}/, "\\Ü", s)
447
448 gsub(/{\\'Y}/, "\\Ý", s)
449
450 gsub(/{\\ss}/, "\\ß", s)
451
452 # Others not mentioned in Flynn's book
453 gsub(/{\\'\\i}/,"\\í", s)
454 gsub(/{\\'\\j}/,"j", s)
455 }
456 return (s)
457 }
458
459
460 function html_begin_issue()
461 {
462 print_line("")
463 print_line(prefix(2) "<HR>")
464 print_line("")
465 print_line(prefix(2) "<H1>")
466 print_line(prefix(3) "<A NAME=\"" html_label() "\">")
467 }
468
469
470 function html_begin_pages()
471 {
472 return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "")
473 }
474
475
476 function html_begin_pre()
477 {
478 In_PRE = 1
479 print_line("<PRE>")
480 }
481
482
483 function html_begin_title()
484 {
485 return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "")
486 }
487
488
489 function html_begin_toc()
490 {
491 html_end_toc()
492 html_begin_pre()
493 }
494
495
496 function html_body( k)
497 {
498 for (k = 1; k <= BodyLines; ++k)
499 print Body[k]
500 }
501
502 function html_breakpoint(title,maxlength, break_after,k)
503 {
504 # Return the largest character position in title AFTER which we
505 # can break the title across lines, without exceeding maxlength
506 # visible characters.
507 if (html_length(title) > maxlength) # then need to split title across lines
508 {
509 # In the presence of HTML markup, the initialization of
510 # k here is complicated, because we need to advance it
511 # until html_length(title) is at least maxlength,
512 # without invoking the expensive html_length() function
513 # too frequently. The need to split the title makes the
514 # alternative of delayed insertion of HTML markup much
515 # more complicated.
516 break_after = 0
517 for (k = min(maxlength,length(title)); k < length(title); ++k)
518 {
519 if (substr(title,k+1,1) == " ")
520 { # could break after position k
521 if (html_length(substr(title,1,k)) <= maxlength)
522 break_after = k
523 else # advanced too far, retreat back to last break_after
524 break
525 }
526 }
527 if (break_after == 0) # no breakpoint found by forward scan
528 { # so switch to backward scan
529 for (k = min(maxlength,length(title)) - 1; \
530 (k > 0) && (substr(title,k+1,1) != " "); --k)
531 ; # find space at which to break title
532 if (k < 1) # no break point found
533 k = length(title) # so must print entire string
534 }
535 else
536 k = break_after
537 }
538 else # title fits on one line
539 k = length(title)
540 return (k)
541 }
542
543
544
545 function html_end_issue()
546 {
547 print_line(prefix(3) "</A>")
548 print_line(prefix(2) "</H1>")
549 }
550
551
552 function html_end_pages()
553 {
554 return ((HTML && (BIBFILEURL != "")) ? "</A>" : "")
555 }
556
557
558 function html_end_pre()
559 {
560 if (In_PRE)
561 {
562 print_line("</PRE>")
563 In_PRE = 0
564 }
565 }
566
567
568 function html_end_title()
569 {
570 return ((HTML && (Url != "")) ? "</A>" : "")
571 }
572
573
574 function html_end_toc()
575 {
576 html_end_pre()
577 }
578
579
580 function html_fonts(s, arg,control_word,k,level,n,open_brace)
581 {
582 open_brace = index(s,"{")
583 if (open_brace > 0) # important optimization
584 {
585 level = 1
586 for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k)
587 {
588 if (substr(s,k,1) == "{")
589 level++
590 else if (substr(s,k,1) == "}")
591 level--
592 }
593
594 # {...} is now found at open_brace ... (k-1)
595 for (control_word in Font_decl_map) # look for {\xxx ...}
596 {
597 if (substr(s,open_brace+1,length(control_word)+1) ~ \
598 ("\\" control_word "[^A-Za-z]"))
599 {
600 n = open_brace + 1 + length(control_word)
601 arg = trim(substr(s,n,k - n))
602 if (Font_decl_map[control_word] == "toupper") # arg -> ARG
603 arg = toupper(arg)
604 else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG>
605 arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">"
606 return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k)))
607 }
608 }
609 for (control_word in Font_cmd_map) # look for \xxx{...}
610 {
611 if (substr(s,open_brace - length(control_word),length(control_word)) ~ \
612 ("\\" control_word))
613 {
614 n = open_brace + 1
615 arg = trim(substr(s,n,k - n))
616 if (Font_cmd_map[control_word] == "toupper") # arg -> ARG
617 arg = toupper(arg)
618 else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG>
619 arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">"
620 n = open_brace - length(control_word) - 1
621 return (substr(s,1,n) arg html_fonts(substr(s,k)))
622 }
623 }
624 }
625 return (s)
626 }
627
628
629 function html_header()
630 {
631 USER = ENVIRON["USER"]
632 if (USER == "")
633 USER = ENVIRON["LOGNAME"]
634 if (USER == "")
635 USER = "????"
636 "hostname" | getline HOSTNAME
637 "date" | getline DATE
638 ("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
639 if (PERSONAL_NAME == "")
640 ("grep '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
641
642
643 print "<!-- WARNING: Do NOT edit this file. It was converted from -->"
644 print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->"
645 print "<!-- on " DATE " -->"
646 print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"
647 print ""
648 print ""
649 print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">"
650 print ""
651 print "<HTML>"
652 print prefix(1) "<HEAD>"
653 print prefix(2) "<TITLE>"
654 print prefix(3) Journal
655 print prefix(2) "</TITLE>"
656 print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"
657 print prefix(1) "</HEAD>"
658 print ""
659 print prefix(1) "<BODY>"
660 }
661
662
663 function html_label( label)
664 {
665 label = Volume "(" Number "):" Month ":" Year
666 gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label)
667 return (label)
668 }
669
670
671 function html_length(s)
672 { # Return visible length of s, ignoring any HTML markup
673 if (HTML)
674 {
675 gsub(/<\/?[^>]*>/,"",s) # remove SGML tags
676 gsub(/&[A-Za-z0-9]+;/,"",s) # remove SGML entities
677 }
678 return (length(s))
679 }
680
681
682 function html_toc()
683 {
684 print prefix(2) "<H1>"
685 print prefix(3) "Table of contents for issues of " Journal
686 print prefix(2) "</H1>"
687 print HTML_TOC
688 }
689
690
691 function html_toc_entry()
692 {
693 HTML_TOC = HTML_TOC " <A HREF=\"#" html_label() "\">"
694 HTML_TOC = HTML_TOC vol_no_month_year()
695 HTML_TOC = HTML_TOC "</A><BR>" "\n"
696 }
697
698
699 function html_trailer()
700 {
701 html_end_pre()
702 print prefix(1) "</BODY>"
703 print "</HTML>"
704 }
705
706
707 function initialize()
708 {
709 # NB: Update these when the program changes
710 VERSION_DATE = "[09-Oct-1996]"
711 VERSION_NUMBER = "1.00"
712
713 HTML = (HTML == "") ? 0 : (0 + HTML)
714
715 if (INDENT == "")
716 INDENT = 4
717
718 if (HTML == 0)
719 INDENT = 0 # indentation suppressed in ASCII mode
720
721 LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ."
722
723 MAX_TITLE_CHARS = 36 # 36 produces a 79-char output line when there is
724 # just an initial page number. If this is
725 # increased, the LEADERS string may need to be
726 # lengthened.
727
728 MIN_LEADERS = 4 # Minimum number of characters from LEADERS
729 # required when leaders are used. The total
730 # number of characters that can appear in a
731 # title line is MAX_TITLE_CHARS + MIN_LEADERS.
732 # Leaders are omitted when the title length is
733 # between MAX_TITLE_CHARS and this sum.
734
735 MIN_LEADERS_SPACE = " " # must be at least MIN_LEADERS characters long
736
737 Month_expansion["jan"] = "January"
738 Month_expansion["feb"] = "February"
739 Month_expansion["mar"] = "March"
740 Month_expansion["apr"] = "April"
741 Month_expansion["may"] = "May"
742 Month_expansion["jun"] = "June"
743 Month_expansion["jul"] = "July"
744 Month_expansion["aug"] = "August"
745 Month_expansion["sep"] = "September"
746 Month_expansion["oct"] = "October"
747 Month_expansion["nov"] = "November"
748 Month_expansion["dec"] = "December"
749
750 Font_cmd_map["\\emph"] = "EM"
751 Font_cmd_map["\\textbf"] = "B"
752 Font_cmd_map["\\textit"] = "I"
753 Font_cmd_map["\\textmd"] = ""
754 Font_cmd_map["\\textrm"] = ""
755 Font_cmd_map["\\textsc"] = "toupper"
756 Font_cmd_map["\\textsl"] = "I"
757 Font_cmd_map["\\texttt"] = "t"
758 Font_cmd_map["\\textup"] = ""
759
760 Font_decl_map["\\bf"] = "B"
761 Font_decl_map["\\em"] = "EM"
762 Font_decl_map["\\it"] = "I"
763 Font_decl_map["\\rm"] = ""
764 Font_decl_map["\\sc"] = "toupper"
765 Font_decl_map["\\sf"] = ""
766 Font_decl_map["\\tt"] = "TT"
767 Font_decl_map["\\itshape"] = "I"
768 Font_decl_map["\\upshape"] = ""
769 Font_decl_map["\\slshape"] = "I"
770 Font_decl_map["\\scshape"] = "toupper"
771 Font_decl_map["\\mdseries"] = ""
772 Font_decl_map["\\bfseries"] = "B"
773 Font_decl_map["\\rmfamily"] = ""
774 Font_decl_map["\\sffamily"] = ""
775 Font_decl_map["\\ttfamily"] = "TT"
776 }
777
778 function min(a,b)
779 {
780 return (a < b) ? a : b
781 }
782
783
784 function prefix(level)
785 {
786 # Return a prefix of up to 60 blanks
787
788 if (In_PRE)
789 return ("")
790 else
791 return (substr(" ", \
792 1, INDENT * level))
793 }
794
795
796 function print_line(line)
797 {
798 if (HTML) # must buffer in memory so that we can accumulate TOC
799 Body[++BodyLines] = line
800 else
801 print line
802 }
803
804
805 function print_toc_line(author,title,pages, extra,leaders,n,t)
806 {
807 # When we have a multiline title, the hypertext link goes only
808 # on the first line. A multiline hypertext link looks awful
809 # because of long underlines under the leading indentation.
810
811 if (pages == "") # then no leaders needed in title lines other than last one
812 t = sprintf("%31s %s%s%s", author, Title_prefix, title, Title_suffix)
813 else # last title line, with page number
814 {
815 n = html_length(title) # potentially expensive
816 extra = n % 2 # extra space for aligned leader dots
817 if (n <= MAX_TITLE_CHARS) # then need leaders
818 leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \
819 min(MAX_TITLE_CHARS,n))
820 else # title (almost) fills line, so no leaders
821 leaders = substr(MIN_LEADERS_SPACE,1, \
822 (MAX_TITLE_CHARS + MIN_LEADERS - extra - n))
823 t = sprintf("%31s %s%s%s%s%s %4s", \
824 author, Title_prefix, title, Title_suffix, \
825 (extra ? " " : ""), leaders, pages)
826 }
827
828 Title_prefix = "" # forget any hypertext
829 Title_suffix = "" # link material
830
831 # Efficency note: an earlier version accumulated the body in a
832 # single scalar like this: "Body = Body t". Profiling revealed
833 # this statement as the major hot spot, and the change to array
834 # storage made the program more than twice as fast. This
835 # suggests that awk might benefit from an optimization of
836 # "s = s t" that uses realloc() instead of malloc().
837 if (HTML)
838 Body[++BodyLines] = t
839 else
840 print t
841 }
842
843
844 function protect_SGML_characters(s)
845 {
846 gsub(/&/,"\\&",s) # NB: this one MUST be first
847 gsub(/</,"\\<",s)
848 gsub(/>/,"\\>",s)
849 gsub(/\"/,"\\"",s)
850 return (s)
851 }
852
853
854 function strip_braces(s, k)
855 { # strip non-backslashed braces from s and return the result
856
857 return (strip_char(strip_char(s,"{"),"}"))
858 }
859
860
861 function strip_char(s,c, k)
862 { # strip non-backslashed instances of c from s, and return the result
863 k = index(s,c)
864 if (k > 0) # then found the character
865 {
866 if (substr(s,k-1,1) != "\\") # then not backslashed char
867 s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively)
868 else # preserve backslashed char
869 s = substr(s,1,k) strip_char(s,k+1,c)
870 }
871 return (s)
872 }
873
874
875 function strip_html(s)
876 {
877 gsub(/<\/?[^>]*>/,"",s)
878 return (s)
879 }
880
881
882 function terminate()
883 {
884 if (HTML)
885 {
886 html_end_pre()
887
888 HTML = 0 # NB: stop line buffering
889 html_header()
890 html_toc()
891 html_body()
892 html_trailer()
893 }
894 }
895
896
897 function TeX_to_HTML(s, k,n,parts)
898 {
899 # First convert the four SGML reserved characters to SGML entities
900 if (HTML)
901 {
902 gsub(/>/, "\\>", s)
903 gsub(/</, "\\<", s)
904 gsub(/"/, "\\"", s)
905 }
906
907 gsub(/[$][$]/,"$$",s) # change display math to triple dollars for split
908 n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts
909
910 s = ""
911 for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact
912 s = s ((k > 1) ? "$" : "") \
913 ((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \
914 TeX_to_HTML_math(parts[k]))
915
916 gsub(/[$][$][$]/,"$$",s) # restore display math
917
918 return (s)
919 }
920
921
922 function TeX_to_HTML_math(s)
923 {
924 # Mostly a dummy for now, but HTML 3 could support some math translation
925
926 gsub(/\\&/,"\\&",s) # reduce TeX ampersands to SGML entities
927
928 return (s)
929 }
930
931
932 function TeX_to_HTML_nonmath(s)
933 {
934 if (index(s,"\\") > 0) # important optimization
935 {
936 gsub(/\\slash +/,"/",s) # replace TeX slashes with conventional ones
937 gsub(/ *\\emdash +/," --- ",s) # replace BibNet emdashes with conventional ones
938 gsub(/\\%/,"%",s) # reduce TeX percents to conventional ones
939 gsub(/\\[$]/,"$",s) # reduce TeX dollars to conventional ones
940 gsub(/\\#/,"#",s) # reduce TeX sharps to conventional ones
941
942 if (HTML) # translate TeX markup to HTML
943 {
944 gsub(/\\&/,"\\&",s) # reduce TeX ampersands to SGML entities
945 s = html_accents(s)
946 s = html_fonts(s)
947 }
948 else # plain ASCII text output: discard all TeX markup
949 {
950 gsub(/\\\&/, "\\&", s) # reduce TeX ampersands to conventional ones
951
952 gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes
953 gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols
954 }
955 }
956 return (s)
957 }
958
959
960 function trim(s)
961 {
962 gsub(/^[ \t]+/,"",s)
963 gsub(/[ \t]+$/,"",s)
964 return (s)
965 }
966
967
968 function vol_no_month_year()
969 {
970 return ("Volume " wrap(Volume) ", Number " wrap(Number) ", " wrap(Month) ", " wrap(Year))
971 }
972
973
974 function wrap(value)
975 {
976 return (HTML ? ("<STRONG>" value "</STRONG>") : value)
977 }
978