xref: /illumos-gate/usr/src/test/util-tests/tests/awk/gnu/funstack.awk (revision f73e1ebf60792a8bdb2d559097c3131b68c09318)
1### ====================================================================
2###  @Awk-file{
3###     author          = "Nelson H. F. Beebe",
4###     version         = "1.00",
5###     date            = "09 October 1996",
6###     time            = "15:57:06 MDT",
7###     filename        = "journal-toc.awk",
8###     address         = "Center for Scientific Computing
9###                        Department of Mathematics
10###                        University of Utah
11###                        Salt Lake City, UT 84112
12###                        USA",
13###     telephone       = "+1 801 581 5254",
14###     FAX             = "+1 801 581 4148",
15###     URL             = "http://www.math.utah.edu/~beebe",
16###     checksum        = "25092 977 3357 26493",
17###     email           = "beebe@math.utah.edu (Internet)",
18###     codetable       = "ISO/ASCII",
19###     keywords        = "BibTeX, bibliography, HTML, journal table of
20###                        contents",
21###     supported       = "yes",
22###     docstring       = "Create a journal cover table of contents from
23###                        <at>Article{...} entries in a journal BibTeX
24###                        .bib file for checking the bibliography
25###                        database against the actual journal covers.
26###                        The output can be either plain text, or HTML.
27###
28###                        Usage:
29###                            bibclean -max-width 0 BibTeX-file(s) | \
30###                                bibsort -byvolume | \
31###                                awk -f journal-toc.awk \
32###                                    [-v HTML=nnn] [-v INDENT=nnn] \
33###                                    [-v BIBFILEURL=url] >foo.toc
34###
35###                            or if the bibliography is already sorted
36###                            by volume,
37###
38###                            bibclean -max-width 0 BibTeX-file(s) | \
39###                                awk -f journal-toc.awk \
40###                                    [-v HTML=nnn] [-v INDENT=nnn] \
41###                                    [-v BIBFILEURL=url] >foo.toc
42###
43###                        A non-zero value of the command-line option,
44###                        HTML=nnn, results in HTML output instead of
45###                        the default plain ASCII text (corresponding
46###                        to HTML=0).  The
47###
48###                        The INDENT=nnn command-line option specifies
49###                        the number of blanks to indent each logical
50###                        level of HTML.  The default is INDENT=4.
51###                        INDENT=0 suppresses indentation.  The INDENT
52###                        option has no effect when the default HTML=0
53###                        (plain text output) option is in effect.
54###
55###                        When HTML output is selected, the
56###                        BIBFILEURL=url command-line option provides a
57###                        way to request hypertext links from table of
58###                        contents page numbers to the complete BibTeX
59###                        entry for the article.  These links are
60###                        created by appending a sharp (#) and the
61###                        citation label to the BIBFILEURL value, which
62###                        conforms with the practice of
63###                        bibtex-to-html.awk.
64###
65###                        The HTML output form may be useful as a more
66###                        compact representation of journal article
67###                        bibliography data than the original BibTeX
68###                        file provides.  Of course, the
69###                        table-of-contents format provides less
70###                        information, and is considerably more
71###                        troublesome for a computer program to parse.
72###
73###                        When URL key values are provided, they will
74###                        be used to create hypertext links around
75###                        article titles.  This supports journals that
76###                        provide article contents on the World-Wide
77###                        Web.
78###
79###                        For parsing simplicity, this program requires
80###                        that BibTeX
81###
82###                            key = "value"
83###
84###                        and
85###
86###                            @String{name = "value"}
87###
88###                        specifications be entirely contained on
89###                        single lines, which is readily provided by
90###                        the `bibclean -max-width 0' filter.  It also
91###                        requires that bibliography entries begin and
92###                        end at the start of a line, and that
93###                        quotation marks, rather than balanced braces,
94###                        delimit string values.  This is a
95###                        conventional format that again can be
96###                        guaranteed by bibclean.
97###
98###                        This program requires `new' awk, as described
99###                        in the book
100###
101###                            Alfred V. Aho, Brian W. Kernighan, and
102###                            Peter J. Weinberger,
103###                            ``The AWK Programming Language'',
104###                            Addison-Wesley (1988), ISBN
105###                            0-201-07981-X,
106###
107###                        such as provided by programs named (GNU)
108###                        gawk, nawk, and recent AT&T awk.
109###
110###                        The checksum field above contains a CRC-16
111###                        checksum as the first value, followed by the
112###                        equivalent of the standard UNIX wc (word
113###                        count) utility output of lines, words, and
114###                        characters.  This is produced by Robert
115###                        Solovay's checksum utility.",
116###  }
117### ====================================================================
118
119BEGIN						{ initialize() }
120
121/^ *@ *[Ss][Tt][Rr][Ii][Nn][Gg] *\{/		{ do_String(); next }
122
123/^ *@ *[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/	{ next }
124
125/^ *@ *[Aa][Rr][Tt][Ii][Cc][Ll][Ee]/		{ do_Article(); next }
126
127/^ *@/						{ do_Other(); next }
128
129/^ *author *= *\"/ 				{ do_author(); next }
130
131/^ *journal *= */				{ do_journal(); next }
132
133/^ *volume *= *\"/				{ do_volume(); next }
134
135/^ *number *= *\"/				{ do_number(); next }
136
137/^ *year *= *\"/				{ do_year(); next }
138
139/^ *month *= */					{ do_month(); next }
140
141/^ *title *= *\"/				{ do_title(); next }
142
143/^ *pages *= *\"/				{ do_pages(); next }
144
145/^ *URL *= *\"/					{ do_URL(); next }
146
147/^ *} *$/					{ if (In_Article) do_end_entry(); next }
148
149END						{ terminate() }
150
151
152########################################################################
153# NB: The programming conventions for variables in this program are:   #
154#	UPPERCASE		global constants and user options      #
155#	Initialuppercase	global variables                       #
156#	lowercase		local variables                        #
157# Any deviation is an error!                                           #
158########################################################################
159
160
161function do_Article()
162{
163	In_Article = 1
164
165	Citation_label = $0
166	sub(/^[^\{]*\{/,"",Citation_label)
167	sub(/ *, *$/,"",Citation_label)
168
169	Author = ""
170        Title = ""
171        Journal = ""
172        Volume = ""
173        Number = ""
174        Month = ""
175        Year = ""
176        Pages = ""
177        Url = ""
178}
179
180
181function do_author()
182{
183	Author = TeX_to_HTML(get_value($0))
184}
185
186
187function do_end_entry( k,n,parts)
188{
189	n = split(Author,parts," and ")
190	if (Last_number != Number)
191		do_new_issue()
192	for (k = 1; k < n; ++k)
193		print_toc_line(parts[k] " and", "", "")
194	Title_prefix = html_begin_title()
195	Title_suffix = html_end_title()
196	if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line
197		print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages())
198	else			# need to split long title over multiple lines
199		do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages())
200}
201
202
203function do_journal()
204{
205	if ($0 ~ /[=] *"/)	# have journal = "quoted journal name",
206		Journal = get_value($0)
207	else			# have journal = journal-abbreviation,
208	{
209        	Journal = get_abbrev($0)
210		if (Journal in String) # replace abbrev by its expansion
211			Journal = String[Journal]
212	}
213	gsub(/\\-/,"",Journal)	# remove discretionary hyphens
214}
215
216
217function do_long_title(author,title,pages, last_title,n)
218{
219	title = trim(title)			# discard leading and trailing space
220	while (length(title) > 0)
221	{
222		n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS)
223		last_title = substr(title,1,n)
224		title = substr(title,n+1)
225		sub(/^ +/,"",title)		# discard any leading space
226		print_toc_line(author, last_title, (length(title) == 0) ? pages : "")
227		author = ""
228	}
229}
230
231
232function do_month( k,n,parts)
233{
234	Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0)
235	gsub(/[\"]/,"",Month)
236	gsub(/ *# *\\slash *# */," / ",Month)
237	gsub(/ *# *-+ *# */," / ",Month)
238	n = split(Month,parts," */ *")
239	Month = ""
240	for (k = 1; k <= n; ++k)
241		Month = Month ((k > 1) ? " / " : "") \
242			((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k])
243}
244
245
246function do_new_issue()
247{
248	Last_number = Number
249	if (HTML)
250	{
251		if (Last_volume != Volume)
252		{
253			Last_volume = Volume
254			print_line(prefix(2) "<BR>")
255		}
256		html_end_toc()
257		html_begin_issue()
258		print_line(prefix(2) Journal "<BR>")
259	}
260	else
261	{
262		print_line("")
263		print_line(Journal)
264	}
265
266	print_line(strip_html(vol_no_month_year()))
267
268	if (HTML)
269	{
270		html_end_issue()
271		html_toc_entry()
272		html_begin_toc()
273	}
274	else
275		print_line("")
276}
277
278
279function do_number()
280{
281	Number = get_value($0)
282}
283
284
285function do_Other()
286{
287	In_Article = 0
288}
289
290
291function do_pages()
292{
293	Pages = get_value($0)
294	sub(/--[?][?]/,"",Pages)
295}
296
297
298function do_String()
299{
300	sub(/^[^\{]*\{/,"",$0)	# discard up to and including open brace
301	sub(/\} *$/,"",$0)	# discard from optional whitespace and trailing brace to end of line
302	String[get_key($0)] = get_value($0)
303}
304
305
306function do_title()
307{
308	Title = TeX_to_HTML(get_value($0))
309}
310
311
312function do_URL( parts)
313{
314	Url = get_value($0)
315	split(Url,parts,"[,;]")			# in case we have multiple URLs
316	Url = trim(parts[1])
317}
318
319
320function do_volume()
321{
322	Volume = get_value($0)
323}
324
325
326function do_year()
327{
328	Year = get_value($0)
329}
330
331
332function get_abbrev(s)
333{	# return abbrev from ``key = abbrev,''
334	sub(/^[^=]*= */,"",s)	# discard text up to start of non-blank value
335	sub(/ *,? *$/,"",s)	# discard trailing optional whitspace, quote,
336				# optional comma, and optional space
337	return (s)
338}
339
340
341function get_key(s)
342{	# return kay from ``key = "value",''
343	sub(/^ */,"",s)		# discard leading space
344	sub(/ *=.*$/,"",s)	# discard everthing after key
345
346	return (s)
347}
348
349
350function get_value(s)
351{	# return value from ``key = "value",''
352	sub(/^[^\"]*\" */,"",s)	# discard text up to start of non-blank value
353	sub(/ *\",? *$/,"",s)	# discard trailing optional whitspace, quote,
354				# optional comma, and optional space
355	return (s)
356}
357
358
359function html_accents(s)
360{
361	if (index(s,"\\") > 0)			# important optimization
362	{
363		# Convert common lower-case accented letters according to the
364		# table on p. 169 of in Peter Flynn's ``The World Wide Web
365		# Handbook'', International Thomson Computer Press, 1995, ISBN
366		# 1-85032-205-8.  The official table of ISO Latin 1 SGML
367		# entities used in HTML can be found in the file
368		# /usr/local/lib/html-check/lib/ISOlat1.sgml (your path
369		# may differ).
370
371		gsub(/{\\\a}/,	"\\&agrave;",	s)
372		gsub(/{\\'a}/,	"\\&aacute;",	s)
373		gsub(/{\\[\^]a}/,"\\&acirc;",	s)
374		gsub(/{\\~a}/,	"\\&atilde;",	s)
375		gsub(/{\\\"a}/,	"\\&auml;",	s)
376		gsub(/{\\aa}/,	"\\&aring;",	s)
377		gsub(/{\\ae}/,	"\\&aelig;",	s)
378
379		gsub(/\{\\c\{c\}\}/,"\\&ccedil;",	s)
380
381		gsub(/\{\\\e\}/,	"\\&egrave;",	s)
382		gsub(/\{\\'e\}/,	"\\&eacute;",	s)
383		gsub(/\{\\[\^]e\}/,"\\&ecirc;",	s)
384		gsub(/\{\\\"e\}/,	"\\&euml;",	s)
385
386		gsub(/\{\\\i\}/,	"\\&igrave;",	s)
387		gsub(/\{\\'i\}/,	"\\&iacute;",	s)
388		gsub(/\{\\[\^]i\}/,"\\&icirc;",	s)
389		gsub(/\{\\\"i\}/,	"\\&iuml;",	s)
390
391		# ignore eth and thorn
392
393		gsub(/\{\\~n\}/,	"\\&ntilde;",	s)
394
395		gsub(/\{\\\o\}/,	"\\&ograve;",	s)
396		gsub(/\{\\'o\}/,	"\\&oacute;",	s)
397		gsub(/\{\\[\^]o\}/, "\\&ocirc;",	s)
398		gsub(/\{\\~o\}/,	"\\&otilde;",	s)
399		gsub(/\{\\\"o\}/,	"\\&ouml;",	s)
400		gsub(/\{\\o\}/,	"\\&oslash;",	s)
401
402		gsub(/\{\\\u\}/,	"\\&ugrave;",	s)
403		gsub(/\{\\'u\}/,	"\\&uacute;",	s)
404		gsub(/\{\\[\^]u\}/,"\\&ucirc;",	s)
405		gsub(/\{\\\"u\}/,	"\\&uuml;",	s)
406
407		gsub(/\{\\'y\}/,	"\\&yacute;",	s)
408		gsub(/\{\\\"y\}/,	"\\&yuml;",	s)
409
410		# Now do the same for upper-case accents
411
412		gsub(/\{\\\A\}/,	"\\&Agrave;",	s)
413		gsub(/\{\\'A\}/,	"\\&Aacute;",	s)
414		gsub(/\{\\[\^]A\}/,	"\\&Acirc;",	s)
415		gsub(/\{\\~A\}/,	"\\&Atilde;",	s)
416		gsub(/\{\\\"A\}/,	"\\&Auml;",	s)
417		gsub(/\{\\AA\}/,	"\\&Aring;",	s)
418		gsub(/\{\\AE\}/,	"\\&AElig;",	s)
419
420		gsub(/\{\\c\{C\}\}/,"\\&Ccedil;",	s)
421
422		gsub(/\{\\\e\}/,	"\\&Egrave;",	s)
423		gsub(/\{\\'E\}/,	"\\&Eacute;",	s)
424		gsub(/\{\\[\^]E\}/,	"\\&Ecirc;",	s)
425		gsub(/\{\\\"E\}/,	"\\&Euml;",	s)
426
427		gsub(/\{\\\I\}/,	"\\&Igrave;",	s)
428		gsub(/\{\\'I\}/,	"\\&Iacute;",	s)
429		gsub(/\{\\[\^]I\}/,	"\\&Icirc;",	s)
430		gsub(/\{\\\"I\}/,	"\\&Iuml;",	s)
431
432		# ignore eth and thorn
433
434		gsub(/\{\\~N\}/,	"\\&Ntilde;",	s)
435
436		gsub(/\{\\\O\}/,	"\\&Ograve;",	s)
437		gsub(/\{\\'O\}/,	"\\&Oacute;",	s)
438		gsub(/\{\\[\^]O\}/,	"\\&Ocirc;",	s)
439		gsub(/\{\\~O\}/,	"\\&Otilde;",	s)
440		gsub(/\{\\\"O\}/,	"\\&Ouml;",	s)
441		gsub(/\{\\O\}/,	"\\&Oslash;",	s)
442
443		gsub(/\{\\\U\}/,	"\\&Ugrave;",	s)
444		gsub(/\{\\'U\}/,	"\\&Uacute;",	s)
445		gsub(/\{\\[\^]U\}/,	"\\&Ucirc;",	s)
446		gsub(/\{\\\"U\}/,	"\\&Uuml;",	s)
447
448		gsub(/\{\\'Y\}/,	"\\&Yacute;",	s)
449
450		gsub(/\{\\ss\}/,	"\\&szlig;",	s)
451
452		# Others not mentioned in Flynn's book
453		gsub(/\{\\'\\i\}/,"\\&iacute;",	s)
454		gsub(/\{\\'\\j\}/,"j",		s)
455	}
456	return (s)
457}
458
459
460function html_begin_issue()
461{
462	print_line("")
463	print_line(prefix(2) "<HR>")
464	print_line("")
465	print_line(prefix(2) "<H1>")
466	print_line(prefix(3) "<A NAME=\"" html_label() "\">")
467}
468
469
470function html_begin_pages()
471{
472	return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "")
473}
474
475
476function html_begin_pre()
477{
478	In_PRE = 1
479	print_line("<PRE>")
480}
481
482
483function html_begin_title()
484{
485	return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "")
486}
487
488
489function html_begin_toc()
490{
491	html_end_toc()
492	html_begin_pre()
493}
494
495
496function html_body( k)
497{
498	for (k = 1; k <= BodyLines; ++k)
499		print Body[k]
500}
501
502function html_breakpoint(title,maxlength, break_after,k)
503{
504	# Return the largest character position in title AFTER which we
505	# can break the title across lines, without exceeding maxlength
506	# visible characters.
507	if (html_length(title) > maxlength)	# then need to split title across lines
508	{
509		# In the presence of HTML markup, the initialization of
510		# k here is complicated, because we need to advance it
511		# until html_length(title) is at least maxlength,
512		# without invoking the expensive html_length() function
513		# too frequently.  The need to split the title makes the
514		# alternative of delayed insertion of HTML markup much
515		# more complicated.
516		break_after = 0
517		for (k = min(maxlength,length(title)); k < length(title); ++k)
518		{
519			if (substr(title,k+1,1) == " ")
520			{		# could break after position k
521				if (html_length(substr(title,1,k)) <= maxlength)
522					break_after = k
523				else	# advanced too far, retreat back to last break_after
524					break
525			}
526		}
527		if (break_after == 0)		# no breakpoint found by forward scan
528		{				# so switch to backward scan
529			for (k = min(maxlength,length(title)) - 1; \
530				(k > 0) && (substr(title,k+1,1) != " "); --k)
531				;		# find space at which to break title
532			if (k < 1)		# no break point found
533				k = length(title) # so must print entire string
534		}
535		else
536			k = break_after
537	}
538	else					# title fits on one line
539		k = length(title)
540	return (k)
541}
542
543
544
545function html_end_issue()
546{
547	print_line(prefix(3) "</A>")
548	print_line(prefix(2) "</H1>")
549}
550
551
552function html_end_pages()
553{
554	return ((HTML && (BIBFILEURL != "")) ? "</A>" : "")
555}
556
557
558function html_end_pre()
559{
560	if (In_PRE)
561	{
562		print_line("</PRE>")
563		In_PRE = 0
564	}
565}
566
567
568function html_end_title()
569{
570	return ((HTML && (Url != "")) ? "</A>" : "")
571}
572
573
574function html_end_toc()
575{
576	html_end_pre()
577}
578
579
580function html_fonts(s, arg,control_word,k,level,n,open_brace)
581{
582	open_brace = index(s,"{")
583	if (open_brace > 0)			# important optimization
584	{
585		level = 1
586		for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k)
587		{
588			if (substr(s,k,1) == "{")
589				level++
590			else if (substr(s,k,1) == "}")
591				level--
592		}
593
594		# {...} is now found at open_brace ... (k-1)
595		for (control_word in Font_decl_map)	# look for {\xxx ...}
596		{
597			if (substr(s,open_brace+1,length(control_word)+1) ~ \
598				("\\" control_word "[^A-Za-z]"))
599			{
600				n = open_brace + 1 + length(control_word)
601				arg = trim(substr(s,n,k - n))
602				if (Font_decl_map[control_word] == "toupper") # arg -> ARG
603					arg = toupper(arg)
604				else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG>
605					arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">"
606				return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k)))
607			}
608		}
609		for (control_word in Font_cmd_map)	# look for \xxx{...}
610		{
611			if (substr(s,open_brace - length(control_word),length(control_word)) ~ \
612				("\\" control_word))
613			{
614				n = open_brace + 1
615				arg = trim(substr(s,n,k - n))
616				if (Font_cmd_map[control_word] == "toupper") # arg -> ARG
617					arg = toupper(arg)
618				else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG>
619					arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">"
620				n = open_brace - length(control_word) - 1
621				return (substr(s,1,n) arg html_fonts(substr(s,k)))
622			}
623		}
624	}
625	return (s)
626}
627
628
629function html_header()
630{
631	USER = ENVIRON["USER"]
632	if (USER == "")
633	    USER = ENVIRON["LOGNAME"]
634	if (USER == "")
635	    USER = "????"
636	"hostname" | getline HOSTNAME
637	"date" | getline DATE
638	("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
639	if (PERSONAL_NAME == "")
640	    ("grep  '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
641
642
643	print "<!-- WARNING: Do NOT edit this file.  It was converted from -->"
644	print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->"
645	print "<!-- on " DATE " -->"
646	print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"
647	print ""
648	print ""
649	print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">"
650	print ""
651	print "<HTML>"
652	print prefix(1) "<HEAD>"
653	print prefix(2) "<TITLE>"
654	print prefix(3)  Journal
655	print prefix(2) "</TITLE>"
656	print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"
657	print prefix(1) "</HEAD>"
658	print ""
659	print prefix(1) "<BODY>"
660}
661
662
663function html_label( label)
664{
665	label = Volume "(" Number "):" Month ":" Year
666	# gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label)
667	gsub(/[^[:alnum:]():,;.\/\-]/,"",label)
668	return (label)
669}
670
671
672function html_length(s)
673{	# Return visible length of s, ignoring any HTML markup
674	if (HTML)
675	{
676		gsub(/<\/?[^>]*>/,"",s)		# remove SGML tags
677		# gsub(/&[A-Za-z0-9]+;/,"",s)	# remove SGML entities
678		gsub(/&[[:alnum:]]+;/,"",s)	# remove SGML entities
679	}
680	return (length(s))
681}
682
683
684function html_toc()
685{
686	print prefix(2) "<H1>"
687	print prefix(3) "Table of contents for issues of " Journal
688	print prefix(2) "</H1>"
689	print HTML_TOC
690}
691
692
693function html_toc_entry()
694{
695	HTML_TOC = HTML_TOC "        <A HREF=\"#" html_label() "\">"
696	HTML_TOC = HTML_TOC vol_no_month_year()
697	HTML_TOC = HTML_TOC "</A><BR>" "\n"
698}
699
700
701function html_trailer()
702{
703	html_end_pre()
704	print prefix(1) "</BODY>"
705	print "</HTML>"
706}
707
708
709function initialize()
710{
711	# NB: Update these when the program changes
712	VERSION_DATE = "[09-Oct-1996]"
713	VERSION_NUMBER = "1.00"
714
715	HTML = (HTML == "") ? 0 : (0 + HTML)
716
717	if (INDENT == "")
718		INDENT = 4
719
720	if (HTML == 0)
721		INDENT = 0	# indentation suppressed in ASCII mode
722
723	LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ."
724
725	MAX_TITLE_CHARS = 36	# 36 produces a 79-char output line when there is
726				# just an initial page number.  If this is
727				# increased, the LEADERS string may need to be
728				# lengthened.
729
730	MIN_LEADERS = 4		# Minimum number of characters from LEADERS
731				# required when leaders are used.  The total
732				# number of characters that can appear in a
733				# title line is MAX_TITLE_CHARS + MIN_LEADERS.
734				# Leaders are omitted when the title length is
735				# between MAX_TITLE_CHARS and this sum.
736
737	MIN_LEADERS_SPACE = "        "	# must be at least MIN_LEADERS characters long
738
739	Month_expansion["jan"]	= "January"
740	Month_expansion["feb"]	= "February"
741	Month_expansion["mar"]	= "March"
742	Month_expansion["apr"]	= "April"
743	Month_expansion["may"]	= "May"
744	Month_expansion["jun"]	= "June"
745	Month_expansion["jul"]	= "July"
746	Month_expansion["aug"]	= "August"
747	Month_expansion["sep"]	= "September"
748	Month_expansion["oct"]	= "October"
749	Month_expansion["nov"]	= "November"
750	Month_expansion["dec"]	= "December"
751
752	Font_cmd_map["\\emph"]		= "EM"
753	Font_cmd_map["\\textbf"]	= "B"
754	Font_cmd_map["\\textit"]	= "I"
755	Font_cmd_map["\\textmd"]	= ""
756	Font_cmd_map["\\textrm"]	= ""
757	Font_cmd_map["\\textsc"]	= "toupper"
758	Font_cmd_map["\\textsl"]	= "I"
759	Font_cmd_map["\\texttt"]	= "t"
760	Font_cmd_map["\\textup"]	= ""
761
762	Font_decl_map["\\bf"]		= "B"
763	Font_decl_map["\\em"]		= "EM"
764	Font_decl_map["\\it"]		= "I"
765	Font_decl_map["\\rm"]		= ""
766	Font_decl_map["\\sc"]		= "toupper"
767	Font_decl_map["\\sf"]		= ""
768	Font_decl_map["\\tt"]		= "TT"
769	Font_decl_map["\\itshape"]	= "I"
770	Font_decl_map["\\upshape"]	= ""
771	Font_decl_map["\\slshape"]	= "I"
772	Font_decl_map["\\scshape"]	= "toupper"
773	Font_decl_map["\\mdseries"]	= ""
774	Font_decl_map["\\bfseries"]	= "B"
775	Font_decl_map["\\rmfamily"]	= ""
776	Font_decl_map["\\sffamily"]	= ""
777	Font_decl_map["\\ttfamily"]	= "TT"
778}
779
780function min(a,b)
781{
782	return (a < b) ? a : b
783}
784
785
786function prefix(level)
787{
788	# Return a prefix of up to 60 blanks
789
790	if (In_PRE)
791		return ("")
792	else
793		return (substr("                                                            ", \
794			1, INDENT * level))
795}
796
797
798function print_line(line)
799{
800	if (HTML)		# must buffer in memory so that we can accumulate TOC
801		Body[++BodyLines] = line
802	else
803		print line
804}
805
806
807function print_toc_line(author,title,pages, extra,leaders,n,t)
808{
809	# When we have a multiline title, the hypertext link goes only
810	# on the first line.  A multiline hypertext link looks awful
811	# because of long underlines under the leading indentation.
812
813	if (pages == "")	# then no leaders needed in title lines other than last one
814		t = sprintf("%31s   %s%s%s", author, Title_prefix, title, Title_suffix)
815	else					# last title line, with page number
816	{
817		n = html_length(title)		# potentially expensive
818		extra = n % 2			# extra space for aligned leader dots
819		if (n <= MAX_TITLE_CHARS) 	# then need leaders
820			leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \
821				   min(MAX_TITLE_CHARS,n))
822		else				# title (almost) fills line, so no leaders
823			leaders = substr(MIN_LEADERS_SPACE,1, \
824					 (MAX_TITLE_CHARS + MIN_LEADERS - extra - n))
825		t = sprintf("%31s   %s%s%s%s%s %4s", \
826			    author, Title_prefix, title, Title_suffix, \
827			    (extra ? " " : ""), leaders, pages)
828	}
829
830	Title_prefix = ""	# forget any hypertext
831	Title_suffix = ""	# link material
832
833	# Efficency note: an earlier version accumulated the body in a
834	# single scalar like this: "Body = Body t".  Profiling revealed
835	# this statement as the major hot spot, and the change to array
836	# storage made the program more than twice as fast.  This
837	# suggests that awk might benefit from an optimization of
838	# "s = s t" that uses realloc() instead of malloc().
839	if (HTML)
840		Body[++BodyLines] = t
841	else
842		print t
843}
844
845
846function protect_SGML_characters(s)
847{
848    gsub(/&/,"\\&amp;",s)	# NB: this one MUST be first
849    gsub(/</,"\\&lt;",s)
850    gsub(/>/,"\\&gt;",s)
851    gsub(/\"/,"\\&quot;",s)
852    return (s)
853}
854
855
856function strip_braces(s, k)
857{	# strip non-backslashed braces from s and return the result
858
859	return (strip_char(strip_char(s,"{"),"}"))
860}
861
862
863function strip_char(s,c, k)
864{	# strip non-backslashed instances of c from s, and return the result
865	k = index(s,c)
866	if (k > 0)		# then found the character
867	{
868		if (substr(s,k-1,1) != "\\") # then not backslashed char
869			s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively)
870		else		# preserve backslashed char
871			s = substr(s,1,k) strip_char(s,k+1,c)
872	}
873	return (s)
874}
875
876
877function strip_html(s)
878{
879	gsub(/<\/?[^>]*>/,"",s)
880	return (s)
881}
882
883
884function terminate()
885{
886	if (HTML)
887	{
888		html_end_pre()
889
890		HTML = 0	# NB: stop line buffering
891		html_header()
892		html_toc()
893		html_body()
894		html_trailer()
895	}
896}
897
898
899function TeX_to_HTML(s, k,n,parts)
900{
901	# First convert the four SGML reserved characters to SGML entities
902	if (HTML)
903	{
904	    gsub(/>/,	"\\&gt;",	s)
905	    gsub(/</,	"\\&lt;",	s)
906	    gsub(/"/,	"\\&quot;",	s)
907	}
908
909	gsub(/[$][$]/,"$$",s)	# change display math to triple dollars for split
910	n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts
911
912	s = ""
913	for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact
914		s = s ((k > 1) ? "$" : "") \
915			((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \
916			TeX_to_HTML_math(parts[k]))
917
918	gsub(/[$][$][$]/,"$$",s) # restore display math
919
920	return (s)
921}
922
923
924function TeX_to_HTML_math(s)
925{
926	# Mostly a dummy for now, but HTML 3 could support some math translation
927
928	gsub(/\\&/,"\\&amp;",s)	# reduce TeX ampersands to SGML entities
929
930	return (s)
931}
932
933
934function TeX_to_HTML_nonmath(s)
935{
936	if (index(s,"\\") > 0)			# important optimization
937	{
938		gsub(/\\slash +/,"/",s)		# replace TeX slashes with conventional ones
939		gsub(/ *\\emdash +/," --- ",s)	# replace BibNet emdashes with conventional ones
940		gsub(/\\%/,"%",s)		# reduce TeX percents to conventional ones
941		gsub(/\\[$]/,"$",s)		# reduce TeX dollars to conventional ones
942		gsub(/\\#/,"#",s)		# reduce TeX sharps to conventional ones
943
944		if (HTML)			# translate TeX markup to HTML
945		{
946			gsub(/\\&/,"\\&amp;",s)	# reduce TeX ampersands to SGML entities
947			s = html_accents(s)
948			s = html_fonts(s)
949		}
950		else				# plain ASCII text output: discard all TeX markup
951		{
952			gsub(/\\\&/, "\\&", s)	# reduce TeX ampersands to conventional ones
953
954			#gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes
955			gsub(/\\[[:lower:]][[:lower:]] +/,"",s) # remove TeX font changes
956			#gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols
957			gsub(/\\[^[:alpha:]]/,"",s) # remove remaining TeX control symbols
958		}
959	}
960	return (s)
961}
962
963
964function trim(s)
965{
966    gsub(/^[ \t]+/,"",s)
967    gsub(/[ \t]+$/,"",s)
968    return (s)
969}
970
971
972function vol_no_month_year()
973{
974	return ("Volume " wrap(Volume)  ",  Number " wrap(Number) ", " wrap(Month) ", " wrap(Year))
975}
976
977
978function wrap(value)
979{
980	return (HTML ? ("<STRONG>" value "</STRONG>") : value)
981}
982