Home | History | Annotate | Line # | Download | only in dist
      1 /* Id: html.c,v 1.275 2021/09/09 14:47:24 schwarze Exp  */
      2 /*
      3  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps (at) bsd.lv>
      4  * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze (at) openbsd.org>
      5  *
      6  * Permission to use, copy, modify, and distribute this software for any
      7  * purpose with or without fee is hereby granted, provided that the above
      8  * copyright notice and this permission notice appear in all copies.
      9  *
     10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
     11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
     13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     17  *
     18  * Common functions for mandoc(1) HTML formatters.
     19  * For use by individual formatters and by the main program.
     20  */
     21 #include "config.h"
     22 
     23 #include <sys/types.h>
     24 #include <sys/stat.h>
     25 
     26 #include <assert.h>
     27 #include <ctype.h>
     28 #include <stdarg.h>
     29 #include <stddef.h>
     30 #include <stdio.h>
     31 #include <stdint.h>
     32 #include <stdlib.h>
     33 #include <string.h>
     34 #include <unistd.h>
     35 
     36 #include "mandoc_aux.h"
     37 #include "mandoc_ohash.h"
     38 #include "mandoc.h"
     39 #include "roff.h"
     40 #include "out.h"
     41 #include "html.h"
     42 #include "manconf.h"
     43 #include "main.h"
     44 
     45 struct	htmldata {
     46 	const char	 *name;
     47 	int		  flags;
     48 #define	HTML_INPHRASE	 (1 << 0)  /* Can appear in phrasing context. */
     49 #define	HTML_TOPHRASE	 (1 << 1)  /* Establishes phrasing context. */
     50 #define	HTML_NOSTACK	 (1 << 2)  /* Does not have an end tag. */
     51 #define	HTML_NLBEFORE	 (1 << 3)  /* Output line break before opening. */
     52 #define	HTML_NLBEGIN	 (1 << 4)  /* Output line break after opening. */
     53 #define	HTML_NLEND	 (1 << 5)  /* Output line break before closing. */
     54 #define	HTML_NLAFTER	 (1 << 6)  /* Output line break after closing. */
     55 #define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
     56 #define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
     57 #define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
     58 #define	HTML_INDENT	 (1 << 7)  /* Indent content by two spaces. */
     59 #define	HTML_NOINDENT	 (1 << 8)  /* Exception: never indent content. */
     60 };
     61 
     62 static	const struct htmldata htmltags[TAG_MAX] = {
     63 	{"html",	HTML_NLALL},
     64 	{"head",	HTML_NLALL | HTML_INDENT},
     65 	{"meta",	HTML_NOSTACK | HTML_NLALL},
     66 	{"link",	HTML_NOSTACK | HTML_NLALL},
     67 	{"style",	HTML_NLALL | HTML_INDENT},
     68 	{"title",	HTML_NLAROUND},
     69 	{"body",	HTML_NLALL},
     70 	{"div",		HTML_NLAROUND},
     71 	{"section",	HTML_NLALL},
     72 	{"table",	HTML_NLALL | HTML_INDENT},
     73 	{"tr",		HTML_NLALL | HTML_INDENT},
     74 	{"td",		HTML_NLAROUND},
     75 	{"li",		HTML_NLAROUND | HTML_INDENT},
     76 	{"ul",		HTML_NLALL | HTML_INDENT},
     77 	{"ol",		HTML_NLALL | HTML_INDENT},
     78 	{"dl",		HTML_NLALL | HTML_INDENT},
     79 	{"dt",		HTML_NLAROUND},
     80 	{"dd",		HTML_NLAROUND | HTML_INDENT},
     81 	{"h1",		HTML_TOPHRASE | HTML_NLAROUND},
     82 	{"h2",		HTML_TOPHRASE | HTML_NLAROUND},
     83 	{"p",		HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
     84 	{"pre",		HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
     85 	{"a",		HTML_INPHRASE | HTML_TOPHRASE},
     86 	{"b",		HTML_INPHRASE | HTML_TOPHRASE},
     87 	{"cite",	HTML_INPHRASE | HTML_TOPHRASE},
     88 	{"code",	HTML_INPHRASE | HTML_TOPHRASE},
     89 	{"i",		HTML_INPHRASE | HTML_TOPHRASE},
     90 	{"small",	HTML_INPHRASE | HTML_TOPHRASE},
     91 	{"span",	HTML_INPHRASE | HTML_TOPHRASE},
     92 	{"var",		HTML_INPHRASE | HTML_TOPHRASE},
     93 	{"br",		HTML_INPHRASE | HTML_NOSTACK | HTML_NLALL},
     94 	{"hr",		HTML_INPHRASE | HTML_NOSTACK},
     95 	{"mark",	HTML_INPHRASE },
     96 	{"math",	HTML_INPHRASE | HTML_NLALL | HTML_INDENT},
     97 	{"mrow",	0},
     98 	{"mi",		0},
     99 	{"mn",		0},
    100 	{"mo",		0},
    101 	{"msup",	0},
    102 	{"msub",	0},
    103 	{"msubsup",	0},
    104 	{"mfrac",	0},
    105 	{"msqrt",	0},
    106 	{"mfenced",	0},
    107 	{"mtable",	0},
    108 	{"mtr",		0},
    109 	{"mtd",		0},
    110 	{"munderover",	0},
    111 	{"munder",	0},
    112 	{"mover",	0},
    113 };
    114 
    115 /* Avoid duplicate HTML id= attributes. */
    116 
    117 struct	id_entry {
    118 	int	 ord;	/* Ordinal number of the latest occurrence. */
    119 	char	 id[];	/* The id= attribute without any ordinal suffix. */
    120 };
    121 static	struct ohash	 id_unique;
    122 
    123 static	void	 html_reset_internal(struct html *);
    124 static	void	 print_byte(struct html *, char);
    125 static	void	 print_endword(struct html *);
    126 static	void	 print_indent(struct html *);
    127 static	void	 print_word(struct html *, const char *);
    128 
    129 static	void	 print_ctag(struct html *, struct tag *);
    130 static	int	 print_escape(struct html *, char);
    131 static	int	 print_encode(struct html *, const char *, const char *, int);
    132 static	void	 print_href(struct html *, const char *, const char *, int);
    133 static	void	 print_metaf(struct html *);
    134 
    135 
    136 void *
    137 html_alloc(const struct manoutput *outopts)
    138 {
    139 	struct html	*h;
    140 
    141 	h = mandoc_calloc(1, sizeof(struct html));
    142 
    143 	h->tag = NULL;
    144 	h->metac = h->metal = ESCAPE_FONTROMAN;
    145 	h->style = outopts->style;
    146 	if ((h->base_man1 = outopts->man) == NULL)
    147 		h->base_man2 = NULL;
    148 	else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
    149 		*h->base_man2++ = '\0';
    150 	h->base_includes = outopts->includes;
    151 	if (outopts->fragment)
    152 		h->oflags |= HTML_FRAGMENT;
    153 	if (outopts->toc)
    154 		h->oflags |= HTML_TOC;
    155 
    156 	mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
    157 
    158 	return h;
    159 }
    160 
    161 static void
    162 html_reset_internal(struct html *h)
    163 {
    164 	struct tag	*tag;
    165 	struct id_entry	*entry;
    166 	unsigned int	 slot;
    167 
    168 	while ((tag = h->tag) != NULL) {
    169 		h->tag = tag->next;
    170 		free(tag);
    171 	}
    172 	entry = ohash_first(&id_unique, &slot);
    173 	while (entry != NULL) {
    174 		free(entry);
    175 		entry = ohash_next(&id_unique, &slot);
    176 	}
    177 	ohash_delete(&id_unique);
    178 }
    179 
    180 void
    181 html_reset(void *p)
    182 {
    183 	html_reset_internal(p);
    184 	mandoc_ohash_init(&id_unique, 4, offsetof(struct id_entry, id));
    185 }
    186 
    187 void
    188 html_free(void *p)
    189 {
    190 	html_reset_internal(p);
    191 	free(p);
    192 }
    193 
    194 void
    195 print_gen_head(struct html *h)
    196 {
    197 	struct tag	*t;
    198 
    199 	print_otag(h, TAG_META, "?", "charset", "utf-8");
    200 	print_otag(h, TAG_META, "??", "name", "viewport",
    201 	    "content", "width=device-width, initial-scale=1.0");
    202 	if (h->style != NULL) {
    203 		print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
    204 		    h->style, "type", "text/css", "media", "all");
    205 		return;
    206 	}
    207 
    208 	/*
    209 	 * Print a minimal embedded style sheet.
    210 	 */
    211 
    212 	t = print_otag(h, TAG_STYLE, "");
    213 	print_text(h, "table.head, table.foot { width: 100%; }");
    214 	print_endline(h);
    215 	print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
    216 	print_endline(h);
    217 	print_text(h, "td.head-vol { text-align: center; }");
    218 	print_endline(h);
    219 	print_text(h, ".Nd, .Bf, .Op { display: inline; }");
    220 	print_endline(h);
    221 	print_text(h, ".Pa, .Ad { font-style: italic; }");
    222 	print_endline(h);
    223 	print_text(h, ".Ms { font-weight: bold; }");
    224 	print_endline(h);
    225 	print_text(h, ".Bl-diag ");
    226 	print_byte(h, '>');
    227 	print_text(h, " dt { font-weight: bold; }");
    228 	print_endline(h);
    229 	print_text(h, "code.Nm, .Fl, .Cm, .Ic, code.In, .Fd, .Fn, .Cd "
    230 	    "{ font-weight: bold; font-family: inherit; }");
    231 	print_tagq(h, t);
    232 }
    233 
    234 int
    235 html_setfont(struct html *h, enum mandoc_esc font)
    236 {
    237 	switch (font) {
    238 	case ESCAPE_FONTPREV:
    239 		font = h->metal;
    240 		break;
    241 	case ESCAPE_FONTITALIC:
    242 	case ESCAPE_FONTBOLD:
    243 	case ESCAPE_FONTBI:
    244 	case ESCAPE_FONTROMAN:
    245 	case ESCAPE_FONTCR:
    246 	case ESCAPE_FONTCB:
    247 	case ESCAPE_FONTCI:
    248 		break;
    249 	case ESCAPE_FONT:
    250 		font = ESCAPE_FONTROMAN;
    251 		break;
    252 	default:
    253 		return 0;
    254 	}
    255 	h->metal = h->metac;
    256 	h->metac = font;
    257 	return 1;
    258 }
    259 
    260 static void
    261 print_metaf(struct html *h)
    262 {
    263 	if (h->metaf) {
    264 		print_tagq(h, h->metaf);
    265 		h->metaf = NULL;
    266 	}
    267 	switch (h->metac) {
    268 	case ESCAPE_FONTITALIC:
    269 		h->metaf = print_otag(h, TAG_I, "");
    270 		break;
    271 	case ESCAPE_FONTBOLD:
    272 		h->metaf = print_otag(h, TAG_B, "");
    273 		break;
    274 	case ESCAPE_FONTBI:
    275 		h->metaf = print_otag(h, TAG_B, "");
    276 		print_otag(h, TAG_I, "");
    277 		break;
    278 	case ESCAPE_FONTCR:
    279 		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
    280 		break;
    281 	case ESCAPE_FONTCB:
    282 		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
    283 		print_otag(h, TAG_B, "");
    284 		break;
    285 	case ESCAPE_FONTCI:
    286 		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
    287 		print_otag(h, TAG_I, "");
    288 		break;
    289 	default:
    290 		break;
    291 	}
    292 }
    293 
    294 void
    295 html_close_paragraph(struct html *h)
    296 {
    297 	struct tag	*this, *next;
    298 	int		 flags;
    299 
    300 	this = h->tag;
    301 	for (;;) {
    302 		next = this->next;
    303 		flags = htmltags[this->tag].flags;
    304 		if (flags & (HTML_INPHRASE | HTML_TOPHRASE))
    305 			print_ctag(h, this);
    306 		if ((flags & HTML_INPHRASE) == 0)
    307 			break;
    308 		this = next;
    309 	}
    310 }
    311 
    312 /*
    313  * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
    314  * TOKEN_NONE does not switch.  The old mode is returned.
    315  */
    316 enum roff_tok
    317 html_fillmode(struct html *h, enum roff_tok want)
    318 {
    319 	struct tag	*t;
    320 	enum roff_tok	 had;
    321 
    322 	for (t = h->tag; t != NULL; t = t->next)
    323 		if (t->tag == TAG_PRE)
    324 			break;
    325 
    326 	had = t == NULL ? ROFF_fi : ROFF_nf;
    327 
    328 	if (want != had) {
    329 		switch (want) {
    330 		case ROFF_fi:
    331 			print_tagq(h, t);
    332 			break;
    333 		case ROFF_nf:
    334 			html_close_paragraph(h);
    335 			print_otag(h, TAG_PRE, "");
    336 			break;
    337 		case TOKEN_NONE:
    338 			break;
    339 		default:
    340 			abort();
    341 		}
    342 	}
    343 	return had;
    344 }
    345 
    346 /*
    347  * Allocate a string to be used for the "id=" attribute of an HTML
    348  * element and/or as a segment identifier for a URI in an <a> element.
    349  * The function may fail and return NULL if the node lacks text data
    350  * to create the attribute from.
    351  * The caller is responsible for free(3)ing the returned string.
    352  *
    353  * If the "unique" argument is non-zero, the "id_unique" ohash table
    354  * is used for de-duplication.  If the "unique" argument is 1,
    355  * it is the first time the function is called for this tag and
    356  * location, so if an ordinal suffix is needed, it is incremented.
    357  * If the "unique" argument is 2, it is the second time the function
    358  * is called for this tag and location, so the ordinal suffix
    359  * remains unchanged.
    360  */
    361 char *
    362 html_make_id(const struct roff_node *n, int unique)
    363 {
    364 	const struct roff_node	*nch;
    365 	struct id_entry		*entry;
    366 	char			*buf, *cp;
    367 	size_t			 len;
    368 	unsigned int		 slot;
    369 
    370 	if (n->tag != NULL)
    371 		buf = mandoc_strdup(n->tag);
    372 	else {
    373 		switch (n->tok) {
    374 		case MDOC_Sh:
    375 		case MDOC_Ss:
    376 		case MDOC_Sx:
    377 		case MAN_SH:
    378 		case MAN_SS:
    379 			for (nch = n->child; nch != NULL; nch = nch->next)
    380 				if (nch->type != ROFFT_TEXT)
    381 					return NULL;
    382 			buf = NULL;
    383 			deroff(&buf, n);
    384 			if (buf == NULL)
    385 				return NULL;
    386 			break;
    387 		default:
    388 			if (n->child == NULL || n->child->type != ROFFT_TEXT)
    389 				return NULL;
    390 			buf = mandoc_strdup(n->child->string);
    391 			break;
    392 		}
    393 	}
    394 
    395 	/*
    396 	 * In ID attributes, only use ASCII characters that are
    397 	 * permitted in URL-fragment strings according to the
    398 	 * explicit list at:
    399 	 * https://url.spec.whatwg.org/#url-fragment-string
    400 	 * In addition, reserve '~' for ordinal suffixes.
    401 	 */
    402 
    403 	for (cp = buf; *cp != '\0'; cp++)
    404 		if (isalnum((unsigned char)*cp) == 0 &&
    405 		    strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
    406 			*cp = '_';
    407 
    408 	if (unique == 0)
    409 		return buf;
    410 
    411 	/* Avoid duplicate HTML id= attributes. */
    412 
    413 	slot = ohash_qlookup(&id_unique, buf);
    414 	if ((entry = ohash_find(&id_unique, slot)) == NULL) {
    415 		len = strlen(buf) + 1;
    416 		entry = mandoc_malloc(sizeof(*entry) + len);
    417 		entry->ord = 1;
    418 		memcpy(entry->id, buf, len);
    419 		ohash_insert(&id_unique, slot, entry);
    420 	} else if (unique == 1)
    421 		entry->ord++;
    422 
    423 	if (entry->ord > 1) {
    424 		cp = buf;
    425 		mandoc_asprintf(&buf, "%s~%d", cp, entry->ord);
    426 		free(cp);
    427 	}
    428 	return buf;
    429 }
    430 
    431 static int
    432 print_escape(struct html *h, char c)
    433 {
    434 
    435 	switch (c) {
    436 	case '<':
    437 		print_word(h, "&lt;");
    438 		break;
    439 	case '>':
    440 		print_word(h, "&gt;");
    441 		break;
    442 	case '&':
    443 		print_word(h, "&amp;");
    444 		break;
    445 	case '"':
    446 		print_word(h, "&quot;");
    447 		break;
    448 	case ASCII_NBRSP:
    449 		print_word(h, "&nbsp;");
    450 		break;
    451 	case ASCII_HYPH:
    452 		print_byte(h, '-');
    453 		break;
    454 	case ASCII_BREAK:
    455 		break;
    456 	default:
    457 		return 0;
    458 	}
    459 	return 1;
    460 }
    461 
    462 static int
    463 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
    464 {
    465 	char		 numbuf[16];
    466 	const char	*seq;
    467 	size_t		 sz;
    468 	int		 c, len, breakline, nospace;
    469 	enum mandoc_esc	 esc;
    470 	static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
    471 		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
    472 
    473 	if (pend == NULL)
    474 		pend = strchr(p, '\0');
    475 
    476 	breakline = 0;
    477 	nospace = 0;
    478 
    479 	while (p < pend) {
    480 		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
    481 			h->flags &= ~HTML_SKIPCHAR;
    482 			p++;
    483 			continue;
    484 		}
    485 
    486 		for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
    487 			print_byte(h, *p);
    488 
    489 		if (breakline &&
    490 		    (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
    491 			print_otag(h, TAG_BR, "");
    492 			breakline = 0;
    493 			while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
    494 				p++;
    495 			continue;
    496 		}
    497 
    498 		if (p >= pend)
    499 			break;
    500 
    501 		if (*p == ' ') {
    502 			print_endword(h);
    503 			p++;
    504 			continue;
    505 		}
    506 
    507 		if (print_escape(h, *p++))
    508 			continue;
    509 
    510 		esc = mandoc_escape(&p, &seq, &len);
    511 		switch (esc) {
    512 		case ESCAPE_FONT:
    513 		case ESCAPE_FONTPREV:
    514 		case ESCAPE_FONTBOLD:
    515 		case ESCAPE_FONTITALIC:
    516 		case ESCAPE_FONTBI:
    517 		case ESCAPE_FONTROMAN:
    518 		case ESCAPE_FONTCR:
    519 		case ESCAPE_FONTCB:
    520 		case ESCAPE_FONTCI:
    521 			if (0 == norecurse) {
    522 				h->flags |= HTML_NOSPACE;
    523 				if (html_setfont(h, esc))
    524 					print_metaf(h);
    525 				h->flags &= ~HTML_NOSPACE;
    526 			}
    527 			continue;
    528 		case ESCAPE_SKIPCHAR:
    529 			h->flags |= HTML_SKIPCHAR;
    530 			continue;
    531 		case ESCAPE_ERROR:
    532 			continue;
    533 		default:
    534 			break;
    535 		}
    536 
    537 		if (h->flags & HTML_SKIPCHAR) {
    538 			h->flags &= ~HTML_SKIPCHAR;
    539 			continue;
    540 		}
    541 
    542 		switch (esc) {
    543 		case ESCAPE_UNICODE:
    544 			/* Skip past "u" header. */
    545 			c = mchars_num2uc(seq + 1, len - 1);
    546 			break;
    547 		case ESCAPE_NUMBERED:
    548 			c = mchars_num2char(seq, len);
    549 			if (c < 0)
    550 				continue;
    551 			break;
    552 		case ESCAPE_SPECIAL:
    553 			c = mchars_spec2cp(seq, len);
    554 			if (c <= 0)
    555 				continue;
    556 			break;
    557 		case ESCAPE_UNDEF:
    558 			c = *seq;
    559 			break;
    560 		case ESCAPE_DEVICE:
    561 			print_word(h, "html");
    562 			continue;
    563 		case ESCAPE_BREAK:
    564 			breakline = 1;
    565 			continue;
    566 		case ESCAPE_NOSPACE:
    567 			if ('\0' == *p)
    568 				nospace = 1;
    569 			continue;
    570 		case ESCAPE_OVERSTRIKE:
    571 			if (len == 0)
    572 				continue;
    573 			c = seq[len - 1];
    574 			break;
    575 		default:
    576 			continue;
    577 		}
    578 		if ((c < 0x20 && c != 0x09) ||
    579 		    (c > 0x7E && c < 0xA0))
    580 			c = 0xFFFD;
    581 		if (c > 0x7E) {
    582 			(void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
    583 			print_word(h, numbuf);
    584 		} else if (print_escape(h, c) == 0)
    585 			print_byte(h, c);
    586 	}
    587 
    588 	return nospace;
    589 }
    590 
    591 static void
    592 print_href(struct html *h, const char *name, const char *sec, int man)
    593 {
    594 	struct stat	 sb;
    595 	const char	*p, *pp;
    596 	char		*filename;
    597 
    598 	if (man) {
    599 		pp = h->base_man1;
    600 		if (h->base_man2 != NULL) {
    601 			mandoc_asprintf(&filename, "%s.%s", name, sec);
    602 			if (stat(filename, &sb) == -1)
    603 				pp = h->base_man2;
    604 			free(filename);
    605 		}
    606 	} else
    607 		pp = h->base_includes;
    608 
    609 	while ((p = strchr(pp, '%')) != NULL) {
    610 		print_encode(h, pp, p, 1);
    611 		if (man && p[1] == 'S') {
    612 			if (sec == NULL)
    613 				print_byte(h, '1');
    614 			else
    615 				print_encode(h, sec, NULL, 1);
    616 		} else if ((man && p[1] == 'N') ||
    617 		    (man == 0 && p[1] == 'I'))
    618 			print_encode(h, name, NULL, 1);
    619 		else
    620 			print_encode(h, p, p + 2, 1);
    621 		pp = p + 2;
    622 	}
    623 	if (*pp != '\0')
    624 		print_encode(h, pp, NULL, 1);
    625 }
    626 
    627 struct tag *
    628 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
    629 {
    630 	va_list		 ap;
    631 	struct tag	*t;
    632 	const char	*attr;
    633 	char		*arg1, *arg2;
    634 	int		 style_written, tflags;
    635 
    636 	tflags = htmltags[tag].flags;
    637 
    638 	/* Flow content is not allowed in phrasing context. */
    639 
    640 	if ((tflags & HTML_INPHRASE) == 0) {
    641 		for (t = h->tag; t != NULL; t = t->next) {
    642 			if (t->closed)
    643 				continue;
    644 			assert((htmltags[t->tag].flags & HTML_TOPHRASE) == 0);
    645 			break;
    646 		}
    647 
    648 	/*
    649 	 * Always wrap phrasing elements in a paragraph
    650 	 * unless already contained in some flow container;
    651 	 * never put them directly into a section.
    652 	 */
    653 
    654 	} else if (tflags & HTML_TOPHRASE && h->tag->tag == TAG_SECTION)
    655 		print_otag(h, TAG_P, "c", "Pp");
    656 
    657 	/* Push this tag onto the stack of open scopes. */
    658 
    659 	if ((tflags & HTML_NOSTACK) == 0) {
    660 		t = mandoc_malloc(sizeof(struct tag));
    661 		t->tag = tag;
    662 		t->next = h->tag;
    663 		t->refcnt = 0;
    664 		t->closed = 0;
    665 		h->tag = t;
    666 	} else
    667 		t = NULL;
    668 
    669 	if (tflags & HTML_NLBEFORE)
    670 		print_endline(h);
    671 	if (h->col == 0)
    672 		print_indent(h);
    673 	else if ((h->flags & HTML_NOSPACE) == 0) {
    674 		if (h->flags & HTML_KEEP)
    675 			print_word(h, "&#x00A0;");
    676 		else {
    677 			if (h->flags & HTML_PREKEEP)
    678 				h->flags |= HTML_KEEP;
    679 			print_endword(h);
    680 		}
    681 	}
    682 
    683 	if ( ! (h->flags & HTML_NONOSPACE))
    684 		h->flags &= ~HTML_NOSPACE;
    685 	else
    686 		h->flags |= HTML_NOSPACE;
    687 
    688 	/* Print out the tag name and attributes. */
    689 
    690 	print_byte(h, '<');
    691 	print_word(h, htmltags[tag].name);
    692 
    693 	va_start(ap, fmt);
    694 
    695 	while (*fmt != '\0' && *fmt != 's') {
    696 
    697 		/* Parse attributes and arguments. */
    698 
    699 		arg1 = va_arg(ap, char *);
    700 		arg2 = NULL;
    701 		switch (*fmt++) {
    702 		case 'c':
    703 			attr = "class";
    704 			break;
    705 		case 'h':
    706 			attr = "href";
    707 			break;
    708 		case 'i':
    709 			attr = "id";
    710 			break;
    711 		case '?':
    712 			attr = arg1;
    713 			arg1 = va_arg(ap, char *);
    714 			break;
    715 		default:
    716 			abort();
    717 		}
    718 		if (*fmt == 'M')
    719 			arg2 = va_arg(ap, char *);
    720 		if (arg1 == NULL)
    721 			continue;
    722 
    723 		/* Print the attributes. */
    724 
    725 		print_byte(h, ' ');
    726 		print_word(h, attr);
    727 		print_byte(h, '=');
    728 		print_byte(h, '"');
    729 		switch (*fmt) {
    730 		case 'I':
    731 			print_href(h, arg1, NULL, 0);
    732 			fmt++;
    733 			break;
    734 		case 'M':
    735 			print_href(h, arg1, arg2, 1);
    736 			fmt++;
    737 			break;
    738 		case 'R':
    739 			print_byte(h, '#');
    740 			print_encode(h, arg1, NULL, 1);
    741 			fmt++;
    742 			break;
    743 		default:
    744 			print_encode(h, arg1, NULL, 1);
    745 			break;
    746 		}
    747 		print_byte(h, '"');
    748 	}
    749 
    750 	style_written = 0;
    751 	while (*fmt++ == 's') {
    752 		arg1 = va_arg(ap, char *);
    753 		arg2 = va_arg(ap, char *);
    754 		if (arg2 == NULL)
    755 			continue;
    756 		print_byte(h, ' ');
    757 		if (style_written == 0) {
    758 			print_word(h, "style=\"");
    759 			style_written = 1;
    760 		}
    761 		print_word(h, arg1);
    762 		print_byte(h, ':');
    763 		print_byte(h, ' ');
    764 		print_word(h, arg2);
    765 		print_byte(h, ';');
    766 	}
    767 	if (style_written)
    768 		print_byte(h, '"');
    769 
    770 	va_end(ap);
    771 
    772 	/* Accommodate for "well-formed" singleton escaping. */
    773 
    774 	if (htmltags[tag].flags & HTML_NOSTACK)
    775 		print_byte(h, '/');
    776 
    777 	print_byte(h, '>');
    778 
    779 	if (tflags & HTML_NLBEGIN)
    780 		print_endline(h);
    781 	else
    782 		h->flags |= HTML_NOSPACE;
    783 
    784 	if (tflags & HTML_INDENT)
    785 		h->indent++;
    786 	if (tflags & HTML_NOINDENT)
    787 		h->noindent++;
    788 
    789 	return t;
    790 }
    791 
    792 /*
    793  * Print an element with an optional "id=" attribute.
    794  * If the element has phrasing content and an "id=" attribute,
    795  * also add a permalink: outside if it can be in phrasing context,
    796  * inside otherwise.
    797  */
    798 struct tag *
    799 print_otag_id(struct html *h, enum htmltag elemtype, const char *cattr,
    800     struct roff_node *n)
    801 {
    802 	struct roff_node *nch;
    803 	struct tag	*ret, *t;
    804 	char		*id, *href;
    805 
    806 	ret = NULL;
    807 	id = href = NULL;
    808 	if (n->flags & NODE_ID)
    809 		id = html_make_id(n, 1);
    810 	if (n->flags & NODE_HREF)
    811 		href = id == NULL ? html_make_id(n, 2) : id;
    812 	if (href != NULL && htmltags[elemtype].flags & HTML_INPHRASE)
    813 		ret = print_otag(h, TAG_A, "chR", "permalink", href);
    814 	t = print_otag(h, elemtype, "ci", cattr, id);
    815 	if (ret == NULL) {
    816 		ret = t;
    817 		if (href != NULL && (nch = n->child) != NULL) {
    818 			/* man(7) is safe, it tags phrasing content only. */
    819 			if (n->tok > MDOC_MAX ||
    820 			    htmltags[elemtype].flags & HTML_TOPHRASE)
    821 				nch = NULL;
    822 			else  /* For mdoc(7), beware of nested blocks. */
    823 				while (nch != NULL && nch->type == ROFFT_TEXT)
    824 					nch = nch->next;
    825 			if (nch == NULL)
    826 				print_otag(h, TAG_A, "chR", "permalink", href);
    827 		}
    828 	}
    829 	free(id);
    830 	if (id == NULL)
    831 		free(href);
    832 	return ret;
    833 }
    834 
    835 static void
    836 print_ctag(struct html *h, struct tag *tag)
    837 {
    838 	int	 tflags;
    839 
    840 	if (tag->closed == 0) {
    841 		tag->closed = 1;
    842 		if (tag == h->metaf)
    843 			h->metaf = NULL;
    844 		if (tag == h->tblt)
    845 			h->tblt = NULL;
    846 
    847 		tflags = htmltags[tag->tag].flags;
    848 		if (tflags & HTML_INDENT)
    849 			h->indent--;
    850 		if (tflags & HTML_NOINDENT)
    851 			h->noindent--;
    852 		if (tflags & HTML_NLEND)
    853 			print_endline(h);
    854 		print_indent(h);
    855 		print_byte(h, '<');
    856 		print_byte(h, '/');
    857 		print_word(h, htmltags[tag->tag].name);
    858 		print_byte(h, '>');
    859 		if (tflags & HTML_NLAFTER)
    860 			print_endline(h);
    861 	}
    862 	if (tag->refcnt == 0) {
    863 		h->tag = tag->next;
    864 		free(tag);
    865 	}
    866 }
    867 
    868 void
    869 print_gen_decls(struct html *h)
    870 {
    871 	print_word(h, "<!DOCTYPE html>");
    872 	print_endline(h);
    873 }
    874 
    875 void
    876 print_gen_comment(struct html *h, struct roff_node *n)
    877 {
    878 	int	 wantblank;
    879 
    880 	print_word(h, "<!-- This is an automatically generated file."
    881 	    "  Do not edit.");
    882 	h->indent = 1;
    883 	wantblank = 0;
    884 	while (n != NULL && n->type == ROFFT_COMMENT) {
    885 		if (strstr(n->string, "-->") == NULL &&
    886 		    (wantblank || *n->string != '\0')) {
    887 			print_endline(h);
    888 			print_indent(h);
    889 			print_word(h, n->string);
    890 			wantblank = *n->string != '\0';
    891 		}
    892 		n = n->next;
    893 	}
    894 	if (wantblank)
    895 		print_endline(h);
    896 	print_word(h, " -->");
    897 	print_endline(h);
    898 	h->indent = 0;
    899 }
    900 
    901 void
    902 print_text(struct html *h, const char *word)
    903 {
    904 	print_tagged_text(h, word, NULL);
    905 }
    906 
    907 void
    908 print_tagged_text(struct html *h, const char *word, struct roff_node *n)
    909 {
    910 	struct tag	*t;
    911 	char		*href;
    912 
    913 	/*
    914 	 * Always wrap text in a paragraph unless already contained in
    915 	 * some flow container; never put it directly into a section.
    916 	 */
    917 
    918 	if (h->tag->tag == TAG_SECTION)
    919 		print_otag(h, TAG_P, "c", "Pp");
    920 
    921 	/* Output whitespace before this text? */
    922 
    923 	if (h->col && (h->flags & HTML_NOSPACE) == 0) {
    924 		if ( ! (HTML_KEEP & h->flags)) {
    925 			if (HTML_PREKEEP & h->flags)
    926 				h->flags |= HTML_KEEP;
    927 			print_endword(h);
    928 		} else
    929 			print_word(h, "&#x00A0;");
    930 	}
    931 
    932 	/*
    933 	 * Optionally switch fonts, optionally write a permalink, then
    934 	 * print the text, optionally surrounded by HTML whitespace.
    935 	 */
    936 
    937 	assert(h->metaf == NULL);
    938 	print_metaf(h);
    939 	print_indent(h);
    940 
    941 	if (n != NULL && (href = html_make_id(n, 2)) != NULL) {
    942 		t = print_otag(h, TAG_A, "chR", "permalink", href);
    943 		free(href);
    944 	} else
    945 		t = NULL;
    946 
    947 	if ( ! print_encode(h, word, NULL, 0)) {
    948 		if ( ! (h->flags & HTML_NONOSPACE))
    949 			h->flags &= ~HTML_NOSPACE;
    950 		h->flags &= ~HTML_NONEWLINE;
    951 	} else
    952 		h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
    953 
    954 	if (h->metaf != NULL) {
    955 		print_tagq(h, h->metaf);
    956 		h->metaf = NULL;
    957 	} else if (t != NULL)
    958 		print_tagq(h, t);
    959 
    960 	h->flags &= ~HTML_IGNDELIM;
    961 }
    962 
    963 void
    964 print_tagq(struct html *h, const struct tag *until)
    965 {
    966 	struct tag	*this, *next;
    967 
    968 	for (this = h->tag; this != NULL; this = next) {
    969 		next = this == until ? NULL : this->next;
    970 		print_ctag(h, this);
    971 	}
    972 }
    973 
    974 /*
    975  * Close out all open elements up to but excluding suntil.
    976  * Note that a paragraph just inside stays open together with it
    977  * because paragraphs include subsequent phrasing content.
    978  */
    979 void
    980 print_stagq(struct html *h, const struct tag *suntil)
    981 {
    982 	struct tag	*this, *next;
    983 
    984 	for (this = h->tag; this != NULL; this = next) {
    985 		next = this->next;
    986 		if (this == suntil || (next == suntil &&
    987 		    (this->tag == TAG_P || this->tag == TAG_PRE)))
    988 			break;
    989 		print_ctag(h, this);
    990 	}
    991 }
    992 
    993 
    994 /***********************************************************************
    995  * Low level output functions.
    996  * They implement line breaking using a short static buffer.
    997  ***********************************************************************/
    998 
    999 /*
   1000  * Buffer one HTML output byte.
   1001  * If the buffer is full, flush and deactivate it and start a new line.
   1002  * If the buffer is inactive, print directly.
   1003  */
   1004 static void
   1005 print_byte(struct html *h, char c)
   1006 {
   1007 	if ((h->flags & HTML_BUFFER) == 0) {
   1008 		putchar(c);
   1009 		h->col++;
   1010 		return;
   1011 	}
   1012 
   1013 	if (h->col + h->bufcol < sizeof(h->buf)) {
   1014 		h->buf[h->bufcol++] = c;
   1015 		return;
   1016 	}
   1017 
   1018 	putchar('\n');
   1019 	h->col = 0;
   1020 	print_indent(h);
   1021 	putchar(' ');
   1022 	putchar(' ');
   1023 	fwrite(h->buf, h->bufcol, 1, stdout);
   1024 	putchar(c);
   1025 	h->col = (h->indent + 1) * 2 + h->bufcol + 1;
   1026 	h->bufcol = 0;
   1027 	h->flags &= ~HTML_BUFFER;
   1028 }
   1029 
   1030 /*
   1031  * If something was printed on the current output line, end it.
   1032  * Not to be called right after print_indent().
   1033  */
   1034 void
   1035 print_endline(struct html *h)
   1036 {
   1037 	if (h->col == 0)
   1038 		return;
   1039 
   1040 	if (h->bufcol) {
   1041 		putchar(' ');
   1042 		fwrite(h->buf, h->bufcol, 1, stdout);
   1043 		h->bufcol = 0;
   1044 	}
   1045 	putchar('\n');
   1046 	h->col = 0;
   1047 	h->flags |= HTML_NOSPACE;
   1048 	h->flags &= ~HTML_BUFFER;
   1049 }
   1050 
   1051 /*
   1052  * Flush the HTML output buffer.
   1053  * If it is inactive, activate it.
   1054  */
   1055 static void
   1056 print_endword(struct html *h)
   1057 {
   1058 	if (h->noindent) {
   1059 		print_byte(h, ' ');
   1060 		return;
   1061 	}
   1062 
   1063 	if ((h->flags & HTML_BUFFER) == 0) {
   1064 		h->col++;
   1065 		h->flags |= HTML_BUFFER;
   1066 	} else if (h->bufcol) {
   1067 		putchar(' ');
   1068 		fwrite(h->buf, h->bufcol, 1, stdout);
   1069 		h->col += h->bufcol + 1;
   1070 	}
   1071 	h->bufcol = 0;
   1072 }
   1073 
   1074 /*
   1075  * If at the beginning of a new output line,
   1076  * perform indentation and mark the line as containing output.
   1077  * Make sure to really produce some output right afterwards,
   1078  * but do not use print_otag() for producing it.
   1079  */
   1080 static void
   1081 print_indent(struct html *h)
   1082 {
   1083 	size_t	 i;
   1084 
   1085 	if (h->col || h->noindent)
   1086 		return;
   1087 
   1088 	h->col = h->indent * 2;
   1089 	for (i = 0; i < h->col; i++)
   1090 		putchar(' ');
   1091 }
   1092 
   1093 /*
   1094  * Print or buffer some characters
   1095  * depending on the current HTML output buffer state.
   1096  */
   1097 static void
   1098 print_word(struct html *h, const char *cp)
   1099 {
   1100 	while (*cp != '\0')
   1101 		print_byte(h, *cp++);
   1102 }
   1103