Home | History | Annotate | Line # | Download | only in dist
man.c revision 1.1.1.13.6.1
      1 /*	Id: man.c,v 1.122 2013/12/31 23:23:10 schwarze Exp  */
      2 /*
      3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps (at) bsd.lv>
      4  *
      5  * Permission to use, copy, modify, and distribute this software for any
      6  * purpose with or without fee is hereby granted, provided that the above
      7  * copyright notice and this permission notice appear in all copies.
      8  *
      9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     16  */
     17 #ifdef HAVE_CONFIG_H
     18 #include "config.h"
     19 #endif
     20 
     21 #include <sys/types.h>
     22 
     23 #include <assert.h>
     24 #include <stdarg.h>
     25 #include <stdlib.h>
     26 #include <stdio.h>
     27 #include <string.h>
     28 
     29 #include "man.h"
     30 #include "mandoc.h"
     31 #include "libman.h"
     32 #include "libmandoc.h"
     33 
     34 const	char *const __man_macronames[MAN_MAX] = {
     35 	"br",		"TH",		"SH",		"SS",
     36 	"TP", 		"LP",		"PP",		"P",
     37 	"IP",		"HP",		"SM",		"SB",
     38 	"BI",		"IB",		"BR",		"RB",
     39 	"R",		"B",		"I",		"IR",
     40 	"RI",		"na",		"sp",		"nf",
     41 	"fi",		"RE",		"RS",		"DT",
     42 	"UC",		"PD",		"AT",		"in",
     43 	"ft",		"OP",		"EX",		"EE",
     44 	"UR",		"UE"
     45 	};
     46 
     47 const	char * const *man_macronames = __man_macronames;
     48 
     49 static	struct man_node	*man_node_alloc(struct man *, int, int,
     50 				enum man_type, enum mant);
     51 static	int		 man_node_append(struct man *,
     52 				struct man_node *);
     53 static	void		 man_node_free(struct man_node *);
     54 static	void		 man_node_unlink(struct man *,
     55 				struct man_node *);
     56 static	int		 man_ptext(struct man *, int, char *, int);
     57 static	int		 man_pmacro(struct man *, int, char *, int);
     58 static	void		 man_free1(struct man *);
     59 static	void		 man_alloc1(struct man *);
     60 static	int		 man_descope(struct man *, int, int);
     61 
     62 
     63 const struct man_node *
     64 man_node(const struct man *man)
     65 {
     66 
     67 	assert( ! (MAN_HALT & man->flags));
     68 	return(man->first);
     69 }
     70 
     71 
     72 const struct man_meta *
     73 man_meta(const struct man *man)
     74 {
     75 
     76 	assert( ! (MAN_HALT & man->flags));
     77 	return(&man->meta);
     78 }
     79 
     80 
     81 void
     82 man_reset(struct man *man)
     83 {
     84 
     85 	man_free1(man);
     86 	man_alloc1(man);
     87 }
     88 
     89 
     90 void
     91 man_free(struct man *man)
     92 {
     93 
     94 	man_free1(man);
     95 	free(man);
     96 }
     97 
     98 
     99 struct man *
    100 man_alloc(struct roff *roff, struct mparse *parse)
    101 {
    102 	struct man	*p;
    103 
    104 	p = mandoc_calloc(1, sizeof(struct man));
    105 
    106 	man_hash_init();
    107 	p->parse = parse;
    108 	p->roff = roff;
    109 
    110 	man_alloc1(p);
    111 	return(p);
    112 }
    113 
    114 
    115 int
    116 man_endparse(struct man *man)
    117 {
    118 
    119 	assert( ! (MAN_HALT & man->flags));
    120 	if (man_macroend(man))
    121 		return(1);
    122 	man->flags |= MAN_HALT;
    123 	return(0);
    124 }
    125 
    126 
    127 int
    128 man_parseln(struct man *man, int ln, char *buf, int offs)
    129 {
    130 
    131 	man->flags |= MAN_NEWLINE;
    132 
    133 	assert( ! (MAN_HALT & man->flags));
    134 
    135 	return (roff_getcontrol(man->roff, buf, &offs) ?
    136 			man_pmacro(man, ln, buf, offs) :
    137 			man_ptext(man, ln, buf, offs));
    138 }
    139 
    140 
    141 static void
    142 man_free1(struct man *man)
    143 {
    144 
    145 	if (man->first)
    146 		man_node_delete(man, man->first);
    147 	if (man->meta.title)
    148 		free(man->meta.title);
    149 	if (man->meta.source)
    150 		free(man->meta.source);
    151 	if (man->meta.date)
    152 		free(man->meta.date);
    153 	if (man->meta.vol)
    154 		free(man->meta.vol);
    155 	if (man->meta.msec)
    156 		free(man->meta.msec);
    157 }
    158 
    159 
    160 static void
    161 man_alloc1(struct man *man)
    162 {
    163 
    164 	memset(&man->meta, 0, sizeof(struct man_meta));
    165 	man->flags = 0;
    166 	man->last = mandoc_calloc(1, sizeof(struct man_node));
    167 	man->first = man->last;
    168 	man->last->type = MAN_ROOT;
    169 	man->last->tok = MAN_MAX;
    170 	man->next = MAN_NEXT_CHILD;
    171 }
    172 
    173 
    174 static int
    175 man_node_append(struct man *man, struct man_node *p)
    176 {
    177 
    178 	assert(man->last);
    179 	assert(man->first);
    180 	assert(MAN_ROOT != p->type);
    181 
    182 	switch (man->next) {
    183 	case (MAN_NEXT_SIBLING):
    184 		man->last->next = p;
    185 		p->prev = man->last;
    186 		p->parent = man->last->parent;
    187 		break;
    188 	case (MAN_NEXT_CHILD):
    189 		man->last->child = p;
    190 		p->parent = man->last;
    191 		break;
    192 	default:
    193 		abort();
    194 		/* NOTREACHED */
    195 	}
    196 
    197 	assert(p->parent);
    198 	p->parent->nchild++;
    199 
    200 	if ( ! man_valid_pre(man, p))
    201 		return(0);
    202 
    203 	switch (p->type) {
    204 	case (MAN_HEAD):
    205 		assert(MAN_BLOCK == p->parent->type);
    206 		p->parent->head = p;
    207 		break;
    208 	case (MAN_TAIL):
    209 		assert(MAN_BLOCK == p->parent->type);
    210 		p->parent->tail = p;
    211 		break;
    212 	case (MAN_BODY):
    213 		assert(MAN_BLOCK == p->parent->type);
    214 		p->parent->body = p;
    215 		break;
    216 	default:
    217 		break;
    218 	}
    219 
    220 	man->last = p;
    221 
    222 	switch (p->type) {
    223 	case (MAN_TBL):
    224 		/* FALLTHROUGH */
    225 	case (MAN_TEXT):
    226 		if ( ! man_valid_post(man))
    227 			return(0);
    228 		break;
    229 	default:
    230 		break;
    231 	}
    232 
    233 	return(1);
    234 }
    235 
    236 
    237 static struct man_node *
    238 man_node_alloc(struct man *man, int line, int pos,
    239 		enum man_type type, enum mant tok)
    240 {
    241 	struct man_node *p;
    242 
    243 	p = mandoc_calloc(1, sizeof(struct man_node));
    244 	p->line = line;
    245 	p->pos = pos;
    246 	p->type = type;
    247 	p->tok = tok;
    248 
    249 	if (MAN_NEWLINE & man->flags)
    250 		p->flags |= MAN_LINE;
    251 	man->flags &= ~MAN_NEWLINE;
    252 	return(p);
    253 }
    254 
    255 
    256 int
    257 man_elem_alloc(struct man *man, int line, int pos, enum mant tok)
    258 {
    259 	struct man_node *p;
    260 
    261 	p = man_node_alloc(man, line, pos, MAN_ELEM, tok);
    262 	if ( ! man_node_append(man, p))
    263 		return(0);
    264 	man->next = MAN_NEXT_CHILD;
    265 	return(1);
    266 }
    267 
    268 
    269 int
    270 man_tail_alloc(struct man *man, int line, int pos, enum mant tok)
    271 {
    272 	struct man_node *p;
    273 
    274 	p = man_node_alloc(man, line, pos, MAN_TAIL, tok);
    275 	if ( ! man_node_append(man, p))
    276 		return(0);
    277 	man->next = MAN_NEXT_CHILD;
    278 	return(1);
    279 }
    280 
    281 
    282 int
    283 man_head_alloc(struct man *man, int line, int pos, enum mant tok)
    284 {
    285 	struct man_node *p;
    286 
    287 	p = man_node_alloc(man, line, pos, MAN_HEAD, tok);
    288 	if ( ! man_node_append(man, p))
    289 		return(0);
    290 	man->next = MAN_NEXT_CHILD;
    291 	return(1);
    292 }
    293 
    294 
    295 int
    296 man_body_alloc(struct man *man, int line, int pos, enum mant tok)
    297 {
    298 	struct man_node *p;
    299 
    300 	p = man_node_alloc(man, line, pos, MAN_BODY, tok);
    301 	if ( ! man_node_append(man, p))
    302 		return(0);
    303 	man->next = MAN_NEXT_CHILD;
    304 	return(1);
    305 }
    306 
    307 
    308 int
    309 man_block_alloc(struct man *man, int line, int pos, enum mant tok)
    310 {
    311 	struct man_node *p;
    312 
    313 	p = man_node_alloc(man, line, pos, MAN_BLOCK, tok);
    314 	if ( ! man_node_append(man, p))
    315 		return(0);
    316 	man->next = MAN_NEXT_CHILD;
    317 	return(1);
    318 }
    319 
    320 int
    321 man_word_alloc(struct man *man, int line, int pos, const char *word)
    322 {
    323 	struct man_node	*n;
    324 
    325 	n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX);
    326 	n->string = roff_strdup(man->roff, word);
    327 
    328 	if ( ! man_node_append(man, n))
    329 		return(0);
    330 
    331 	man->next = MAN_NEXT_SIBLING;
    332 	return(1);
    333 }
    334 
    335 
    336 /*
    337  * Free all of the resources held by a node.  This does NOT unlink a
    338  * node from its context; for that, see man_node_unlink().
    339  */
    340 static void
    341 man_node_free(struct man_node *p)
    342 {
    343 
    344 	if (p->string)
    345 		free(p->string);
    346 	free(p);
    347 }
    348 
    349 
    350 void
    351 man_node_delete(struct man *man, struct man_node *p)
    352 {
    353 
    354 	while (p->child)
    355 		man_node_delete(man, p->child);
    356 
    357 	man_node_unlink(man, p);
    358 	man_node_free(p);
    359 }
    360 
    361 int
    362 man_addeqn(struct man *man, const struct eqn *ep)
    363 {
    364 	struct man_node	*n;
    365 
    366 	assert( ! (MAN_HALT & man->flags));
    367 
    368 	n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
    369 	n->eqn = ep;
    370 
    371 	if ( ! man_node_append(man, n))
    372 		return(0);
    373 
    374 	man->next = MAN_NEXT_SIBLING;
    375 	return(man_descope(man, ep->ln, ep->pos));
    376 }
    377 
    378 int
    379 man_addspan(struct man *man, const struct tbl_span *sp)
    380 {
    381 	struct man_node	*n;
    382 
    383 	assert( ! (MAN_HALT & man->flags));
    384 
    385 	n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX);
    386 	n->span = sp;
    387 
    388 	if ( ! man_node_append(man, n))
    389 		return(0);
    390 
    391 	man->next = MAN_NEXT_SIBLING;
    392 	return(man_descope(man, sp->line, 0));
    393 }
    394 
    395 static int
    396 man_descope(struct man *man, int line, int offs)
    397 {
    398 	/*
    399 	 * Co-ordinate what happens with having a next-line scope open:
    400 	 * first close out the element scope (if applicable), then close
    401 	 * out the block scope (also if applicable).
    402 	 */
    403 
    404 	if (MAN_ELINE & man->flags) {
    405 		man->flags &= ~MAN_ELINE;
    406 		if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
    407 			return(0);
    408 	}
    409 
    410 	if ( ! (MAN_BLINE & man->flags))
    411 		return(1);
    412 	man->flags &= ~MAN_BLINE;
    413 
    414 	if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
    415 		return(0);
    416 	return(man_body_alloc(man, line, offs, man->last->tok));
    417 }
    418 
    419 static int
    420 man_ptext(struct man *man, int line, char *buf, int offs)
    421 {
    422 	int		 i;
    423 
    424 	/* Literal free-form text whitespace is preserved. */
    425 
    426 	if (MAN_LITERAL & man->flags) {
    427 		if ( ! man_word_alloc(man, line, offs, buf + offs))
    428 			return(0);
    429 		return(man_descope(man, line, offs));
    430 	}
    431 
    432 	for (i = offs; ' ' == buf[i]; i++)
    433 		/* Skip leading whitespace. */ ;
    434 
    435 	/*
    436 	 * Blank lines are ignored right after headings
    437 	 * but add a single vertical space elsewhere.
    438 	 */
    439 
    440 	if ('\0' == buf[i]) {
    441 		/* Allocate a blank entry. */
    442 		if (MAN_SH != man->last->tok &&
    443 		    MAN_SS != man->last->tok) {
    444 			if ( ! man_elem_alloc(man, line, offs, MAN_sp))
    445 				return(0);
    446 			man->next = MAN_NEXT_SIBLING;
    447 		}
    448 		return(1);
    449 	}
    450 
    451 	/*
    452 	 * Warn if the last un-escaped character is whitespace. Then
    453 	 * strip away the remaining spaces (tabs stay!).
    454 	 */
    455 
    456 	i = (int)strlen(buf);
    457 	assert(i);
    458 
    459 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
    460 		if (i > 1 && '\\' != buf[i - 2])
    461 			man_pmsg(man, line, i - 1, MANDOCERR_EOLNSPACE);
    462 
    463 		for (--i; i && ' ' == buf[i]; i--)
    464 			/* Spin back to non-space. */ ;
    465 
    466 		/* Jump ahead of escaped whitespace. */
    467 		i += '\\' == buf[i] ? 2 : 1;
    468 
    469 		buf[i] = '\0';
    470 	}
    471 
    472 	if ( ! man_word_alloc(man, line, offs, buf + offs))
    473 		return(0);
    474 
    475 	/*
    476 	 * End-of-sentence check.  If the last character is an unescaped
    477 	 * EOS character, then flag the node as being the end of a
    478 	 * sentence.  The front-end will know how to interpret this.
    479 	 */
    480 
    481 	assert(i);
    482 	if (mandoc_eos(buf, (size_t)i))
    483 		man->last->flags |= MAN_EOS;
    484 
    485 	return(man_descope(man, line, offs));
    486 }
    487 
    488 static int
    489 man_pmacro(struct man *man, int ln, char *buf, int offs)
    490 {
    491 	int		 i, ppos;
    492 	enum mant	 tok;
    493 	char		 mac[5];
    494 	struct man_node	*n;
    495 
    496 	if ('"' == buf[offs]) {
    497 		man_pmsg(man, ln, offs, MANDOCERR_BADCOMMENT);
    498 		return(1);
    499 	} else if ('\0' == buf[offs])
    500 		return(1);
    501 
    502 	ppos = offs;
    503 
    504 	/*
    505 	 * Copy the first word into a nil-terminated buffer.
    506 	 * Stop copying when a tab, space, or eoln is encountered.
    507 	 */
    508 
    509 	i = 0;
    510 	while (i < 4 && '\0' != buf[offs] &&
    511 			' ' != buf[offs] && '\t' != buf[offs])
    512 		mac[i++] = buf[offs++];
    513 
    514 	mac[i] = '\0';
    515 
    516 	tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
    517 
    518 	if (MAN_MAX == tok) {
    519 		mandoc_vmsg(MANDOCERR_MACRO, man->parse, ln,
    520 				ppos, "%s", buf + ppos - 1);
    521 		return(1);
    522 	}
    523 
    524 	/* The macro is sane.  Jump to the next word. */
    525 
    526 	while (buf[offs] && ' ' == buf[offs])
    527 		offs++;
    528 
    529 	/*
    530 	 * Trailing whitespace.  Note that tabs are allowed to be passed
    531 	 * into the parser as "text", so we only warn about spaces here.
    532 	 */
    533 
    534 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
    535 		man_pmsg(man, ln, offs - 1, MANDOCERR_EOLNSPACE);
    536 
    537 	/*
    538 	 * Remove prior ELINE macro, as it's being clobbered by a new
    539 	 * macro.  Note that NSCOPED macros do not close out ELINE
    540 	 * macros---they don't print text---so we let those slip by.
    541 	 */
    542 
    543 	if ( ! (MAN_NSCOPED & man_macros[tok].flags) &&
    544 			man->flags & MAN_ELINE) {
    545 		n = man->last;
    546 		assert(MAN_TEXT != n->type);
    547 
    548 		/* Remove repeated NSCOPED macros causing ELINE. */
    549 
    550 		if (MAN_NSCOPED & man_macros[n->tok].flags)
    551 			n = n->parent;
    552 
    553 		mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line,
    554 		    n->pos, "%s breaks %s", man_macronames[tok],
    555 		    man_macronames[n->tok]);
    556 
    557 		man_node_delete(man, n);
    558 		man->flags &= ~MAN_ELINE;
    559 	}
    560 
    561 	/*
    562 	 * Remove prior BLINE macro that is being clobbered.
    563 	 */
    564 	if ((man->flags & MAN_BLINE) &&
    565 	    (MAN_BSCOPE & man_macros[tok].flags)) {
    566 		n = man->last;
    567 
    568 		/* Might be a text node like 8 in
    569 		 * .TP 8
    570 		 * .SH foo
    571 		 */
    572 		if (MAN_TEXT == n->type)
    573 			n = n->parent;
    574 
    575 		/* Remove element that didn't end BLINE, if any. */
    576 		if ( ! (MAN_BSCOPE & man_macros[n->tok].flags))
    577 			n = n->parent;
    578 
    579 		assert(MAN_HEAD == n->type);
    580 		n = n->parent;
    581 		assert(MAN_BLOCK == n->type);
    582 		assert(MAN_SCOPED & man_macros[n->tok].flags);
    583 
    584 		mandoc_vmsg(MANDOCERR_LINESCOPE, man->parse, n->line,
    585 		    n->pos, "%s breaks %s", man_macronames[tok],
    586 		    man_macronames[n->tok]);
    587 
    588 		man_node_delete(man, n);
    589 		man->flags &= ~MAN_BLINE;
    590 	}
    591 
    592 	/*
    593 	 * Save the fact that we're in the next-line for a block.  In
    594 	 * this way, embedded roff instructions can "remember" state
    595 	 * when they exit.
    596 	 */
    597 
    598 	if (MAN_BLINE & man->flags)
    599 		man->flags |= MAN_BPLINE;
    600 
    601 	/* Call to handler... */
    602 
    603 	assert(man_macros[tok].fp);
    604 	if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf))
    605 		goto err;
    606 
    607 	/*
    608 	 * We weren't in a block-line scope when entering the
    609 	 * above-parsed macro, so return.
    610 	 */
    611 
    612 	if ( ! (MAN_BPLINE & man->flags)) {
    613 		man->flags &= ~MAN_ILINE;
    614 		return(1);
    615 	}
    616 	man->flags &= ~MAN_BPLINE;
    617 
    618 	/*
    619 	 * If we're in a block scope, then allow this macro to slip by
    620 	 * without closing scope around it.
    621 	 */
    622 
    623 	if (MAN_ILINE & man->flags) {
    624 		man->flags &= ~MAN_ILINE;
    625 		return(1);
    626 	}
    627 
    628 	/*
    629 	 * If we've opened a new next-line element scope, then return
    630 	 * now, as the next line will close out the block scope.
    631 	 */
    632 
    633 	if (MAN_ELINE & man->flags)
    634 		return(1);
    635 
    636 	/* Close out the block scope opened in the prior line.  */
    637 
    638 	assert(MAN_BLINE & man->flags);
    639 	man->flags &= ~MAN_BLINE;
    640 
    641 	if ( ! man_unscope(man, man->last->parent, MANDOCERR_MAX))
    642 		return(0);
    643 	return(man_body_alloc(man, ln, ppos, man->last->tok));
    644 
    645 err:	/* Error out. */
    646 
    647 	man->flags |= MAN_HALT;
    648 	return(0);
    649 }
    650 
    651 /*
    652  * Unlink a node from its context.  If "man" is provided, the last parse
    653  * point will also be adjusted accordingly.
    654  */
    655 static void
    656 man_node_unlink(struct man *man, struct man_node *n)
    657 {
    658 
    659 	/* Adjust siblings. */
    660 
    661 	if (n->prev)
    662 		n->prev->next = n->next;
    663 	if (n->next)
    664 		n->next->prev = n->prev;
    665 
    666 	/* Adjust parent. */
    667 
    668 	if (n->parent) {
    669 		n->parent->nchild--;
    670 		if (n->parent->child == n)
    671 			n->parent->child = n->prev ? n->prev : n->next;
    672 	}
    673 
    674 	/* Adjust parse point, if applicable. */
    675 
    676 	if (man && man->last == n) {
    677 		/*XXX: this can occur when bailing from validation. */
    678 		/*assert(NULL == n->next);*/
    679 		if (n->prev) {
    680 			man->last = n->prev;
    681 			man->next = MAN_NEXT_SIBLING;
    682 		} else {
    683 			man->last = n->parent;
    684 			man->next = MAN_NEXT_CHILD;
    685 		}
    686 	}
    687 
    688 	if (man && man->first == n)
    689 		man->first = NULL;
    690 }
    691 
    692 const struct mparse *
    693 man_mparse(const struct man *man)
    694 {
    695 
    696 	assert(man && man->parse);
    697 	return(man->parse);
    698 }
    699