roff.c revision 1.2 1 /* $Vendor-Id: roff.c,v 1.131 2011/04/05 22:22:33 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps (at) bsd.lv>
4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze (at) openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <assert.h>
23 #include <errno.h>
24 #include <ctype.h>
25 #include <limits.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <stdio.h>
29
30 #include "mandoc.h"
31 #include "libroff.h"
32 #include "libmandoc.h"
33
34 #define RSTACK_MAX 128
35
36 enum rofft {
37 ROFF_ad,
38 ROFF_am,
39 ROFF_ami,
40 ROFF_am1,
41 ROFF_de,
42 ROFF_dei,
43 ROFF_de1,
44 ROFF_ds,
45 ROFF_el,
46 ROFF_hy,
47 ROFF_ie,
48 ROFF_if,
49 ROFF_ig,
50 ROFF_it,
51 ROFF_ne,
52 ROFF_nh,
53 ROFF_nr,
54 ROFF_ns,
55 ROFF_ps,
56 ROFF_rm,
57 ROFF_so,
58 ROFF_ta,
59 ROFF_tr,
60 ROFF_TS,
61 ROFF_TE,
62 ROFF_T_,
63 ROFF_EQ,
64 ROFF_EN,
65 ROFF_cblock,
66 ROFF_ccond, /* FIXME: remove this. */
67 ROFF_USERDEF,
68 ROFF_MAX
69 };
70
71 enum roffrule {
72 ROFFRULE_ALLOW,
73 ROFFRULE_DENY
74 };
75
76 struct roffstr {
77 char *name; /* key of symbol */
78 char *string; /* current value */
79 struct roffstr *next; /* next in list */
80 };
81
82 struct roff {
83 struct mparse *parse; /* parse point */
84 struct roffnode *last; /* leaf of stack */
85 enum roffrule rstack[RSTACK_MAX]; /* stack of !`ie' rules */
86 int rstackpos; /* position in rstack */
87 struct regset *regs; /* read/writable registers */
88 struct roffstr *first_string; /* user-defined strings & macros */
89 const char *current_string; /* value of last called user macro */
90 struct tbl_node *first_tbl; /* first table parsed */
91 struct tbl_node *last_tbl; /* last table parsed */
92 struct tbl_node *tbl; /* current table being parsed */
93 struct eqn_node *last_eqn; /* last equation parsed */
94 struct eqn_node *first_eqn; /* first equation parsed */
95 struct eqn_node *eqn; /* current equation being parsed */
96 };
97
98 struct roffnode {
99 enum rofft tok; /* type of node */
100 struct roffnode *parent; /* up one in stack */
101 int line; /* parse line */
102 int col; /* parse col */
103 char *name; /* node name, e.g. macro name */
104 char *end; /* end-rules: custom token */
105 int endspan; /* end-rules: next-line or infty */
106 enum roffrule rule; /* current evaluation rule */
107 };
108
109 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
110 enum rofft tok, /* tok of macro */ \
111 char **bufp, /* input buffer */ \
112 size_t *szp, /* size of input buffer */ \
113 int ln, /* parse line */ \
114 int ppos, /* original pos in buffer */ \
115 int pos, /* current pos in buffer */ \
116 int *offs /* reset offset of buffer data */
117
118 typedef enum rofferr (*roffproc)(ROFF_ARGS);
119
120 struct roffmac {
121 const char *name; /* macro name */
122 roffproc proc; /* process new macro */
123 roffproc text; /* process as child text of macro */
124 roffproc sub; /* process as child of macro */
125 int flags;
126 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
127 struct roffmac *next;
128 };
129
130 static enum rofferr roff_block(ROFF_ARGS);
131 static enum rofferr roff_block_text(ROFF_ARGS);
132 static enum rofferr roff_block_sub(ROFF_ARGS);
133 static enum rofferr roff_cblock(ROFF_ARGS);
134 static enum rofferr roff_ccond(ROFF_ARGS);
135 static enum rofferr roff_cond(ROFF_ARGS);
136 static enum rofferr roff_cond_text(ROFF_ARGS);
137 static enum rofferr roff_cond_sub(ROFF_ARGS);
138 static enum rofferr roff_ds(ROFF_ARGS);
139 static enum roffrule roff_evalcond(const char *, int *);
140 static void roff_freestr(struct roff *);
141 static char *roff_getname(struct roff *, char **, int, int);
142 static const char *roff_getstrn(const struct roff *,
143 const char *, size_t);
144 static enum rofferr roff_line_ignore(ROFF_ARGS);
145 static enum rofferr roff_nr(ROFF_ARGS);
146 static int roff_res(struct roff *,
147 char **, size_t *, int);
148 static enum rofferr roff_rm(ROFF_ARGS);
149 static void roff_setstr(struct roff *,
150 const char *, const char *, int);
151 static enum rofferr roff_so(ROFF_ARGS);
152 static enum rofferr roff_TE(ROFF_ARGS);
153 static enum rofferr roff_TS(ROFF_ARGS);
154 static enum rofferr roff_EQ(ROFF_ARGS);
155 static enum rofferr roff_EN(ROFF_ARGS);
156 static enum rofferr roff_T_(ROFF_ARGS);
157 static enum rofferr roff_userdef(ROFF_ARGS);
158
159 /* See roff_hash_find() */
160
161 #define ASCII_HI 126
162 #define ASCII_LO 33
163 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1)
164
165 static struct roffmac *hash[HASHWIDTH];
166
167 static struct roffmac roffs[ROFF_MAX] = {
168 { "ad", roff_line_ignore, NULL, NULL, 0, NULL },
169 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
170 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
171 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
172 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
173 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
174 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
175 { "ds", roff_ds, NULL, NULL, 0, NULL },
176 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
177 { "hy", roff_line_ignore, NULL, NULL, 0, NULL },
178 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
179 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
180 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
181 { "it", roff_line_ignore, NULL, NULL, 0, NULL },
182 { "ne", roff_line_ignore, NULL, NULL, 0, NULL },
183 { "nh", roff_line_ignore, NULL, NULL, 0, NULL },
184 { "nr", roff_nr, NULL, NULL, 0, NULL },
185 { "ns", roff_line_ignore, NULL, NULL, 0, NULL },
186 { "ps", roff_line_ignore, NULL, NULL, 0, NULL },
187 { "rm", roff_rm, NULL, NULL, 0, NULL },
188 { "so", roff_so, NULL, NULL, 0, NULL },
189 { "ta", roff_line_ignore, NULL, NULL, 0, NULL },
190 { "tr", roff_line_ignore, NULL, NULL, 0, NULL },
191 { "TS", roff_TS, NULL, NULL, 0, NULL },
192 { "TE", roff_TE, NULL, NULL, 0, NULL },
193 { "T&", roff_T_, NULL, NULL, 0, NULL },
194 { "EQ", roff_EQ, NULL, NULL, 0, NULL },
195 { "EN", roff_EN, NULL, NULL, 0, NULL },
196 { ".", roff_cblock, NULL, NULL, 0, NULL },
197 { "\\}", roff_ccond, NULL, NULL, 0, NULL },
198 { NULL, roff_userdef, NULL, NULL, 0, NULL },
199 };
200
201 static void roff_free1(struct roff *);
202 static enum rofft roff_hash_find(const char *, size_t);
203 static void roff_hash_init(void);
204 static void roffnode_cleanscope(struct roff *);
205 static void roffnode_push(struct roff *, enum rofft,
206 const char *, int, int);
207 static void roffnode_pop(struct roff *);
208 static enum rofft roff_parse(struct roff *, const char *, int *);
209 static int roff_parse_nat(const char *, unsigned int *);
210
211 /* See roff_hash_find() */
212 #define ROFF_HASH(p) (p[0] - ASCII_LO)
213
214 static void
215 roff_hash_init(void)
216 {
217 struct roffmac *n;
218 int buc, i;
219
220 for (i = 0; i < (int)ROFF_USERDEF; i++) {
221 assert(roffs[i].name[0] >= ASCII_LO);
222 assert(roffs[i].name[0] <= ASCII_HI);
223
224 buc = ROFF_HASH(roffs[i].name);
225
226 if (NULL != (n = hash[buc])) {
227 for ( ; n->next; n = n->next)
228 /* Do nothing. */ ;
229 n->next = &roffs[i];
230 } else
231 hash[buc] = &roffs[i];
232 }
233 }
234
235
236 /*
237 * Look up a roff token by its name. Returns ROFF_MAX if no macro by
238 * the nil-terminated string name could be found.
239 */
240 static enum rofft
241 roff_hash_find(const char *p, size_t s)
242 {
243 int buc;
244 struct roffmac *n;
245
246 /*
247 * libroff has an extremely simple hashtable, for the time
248 * being, which simply keys on the first character, which must
249 * be printable, then walks a chain. It works well enough until
250 * optimised.
251 */
252
253 if (p[0] < ASCII_LO || p[0] > ASCII_HI)
254 return(ROFF_MAX);
255
256 buc = ROFF_HASH(p);
257
258 if (NULL == (n = hash[buc]))
259 return(ROFF_MAX);
260 for ( ; n; n = n->next)
261 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
262 return((enum rofft)(n - roffs));
263
264 return(ROFF_MAX);
265 }
266
267
268 /*
269 * Pop the current node off of the stack of roff instructions currently
270 * pending.
271 */
272 static void
273 roffnode_pop(struct roff *r)
274 {
275 struct roffnode *p;
276
277 assert(r->last);
278 p = r->last;
279
280 r->last = r->last->parent;
281 free(p->name);
282 free(p->end);
283 free(p);
284 }
285
286
287 /*
288 * Push a roff node onto the instruction stack. This must later be
289 * removed with roffnode_pop().
290 */
291 static void
292 roffnode_push(struct roff *r, enum rofft tok, const char *name,
293 int line, int col)
294 {
295 struct roffnode *p;
296
297 p = mandoc_calloc(1, sizeof(struct roffnode));
298 p->tok = tok;
299 if (name)
300 p->name = mandoc_strdup(name);
301 p->parent = r->last;
302 p->line = line;
303 p->col = col;
304 p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
305
306 r->last = p;
307 }
308
309
310 static void
311 roff_free1(struct roff *r)
312 {
313 struct tbl_node *t;
314 struct eqn_node *e;
315
316 while (NULL != (t = r->first_tbl)) {
317 r->first_tbl = t->next;
318 tbl_free(t);
319 }
320
321 r->first_tbl = r->last_tbl = r->tbl = NULL;
322
323 while (NULL != (e = r->first_eqn)) {
324 r->first_eqn = e->next;
325 eqn_free(e);
326 }
327
328 r->first_eqn = r->last_eqn = r->eqn = NULL;
329
330 while (r->last)
331 roffnode_pop(r);
332
333 roff_freestr(r);
334 }
335
336
337 void
338 roff_reset(struct roff *r)
339 {
340
341 roff_free1(r);
342 }
343
344
345 void
346 roff_free(struct roff *r)
347 {
348
349 roff_free1(r);
350 free(r);
351 }
352
353
354 struct roff *
355 roff_alloc(struct regset *regs, struct mparse *parse)
356 {
357 struct roff *r;
358
359 r = mandoc_calloc(1, sizeof(struct roff));
360 r->regs = regs;
361 r->parse = parse;
362 r->rstackpos = -1;
363
364 roff_hash_init();
365 return(r);
366 }
367
368
369 /*
370 * Pre-filter each and every line for reserved words (one beginning with
371 * `\*', e.g., `\*(ab'). These must be handled before the actual line
372 * is processed.
373 */
374 static int
375 roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
376 {
377 const char *stesc; /* start of an escape sequence ('\\') */
378 const char *stnam; /* start of the name, after "[(*" */
379 const char *cp; /* end of the name, e.g. before ']' */
380 const char *res; /* the string to be substituted */
381 int i, maxl;
382 size_t nsz;
383 char *n;
384
385 /* Search for a leading backslash and save a pointer to it. */
386
387 cp = *bufp + pos;
388 while (NULL != (cp = strchr(cp, '\\'))) {
389 stesc = cp++;
390
391 /*
392 * The second character must be an asterisk.
393 * If it isn't, skip it anyway: It is escaped,
394 * so it can't start another escape sequence.
395 */
396
397 if ('\0' == *cp)
398 return(1);
399 if ('*' != *cp++)
400 continue;
401
402 /*
403 * The third character decides the length
404 * of the name of the string.
405 * Save a pointer to the name.
406 */
407
408 switch (*cp) {
409 case ('\0'):
410 return(1);
411 case ('('):
412 cp++;
413 maxl = 2;
414 break;
415 case ('['):
416 cp++;
417 maxl = 0;
418 break;
419 default:
420 maxl = 1;
421 break;
422 }
423 stnam = cp;
424
425 /* Advance to the end of the name. */
426
427 for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
428 if ('\0' == *cp)
429 return(1); /* Error. */
430 if (0 == maxl && ']' == *cp)
431 break;
432 }
433
434 /*
435 * Retrieve the replacement string; if it is
436 * undefined, resume searching for escapes.
437 */
438
439 res = roff_getstrn(r, stnam, (size_t)i);
440
441 if (NULL == res) {
442 cp -= maxl ? 1 : 0;
443 continue;
444 }
445
446 /* Replace the escape sequence by the string. */
447
448 nsz = *szp + strlen(res) + 1;
449 n = mandoc_malloc(nsz);
450
451 strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
452 strlcat(n, res, nsz);
453 strlcat(n, cp + (maxl ? 0 : 1), nsz);
454
455 free(*bufp);
456
457 *bufp = n;
458 *szp = nsz;
459 return(0);
460 }
461
462 return(1);
463 }
464
465
466 enum rofferr
467 roff_parseln(struct roff *r, int ln, char **bufp,
468 size_t *szp, int pos, int *offs)
469 {
470 enum rofft t;
471 enum rofferr e;
472 int ppos, ctl;
473
474 /*
475 * Run the reserved-word filter only if we have some reserved
476 * words to fill in.
477 */
478
479 if (r->first_string && ! roff_res(r, bufp, szp, pos))
480 return(ROFF_REPARSE);
481
482 ppos = pos;
483 ctl = mandoc_getcontrol(*bufp, &pos);
484
485 /*
486 * First, if a scope is open and we're not a macro, pass the
487 * text through the macro's filter. If a scope isn't open and
488 * we're not a macro, just let it through.
489 * Finally, if there's an equation scope open, divert it into it
490 * no matter our state.
491 */
492
493 if (r->last && ! ctl) {
494 t = r->last->tok;
495 assert(roffs[t].text);
496 e = (*roffs[t].text)
497 (r, t, bufp, szp, ln, pos, pos, offs);
498 assert(ROFF_IGN == e || ROFF_CONT == e);
499 if (ROFF_CONT != e)
500 return(e);
501 if (r->eqn)
502 return(eqn_read(&r->eqn, ln, *bufp, pos));
503 if (r->tbl)
504 return(tbl_read(r->tbl, ln, *bufp, pos));
505 return(ROFF_CONT);
506 } else if ( ! ctl) {
507 if (r->eqn)
508 return(eqn_read(&r->eqn, ln, *bufp, pos));
509 if (r->tbl)
510 return(tbl_read(r->tbl, ln, *bufp, pos));
511 return(ROFF_CONT);
512 } else if (r->eqn)
513 return(eqn_read(&r->eqn, ln, *bufp, ppos));
514
515 /*
516 * If a scope is open, go to the child handler for that macro,
517 * as it may want to preprocess before doing anything with it.
518 * Don't do so if an equation is open.
519 */
520
521 if (r->last) {
522 t = r->last->tok;
523 assert(roffs[t].sub);
524 return((*roffs[t].sub)
525 (r, t, bufp, szp,
526 ln, ppos, pos, offs));
527 }
528
529 /*
530 * Lastly, as we've no scope open, try to look up and execute
531 * the new macro. If no macro is found, simply return and let
532 * the compilers handle it.
533 */
534
535 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
536 return(ROFF_CONT);
537
538 assert(roffs[t].proc);
539 return((*roffs[t].proc)
540 (r, t, bufp, szp,
541 ln, ppos, pos, offs));
542 }
543
544
545 void
546 roff_endparse(struct roff *r)
547 {
548
549 if (r->last)
550 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
551 r->last->line, r->last->col, NULL);
552
553 if (r->eqn) {
554 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
555 r->eqn->eqn.line, r->eqn->eqn.pos, NULL);
556 eqn_end(r->eqn);
557 r->eqn = NULL;
558 }
559
560 if (r->tbl) {
561 mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
562 r->tbl->line, r->tbl->pos, NULL);
563 tbl_end(r->tbl);
564 r->tbl = NULL;
565 }
566 }
567
568 /*
569 * Parse a roff node's type from the input buffer. This must be in the
570 * form of ".foo xxx" in the usual way.
571 */
572 static enum rofft
573 roff_parse(struct roff *r, const char *buf, int *pos)
574 {
575 const char *mac;
576 size_t maclen;
577 enum rofft t;
578
579 if ('\0' == buf[*pos] || '"' == buf[*pos])
580 return(ROFF_MAX);
581
582 mac = buf + *pos;
583 maclen = strcspn(mac, " \\\t\0");
584
585 t = (r->current_string = roff_getstrn(r, mac, maclen))
586 ? ROFF_USERDEF : roff_hash_find(mac, maclen);
587
588 *pos += (int)maclen;
589
590 while (buf[*pos] && ' ' == buf[*pos])
591 (*pos)++;
592
593 return(t);
594 }
595
596
597 static int
598 roff_parse_nat(const char *buf, unsigned int *res)
599 {
600 char *ep;
601 long lval;
602
603 errno = 0;
604 lval = strtol(buf, &ep, 10);
605 if (buf[0] == '\0' || *ep != '\0')
606 return(0);
607 if ((errno == ERANGE &&
608 (lval == LONG_MAX || lval == LONG_MIN)) ||
609 (lval > INT_MAX || lval < 0))
610 return(0);
611
612 *res = (unsigned int)lval;
613 return(1);
614 }
615
616
617 /* ARGSUSED */
618 static enum rofferr
619 roff_cblock(ROFF_ARGS)
620 {
621
622 /*
623 * A block-close `..' should only be invoked as a child of an
624 * ignore macro, otherwise raise a warning and just ignore it.
625 */
626
627 if (NULL == r->last) {
628 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
629 return(ROFF_IGN);
630 }
631
632 switch (r->last->tok) {
633 case (ROFF_am):
634 /* FALLTHROUGH */
635 case (ROFF_ami):
636 /* FALLTHROUGH */
637 case (ROFF_am1):
638 /* FALLTHROUGH */
639 case (ROFF_de):
640 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */
641 /* FALLTHROUGH */
642 case (ROFF_dei):
643 /* FALLTHROUGH */
644 case (ROFF_ig):
645 break;
646 default:
647 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
648 return(ROFF_IGN);
649 }
650
651 if ((*bufp)[pos])
652 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
653
654 roffnode_pop(r);
655 roffnode_cleanscope(r);
656 return(ROFF_IGN);
657
658 }
659
660
661 static void
662 roffnode_cleanscope(struct roff *r)
663 {
664
665 while (r->last) {
666 if (--r->last->endspan < 0)
667 break;
668 roffnode_pop(r);
669 }
670 }
671
672
673 /* ARGSUSED */
674 static enum rofferr
675 roff_ccond(ROFF_ARGS)
676 {
677
678 if (NULL == r->last) {
679 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
680 return(ROFF_IGN);
681 }
682
683 switch (r->last->tok) {
684 case (ROFF_el):
685 /* FALLTHROUGH */
686 case (ROFF_ie):
687 /* FALLTHROUGH */
688 case (ROFF_if):
689 break;
690 default:
691 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
692 return(ROFF_IGN);
693 }
694
695 if (r->last->endspan > -1) {
696 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
697 return(ROFF_IGN);
698 }
699
700 if ((*bufp)[pos])
701 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
702
703 roffnode_pop(r);
704 roffnode_cleanscope(r);
705 return(ROFF_IGN);
706 }
707
708
709 /* ARGSUSED */
710 static enum rofferr
711 roff_block(ROFF_ARGS)
712 {
713 int sv;
714 size_t sz;
715 char *name;
716
717 name = NULL;
718
719 if (ROFF_ig != tok) {
720 if ('\0' == (*bufp)[pos]) {
721 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
722 return(ROFF_IGN);
723 }
724
725 /*
726 * Re-write `de1', since we don't really care about
727 * groff's strange compatibility mode, into `de'.
728 */
729
730 if (ROFF_de1 == tok)
731 tok = ROFF_de;
732 if (ROFF_de == tok)
733 name = *bufp + pos;
734 else
735 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
736 roffs[tok].name);
737
738 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
739 pos++;
740
741 while (isspace((unsigned char)(*bufp)[pos]))
742 (*bufp)[pos++] = '\0';
743 }
744
745 roffnode_push(r, tok, name, ln, ppos);
746
747 /*
748 * At the beginning of a `de' macro, clear the existing string
749 * with the same name, if there is one. New content will be
750 * added from roff_block_text() in multiline mode.
751 */
752
753 if (ROFF_de == tok)
754 roff_setstr(r, name, "", 0);
755
756 if ('\0' == (*bufp)[pos])
757 return(ROFF_IGN);
758
759 /* If present, process the custom end-of-line marker. */
760
761 sv = pos;
762 while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
763 pos++;
764
765 /*
766 * Note: groff does NOT like escape characters in the input.
767 * Instead of detecting this, we're just going to let it fly and
768 * to hell with it.
769 */
770
771 assert(pos > sv);
772 sz = (size_t)(pos - sv);
773
774 if (1 == sz && '.' == (*bufp)[sv])
775 return(ROFF_IGN);
776
777 r->last->end = mandoc_malloc(sz + 1);
778
779 memcpy(r->last->end, *bufp + sv, sz);
780 r->last->end[(int)sz] = '\0';
781
782 if ((*bufp)[pos])
783 mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
784
785 return(ROFF_IGN);
786 }
787
788
789 /* ARGSUSED */
790 static enum rofferr
791 roff_block_sub(ROFF_ARGS)
792 {
793 enum rofft t;
794 int i, j;
795
796 /*
797 * First check whether a custom macro exists at this level. If
798 * it does, then check against it. This is some of groff's
799 * stranger behaviours. If we encountered a custom end-scope
800 * tag and that tag also happens to be a "real" macro, then we
801 * need to try interpreting it again as a real macro. If it's
802 * not, then return ignore. Else continue.
803 */
804
805 if (r->last->end) {
806 for (i = pos, j = 0; r->last->end[j]; j++, i++)
807 if ((*bufp)[i] != r->last->end[j])
808 break;
809
810 if ('\0' == r->last->end[j] &&
811 ('\0' == (*bufp)[i] ||
812 ' ' == (*bufp)[i] ||
813 '\t' == (*bufp)[i])) {
814 roffnode_pop(r);
815 roffnode_cleanscope(r);
816
817 while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
818 i++;
819
820 pos = i;
821 if (ROFF_MAX != roff_parse(r, *bufp, &pos))
822 return(ROFF_RERUN);
823 return(ROFF_IGN);
824 }
825 }
826
827 /*
828 * If we have no custom end-query or lookup failed, then try
829 * pulling it out of the hashtable.
830 */
831
832 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
833 return(ROFF_IGN);
834
835 /*
836 * Macros other than block-end are only significant
837 * in `de' blocks; elsewhere, simply throw them away.
838 */
839 if (ROFF_cblock != t) {
840 if (ROFF_de == tok)
841 roff_setstr(r, r->last->name, *bufp + ppos, 1);
842 return(ROFF_IGN);
843 }
844
845 assert(roffs[t].proc);
846 return((*roffs[t].proc)(r, t, bufp, szp,
847 ln, ppos, pos, offs));
848 }
849
850
851 /* ARGSUSED */
852 static enum rofferr
853 roff_block_text(ROFF_ARGS)
854 {
855
856 if (ROFF_de == tok)
857 roff_setstr(r, r->last->name, *bufp + pos, 1);
858
859 return(ROFF_IGN);
860 }
861
862
863 /* ARGSUSED */
864 static enum rofferr
865 roff_cond_sub(ROFF_ARGS)
866 {
867 enum rofft t;
868 enum roffrule rr;
869
870 rr = r->last->rule;
871
872 /*
873 * Clean out scope. If we've closed ourselves, then don't
874 * continue.
875 */
876
877 roffnode_cleanscope(r);
878
879 if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
880 if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
881 return(roff_ccond
882 (r, ROFF_ccond, bufp, szp,
883 ln, pos, pos + 2, offs));
884 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
885 }
886
887 /*
888 * A denied conditional must evaluate its children if and only
889 * if they're either structurally required (such as loops and
890 * conditionals) or a closing macro.
891 */
892 if (ROFFRULE_DENY == rr)
893 if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
894 if (ROFF_ccond != t)
895 return(ROFF_IGN);
896
897 assert(roffs[t].proc);
898 return((*roffs[t].proc)(r, t, bufp, szp,
899 ln, ppos, pos, offs));
900 }
901
902
903 /* ARGSUSED */
904 static enum rofferr
905 roff_cond_text(ROFF_ARGS)
906 {
907 char *ep, *st;
908 enum roffrule rr;
909
910 rr = r->last->rule;
911
912 /*
913 * We display the value of the text if out current evaluation
914 * scope permits us to do so.
915 */
916
917 /* FIXME: use roff_ccond? */
918
919 st = &(*bufp)[pos];
920 if (NULL == (ep = strstr(st, "\\}"))) {
921 roffnode_cleanscope(r);
922 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
923 }
924
925 if (ep == st || (ep > st && '\\' != *(ep - 1)))
926 roffnode_pop(r);
927
928 roffnode_cleanscope(r);
929 return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
930 }
931
932
933 static enum roffrule
934 roff_evalcond(const char *v, int *pos)
935 {
936
937 switch (v[*pos]) {
938 case ('n'):
939 (*pos)++;
940 return(ROFFRULE_ALLOW);
941 case ('e'):
942 /* FALLTHROUGH */
943 case ('o'):
944 /* FALLTHROUGH */
945 case ('t'):
946 (*pos)++;
947 return(ROFFRULE_DENY);
948 default:
949 break;
950 }
951
952 while (v[*pos] && ' ' != v[*pos])
953 (*pos)++;
954 return(ROFFRULE_DENY);
955 }
956
957 /* ARGSUSED */
958 static enum rofferr
959 roff_line_ignore(ROFF_ARGS)
960 {
961
962 if (ROFF_it == tok)
963 mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
964
965 return(ROFF_IGN);
966 }
967
968 /* ARGSUSED */
969 static enum rofferr
970 roff_cond(ROFF_ARGS)
971 {
972 int sv;
973 enum roffrule rule;
974
975 /*
976 * An `.el' has no conditional body: it will consume the value
977 * of the current rstack entry set in prior `ie' calls or
978 * defaults to DENY.
979 *
980 * If we're not an `el', however, then evaluate the conditional.
981 */
982
983 rule = ROFF_el == tok ?
984 (r->rstackpos < 0 ?
985 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
986 roff_evalcond(*bufp, &pos);
987
988 sv = pos;
989 while (' ' == (*bufp)[pos])
990 pos++;
991
992 /*
993 * Roff is weird. If we have just white-space after the
994 * conditional, it's considered the BODY and we exit without
995 * really doing anything. Warn about this. It's probably
996 * wrong.
997 */
998
999 if ('\0' == (*bufp)[pos] && sv != pos) {
1000 mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1001 return(ROFF_IGN);
1002 }
1003
1004 roffnode_push(r, tok, NULL, ln, ppos);
1005
1006 r->last->rule = rule;
1007
1008 /*
1009 * An if-else will put the NEGATION of the current evaluated
1010 * conditional into the stack of rules.
1011 */
1012
1013 if (ROFF_ie == tok) {
1014 if (r->rstackpos == RSTACK_MAX - 1) {
1015 mandoc_msg(MANDOCERR_MEM,
1016 r->parse, ln, ppos, NULL);
1017 return(ROFF_ERR);
1018 }
1019 r->rstack[++r->rstackpos] =
1020 ROFFRULE_DENY == r->last->rule ?
1021 ROFFRULE_ALLOW : ROFFRULE_DENY;
1022 }
1023
1024 /* If the parent has false as its rule, then so do we. */
1025
1026 if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1027 r->last->rule = ROFFRULE_DENY;
1028
1029 /*
1030 * Determine scope. If we're invoked with "\{" trailing the
1031 * conditional, then we're in a multiline scope. Else our scope
1032 * expires on the next line.
1033 */
1034
1035 r->last->endspan = 1;
1036
1037 if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1038 r->last->endspan = -1;
1039 pos += 2;
1040 }
1041
1042 /*
1043 * If there are no arguments on the line, the next-line scope is
1044 * assumed.
1045 */
1046
1047 if ('\0' == (*bufp)[pos])
1048 return(ROFF_IGN);
1049
1050 /* Otherwise re-run the roff parser after recalculating. */
1051
1052 *offs = pos;
1053 return(ROFF_RERUN);
1054 }
1055
1056
1057 /* ARGSUSED */
1058 static enum rofferr
1059 roff_ds(ROFF_ARGS)
1060 {
1061 char *name, *string;
1062
1063 /*
1064 * A symbol is named by the first word following the macro
1065 * invocation up to a space. Its value is anything after the
1066 * name's trailing whitespace and optional double-quote. Thus,
1067 *
1068 * [.ds foo "bar " ]
1069 *
1070 * will have `bar " ' as its value.
1071 */
1072
1073 string = *bufp + pos;
1074 name = roff_getname(r, &string, ln, pos);
1075 if ('\0' == *name)
1076 return(ROFF_IGN);
1077
1078 /* Read past initial double-quote. */
1079 if ('"' == *string)
1080 string++;
1081
1082 /* The rest is the value. */
1083 roff_setstr(r, name, string, 0);
1084 return(ROFF_IGN);
1085 }
1086
1087
1088 /* ARGSUSED */
1089 static enum rofferr
1090 roff_nr(ROFF_ARGS)
1091 {
1092 const char *key;
1093 char *val;
1094 struct reg *rg;
1095
1096 val = *bufp + pos;
1097 key = roff_getname(r, &val, ln, pos);
1098 rg = r->regs->regs;
1099
1100 if (0 == strcmp(key, "nS")) {
1101 rg[(int)REG_nS].set = 1;
1102 if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1103 rg[(int)REG_nS].v.u = 0;
1104 }
1105
1106 return(ROFF_IGN);
1107 }
1108
1109 /* ARGSUSED */
1110 static enum rofferr
1111 roff_rm(ROFF_ARGS)
1112 {
1113 const char *name;
1114 char *cp;
1115
1116 cp = *bufp + pos;
1117 while ('\0' != *cp) {
1118 name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1119 if ('\0' != *name)
1120 roff_setstr(r, name, NULL, 0);
1121 }
1122 return(ROFF_IGN);
1123 }
1124
1125 /* ARGSUSED */
1126 static enum rofferr
1127 roff_TE(ROFF_ARGS)
1128 {
1129
1130 if (NULL == r->tbl)
1131 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1132 else
1133 tbl_end(r->tbl);
1134
1135 r->tbl = NULL;
1136 return(ROFF_IGN);
1137 }
1138
1139 /* ARGSUSED */
1140 static enum rofferr
1141 roff_T_(ROFF_ARGS)
1142 {
1143
1144 if (NULL == r->tbl)
1145 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1146 else
1147 tbl_restart(ppos, ln, r->tbl);
1148
1149 return(ROFF_IGN);
1150 }
1151
1152 /* ARGSUSED */
1153 static enum rofferr
1154 roff_EQ(ROFF_ARGS)
1155 {
1156 struct eqn_node *e;
1157
1158 assert(NULL == r->eqn);
1159 e = eqn_alloc(ppos, ln);
1160
1161 if (r->last_eqn)
1162 r->last_eqn->next = e;
1163 else
1164 r->first_eqn = r->last_eqn = e;
1165
1166 r->eqn = r->last_eqn = e;
1167 return(ROFF_IGN);
1168 }
1169
1170 /* ARGSUSED */
1171 static enum rofferr
1172 roff_EN(ROFF_ARGS)
1173 {
1174
1175 mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1176 return(ROFF_IGN);
1177 }
1178
1179 /* ARGSUSED */
1180 static enum rofferr
1181 roff_TS(ROFF_ARGS)
1182 {
1183 struct tbl_node *t;
1184
1185 if (r->tbl) {
1186 mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1187 tbl_end(r->tbl);
1188 }
1189
1190 t = tbl_alloc(ppos, ln, r->parse);
1191
1192 if (r->last_tbl)
1193 r->last_tbl->next = t;
1194 else
1195 r->first_tbl = r->last_tbl = t;
1196
1197 r->tbl = r->last_tbl = t;
1198 return(ROFF_IGN);
1199 }
1200
1201 /* ARGSUSED */
1202 static enum rofferr
1203 roff_so(ROFF_ARGS)
1204 {
1205 char *name;
1206
1207 mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1208
1209 /*
1210 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
1211 * opening anything that's not in our cwd or anything beneath
1212 * it. Thus, explicitly disallow traversing up the file-system
1213 * or using absolute paths.
1214 */
1215
1216 name = *bufp + pos;
1217 if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1218 mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1219 return(ROFF_ERR);
1220 }
1221
1222 *offs = pos;
1223 return(ROFF_SO);
1224 }
1225
1226 /* ARGSUSED */
1227 static enum rofferr
1228 roff_userdef(ROFF_ARGS)
1229 {
1230 const char *arg[9];
1231 char *cp, *n1, *n2;
1232 int i;
1233
1234 /*
1235 * Collect pointers to macro argument strings
1236 * and null-terminate them.
1237 */
1238 cp = *bufp + pos;
1239 for (i = 0; i < 9; i++)
1240 arg[i] = '\0' == *cp ? "" :
1241 mandoc_getarg(r->parse, &cp, ln, &pos);
1242
1243 /*
1244 * Expand macro arguments.
1245 */
1246 *szp = 0;
1247 n1 = cp = mandoc_strdup(r->current_string);
1248 while (NULL != (cp = strstr(cp, "\\$"))) {
1249 i = cp[2] - '1';
1250 if (0 > i || 8 < i) {
1251 /* Not an argument invocation. */
1252 cp += 2;
1253 continue;
1254 }
1255
1256 *szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1257 n2 = mandoc_malloc(*szp);
1258
1259 strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1260 strlcat(n2, arg[i], *szp);
1261 strlcat(n2, cp + 3, *szp);
1262
1263 cp = n2 + (cp - n1);
1264 free(n1);
1265 n1 = n2;
1266 }
1267
1268 /*
1269 * Replace the macro invocation
1270 * by the expanded macro.
1271 */
1272 free(*bufp);
1273 *bufp = n1;
1274 if (0 == *szp)
1275 *szp = strlen(*bufp) + 1;
1276
1277 return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1278 ROFF_REPARSE : ROFF_APPEND);
1279 }
1280
1281 static char *
1282 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1283 {
1284 char *name, *cp;
1285
1286 name = *cpp;
1287 if ('\0' == *name)
1288 return(name);
1289
1290 /* Read until end of name. */
1291 for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1292 if ('\\' != *cp)
1293 continue;
1294 cp++;
1295 if ('\\' == *cp)
1296 continue;
1297 mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1298 *cp = '\0';
1299 name = cp;
1300 }
1301
1302 /* Nil-terminate name. */
1303 if ('\0' != *cp)
1304 *(cp++) = '\0';
1305
1306 /* Read past spaces. */
1307 while (' ' == *cp)
1308 cp++;
1309
1310 *cpp = cp;
1311 return(name);
1312 }
1313
1314 /*
1315 * Store *string into the user-defined string called *name.
1316 * In multiline mode, append to an existing entry and append '\n';
1317 * else replace the existing entry, if there is one.
1318 * To clear an existing entry, call with (*r, *name, NULL, 0).
1319 */
1320 static void
1321 roff_setstr(struct roff *r, const char *name, const char *string,
1322 int multiline)
1323 {
1324 struct roffstr *n;
1325 char *c;
1326 size_t oldch, newch;
1327
1328 /* Search for an existing string with the same name. */
1329 n = r->first_string;
1330 while (n && strcmp(name, n->name))
1331 n = n->next;
1332
1333 if (NULL == n) {
1334 /* Create a new string table entry. */
1335 n = mandoc_malloc(sizeof(struct roffstr));
1336 n->name = mandoc_strdup(name);
1337 n->string = NULL;
1338 n->next = r->first_string;
1339 r->first_string = n;
1340 } else if (0 == multiline) {
1341 /* In multiline mode, append; else replace. */
1342 free(n->string);
1343 n->string = NULL;
1344 }
1345
1346 if (NULL == string)
1347 return;
1348
1349 /*
1350 * One additional byte for the '\n' in multiline mode,
1351 * and one for the terminating '\0'.
1352 */
1353 newch = strlen(string) + (multiline ? 2u : 1u);
1354 if (NULL == n->string) {
1355 n->string = mandoc_malloc(newch);
1356 *n->string = '\0';
1357 oldch = 0;
1358 } else {
1359 oldch = strlen(n->string);
1360 n->string = mandoc_realloc(n->string, oldch + newch);
1361 }
1362
1363 /* Skip existing content in the destination buffer. */
1364 c = n->string + (int)oldch;
1365
1366 /* Append new content to the destination buffer. */
1367 while (*string) {
1368 /*
1369 * Rudimentary roff copy mode:
1370 * Handle escaped backslashes.
1371 */
1372 if ('\\' == *string && '\\' == *(string + 1))
1373 string++;
1374 *c++ = *string++;
1375 }
1376
1377 /* Append terminating bytes. */
1378 if (multiline)
1379 *c++ = '\n';
1380 *c = '\0';
1381 }
1382
1383 static const char *
1384 roff_getstrn(const struct roff *r, const char *name, size_t len)
1385 {
1386 const struct roffstr *n;
1387
1388 n = r->first_string;
1389 while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1390 n = n->next;
1391
1392 return(n ? n->string : NULL);
1393 }
1394
1395 static void
1396 roff_freestr(struct roff *r)
1397 {
1398 struct roffstr *n, *nn;
1399
1400 for (n = r->first_string; n; n = nn) {
1401 free(n->name);
1402 free(n->string);
1403 nn = n->next;
1404 free(n);
1405 }
1406
1407 r->first_string = NULL;
1408 }
1409
1410 const struct tbl_span *
1411 roff_span(const struct roff *r)
1412 {
1413
1414 return(r->tbl ? tbl_span(r->tbl) : NULL);
1415 }
1416
1417 const struct eqn *
1418 roff_eqn(const struct roff *r)
1419 {
1420
1421 return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1422 }
1423