checknr.c revision 1.16 1 /* $NetBSD: checknr.c,v 1.16 2005/03/30 14:18:41 wiz Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
35 The Regents of the University of California. All rights reserved.\n");
36 #endif /* not lint */
37
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
41 #else
42 __RCSID("$NetBSD: checknr.c,v 1.16 2005/03/30 14:18:41 wiz Exp $");
43 #endif
44 #endif /* not lint */
45
46 /*
47 * checknr: check an nroff/troff input file for matching macro calls.
48 * we also attempt to match size and font changes, but only the embedded
49 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
50 * later but for now think of these restrictions as contributions to
51 * structured typesetting.
52 */
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57
58 #define MAXSTK 100 /* Stack size */
59 #define MAXBR 100 /* Max number of bracket pairs known */
60 #define MAXCMDS 500 /* Max number of commands known */
61
62 /*
63 * The stack on which we remember what we've seen so far.
64 */
65 struct stkstr {
66 int opno; /* number of opening bracket */
67 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
68 int parm; /* parm to size, font, etc */
69 int lno; /* line number the thing came in in */
70 } stk[MAXSTK];
71 int stktop;
72
73 /*
74 * The kinds of opening and closing brackets.
75 */
76 struct brstr {
77 char *opbr;
78 char *clbr;
79 } br[MAXBR] = {
80 /* A few bare bones troff commands */
81 #define SZ 0
82 { "sz", "sz"}, /* also \s */
83 #define FT 1
84 { "ft", "ft"}, /* also \f */
85 /* the -mm package */
86 {"AL", "LE"},
87 {"AS", "AE"},
88 {"BL", "LE"},
89 {"BS", "BE"},
90 {"DF", "DE"},
91 {"DL", "LE"},
92 {"DS", "DE"},
93 {"FS", "FE"},
94 {"ML", "LE"},
95 {"NS", "NE"},
96 {"RL", "LE"},
97 {"VL", "LE"},
98 /* the -ms package */
99 {"AB", "AE"},
100 {"BD", "DE"},
101 {"CD", "DE"},
102 {"DS", "DE"},
103 {"FS", "FE"},
104 {"ID", "DE"},
105 {"KF", "KE"},
106 {"KS", "KE"},
107 {"LD", "DE"},
108 {"LG", "NL"},
109 {"QS", "QE"},
110 {"RS", "RE"},
111 {"SM", "NL"},
112 {"XA", "XE"},
113 {"XS", "XE"},
114 /* The -me package */
115 {"(b", ")b"},
116 {"(c", ")c"},
117 {"(d", ")d"},
118 {"(f", ")f"},
119 {"(l", ")l"},
120 {"(q", ")q"},
121 {"(x", ")x"},
122 {"(z", ")z"},
123 /* The -mdoc package */
124 {"Ao", "Ac"},
125 {"Bd", "Ed"},
126 {"Bk", "Ek"},
127 {"Bo", "Bc"},
128 {"Do", "Dc"},
129 {"Fo", "Fc"},
130 {"Oo", "Oc"},
131 {"Po", "Pc"},
132 {"Qo", "Qc"},
133 {"Rs", "Re"},
134 {"So", "Sc"},
135 {"Xo", "Xc"},
136 /* Things needed by preprocessors */
137 {"EQ", "EN"},
138 {"TS", "TE"},
139 /* Refer */
140 {"[", "]"},
141 {0, 0}
142 };
143
144 /*
145 * All commands known to nroff, plus macro packages.
146 * Used so we can complain about unrecognized commands.
147 */
148 char *knowncmds[MAXCMDS] = {
149 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N",
150 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q",
151 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x",
152 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D",
153 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p",
154 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT",
155 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1",
156 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf",
157 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT",
158 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc",
159 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM",
160 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er",
161 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ",
162 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx",
163 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM",
164 "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF", "KQ", "KS", "LB",
165 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF",
166 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd",
167 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op",
168 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY",
169 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql",
170 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT",
171 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM",
172 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy",
173 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ",
174 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
175 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo",
176 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>",
177 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am",
178 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx",
179 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de",
180 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el",
181 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft",
182 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie",
183 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
184 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
185 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr",
186 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn",
187 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro",
188 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st",
189 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u",
190 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0
191 };
192
193 int lineno; /* current line number in input file */
194 char *cfilename; /* name of current file */
195 int nfiles; /* number of files to process */
196 int fflag; /* -f: ignore \f */
197 int sflag; /* -s: ignore \s */
198 int ncmds; /* size of knowncmds */
199 int slot; /* slot in knowncmds found by binsrch */
200
201 void addcmd(char *);
202 void addmac(char *);
203 int binsrch(char *);
204 void checkknown(char *);
205 void chkcmd(char *, char *);
206 void complain(int);
207 int eq(const void *, const void *);
208 int main(int, char **);
209 void nomatch(char *);
210 void pe(int);
211 void process(FILE *);
212 void prop(int);
213 void usage(void);
214
215 int
216 main(int argc, char **argv)
217 {
218 FILE *f;
219 int i;
220 char *cp;
221 char b1[4];
222
223 /* Figure out how many known commands there are */
224 while (knowncmds[ncmds])
225 ncmds++;
226 while (argc > 1 && argv[1][0] == '-') {
227 switch(argv[1][1]) {
228
229 /* -a: add pairs of macros */
230 case 'a':
231 i = strlen(argv[1]) - 2;
232 if (i % 6 != 0)
233 usage();
234 /* look for empty macro slots */
235 for (i=0; br[i].opbr; i++)
236 ;
237 for (cp=argv[1]+3; cp[-1]; cp += 6) {
238 if (i >= MAXBR)
239 errx(1, "too many pairs");
240 if ((br[i].opbr = malloc(3)) == NULL)
241 err(1, "malloc");
242 strlcpy(br[i].opbr, cp, 3);
243 if ((br[i].clbr = malloc(3)) == NULL)
244 err(1, "malloc");
245 strlcpy(br[i].clbr, cp+3, 3);
246 addmac(br[i].opbr); /* knows pairs are also known cmds */
247 addmac(br[i].clbr);
248 i++;
249 }
250 break;
251
252 /* -c: add known commands */
253 case 'c':
254 i = strlen(argv[1]) - 2;
255 if (i % 3 != 0)
256 usage();
257 for (cp=argv[1]+3; cp[-1]; cp += 3) {
258 if (cp[2] && cp[2] != '.')
259 usage();
260 strncpy(b1, cp, 2);
261 addmac(b1);
262 }
263 break;
264
265 /* -f: ignore font changes */
266 case 'f':
267 fflag = 1;
268 break;
269
270 /* -s: ignore size changes */
271 case 's':
272 sflag = 1;
273 break;
274 default:
275 usage();
276 }
277 argc--; argv++;
278 }
279
280 nfiles = argc - 1;
281
282 if (nfiles > 0) {
283 for (i=1; i<argc; i++) {
284 cfilename = argv[i];
285 f = fopen(cfilename, "r");
286 if (f == NULL)
287 perror(cfilename);
288 else {
289 process(f);
290 fclose(f);
291 }
292 }
293 } else {
294 cfilename = "stdin";
295 process(stdin);
296 }
297 exit(0);
298 }
299
300 void
301 usage(void)
302 {
303 (void)fprintf(stderr,
304 "usage: %s [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] file\n",
305 getprogname());
306 exit(1);
307 }
308
309 void
310 process(FILE *f)
311 {
312 int i, n;
313 char line[256]; /* the current line */
314 char mac[5]; /* The current macro or nroff command */
315 int pl;
316
317 stktop = -1;
318 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
319 if (line[0] == '.') {
320 /*
321 * find and isolate the macro/command name.
322 */
323 strncpy(mac, line+1, 4);
324 if (isspace((unsigned char)mac[0])) {
325 pe(lineno);
326 printf("Empty command\n");
327 } else if (isspace((unsigned char)mac[1])) {
328 mac[1] = 0;
329 } else if (isspace((unsigned char)mac[2])) {
330 mac[2] = 0;
331 } else if (mac[0] != '\\' || mac[1] != '\"') {
332 pe(lineno);
333 printf("Command too long\n");
334 }
335
336 /*
337 * Is it a known command?
338 */
339 checkknown(mac);
340
341 /*
342 * Should we add it?
343 */
344 if (eq(mac, "de"))
345 addcmd(line);
346
347 chkcmd(line, mac);
348 }
349
350 /*
351 * At this point we process the line looking
352 * for \s and \f.
353 */
354 for (i=0; line[i]; i++)
355 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
356 if (!sflag && line[++i]=='s') {
357 pl = line[++i];
358 if (isdigit((unsigned char)pl)) {
359 n = pl - '0';
360 pl = ' ';
361 } else
362 n = 0;
363 while (isdigit((unsigned char)line[++i]))
364 n = 10 * n + line[i] - '0';
365 i--;
366 if (n == 0) {
367 if (stk[stktop].opno == SZ) {
368 stktop--;
369 } else {
370 pe(lineno);
371 printf("unmatched \\s0\n");
372 }
373 } else {
374 stk[++stktop].opno = SZ;
375 stk[stktop].pl = pl;
376 stk[stktop].parm = n;
377 stk[stktop].lno = lineno;
378 }
379 } else if (!fflag && line[i]=='f') {
380 n = line[++i];
381 if (n == 'P') {
382 if (stk[stktop].opno == FT) {
383 stktop--;
384 } else {
385 pe(lineno);
386 printf("unmatched \\fP\n");
387 }
388 } else {
389 stk[++stktop].opno = FT;
390 stk[stktop].pl = 1;
391 stk[stktop].parm = n;
392 stk[stktop].lno = lineno;
393 }
394 }
395 }
396 }
397 /*
398 * We've hit the end and look at all this stuff that hasn't been
399 * matched yet! Complain, complain.
400 */
401 for (i=stktop; i>=0; i--) {
402 complain(i);
403 }
404 }
405
406 void
407 complain(int i)
408 {
409 pe(stk[i].lno);
410 printf("Unmatched ");
411 prop(i);
412 printf("\n");
413 }
414
415 void
416 prop(int i)
417 {
418 if (stk[i].pl == 0)
419 printf(".%s", br[stk[i].opno].opbr);
420 else switch(stk[i].opno) {
421 case SZ:
422 printf("\\s%c%d", stk[i].pl, stk[i].parm);
423 break;
424 case FT:
425 printf("\\f%c", stk[i].parm);
426 break;
427 default:
428 printf("Bug: stk[%d].opno = %d = .%s, .%s",
429 i, stk[i].opno, br[stk[i].opno].opbr,
430 br[stk[i].opno].clbr);
431 }
432 }
433
434 void
435 chkcmd(char *line, char *mac)
436 {
437 int i;
438
439 /*
440 * Check to see if it matches top of stack.
441 */
442 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
443 stktop--; /* OK. Pop & forget */
444 else {
445 /* No. Maybe it's an opener */
446 for (i=0; br[i].opbr; i++) {
447 if (eq(mac, br[i].opbr)) {
448 /* Found. Push it. */
449 stktop++;
450 stk[stktop].opno = i;
451 stk[stktop].pl = 0;
452 stk[stktop].parm = 0;
453 stk[stktop].lno = lineno;
454 break;
455 }
456 /*
457 * Maybe it's an unmatched closer.
458 * NOTE: this depends on the fact
459 * that none of the closers can be
460 * openers too.
461 */
462 if (eq(mac, br[i].clbr)) {
463 nomatch(mac);
464 break;
465 }
466 }
467 }
468 }
469
470 void
471 nomatch(char *mac)
472 {
473 int i, j;
474
475 /*
476 * Look for a match further down on stack
477 * If we find one, it suggests that the stuff in
478 * between is supposed to match itself.
479 */
480 for (j=stktop; j>=0; j--)
481 if (eq(mac,br[stk[j].opno].clbr)) {
482 /* Found. Make a good diagnostic. */
483 if (j == stktop-2) {
484 /*
485 * Check for special case \fx..\fR and don't
486 * complain.
487 */
488 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
489 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
490 stktop = j -1;
491 return;
492 }
493 /*
494 * We have two unmatched frobs. Chances are
495 * they were intended to match, so we mention
496 * them together.
497 */
498 pe(stk[j+1].lno);
499 prop(j+1);
500 printf(" does not match %d: ", stk[j+2].lno);
501 prop(j+2);
502 printf("\n");
503 } else for (i=j+1; i <= stktop; i++) {
504 complain(i);
505 }
506 stktop = j-1;
507 return;
508 }
509 /* Didn't find one. Throw this away. */
510 pe(lineno);
511 printf("Unmatched .%s\n", mac);
512 }
513
514 /* eq: are two strings equal? */
515 int
516 eq(const void *s1, const void *s2)
517 {
518 return (strcmp((char *)s1, (char *)s2) == 0);
519 }
520
521 /* print the first part of an error message, given the line number */
522 void
523 pe(int pelineno)
524 {
525 if (nfiles > 1)
526 printf("%s: ", cfilename);
527 printf("%d: ", pelineno);
528 }
529
530 void
531 checkknown(char *mac)
532 {
533
534 if (eq(mac, "."))
535 return;
536 if (binsrch(mac) >= 0)
537 return;
538 if (mac[0] == '\\' && mac[1] == '"') /* comments */
539 return;
540
541 pe(lineno);
542 printf("Unknown command: .%s\n", mac);
543 }
544
545 /*
546 * We have a .de xx line in "line". Add xx to the list of known commands.
547 */
548 void
549 addcmd(char *line)
550 {
551 char *mac;
552
553 /* grab the macro being defined */
554 mac = line+4;
555 while (isspace((unsigned char)*mac))
556 mac++;
557 if (*mac == 0) {
558 pe(lineno);
559 printf("illegal define: %s\n", line);
560 return;
561 }
562 mac[2] = 0;
563 if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
564 mac[1] = 0;
565 if (ncmds >= MAXCMDS) {
566 printf("Only %d known commands allowed\n", MAXCMDS);
567 exit(1);
568 }
569 addmac(mac);
570 }
571
572 /*
573 * Add mac to the list. We should really have some kind of tree
574 * structure here but this is a quick-and-dirty job and I just don't
575 * have time to mess with it. (I wonder if this will come back to haunt
576 * me someday?) Anyway, I claim that .de is fairly rare in user
577 * nroff programs, and the register loop below is pretty fast.
578 */
579 void
580 addmac(char *mac)
581 {
582 char **src, **dest, **loc;
583
584 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
585 #ifdef DEBUG
586 printf("binsrch(%s) -> already in table\n", mac);
587 #endif /* DEBUG */
588 return;
589 }
590 /* binsrch sets slot as a side effect */
591 #ifdef DEBUG
592 printf("binsrch(%s) -> %d\n", mac, slot);
593 #endif
594 loc = &knowncmds[slot];
595 src = &knowncmds[ncmds-1];
596 dest = src+1;
597 while (dest > loc)
598 *dest-- = *src--;
599 if ((*loc = strdup(mac)) == NULL)
600 err(1, "strdup");
601 ncmds++;
602 #ifdef DEBUG
603 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
604 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
605 knowncmds[slot+2], ncmds);
606 #endif
607 }
608
609 /*
610 * Do a binary search in knowncmds for mac.
611 * If found, return the index. If not, return -1.
612 */
613 int
614 binsrch(char *mac)
615 {
616 char *p; /* pointer to current cmd in list */
617 int d; /* difference if any */
618 int mid; /* mid point in binary search */
619 int top, bot; /* boundaries of bin search, inclusive */
620
621 top = ncmds-1;
622 bot = 0;
623 while (top >= bot) {
624 mid = (top+bot)/2;
625 p = knowncmds[mid];
626 d = p[0] - mac[0];
627 if (d == 0)
628 d = p[1] - mac[1];
629 if (d == 0)
630 return mid;
631 if (d < 0)
632 bot = mid + 1;
633 else
634 top = mid - 1;
635 }
636 slot = bot; /* place it would have gone */
637 return -1;
638 }
639