checknr.c revision 1.21 1 /* $NetBSD: checknr.c,v 1.21 2013/08/11 06:39:47 dholland Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\
35 The Regents of the University of California. All rights reserved.");
36 #endif /* not lint */
37
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
41 #else
42 __RCSID("$NetBSD: checknr.c,v 1.21 2013/08/11 06:39:47 dholland Exp $");
43 #endif
44 #endif /* not lint */
45
46 /*
47 * checknr: check an nroff/troff input file for matching macro calls.
48 * we also attempt to match size and font changes, but only the embedded
49 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
50 * later but for now think of these restrictions as contributions to
51 * structured typesetting.
52 */
53 #include <ctype.h>
54 #include <err.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58
59 #define MAXSTK 100 /* Stack size */
60 #define MAXBR 100 /* Max number of bracket pairs known */
61 #define MAXCMDS 500 /* Max number of commands known */
62
63 /*
64 * The stack on which we remember what we've seen so far.
65 */
66 struct stkstr {
67 int opno; /* number of opening bracket */
68 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
69 int parm; /* parm to size, font, etc */
70 int lno; /* line number the thing came in in */
71 } stk[MAXSTK];
72 int stktop;
73
74 /*
75 * The kinds of opening and closing brackets.
76 */
77 struct brstr {
78 const char *opbr;
79 const char *clbr;
80 } br[MAXBR] = {
81 /* A few bare bones troff commands */
82 #define SZ 0
83 { "sz", "sz"}, /* also \s */
84 #define FT 1
85 { "ft", "ft"}, /* also \f */
86 /* the -mm package */
87 {"AL", "LE"},
88 {"AS", "AE"},
89 {"BL", "LE"},
90 {"BS", "BE"},
91 {"DF", "DE"},
92 {"DL", "LE"},
93 {"DS", "DE"},
94 {"FS", "FE"},
95 {"ML", "LE"},
96 {"NS", "NE"},
97 {"RL", "LE"},
98 {"VL", "LE"},
99 /* the -ms package */
100 {"AB", "AE"},
101 {"BD", "DE"},
102 {"CD", "DE"},
103 {"DS", "DE"},
104 {"FS", "FE"},
105 {"ID", "DE"},
106 {"KF", "KE"},
107 {"KS", "KE"},
108 {"LD", "DE"},
109 {"LG", "NL"},
110 {"QS", "QE"},
111 {"RS", "RE"},
112 {"SM", "NL"},
113 {"XA", "XE"},
114 {"XS", "XE"},
115 /* The -me package */
116 {"(b", ")b"},
117 {"(c", ")c"},
118 {"(d", ")d"},
119 {"(f", ")f"},
120 {"(l", ")l"},
121 {"(q", ")q"},
122 {"(x", ")x"},
123 {"(z", ")z"},
124 /* The -mdoc package */
125 {"Ao", "Ac"},
126 {"Bd", "Ed"},
127 {"Bk", "Ek"},
128 {"Bo", "Bc"},
129 {"Do", "Dc"},
130 {"Fo", "Fc"},
131 {"Oo", "Oc"},
132 {"Po", "Pc"},
133 {"Qo", "Qc"},
134 {"Rs", "Re"},
135 {"So", "Sc"},
136 {"Xo", "Xc"},
137 /* Things needed by preprocessors */
138 {"EQ", "EN"},
139 {"TS", "TE"},
140 /* Refer */
141 {"[", "]"},
142 {0, 0}
143 };
144
145 /*
146 * All commands known to nroff, plus macro packages.
147 * Used so we can complain about unrecognized commands.
148 */
149 const char *knowncmds[MAXCMDS] = {
150 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N",
151 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q",
152 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x",
153 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D",
154 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p",
155 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT",
156 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1",
157 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf",
158 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT",
159 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc",
160 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM",
161 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er",
162 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ",
163 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx",
164 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM",
165 "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF", "KQ", "KS", "LB",
166 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF",
167 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd",
168 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op",
169 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY",
170 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql",
171 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT",
172 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM",
173 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy",
174 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ",
175 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
176 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo",
177 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>",
178 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am",
179 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx",
180 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de",
181 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el",
182 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft",
183 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie",
184 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
185 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
186 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr",
187 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn",
188 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro",
189 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st",
190 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u",
191 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0
192 };
193
194 int lineno; /* current line number in input file */
195 const char *cfilename; /* name of current file */
196 int nfiles; /* number of files to process */
197 int fflag; /* -f: ignore \f */
198 int sflag; /* -s: ignore \s */
199 int ncmds; /* size of knowncmds */
200 int slot; /* slot in knowncmds found by binsrch */
201
202 void addcmd(char *);
203 void addmac(const char *);
204 int binsrch(const char *);
205 void checkknown(char *);
206 void chkcmd(char *, char *);
207 void complain(int);
208 static int eq(const char *, const char *);
209 int main(int, char **);
210 void nomatch(char *);
211 void pe(int);
212 void process(FILE *);
213 void prop(int);
214 void usage(void);
215
216 int
217 main(int argc, char **argv)
218 {
219 FILE *f;
220 int i;
221 char *cp;
222 char b1[4];
223
224 /* Figure out how many known commands there are */
225 while (knowncmds[ncmds])
226 ncmds++;
227 while (argc > 1 && argv[1][0] == '-') {
228 switch(argv[1][1]) {
229
230 /* -a: add pairs of macros */
231 case 'a':
232 i = strlen(argv[1]) - 2;
233 if (i % 6 != 0)
234 usage();
235 /* look for empty macro slots */
236 for (i=0; br[i].opbr; i++)
237 ;
238 for (cp=argv[1]+3; cp[-1]; cp += 6) {
239 char *tmp;
240
241 if (i >= MAXBR)
242 errx(1, "too many pairs");
243 if ((tmp = malloc(3)) == NULL)
244 err(1, "malloc");
245 strlcpy(tmp, cp, 3);
246 br[i].opbr = tmp;
247 if ((tmp = malloc(3)) == NULL)
248 err(1, "malloc");
249 strlcpy(tmp, cp+3, 3);
250 br[i].clbr = tmp;
251 addmac(br[i].opbr); /* knows pairs are also known cmds */
252 addmac(br[i].clbr);
253 i++;
254 }
255 break;
256
257 /* -c: add known commands */
258 case 'c':
259 i = strlen(argv[1]) - 2;
260 if (i % 3 != 0)
261 usage();
262 for (cp=argv[1]+3; cp[-1]; cp += 3) {
263 if (cp[2] && cp[2] != '.')
264 usage();
265 strncpy(b1, cp, 2);
266 addmac(b1);
267 }
268 break;
269
270 /* -f: ignore font changes */
271 case 'f':
272 fflag = 1;
273 break;
274
275 /* -s: ignore size changes */
276 case 's':
277 sflag = 1;
278 break;
279 default:
280 usage();
281 }
282 argc--; argv++;
283 }
284
285 nfiles = argc - 1;
286
287 if (nfiles > 0) {
288 for (i=1; i<argc; i++) {
289 cfilename = argv[i];
290 f = fopen(cfilename, "r");
291 if (f == NULL)
292 perror(cfilename);
293 else {
294 process(f);
295 fclose(f);
296 }
297 }
298 } else {
299 cfilename = "stdin";
300 process(stdin);
301 }
302 exit(0);
303 }
304
305 void
306 usage(void)
307 {
308 (void)fprintf(stderr,
309 "usage: %s [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] file\n",
310 getprogname());
311 exit(1);
312 }
313
314 void
315 process(FILE *f)
316 {
317 int i, n;
318 char line[256]; /* the current line */
319 char mac[5]; /* The current macro or nroff command */
320 int pl;
321
322 stktop = -1;
323 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
324 if (line[0] == '.') {
325 /*
326 * find and isolate the macro/command name.
327 */
328 strncpy(mac, line+1, 4);
329 if (isspace((unsigned char)mac[0])) {
330 pe(lineno);
331 printf("Empty command\n");
332 } else if (isspace((unsigned char)mac[1])) {
333 mac[1] = 0;
334 } else if (isspace((unsigned char)mac[2])) {
335 mac[2] = 0;
336 } else if (mac[0] != '\\' || mac[1] != '\"') {
337 pe(lineno);
338 printf("Command too long\n");
339 }
340
341 /*
342 * Is it a known command?
343 */
344 checkknown(mac);
345
346 /*
347 * Should we add it?
348 */
349 if (eq(mac, "de"))
350 addcmd(line);
351
352 chkcmd(line, mac);
353 }
354
355 /*
356 * At this point we process the line looking
357 * for \s and \f.
358 */
359 for (i=0; line[i]; i++)
360 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
361 if (!sflag && line[++i]=='s') {
362 pl = line[++i];
363 if (isdigit((unsigned char)pl)) {
364 n = pl - '0';
365 pl = ' ';
366 } else
367 n = 0;
368 while (isdigit((unsigned char)line[++i]))
369 n = 10 * n + line[i] - '0';
370 i--;
371 if (n == 0) {
372 if (stktop >= 0 &&
373 stk[stktop].opno == SZ) {
374 stktop--;
375 } else {
376 pe(lineno);
377 printf("unmatched \\s0\n");
378 }
379 } else {
380 stk[++stktop].opno = SZ;
381 stk[stktop].pl = pl;
382 stk[stktop].parm = n;
383 stk[stktop].lno = lineno;
384 }
385 } else if (!fflag && line[i]=='f') {
386 n = line[++i];
387 if (n == 'P') {
388 if (stktop >= 0 &&
389 stk[stktop].opno == FT) {
390 stktop--;
391 } else {
392 pe(lineno);
393 printf("unmatched \\fP\n");
394 }
395 } else {
396 stk[++stktop].opno = FT;
397 stk[stktop].pl = 1;
398 stk[stktop].parm = n;
399 stk[stktop].lno = lineno;
400 }
401 }
402 }
403 }
404 /*
405 * We've hit the end and look at all this stuff that hasn't been
406 * matched yet! Complain, complain.
407 */
408 for (i=stktop; i>=0; i--) {
409 complain(i);
410 }
411 }
412
413 void
414 complain(int i)
415 {
416 pe(stk[i].lno);
417 printf("Unmatched ");
418 prop(i);
419 printf("\n");
420 }
421
422 void
423 prop(int i)
424 {
425 if (stk[i].pl == 0)
426 printf(".%s", br[stk[i].opno].opbr);
427 else switch(stk[i].opno) {
428 case SZ:
429 printf("\\s%c%d", stk[i].pl, stk[i].parm);
430 break;
431 case FT:
432 printf("\\f%c", stk[i].parm);
433 break;
434 default:
435 printf("Bug: stk[%d].opno = %d = .%s, .%s",
436 i, stk[i].opno, br[stk[i].opno].opbr,
437 br[stk[i].opno].clbr);
438 }
439 }
440
441 void
442 chkcmd(char *line, char *mac)
443 {
444 int i;
445
446 /*
447 * Check to see if it matches top of stack.
448 */
449 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
450 stktop--; /* OK. Pop & forget */
451 else {
452 /* No. Maybe it's an opener */
453 for (i=0; br[i].opbr; i++) {
454 if (eq(mac, br[i].opbr)) {
455 /* Found. Push it. */
456 stktop++;
457 stk[stktop].opno = i;
458 stk[stktop].pl = 0;
459 stk[stktop].parm = 0;
460 stk[stktop].lno = lineno;
461 break;
462 }
463 /*
464 * Maybe it's an unmatched closer.
465 * NOTE: this depends on the fact
466 * that none of the closers can be
467 * openers too.
468 */
469 if (eq(mac, br[i].clbr)) {
470 nomatch(mac);
471 break;
472 }
473 }
474 }
475 }
476
477 void
478 nomatch(char *mac)
479 {
480 int i, j;
481
482 /*
483 * Look for a match further down on stack
484 * If we find one, it suggests that the stuff in
485 * between is supposed to match itself.
486 */
487 for (j=stktop; j>=0; j--)
488 if (eq(mac,br[stk[j].opno].clbr)) {
489 /* Found. Make a good diagnostic. */
490 if (j == stktop-2) {
491 /*
492 * Check for special case \fx..\fR and don't
493 * complain.
494 */
495 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
496 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
497 stktop = j -1;
498 return;
499 }
500 /*
501 * We have two unmatched frobs. Chances are
502 * they were intended to match, so we mention
503 * them together.
504 */
505 pe(stk[j+1].lno);
506 prop(j+1);
507 printf(" does not match %d: ", stk[j+2].lno);
508 prop(j+2);
509 printf("\n");
510 } else for (i=j+1; i <= stktop; i++) {
511 complain(i);
512 }
513 stktop = j-1;
514 return;
515 }
516 /* Didn't find one. Throw this away. */
517 pe(lineno);
518 printf("Unmatched .%s\n", mac);
519 }
520
521 /* eq: are two strings equal? */
522 static int
523 eq(const char *s1, const char *s2)
524 {
525 return strcmp(s1, s2) == 0;
526 }
527
528 /* print the first part of an error message, given the line number */
529 void
530 pe(int pelineno)
531 {
532 if (nfiles > 1)
533 printf("%s: ", cfilename);
534 printf("%d: ", pelineno);
535 }
536
537 void
538 checkknown(char *mac)
539 {
540
541 if (eq(mac, "."))
542 return;
543 if (binsrch(mac) >= 0)
544 return;
545 if (mac[0] == '\\' && mac[1] == '"') /* comments */
546 return;
547
548 pe(lineno);
549 printf("Unknown command: .%s\n", mac);
550 }
551
552 /*
553 * We have a .de xx line in "line". Add xx to the list of known commands.
554 */
555 void
556 addcmd(char *line)
557 {
558 char *mac;
559
560 /* grab the macro being defined */
561 mac = line+4;
562 while (isspace((unsigned char)*mac))
563 mac++;
564 if (*mac == 0) {
565 pe(lineno);
566 printf("illegal define: %s\n", line);
567 return;
568 }
569 mac[2] = 0;
570 if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
571 mac[1] = 0;
572 if (ncmds >= MAXCMDS) {
573 printf("Only %d known commands allowed\n", MAXCMDS);
574 exit(1);
575 }
576 addmac(mac);
577 }
578
579 /*
580 * Add mac to the list. We should really have some kind of tree
581 * structure here but this is a quick-and-dirty job and I just don't
582 * have time to mess with it. (I wonder if this will come back to haunt
583 * me someday?) Anyway, I claim that .de is fairly rare in user
584 * nroff programs, and the register loop below is pretty fast.
585 */
586 void
587 addmac(const char *mac)
588 {
589 const char **src, **dest, **loc;
590
591 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
592 #ifdef DEBUG
593 printf("binsrch(%s) -> already in table\n", mac);
594 #endif /* DEBUG */
595 return;
596 }
597 /* binsrch sets slot as a side effect */
598 #ifdef DEBUG
599 printf("binsrch(%s) -> %d\n", mac, slot);
600 #endif
601 loc = &knowncmds[slot];
602 src = &knowncmds[ncmds-1];
603 dest = src+1;
604 while (dest > loc)
605 *dest-- = *src--;
606 if ((*loc = strdup(mac)) == NULL)
607 err(1, "strdup");
608 ncmds++;
609 #ifdef DEBUG
610 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
611 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
612 knowncmds[slot+2], ncmds);
613 #endif
614 }
615
616 /*
617 * Do a binary search in knowncmds for mac.
618 * If found, return the index. If not, return -1.
619 */
620 int
621 binsrch(const char *mac)
622 {
623 const char *p; /* pointer to current cmd in list */
624 int d; /* difference if any */
625 int mid; /* mid point in binary search */
626 int top, bot; /* boundaries of bin search, inclusive */
627
628 top = ncmds-1;
629 bot = 0;
630 while (top >= bot) {
631 mid = (top+bot)/2;
632 p = knowncmds[mid];
633 d = p[0] - mac[0];
634 if (d == 0)
635 d = p[1] - mac[1];
636 if (d == 0)
637 return mid;
638 if (d < 0)
639 bot = mid + 1;
640 else
641 top = mid - 1;
642 }
643 slot = bot; /* place it would have gone */
644 return -1;
645 }
646