checknr.c revision 1.10 1 /* $NetBSD: checknr.c,v 1.10 2002/01/21 18:28:00 wiz Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
39 The Regents of the University of California. All rights reserved.\n");
40 #endif /* not lint */
41
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
45 #else
46 __RCSID("$NetBSD: checknr.c,v 1.10 2002/01/21 18:28:00 wiz Exp $");
47 #endif
48 #endif /* not lint */
49
50 /*
51 * checknr: check an nroff/troff input file for matching macro calls.
52 * we also attempt to match size and font changes, but only the embedded
53 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
54 * later but for now think of these restrictions as contributions to
55 * structured typesetting.
56 */
57 #include <ctype.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61
62 #define MAXSTK 100 /* Stack size */
63 #define MAXBR 100 /* Max number of bracket pairs known */
64 #define MAXCMDS 500 /* Max number of commands known */
65
66 /*
67 * The stack on which we remember what we've seen so far.
68 */
69 struct stkstr {
70 int opno; /* number of opening bracket */
71 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
72 int parm; /* parm to size, font, etc */
73 int lno; /* line number the thing came in in */
74 } stk[MAXSTK];
75 int stktop;
76
77 /*
78 * The kinds of opening and closing brackets.
79 */
80 struct brstr {
81 char *opbr;
82 char *clbr;
83 } br[MAXBR] = {
84 /* A few bare bones troff commands */
85 #define SZ 0
86 { "sz", "sz"}, /* also \s */
87 #define FT 1
88 { "ft", "ft"}, /* also \f */
89 /* the -mm package */
90 {"AL", "LE"},
91 {"AS", "AE"},
92 {"BL", "LE"},
93 {"BS", "BE"},
94 {"DF", "DE"},
95 {"DL", "LE"},
96 {"DS", "DE"},
97 {"FS", "FE"},
98 {"ML", "LE"},
99 {"NS", "NE"},
100 {"RL", "LE"},
101 {"VL", "LE"},
102 /* the -ms package */
103 {"AB", "AE"},
104 {"BD", "DE"},
105 {"CD", "DE"},
106 {"DS", "DE"},
107 {"FS", "FE"},
108 {"ID", "DE"},
109 {"KF", "KE"},
110 {"KS", "KE"},
111 {"LD", "DE"},
112 {"LG", "NL"},
113 {"QS", "QE"},
114 {"RS", "RE"},
115 {"SM", "NL"},
116 {"XA", "XE"},
117 {"XS", "XE"},
118 /* The -me package */
119 {"(b", ")b"},
120 {"(c", ")c"},
121 {"(d", ")d"},
122 {"(f", ")f"},
123 {"(l", ")l"},
124 {"(q", ")q"},
125 {"(x", ")x"},
126 {"(z", ")z"},
127 /* The -mdoc package */
128 {"Ao", "Ac"},
129 {"Bd", "Ed"},
130 {"Bk", "Ek"},
131 {"Bo", "Bc"},
132 {"Do", "Dc"},
133 {"Fo", "Fc"},
134 {"Oo", "Oc"},
135 {"Po", "Pc"},
136 {"Qo", "Qc"},
137 {"Rs", "Re"},
138 {"So", "Sc"},
139 {"Xo", "Xc"},
140 /* Things needed by preprocessors */
141 {"EQ", "EN"},
142 {"TS", "TE"},
143 /* Refer */
144 {"[", "]"},
145 {0, 0},
146 };
147
148 /*
149 * All commands known to nroff, plus macro packages.
150 * Used so we can complain about unrecognized commands.
151 */
152 char *knowncmds[MAXCMDS] = {
153 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N",
154 "%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q",
155 "(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x",
156 ")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D",
157 "@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p",
158 "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT",
159 "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1",
160 "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf",
161 "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT",
162 "Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc",
163 "Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM",
164 "EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er",
165 "Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ",
166 "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx",
167 "H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM",
168 "IP", "IX", "IZ", "Ic", "It", "KD", "KE", "KF", "KQ", "KS", "LB",
169 "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF",
170 "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd",
171 "Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op",
172 "Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY",
173 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql",
174 "Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT",
175 "Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM",
176 "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy",
177 "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ",
178 "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
179 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo",
180 "Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>",
181 "[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am",
182 "ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx",
183 "c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de",
184 "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el",
185 "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft",
186 "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie",
187 "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
188 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
189 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr",
190 "ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn",
191 "po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro",
192 "rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st",
193 "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u",
194 "uf", "uh", "ul", "vs", "wh", "xp", "yr", 0
195 };
196
197 int lineno; /* current line number in input file */
198 char *cfilename; /* name of current file */
199 int nfiles; /* number of files to process */
200 int fflag; /* -f: ignore \f */
201 int sflag; /* -s: ignore \s */
202 int ncmds; /* size of knowncmds */
203 int slot; /* slot in knowncmds found by binsrch */
204
205 void addcmd(char *);
206 void addmac(char *);
207 int binsrch(char *);
208 void checkknown(char *);
209 void chkcmd(char *, char *);
210 void complain(int);
211 int eq(const void *, const void *);
212 int main(int, char **);
213 void nomatch(char *);
214 void pe(int);
215 void process(FILE *);
216 void prop(int);
217 void usage(void);
218
219 int
220 main(int argc, char **argv)
221 {
222 FILE *f;
223 int i;
224 char *cp;
225 char b1[4];
226
227 /* Figure out how many known commands there are */
228 while (knowncmds[ncmds])
229 ncmds++;
230 while (argc > 1 && argv[1][0] == '-') {
231 switch(argv[1][1]) {
232
233 /* -a: add pairs of macros */
234 case 'a':
235 i = strlen(argv[1]) - 2;
236 if (i % 6 != 0)
237 usage();
238 /* look for empty macro slots */
239 for (i=0; br[i].opbr; i++)
240 ;
241 for (cp=argv[1]+3; cp[-1]; cp += 6) {
242 br[i].opbr = malloc(3);
243 strncpy(br[i].opbr, cp, 2);
244 br[i].clbr = malloc(3);
245 strncpy(br[i].clbr, cp+3, 2);
246 addmac(br[i].opbr); /* knows pairs are also known cmds */
247 addmac(br[i].clbr);
248 i++;
249 }
250 break;
251
252 /* -c: add known commands */
253 case 'c':
254 i = strlen(argv[1]) - 2;
255 if (i % 3 != 0)
256 usage();
257 for (cp=argv[1]+3; cp[-1]; cp += 3) {
258 if (cp[2] && cp[2] != '.')
259 usage();
260 strncpy(b1, cp, 2);
261 addmac(b1);
262 }
263 break;
264
265 /* -f: ignore font changes */
266 case 'f':
267 fflag = 1;
268 break;
269
270 /* -s: ignore size changes */
271 case 's':
272 sflag = 1;
273 break;
274 default:
275 usage();
276 }
277 argc--; argv++;
278 }
279
280 nfiles = argc - 1;
281
282 if (nfiles > 0) {
283 for (i=1; i<argc; i++) {
284 cfilename = argv[i];
285 f = fopen(cfilename, "r");
286 if (f == NULL)
287 perror(cfilename);
288 else
289 process(f);
290 fclose(f);
291 }
292 } else {
293 cfilename = "stdin";
294 process(stdin);
295 }
296 exit(0);
297 }
298
299 void
300 usage(void)
301 {
302 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
303 exit(1);
304 }
305
306 void
307 process(FILE *f)
308 {
309 int i, n;
310 char line[256]; /* the current line */
311 char mac[5]; /* The current macro or nroff command */
312 int pl;
313
314 stktop = -1;
315 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
316 if (line[0] == '.') {
317 /*
318 * find and isolate the macro/command name.
319 */
320 strncpy(mac, line+1, 4);
321 if (isspace((unsigned char)mac[0])) {
322 pe(lineno);
323 printf("Empty command\n");
324 } else if (isspace((unsigned char)mac[1])) {
325 mac[1] = 0;
326 } else if (isspace((unsigned char)mac[2])) {
327 mac[2] = 0;
328 } else if (mac[0] != '\\' || mac[1] != '\"') {
329 pe(lineno);
330 printf("Command too long\n");
331 }
332
333 /*
334 * Is it a known command?
335 */
336 checkknown(mac);
337
338 /*
339 * Should we add it?
340 */
341 if (eq(mac, "de"))
342 addcmd(line);
343
344 chkcmd(line, mac);
345 }
346
347 /*
348 * At this point we process the line looking
349 * for \s and \f.
350 */
351 for (i=0; line[i]; i++)
352 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
353 if (!sflag && line[++i]=='s') {
354 pl = line[++i];
355 if (isdigit((unsigned char)pl)) {
356 n = pl - '0';
357 pl = ' ';
358 } else
359 n = 0;
360 while (isdigit((unsigned char)line[++i]))
361 n = 10 * n + line[i] - '0';
362 i--;
363 if (n == 0) {
364 if (stk[stktop].opno == SZ) {
365 stktop--;
366 } else {
367 pe(lineno);
368 printf("unmatched \\s0\n");
369 }
370 } else {
371 stk[++stktop].opno = SZ;
372 stk[stktop].pl = pl;
373 stk[stktop].parm = n;
374 stk[stktop].lno = lineno;
375 }
376 } else if (!fflag && line[i]=='f') {
377 n = line[++i];
378 if (n == 'P') {
379 if (stk[stktop].opno == FT) {
380 stktop--;
381 } else {
382 pe(lineno);
383 printf("unmatched \\fP\n");
384 }
385 } else {
386 stk[++stktop].opno = FT;
387 stk[stktop].pl = 1;
388 stk[stktop].parm = n;
389 stk[stktop].lno = lineno;
390 }
391 }
392 }
393 }
394 /*
395 * We've hit the end and look at all this stuff that hasn't been
396 * matched yet! Complain, complain.
397 */
398 for (i=stktop; i>=0; i--) {
399 complain(i);
400 }
401 }
402
403 void
404 complain(int i)
405 {
406 pe(stk[i].lno);
407 printf("Unmatched ");
408 prop(i);
409 printf("\n");
410 }
411
412 void
413 prop(int i)
414 {
415 if (stk[i].pl == 0)
416 printf(".%s", br[stk[i].opno].opbr);
417 else switch(stk[i].opno) {
418 case SZ:
419 printf("\\s%c%d", stk[i].pl, stk[i].parm);
420 break;
421 case FT:
422 printf("\\f%c", stk[i].parm);
423 break;
424 default:
425 printf("Bug: stk[%d].opno = %d = .%s, .%s",
426 i, stk[i].opno, br[stk[i].opno].opbr,
427 br[stk[i].opno].clbr);
428 }
429 }
430
431 void
432 chkcmd(char *line, char *mac)
433 {
434 int i;
435
436 /*
437 * Check to see if it matches top of stack.
438 */
439 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
440 stktop--; /* OK. Pop & forget */
441 else {
442 /* No. Maybe it's an opener */
443 for (i=0; br[i].opbr; i++) {
444 if (eq(mac, br[i].opbr)) {
445 /* Found. Push it. */
446 stktop++;
447 stk[stktop].opno = i;
448 stk[stktop].pl = 0;
449 stk[stktop].parm = 0;
450 stk[stktop].lno = lineno;
451 break;
452 }
453 /*
454 * Maybe it's an unmatched closer.
455 * NOTE: this depends on the fact
456 * that none of the closers can be
457 * openers too.
458 */
459 if (eq(mac, br[i].clbr)) {
460 nomatch(mac);
461 break;
462 }
463 }
464 }
465 }
466
467 void
468 nomatch(char *mac)
469 {
470 int i, j;
471
472 /*
473 * Look for a match further down on stack
474 * If we find one, it suggests that the stuff in
475 * between is supposed to match itself.
476 */
477 for (j=stktop; j>=0; j--)
478 if (eq(mac,br[stk[j].opno].clbr)) {
479 /* Found. Make a good diagnostic. */
480 if (j == stktop-2) {
481 /*
482 * Check for special case \fx..\fR and don't
483 * complain.
484 */
485 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
486 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
487 stktop = j -1;
488 return;
489 }
490 /*
491 * We have two unmatched frobs. Chances are
492 * they were intended to match, so we mention
493 * them together.
494 */
495 pe(stk[j+1].lno);
496 prop(j+1);
497 printf(" does not match %d: ", stk[j+2].lno);
498 prop(j+2);
499 printf("\n");
500 } else for (i=j+1; i <= stktop; i++) {
501 complain(i);
502 }
503 stktop = j-1;
504 return;
505 }
506 /* Didn't find one. Throw this away. */
507 pe(lineno);
508 printf("Unmatched .%s\n", mac);
509 }
510
511 /* eq: are two strings equal? */
512 int
513 eq(const void *s1, const void *s2)
514 {
515 return (strcmp((char *)s1, (char *)s2) == 0);
516 }
517
518 /* print the first part of an error message, given the line number */
519 void
520 pe(int pelineno)
521 {
522 if (nfiles > 1)
523 printf("%s: ", cfilename);
524 printf("%d: ", pelineno);
525 }
526
527 void
528 checkknown(char *mac)
529 {
530
531 if (eq(mac, "."))
532 return;
533 if (binsrch(mac) >= 0)
534 return;
535 if (mac[0] == '\\' && mac[1] == '"') /* comments */
536 return;
537
538 pe(lineno);
539 printf("Unknown command: .%s\n", mac);
540 }
541
542 /*
543 * We have a .de xx line in "line". Add xx to the list of known commands.
544 */
545 void
546 addcmd(char *line)
547 {
548 char *mac;
549
550 /* grab the macro being defined */
551 mac = line+4;
552 while (isspace((unsigned char)*mac))
553 mac++;
554 if (*mac == 0) {
555 pe(lineno);
556 printf("illegal define: %s\n", line);
557 return;
558 }
559 mac[2] = 0;
560 if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
561 mac[1] = 0;
562 if (ncmds >= MAXCMDS) {
563 printf("Only %d known commands allowed\n", MAXCMDS);
564 exit(1);
565 }
566 addmac(mac);
567 }
568
569 /*
570 * Add mac to the list. We should really have some kind of tree
571 * structure here but this is a quick-and-dirty job and I just don't
572 * have time to mess with it. (I wonder if this will come back to haunt
573 * me someday?) Anyway, I claim that .de is fairly rare in user
574 * nroff programs, and the register loop below is pretty fast.
575 */
576 void
577 addmac(char *mac)
578 {
579 char **src, **dest, **loc;
580
581 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
582 #ifdef DEBUG
583 printf("binsrch(%s) -> already in table\n", mac);
584 #endif /* DEBUG */
585 return;
586 }
587 /* binsrch sets slot as a side effect */
588 #ifdef DEBUG
589 printf("binsrch(%s) -> %d\n", mac, slot);
590 #endif
591 loc = &knowncmds[slot];
592 src = &knowncmds[ncmds-1];
593 dest = src+1;
594 while (dest > loc)
595 *dest-- = *src--;
596 *loc = malloc(3);
597 strcpy(*loc, mac);
598 ncmds++;
599 #ifdef DEBUG
600 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
601 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
602 knowncmds[slot+2], ncmds);
603 #endif
604 }
605
606 /*
607 * Do a binary search in knowncmds for mac.
608 * If found, return the index. If not, return -1.
609 */
610 int
611 binsrch(char *mac)
612 {
613 char *p; /* pointer to current cmd in list */
614 int d; /* difference if any */
615 int mid; /* mid point in binary search */
616 int top, bot; /* boundaries of bin search, inclusive */
617
618 top = ncmds-1;
619 bot = 0;
620 while (top >= bot) {
621 mid = (top+bot)/2;
622 p = knowncmds[mid];
623 d = p[0] - mac[0];
624 if (d == 0)
625 d = p[1] - mac[1];
626 if (d == 0)
627 return mid;
628 if (d < 0)
629 bot = mid + 1;
630 else
631 top = mid - 1;
632 }
633 slot = bot; /* place it would have gone */
634 return -1;
635 }
636