checknr.c revision 1.2 1 /*
2 * Copyright (c) 1980 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #ifndef lint
35 char copyright[] =
36 "@(#) Copyright (c) 1980 The Regents of the University of California.\n\
37 All rights reserved.\n";
38 #endif /* not lint */
39
40 #ifndef lint
41 /*static char sccsid[] = "from: @(#)checknr.c 5.4 (Berkeley) 6/1/90";*/
42 static char rcsid[] = "$Id: checknr.c,v 1.2 1993/08/01 18:18:04 mycroft Exp $";
43 #endif /* not lint */
44
45 /*
46 * checknr: check an nroff/troff input file for matching macro calls.
47 * we also attempt to match size and font changes, but only the embedded
48 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
49 * later but for now think of these restrictions as contributions to
50 * structured typesetting.
51 */
52 #include <stdio.h>
53 #include <ctype.h>
54
55 #define MAXSTK 100 /* Stack size */
56 #define MAXBR 100 /* Max number of bracket pairs known */
57 #define MAXCMDS 500 /* Max number of commands known */
58
59 /*
60 * The stack on which we remember what we've seen so far.
61 */
62 struct stkstr {
63 int opno; /* number of opening bracket */
64 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
65 int parm; /* parm to size, font, etc */
66 int lno; /* line number the thing came in in */
67 } stk[MAXSTK];
68 int stktop;
69
70 /*
71 * The kinds of opening and closing brackets.
72 */
73 struct brstr {
74 char *opbr;
75 char *clbr;
76 } br[MAXBR] = {
77 /* A few bare bones troff commands */
78 #define SZ 0
79 "sz", "sz", /* also \s */
80 #define FT 1
81 "ft", "ft", /* also \f */
82 /* the -mm package */
83 "AL", "LE",
84 "AS", "AE",
85 "BL", "LE",
86 "BS", "BE",
87 "DF", "DE",
88 "DL", "LE",
89 "DS", "DE",
90 "FS", "FE",
91 "ML", "LE",
92 "NS", "NE",
93 "RL", "LE",
94 "VL", "LE",
95 /* the -ms package */
96 "AB", "AE",
97 "BD", "DE",
98 "CD", "DE",
99 "DS", "DE",
100 "FS", "FE",
101 "ID", "DE",
102 "KF", "KE",
103 "KS", "KE",
104 "LD", "DE",
105 "LG", "NL",
106 "QS", "QE",
107 "RS", "RE",
108 "SM", "NL",
109 "XA", "XE",
110 "XS", "XE",
111 /* The -me package */
112 "(b", ")b",
113 "(c", ")c",
114 "(d", ")d",
115 "(f", ")f",
116 "(l", ")l",
117 "(q", ")q",
118 "(x", ")x",
119 "(z", ")z",
120 /* Things needed by preprocessors */
121 "EQ", "EN",
122 "TS", "TE",
123 /* Refer */
124 "[", "]",
125 0, 0
126 };
127
128 /*
129 * All commands known to nroff, plus macro packages.
130 * Used so we can complain about unrecognized commands.
131 */
132 char *knowncmds[MAXCMDS] = {
133 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
134 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
135 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
136 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
137 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
138 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
139 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
140 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
141 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
142 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
143 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
144 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
145 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
146 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
147 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
148 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
149 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
150 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
151 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
152 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
153 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
154 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
155 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
156 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
157 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
158 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
159 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
160 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
161 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
162 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
163 "yr", 0
164 };
165
166 int lineno; /* current line number in input file */
167 char line[256]; /* the current line */
168 char *cfilename; /* name of current file */
169 int nfiles; /* number of files to process */
170 int fflag; /* -f: ignore \f */
171 int sflag; /* -s: ignore \s */
172 int ncmds; /* size of knowncmds */
173 int slot; /* slot in knowncmds found by binsrch */
174
175 char *malloc();
176
177 main(argc, argv)
178 int argc;
179 char **argv;
180 {
181 FILE *f;
182 int i;
183 char *cp;
184 char b1[4];
185
186 /* Figure out how many known commands there are */
187 while (knowncmds[ncmds])
188 ncmds++;
189 while (argc > 1 && argv[1][0] == '-') {
190 switch(argv[1][1]) {
191
192 /* -a: add pairs of macros */
193 case 'a':
194 i = strlen(argv[1]) - 2;
195 if (i % 6 != 0)
196 usage();
197 /* look for empty macro slots */
198 for (i=0; br[i].opbr; i++)
199 ;
200 for (cp=argv[1]+3; cp[-1]; cp += 6) {
201 br[i].opbr = malloc(3);
202 strncpy(br[i].opbr, cp, 2);
203 br[i].clbr = malloc(3);
204 strncpy(br[i].clbr, cp+3, 2);
205 addmac(br[i].opbr); /* knows pairs are also known cmds */
206 addmac(br[i].clbr);
207 i++;
208 }
209 break;
210
211 /* -c: add known commands */
212 case 'c':
213 i = strlen(argv[1]) - 2;
214 if (i % 3 != 0)
215 usage();
216 for (cp=argv[1]+3; cp[-1]; cp += 3) {
217 if (cp[2] && cp[2] != '.')
218 usage();
219 strncpy(b1, cp, 2);
220 addmac(b1);
221 }
222 break;
223
224 /* -f: ignore font changes */
225 case 'f':
226 fflag = 1;
227 break;
228
229 /* -s: ignore size changes */
230 case 's':
231 sflag = 1;
232 break;
233 default:
234 usage();
235 }
236 argc--; argv++;
237 }
238
239 nfiles = argc - 1;
240
241 if (nfiles > 0) {
242 for (i=1; i<argc; i++) {
243 cfilename = argv[i];
244 f = fopen(cfilename, "r");
245 if (f == NULL)
246 perror(cfilename);
247 else
248 process(f);
249 }
250 } else {
251 cfilename = "stdin";
252 process(stdin);
253 }
254 exit(0);
255 }
256
257 usage()
258 {
259 printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
260 exit(1);
261 }
262
263 process(f)
264 FILE *f;
265 {
266 register int i, n;
267 char mac[5]; /* The current macro or nroff command */
268 int pl;
269
270 stktop = -1;
271 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
272 if (line[0] == '.') {
273 /*
274 * find and isolate the macro/command name.
275 */
276 strncpy(mac, line+1, 4);
277 if (isspace(mac[0])) {
278 pe(lineno);
279 printf("Empty command\n");
280 } else if (isspace(mac[1])) {
281 mac[1] = 0;
282 } else if (isspace(mac[2])) {
283 mac[2] = 0;
284 } else if (mac[0] != '\\' || mac[1] != '\"') {
285 pe(lineno);
286 printf("Command too long\n");
287 }
288
289 /*
290 * Is it a known command?
291 */
292 checkknown(mac);
293
294 /*
295 * Should we add it?
296 */
297 if (eq(mac, "de"))
298 addcmd(line);
299
300 chkcmd(line, mac);
301 }
302
303 /*
304 * At this point we process the line looking
305 * for \s and \f.
306 */
307 for (i=0; line[i]; i++)
308 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
309 if (!sflag && line[++i]=='s') {
310 pl = line[++i];
311 if (isdigit(pl)) {
312 n = pl - '0';
313 pl = ' ';
314 } else
315 n = 0;
316 while (isdigit(line[++i]))
317 n = 10 * n + line[i] - '0';
318 i--;
319 if (n == 0) {
320 if (stk[stktop].opno == SZ) {
321 stktop--;
322 } else {
323 pe(lineno);
324 printf("unmatched \\s0\n");
325 }
326 } else {
327 stk[++stktop].opno = SZ;
328 stk[stktop].pl = pl;
329 stk[stktop].parm = n;
330 stk[stktop].lno = lineno;
331 }
332 } else if (!fflag && line[i]=='f') {
333 n = line[++i];
334 if (n == 'P') {
335 if (stk[stktop].opno == FT) {
336 stktop--;
337 } else {
338 pe(lineno);
339 printf("unmatched \\fP\n");
340 }
341 } else {
342 stk[++stktop].opno = FT;
343 stk[stktop].pl = 1;
344 stk[stktop].parm = n;
345 stk[stktop].lno = lineno;
346 }
347 }
348 }
349 }
350 /*
351 * We've hit the end and look at all this stuff that hasn't been
352 * matched yet! Complain, complain.
353 */
354 for (i=stktop; i>=0; i--) {
355 complain(i);
356 }
357 }
358
359 complain(i)
360 {
361 pe(stk[i].lno);
362 printf("Unmatched ");
363 prop(i);
364 printf("\n");
365 }
366
367 prop(i)
368 {
369 if (stk[i].pl == 0)
370 printf(".%s", br[stk[i].opno].opbr);
371 else switch(stk[i].opno) {
372 case SZ:
373 printf("\\s%c%d", stk[i].pl, stk[i].parm);
374 break;
375 case FT:
376 printf("\\f%c", stk[i].parm);
377 break;
378 default:
379 printf("Bug: stk[%d].opno = %d = .%s, .%s",
380 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
381 }
382 }
383
384 chkcmd(line, mac)
385 char *line;
386 char *mac;
387 {
388 register int i, n;
389
390 /*
391 * Check to see if it matches top of stack.
392 */
393 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
394 stktop--; /* OK. Pop & forget */
395 else {
396 /* No. Maybe it's an opener */
397 for (i=0; br[i].opbr; i++) {
398 if (eq(mac, br[i].opbr)) {
399 /* Found. Push it. */
400 stktop++;
401 stk[stktop].opno = i;
402 stk[stktop].pl = 0;
403 stk[stktop].parm = 0;
404 stk[stktop].lno = lineno;
405 break;
406 }
407 /*
408 * Maybe it's an unmatched closer.
409 * NOTE: this depends on the fact
410 * that none of the closers can be
411 * openers too.
412 */
413 if (eq(mac, br[i].clbr)) {
414 nomatch(mac);
415 break;
416 }
417 }
418 }
419 }
420
421 nomatch(mac)
422 char *mac;
423 {
424 register int i, j;
425
426 /*
427 * Look for a match further down on stack
428 * If we find one, it suggests that the stuff in
429 * between is supposed to match itself.
430 */
431 for (j=stktop; j>=0; j--)
432 if (eq(mac,br[stk[j].opno].clbr)) {
433 /* Found. Make a good diagnostic. */
434 if (j == stktop-2) {
435 /*
436 * Check for special case \fx..\fR and don't
437 * complain.
438 */
439 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
440 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
441 stktop = j -1;
442 return;
443 }
444 /*
445 * We have two unmatched frobs. Chances are
446 * they were intended to match, so we mention
447 * them together.
448 */
449 pe(stk[j+1].lno);
450 prop(j+1);
451 printf(" does not match %d: ", stk[j+2].lno);
452 prop(j+2);
453 printf("\n");
454 } else for (i=j+1; i <= stktop; i++) {
455 complain(i);
456 }
457 stktop = j-1;
458 return;
459 }
460 /* Didn't find one. Throw this away. */
461 pe(lineno);
462 printf("Unmatched .%s\n", mac);
463 }
464
465 /* eq: are two strings equal? */
466 eq(s1, s2)
467 char *s1, *s2;
468 {
469 return (strcmp(s1, s2) == 0);
470 }
471
472 /* print the first part of an error message, given the line number */
473 pe(lineno)
474 int lineno;
475 {
476 if (nfiles > 1)
477 printf("%s: ", cfilename);
478 printf("%d: ", lineno);
479 }
480
481 checkknown(mac)
482 char *mac;
483 {
484
485 if (eq(mac, "."))
486 return;
487 if (binsrch(mac) >= 0)
488 return;
489 if (mac[0] == '\\' && mac[1] == '"') /* comments */
490 return;
491
492 pe(lineno);
493 printf("Unknown command: .%s\n", mac);
494 }
495
496 /*
497 * We have a .de xx line in "line". Add xx to the list of known commands.
498 */
499 addcmd(line)
500 char *line;
501 {
502 char *mac;
503
504 /* grab the macro being defined */
505 mac = line+4;
506 while (isspace(*mac))
507 mac++;
508 if (*mac == 0) {
509 pe(lineno);
510 printf("illegal define: %s\n", line);
511 return;
512 }
513 mac[2] = 0;
514 if (isspace(mac[1]) || mac[1] == '\\')
515 mac[1] = 0;
516 if (ncmds >= MAXCMDS) {
517 printf("Only %d known commands allowed\n", MAXCMDS);
518 exit(1);
519 }
520 addmac(mac);
521 }
522
523 /*
524 * Add mac to the list. We should really have some kind of tree
525 * structure here but this is a quick-and-dirty job and I just don't
526 * have time to mess with it. (I wonder if this will come back to haunt
527 * me someday?) Anyway, I claim that .de is fairly rare in user
528 * nroff programs, and the register loop below is pretty fast.
529 */
530 addmac(mac)
531 char *mac;
532 {
533 register char **src, **dest, **loc;
534
535 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
536 #ifdef DEBUG
537 printf("binsrch(%s) -> already in table\n", mac);
538 #endif DEBUG
539 return;
540 }
541 /* binsrch sets slot as a side effect */
542 #ifdef DEBUG
543 printf("binsrch(%s) -> %d\n", mac, slot);
544 #endif
545 loc = &knowncmds[slot];
546 src = &knowncmds[ncmds-1];
547 dest = src+1;
548 while (dest > loc)
549 *dest-- = *src--;
550 *loc = malloc(3);
551 strcpy(*loc, mac);
552 ncmds++;
553 #ifdef DEBUG
554 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
555 #endif
556 }
557
558 /*
559 * Do a binary search in knowncmds for mac.
560 * If found, return the index. If not, return -1.
561 */
562 binsrch(mac)
563 char *mac;
564 {
565 register char *p; /* pointer to current cmd in list */
566 register int d; /* difference if any */
567 register int mid; /* mid point in binary search */
568 register int top, bot; /* boundaries of bin search, inclusive */
569
570 top = ncmds-1;
571 bot = 0;
572 while (top >= bot) {
573 mid = (top+bot)/2;
574 p = knowncmds[mid];
575 d = p[0] - mac[0];
576 if (d == 0)
577 d = p[1] - mac[1];
578 if (d == 0)
579 return mid;
580 if (d < 0)
581 bot = mid + 1;
582 else
583 top = mid - 1;
584 }
585 slot = bot; /* place it would have gone */
586 return -1;
587 }
588