Home | History | Annotate | Line # | Download | only in checknr
checknr.c revision 1.3
      1 /*
      2  * Copyright (c) 1980 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. All advertising materials mentioning features or use of this software
     14  *    must display the following acknowledgement:
     15  *	This product includes software developed by the University of
     16  *	California, Berkeley and its contributors.
     17  * 4. Neither the name of the University nor the names of its contributors
     18  *    may be used to endorse or promote products derived from this software
     19  *    without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  */
     33 
     34 #ifndef lint
     35 char copyright[] =
     36 "@(#) Copyright (c) 1980 The Regents of the University of California.\n\
     37  All rights reserved.\n";
     38 #endif /* not lint */
     39 
     40 #ifndef lint
     41 /*static char sccsid[] = "from: @(#)checknr.c	5.4 (Berkeley) 6/1/90";*/
     42 static char rcsid[] = "$Id: checknr.c,v 1.3 1994/12/24 15:57:02 cgd Exp $";
     43 #endif /* not lint */
     44 
     45 /*
     46  * checknr: check an nroff/troff input file for matching macro calls.
     47  * we also attempt to match size and font changes, but only the embedded
     48  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
     49  * later but for now think of these restrictions as contributions to
     50  * structured typesetting.
     51  */
     52 #include <stdio.h>
     53 #include <string.h>
     54 #include <ctype.h>
     55 
     56 #define MAXSTK	100	/* Stack size */
     57 #define MAXBR	100	/* Max number of bracket pairs known */
     58 #define MAXCMDS	500	/* Max number of commands known */
     59 
     60 /*
     61  * The stack on which we remember what we've seen so far.
     62  */
     63 struct stkstr {
     64 	int opno;	/* number of opening bracket */
     65 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
     66 	int parm;	/* parm to size, font, etc */
     67 	int lno;	/* line number the thing came in in */
     68 } stk[MAXSTK];
     69 int stktop;
     70 
     71 /*
     72  * The kinds of opening and closing brackets.
     73  */
     74 struct brstr {
     75 	char *opbr;
     76 	char *clbr;
     77 } br[MAXBR] = {
     78 	/* A few bare bones troff commands */
     79 #define SZ	0
     80 	"sz",	"sz",	/* also \s */
     81 #define FT	1
     82 	"ft",	"ft",	/* also \f */
     83 	/* the -mm package */
     84 	"AL",	"LE",
     85 	"AS",	"AE",
     86 	"BL",	"LE",
     87 	"BS",	"BE",
     88 	"DF",	"DE",
     89 	"DL",	"LE",
     90 	"DS",	"DE",
     91 	"FS",	"FE",
     92 	"ML",	"LE",
     93 	"NS",	"NE",
     94 	"RL",	"LE",
     95 	"VL",	"LE",
     96 	/* the -ms package */
     97 	"AB",	"AE",
     98 	"BD",	"DE",
     99 	"CD",	"DE",
    100 	"DS",	"DE",
    101 	"FS",	"FE",
    102 	"ID",	"DE",
    103 	"KF",	"KE",
    104 	"KS",	"KE",
    105 	"LD",	"DE",
    106 	"LG",	"NL",
    107 	"QS",	"QE",
    108 	"RS",	"RE",
    109 	"SM",	"NL",
    110 	"XA",	"XE",
    111 	"XS",	"XE",
    112 	/* The -me package */
    113 	"(b",	")b",
    114 	"(c",	")c",
    115 	"(d",	")d",
    116 	"(f",	")f",
    117 	"(l",	")l",
    118 	"(q",	")q",
    119 	"(x",	")x",
    120 	"(z",	")z",
    121 	/* Things needed by preprocessors */
    122 	"EQ",	"EN",
    123 	"TS",	"TE",
    124 	/* Refer */
    125 	"[",	"]",
    126 	0,	0
    127 };
    128 
    129 /*
    130  * All commands known to nroff, plus macro packages.
    131  * Used so we can complain about unrecognized commands.
    132  */
    133 char *knowncmds[MAXCMDS] = {
    134 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
    135 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
    136 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
    137 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
    138 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
    139 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
    140 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
    141 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
    142 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
    143 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
    144 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
    145 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
    146 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
    147 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
    148 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
    149 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
    150 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
    151 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
    152 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
    153 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
    154 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
    155 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
    156 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
    157 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
    158 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
    159 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
    160 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
    161 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
    162 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
    163 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
    164 "yr", 0
    165 };
    166 
    167 int	lineno;		/* current line number in input file */
    168 char	line[256];	/* the current line */
    169 char	*cfilename;	/* name of current file */
    170 int	nfiles;		/* number of files to process */
    171 int	fflag;		/* -f: ignore \f */
    172 int	sflag;		/* -s: ignore \s */
    173 int	ncmds;		/* size of knowncmds */
    174 int	slot;		/* slot in knowncmds found by binsrch */
    175 
    176 char	*malloc();
    177 
    178 main(argc, argv)
    179 int argc;
    180 char **argv;
    181 {
    182 	FILE *f;
    183 	int i;
    184 	char *cp;
    185 	char b1[4];
    186 
    187 	/* Figure out how many known commands there are */
    188 	while (knowncmds[ncmds])
    189 		ncmds++;
    190 	while (argc > 1 && argv[1][0] == '-') {
    191 		switch(argv[1][1]) {
    192 
    193 		/* -a: add pairs of macros */
    194 		case 'a':
    195 			i = strlen(argv[1]) - 2;
    196 			if (i % 6 != 0)
    197 				usage();
    198 			/* look for empty macro slots */
    199 			for (i=0; br[i].opbr; i++)
    200 				;
    201 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
    202 				br[i].opbr = malloc(3);
    203 				strncpy(br[i].opbr, cp, 2);
    204 				br[i].clbr = malloc(3);
    205 				strncpy(br[i].clbr, cp+3, 2);
    206 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
    207 				addmac(br[i].clbr);
    208 				i++;
    209 			}
    210 			break;
    211 
    212 		/* -c: add known commands */
    213 		case 'c':
    214 			i = strlen(argv[1]) - 2;
    215 			if (i % 3 != 0)
    216 				usage();
    217 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
    218 				if (cp[2] && cp[2] != '.')
    219 					usage();
    220 				strncpy(b1, cp, 2);
    221 				addmac(b1);
    222 			}
    223 			break;
    224 
    225 		/* -f: ignore font changes */
    226 		case 'f':
    227 			fflag = 1;
    228 			break;
    229 
    230 		/* -s: ignore size changes */
    231 		case 's':
    232 			sflag = 1;
    233 			break;
    234 		default:
    235 			usage();
    236 		}
    237 		argc--; argv++;
    238 	}
    239 
    240 	nfiles = argc - 1;
    241 
    242 	if (nfiles > 0) {
    243 		for (i=1; i<argc; i++) {
    244 			cfilename = argv[i];
    245 			f = fopen(cfilename, "r");
    246 			if (f == NULL)
    247 				perror(cfilename);
    248 			else
    249 				process(f);
    250 		}
    251 	} else {
    252 		cfilename = "stdin";
    253 		process(stdin);
    254 	}
    255 	exit(0);
    256 }
    257 
    258 usage()
    259 {
    260 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
    261 	exit(1);
    262 }
    263 
    264 process(f)
    265 FILE *f;
    266 {
    267 	register int i, n;
    268 	char mac[5];	/* The current macro or nroff command */
    269 	int pl;
    270 
    271 	stktop = -1;
    272 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
    273 		if (line[0] == '.') {
    274 			/*
    275 			 * find and isolate the macro/command name.
    276 			 */
    277 			strncpy(mac, line+1, 4);
    278 			if (isspace(mac[0])) {
    279 				pe(lineno);
    280 				printf("Empty command\n");
    281 			} else if (isspace(mac[1])) {
    282 				mac[1] = 0;
    283 			} else if (isspace(mac[2])) {
    284 				mac[2] = 0;
    285 			} else if (mac[0] != '\\' || mac[1] != '\"') {
    286 				pe(lineno);
    287 				printf("Command too long\n");
    288 			}
    289 
    290 			/*
    291 			 * Is it a known command?
    292 			 */
    293 			checkknown(mac);
    294 
    295 			/*
    296 			 * Should we add it?
    297 			 */
    298 			if (eq(mac, "de"))
    299 				addcmd(line);
    300 
    301 			chkcmd(line, mac);
    302 		}
    303 
    304 		/*
    305 		 * At this point we process the line looking
    306 		 * for \s and \f.
    307 		 */
    308 		for (i=0; line[i]; i++)
    309 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
    310 				if (!sflag && line[++i]=='s') {
    311 					pl = line[++i];
    312 					if (isdigit(pl)) {
    313 						n = pl - '0';
    314 						pl = ' ';
    315 					} else
    316 						n = 0;
    317 					while (isdigit(line[++i]))
    318 						n = 10 * n + line[i] - '0';
    319 					i--;
    320 					if (n == 0) {
    321 						if (stk[stktop].opno == SZ) {
    322 							stktop--;
    323 						} else {
    324 							pe(lineno);
    325 							printf("unmatched \\s0\n");
    326 						}
    327 					} else {
    328 						stk[++stktop].opno = SZ;
    329 						stk[stktop].pl = pl;
    330 						stk[stktop].parm = n;
    331 						stk[stktop].lno = lineno;
    332 					}
    333 				} else if (!fflag && line[i]=='f') {
    334 					n = line[++i];
    335 					if (n == 'P') {
    336 						if (stk[stktop].opno == FT) {
    337 							stktop--;
    338 						} else {
    339 							pe(lineno);
    340 							printf("unmatched \\fP\n");
    341 						}
    342 					} else {
    343 						stk[++stktop].opno = FT;
    344 						stk[stktop].pl = 1;
    345 						stk[stktop].parm = n;
    346 						stk[stktop].lno = lineno;
    347 					}
    348 				}
    349 			}
    350 	}
    351 	/*
    352 	 * We've hit the end and look at all this stuff that hasn't been
    353 	 * matched yet!  Complain, complain.
    354 	 */
    355 	for (i=stktop; i>=0; i--) {
    356 		complain(i);
    357 	}
    358 }
    359 
    360 complain(i)
    361 {
    362 	pe(stk[i].lno);
    363 	printf("Unmatched ");
    364 	prop(i);
    365 	printf("\n");
    366 }
    367 
    368 prop(i)
    369 {
    370 	if (stk[i].pl == 0)
    371 		printf(".%s", br[stk[i].opno].opbr);
    372 	else switch(stk[i].opno) {
    373 	case SZ:
    374 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
    375 		break;
    376 	case FT:
    377 		printf("\\f%c", stk[i].parm);
    378 		break;
    379 	default:
    380 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
    381 			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
    382 	}
    383 }
    384 
    385 chkcmd(line, mac)
    386 char *line;
    387 char *mac;
    388 {
    389 	register int i, n;
    390 
    391 	/*
    392 	 * Check to see if it matches top of stack.
    393 	 */
    394 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
    395 		stktop--;	/* OK. Pop & forget */
    396 	else {
    397 		/* No. Maybe it's an opener */
    398 		for (i=0; br[i].opbr; i++) {
    399 			if (eq(mac, br[i].opbr)) {
    400 				/* Found. Push it. */
    401 				stktop++;
    402 				stk[stktop].opno = i;
    403 				stk[stktop].pl = 0;
    404 				stk[stktop].parm = 0;
    405 				stk[stktop].lno = lineno;
    406 				break;
    407 			}
    408 			/*
    409 			 * Maybe it's an unmatched closer.
    410 			 * NOTE: this depends on the fact
    411 			 * that none of the closers can be
    412 			 * openers too.
    413 			 */
    414 			if (eq(mac, br[i].clbr)) {
    415 				nomatch(mac);
    416 				break;
    417 			}
    418 		}
    419 	}
    420 }
    421 
    422 nomatch(mac)
    423 char *mac;
    424 {
    425 	register int i, j;
    426 
    427 	/*
    428 	 * Look for a match further down on stack
    429 	 * If we find one, it suggests that the stuff in
    430 	 * between is supposed to match itself.
    431 	 */
    432 	for (j=stktop; j>=0; j--)
    433 		if (eq(mac,br[stk[j].opno].clbr)) {
    434 			/* Found.  Make a good diagnostic. */
    435 			if (j == stktop-2) {
    436 				/*
    437 				 * Check for special case \fx..\fR and don't
    438 				 * complain.
    439 				 */
    440 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
    441 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
    442 					stktop = j -1;
    443 					return;
    444 				}
    445 				/*
    446 				 * We have two unmatched frobs.  Chances are
    447 				 * they were intended to match, so we mention
    448 				 * them together.
    449 				 */
    450 				pe(stk[j+1].lno);
    451 				prop(j+1);
    452 				printf(" does not match %d: ", stk[j+2].lno);
    453 				prop(j+2);
    454 				printf("\n");
    455 			} else for (i=j+1; i <= stktop; i++) {
    456 				complain(i);
    457 			}
    458 			stktop = j-1;
    459 			return;
    460 		}
    461 	/* Didn't find one.  Throw this away. */
    462 	pe(lineno);
    463 	printf("Unmatched .%s\n", mac);
    464 }
    465 
    466 /* eq: are two strings equal? */
    467 eq(s1, s2)
    468 char *s1, *s2;
    469 {
    470 	return (strcmp(s1, s2) == 0);
    471 }
    472 
    473 /* print the first part of an error message, given the line number */
    474 pe(lineno)
    475 int lineno;
    476 {
    477 	if (nfiles > 1)
    478 		printf("%s: ", cfilename);
    479 	printf("%d: ", lineno);
    480 }
    481 
    482 checkknown(mac)
    483 char *mac;
    484 {
    485 
    486 	if (eq(mac, "."))
    487 		return;
    488 	if (binsrch(mac) >= 0)
    489 		return;
    490 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
    491 		return;
    492 
    493 	pe(lineno);
    494 	printf("Unknown command: .%s\n", mac);
    495 }
    496 
    497 /*
    498  * We have a .de xx line in "line".  Add xx to the list of known commands.
    499  */
    500 addcmd(line)
    501 char *line;
    502 {
    503 	char *mac;
    504 
    505 	/* grab the macro being defined */
    506 	mac = line+4;
    507 	while (isspace(*mac))
    508 		mac++;
    509 	if (*mac == 0) {
    510 		pe(lineno);
    511 		printf("illegal define: %s\n", line);
    512 		return;
    513 	}
    514 	mac[2] = 0;
    515 	if (isspace(mac[1]) || mac[1] == '\\')
    516 		mac[1] = 0;
    517 	if (ncmds >= MAXCMDS) {
    518 		printf("Only %d known commands allowed\n", MAXCMDS);
    519 		exit(1);
    520 	}
    521 	addmac(mac);
    522 }
    523 
    524 /*
    525  * Add mac to the list.  We should really have some kind of tree
    526  * structure here but this is a quick-and-dirty job and I just don't
    527  * have time to mess with it.  (I wonder if this will come back to haunt
    528  * me someday?)  Anyway, I claim that .de is fairly rare in user
    529  * nroff programs, and the register loop below is pretty fast.
    530  */
    531 addmac(mac)
    532 char *mac;
    533 {
    534 	register char **src, **dest, **loc;
    535 
    536 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
    537 #ifdef DEBUG
    538 		printf("binsrch(%s) -> already in table\n", mac);
    539 #endif DEBUG
    540 		return;
    541 	}
    542 	/* binsrch sets slot as a side effect */
    543 #ifdef DEBUG
    544 printf("binsrch(%s) -> %d\n", mac, slot);
    545 #endif
    546 	loc = &knowncmds[slot];
    547 	src = &knowncmds[ncmds-1];
    548 	dest = src+1;
    549 	while (dest > loc)
    550 		*dest-- = *src--;
    551 	*loc = malloc(3);
    552 	strcpy(*loc, mac);
    553 	ncmds++;
    554 #ifdef DEBUG
    555 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
    556 #endif
    557 }
    558 
    559 /*
    560  * Do a binary search in knowncmds for mac.
    561  * If found, return the index.  If not, return -1.
    562  */
    563 binsrch(mac)
    564 char *mac;
    565 {
    566 	register char *p;	/* pointer to current cmd in list */
    567 	register int d;		/* difference if any */
    568 	register int mid;	/* mid point in binary search */
    569 	register int top, bot;	/* boundaries of bin search, inclusive */
    570 
    571 	top = ncmds-1;
    572 	bot = 0;
    573 	while (top >= bot) {
    574 		mid = (top+bot)/2;
    575 		p = knowncmds[mid];
    576 		d = p[0] - mac[0];
    577 		if (d == 0)
    578 			d = p[1] - mac[1];
    579 		if (d == 0)
    580 			return mid;
    581 		if (d < 0)
    582 			bot = mid + 1;
    583 		else
    584 			top = mid - 1;
    585 	}
    586 	slot = bot;	/* place it would have gone */
    587 	return -1;
    588 }
    589