Home | History | Annotate | Line # | Download | only in checknr
checknr.c revision 1.2
      1 /*
      2  * Copyright (c) 1980 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. All advertising materials mentioning features or use of this software
     14  *    must display the following acknowledgement:
     15  *	This product includes software developed by the University of
     16  *	California, Berkeley and its contributors.
     17  * 4. Neither the name of the University nor the names of its contributors
     18  *    may be used to endorse or promote products derived from this software
     19  *    without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  */
     33 
     34 #ifndef lint
     35 char copyright[] =
     36 "@(#) Copyright (c) 1980 The Regents of the University of California.\n\
     37  All rights reserved.\n";
     38 #endif /* not lint */
     39 
     40 #ifndef lint
     41 /*static char sccsid[] = "from: @(#)checknr.c	5.4 (Berkeley) 6/1/90";*/
     42 static char rcsid[] = "$Id: checknr.c,v 1.2 1993/08/01 18:18:04 mycroft Exp $";
     43 #endif /* not lint */
     44 
     45 /*
     46  * checknr: check an nroff/troff input file for matching macro calls.
     47  * we also attempt to match size and font changes, but only the embedded
     48  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
     49  * later but for now think of these restrictions as contributions to
     50  * structured typesetting.
     51  */
     52 #include <stdio.h>
     53 #include <ctype.h>
     54 
     55 #define MAXSTK	100	/* Stack size */
     56 #define MAXBR	100	/* Max number of bracket pairs known */
     57 #define MAXCMDS	500	/* Max number of commands known */
     58 
     59 /*
     60  * The stack on which we remember what we've seen so far.
     61  */
     62 struct stkstr {
     63 	int opno;	/* number of opening bracket */
     64 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
     65 	int parm;	/* parm to size, font, etc */
     66 	int lno;	/* line number the thing came in in */
     67 } stk[MAXSTK];
     68 int stktop;
     69 
     70 /*
     71  * The kinds of opening and closing brackets.
     72  */
     73 struct brstr {
     74 	char *opbr;
     75 	char *clbr;
     76 } br[MAXBR] = {
     77 	/* A few bare bones troff commands */
     78 #define SZ	0
     79 	"sz",	"sz",	/* also \s */
     80 #define FT	1
     81 	"ft",	"ft",	/* also \f */
     82 	/* the -mm package */
     83 	"AL",	"LE",
     84 	"AS",	"AE",
     85 	"BL",	"LE",
     86 	"BS",	"BE",
     87 	"DF",	"DE",
     88 	"DL",	"LE",
     89 	"DS",	"DE",
     90 	"FS",	"FE",
     91 	"ML",	"LE",
     92 	"NS",	"NE",
     93 	"RL",	"LE",
     94 	"VL",	"LE",
     95 	/* the -ms package */
     96 	"AB",	"AE",
     97 	"BD",	"DE",
     98 	"CD",	"DE",
     99 	"DS",	"DE",
    100 	"FS",	"FE",
    101 	"ID",	"DE",
    102 	"KF",	"KE",
    103 	"KS",	"KE",
    104 	"LD",	"DE",
    105 	"LG",	"NL",
    106 	"QS",	"QE",
    107 	"RS",	"RE",
    108 	"SM",	"NL",
    109 	"XA",	"XE",
    110 	"XS",	"XE",
    111 	/* The -me package */
    112 	"(b",	")b",
    113 	"(c",	")c",
    114 	"(d",	")d",
    115 	"(f",	")f",
    116 	"(l",	")l",
    117 	"(q",	")q",
    118 	"(x",	")x",
    119 	"(z",	")z",
    120 	/* Things needed by preprocessors */
    121 	"EQ",	"EN",
    122 	"TS",	"TE",
    123 	/* Refer */
    124 	"[",	"]",
    125 	0,	0
    126 };
    127 
    128 /*
    129  * All commands known to nroff, plus macro packages.
    130  * Used so we can complain about unrecognized commands.
    131  */
    132 char *knowncmds[MAXCMDS] = {
    133 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
    134 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
    135 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
    136 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
    137 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
    138 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
    139 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
    140 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
    141 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
    142 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
    143 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
    144 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
    145 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
    146 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
    147 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
    148 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
    149 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
    150 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
    151 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
    152 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
    153 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
    154 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
    155 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
    156 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
    157 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
    158 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
    159 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
    160 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
    161 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
    162 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
    163 "yr", 0
    164 };
    165 
    166 int	lineno;		/* current line number in input file */
    167 char	line[256];	/* the current line */
    168 char	*cfilename;	/* name of current file */
    169 int	nfiles;		/* number of files to process */
    170 int	fflag;		/* -f: ignore \f */
    171 int	sflag;		/* -s: ignore \s */
    172 int	ncmds;		/* size of knowncmds */
    173 int	slot;		/* slot in knowncmds found by binsrch */
    174 
    175 char	*malloc();
    176 
    177 main(argc, argv)
    178 int argc;
    179 char **argv;
    180 {
    181 	FILE *f;
    182 	int i;
    183 	char *cp;
    184 	char b1[4];
    185 
    186 	/* Figure out how many known commands there are */
    187 	while (knowncmds[ncmds])
    188 		ncmds++;
    189 	while (argc > 1 && argv[1][0] == '-') {
    190 		switch(argv[1][1]) {
    191 
    192 		/* -a: add pairs of macros */
    193 		case 'a':
    194 			i = strlen(argv[1]) - 2;
    195 			if (i % 6 != 0)
    196 				usage();
    197 			/* look for empty macro slots */
    198 			for (i=0; br[i].opbr; i++)
    199 				;
    200 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
    201 				br[i].opbr = malloc(3);
    202 				strncpy(br[i].opbr, cp, 2);
    203 				br[i].clbr = malloc(3);
    204 				strncpy(br[i].clbr, cp+3, 2);
    205 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
    206 				addmac(br[i].clbr);
    207 				i++;
    208 			}
    209 			break;
    210 
    211 		/* -c: add known commands */
    212 		case 'c':
    213 			i = strlen(argv[1]) - 2;
    214 			if (i % 3 != 0)
    215 				usage();
    216 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
    217 				if (cp[2] && cp[2] != '.')
    218 					usage();
    219 				strncpy(b1, cp, 2);
    220 				addmac(b1);
    221 			}
    222 			break;
    223 
    224 		/* -f: ignore font changes */
    225 		case 'f':
    226 			fflag = 1;
    227 			break;
    228 
    229 		/* -s: ignore size changes */
    230 		case 's':
    231 			sflag = 1;
    232 			break;
    233 		default:
    234 			usage();
    235 		}
    236 		argc--; argv++;
    237 	}
    238 
    239 	nfiles = argc - 1;
    240 
    241 	if (nfiles > 0) {
    242 		for (i=1; i<argc; i++) {
    243 			cfilename = argv[i];
    244 			f = fopen(cfilename, "r");
    245 			if (f == NULL)
    246 				perror(cfilename);
    247 			else
    248 				process(f);
    249 		}
    250 	} else {
    251 		cfilename = "stdin";
    252 		process(stdin);
    253 	}
    254 	exit(0);
    255 }
    256 
    257 usage()
    258 {
    259 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
    260 	exit(1);
    261 }
    262 
    263 process(f)
    264 FILE *f;
    265 {
    266 	register int i, n;
    267 	char mac[5];	/* The current macro or nroff command */
    268 	int pl;
    269 
    270 	stktop = -1;
    271 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
    272 		if (line[0] == '.') {
    273 			/*
    274 			 * find and isolate the macro/command name.
    275 			 */
    276 			strncpy(mac, line+1, 4);
    277 			if (isspace(mac[0])) {
    278 				pe(lineno);
    279 				printf("Empty command\n");
    280 			} else if (isspace(mac[1])) {
    281 				mac[1] = 0;
    282 			} else if (isspace(mac[2])) {
    283 				mac[2] = 0;
    284 			} else if (mac[0] != '\\' || mac[1] != '\"') {
    285 				pe(lineno);
    286 				printf("Command too long\n");
    287 			}
    288 
    289 			/*
    290 			 * Is it a known command?
    291 			 */
    292 			checkknown(mac);
    293 
    294 			/*
    295 			 * Should we add it?
    296 			 */
    297 			if (eq(mac, "de"))
    298 				addcmd(line);
    299 
    300 			chkcmd(line, mac);
    301 		}
    302 
    303 		/*
    304 		 * At this point we process the line looking
    305 		 * for \s and \f.
    306 		 */
    307 		for (i=0; line[i]; i++)
    308 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
    309 				if (!sflag && line[++i]=='s') {
    310 					pl = line[++i];
    311 					if (isdigit(pl)) {
    312 						n = pl - '0';
    313 						pl = ' ';
    314 					} else
    315 						n = 0;
    316 					while (isdigit(line[++i]))
    317 						n = 10 * n + line[i] - '0';
    318 					i--;
    319 					if (n == 0) {
    320 						if (stk[stktop].opno == SZ) {
    321 							stktop--;
    322 						} else {
    323 							pe(lineno);
    324 							printf("unmatched \\s0\n");
    325 						}
    326 					} else {
    327 						stk[++stktop].opno = SZ;
    328 						stk[stktop].pl = pl;
    329 						stk[stktop].parm = n;
    330 						stk[stktop].lno = lineno;
    331 					}
    332 				} else if (!fflag && line[i]=='f') {
    333 					n = line[++i];
    334 					if (n == 'P') {
    335 						if (stk[stktop].opno == FT) {
    336 							stktop--;
    337 						} else {
    338 							pe(lineno);
    339 							printf("unmatched \\fP\n");
    340 						}
    341 					} else {
    342 						stk[++stktop].opno = FT;
    343 						stk[stktop].pl = 1;
    344 						stk[stktop].parm = n;
    345 						stk[stktop].lno = lineno;
    346 					}
    347 				}
    348 			}
    349 	}
    350 	/*
    351 	 * We've hit the end and look at all this stuff that hasn't been
    352 	 * matched yet!  Complain, complain.
    353 	 */
    354 	for (i=stktop; i>=0; i--) {
    355 		complain(i);
    356 	}
    357 }
    358 
    359 complain(i)
    360 {
    361 	pe(stk[i].lno);
    362 	printf("Unmatched ");
    363 	prop(i);
    364 	printf("\n");
    365 }
    366 
    367 prop(i)
    368 {
    369 	if (stk[i].pl == 0)
    370 		printf(".%s", br[stk[i].opno].opbr);
    371 	else switch(stk[i].opno) {
    372 	case SZ:
    373 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
    374 		break;
    375 	case FT:
    376 		printf("\\f%c", stk[i].parm);
    377 		break;
    378 	default:
    379 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
    380 			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
    381 	}
    382 }
    383 
    384 chkcmd(line, mac)
    385 char *line;
    386 char *mac;
    387 {
    388 	register int i, n;
    389 
    390 	/*
    391 	 * Check to see if it matches top of stack.
    392 	 */
    393 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
    394 		stktop--;	/* OK. Pop & forget */
    395 	else {
    396 		/* No. Maybe it's an opener */
    397 		for (i=0; br[i].opbr; i++) {
    398 			if (eq(mac, br[i].opbr)) {
    399 				/* Found. Push it. */
    400 				stktop++;
    401 				stk[stktop].opno = i;
    402 				stk[stktop].pl = 0;
    403 				stk[stktop].parm = 0;
    404 				stk[stktop].lno = lineno;
    405 				break;
    406 			}
    407 			/*
    408 			 * Maybe it's an unmatched closer.
    409 			 * NOTE: this depends on the fact
    410 			 * that none of the closers can be
    411 			 * openers too.
    412 			 */
    413 			if (eq(mac, br[i].clbr)) {
    414 				nomatch(mac);
    415 				break;
    416 			}
    417 		}
    418 	}
    419 }
    420 
    421 nomatch(mac)
    422 char *mac;
    423 {
    424 	register int i, j;
    425 
    426 	/*
    427 	 * Look for a match further down on stack
    428 	 * If we find one, it suggests that the stuff in
    429 	 * between is supposed to match itself.
    430 	 */
    431 	for (j=stktop; j>=0; j--)
    432 		if (eq(mac,br[stk[j].opno].clbr)) {
    433 			/* Found.  Make a good diagnostic. */
    434 			if (j == stktop-2) {
    435 				/*
    436 				 * Check for special case \fx..\fR and don't
    437 				 * complain.
    438 				 */
    439 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
    440 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
    441 					stktop = j -1;
    442 					return;
    443 				}
    444 				/*
    445 				 * We have two unmatched frobs.  Chances are
    446 				 * they were intended to match, so we mention
    447 				 * them together.
    448 				 */
    449 				pe(stk[j+1].lno);
    450 				prop(j+1);
    451 				printf(" does not match %d: ", stk[j+2].lno);
    452 				prop(j+2);
    453 				printf("\n");
    454 			} else for (i=j+1; i <= stktop; i++) {
    455 				complain(i);
    456 			}
    457 			stktop = j-1;
    458 			return;
    459 		}
    460 	/* Didn't find one.  Throw this away. */
    461 	pe(lineno);
    462 	printf("Unmatched .%s\n", mac);
    463 }
    464 
    465 /* eq: are two strings equal? */
    466 eq(s1, s2)
    467 char *s1, *s2;
    468 {
    469 	return (strcmp(s1, s2) == 0);
    470 }
    471 
    472 /* print the first part of an error message, given the line number */
    473 pe(lineno)
    474 int lineno;
    475 {
    476 	if (nfiles > 1)
    477 		printf("%s: ", cfilename);
    478 	printf("%d: ", lineno);
    479 }
    480 
    481 checkknown(mac)
    482 char *mac;
    483 {
    484 
    485 	if (eq(mac, "."))
    486 		return;
    487 	if (binsrch(mac) >= 0)
    488 		return;
    489 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
    490 		return;
    491 
    492 	pe(lineno);
    493 	printf("Unknown command: .%s\n", mac);
    494 }
    495 
    496 /*
    497  * We have a .de xx line in "line".  Add xx to the list of known commands.
    498  */
    499 addcmd(line)
    500 char *line;
    501 {
    502 	char *mac;
    503 
    504 	/* grab the macro being defined */
    505 	mac = line+4;
    506 	while (isspace(*mac))
    507 		mac++;
    508 	if (*mac == 0) {
    509 		pe(lineno);
    510 		printf("illegal define: %s\n", line);
    511 		return;
    512 	}
    513 	mac[2] = 0;
    514 	if (isspace(mac[1]) || mac[1] == '\\')
    515 		mac[1] = 0;
    516 	if (ncmds >= MAXCMDS) {
    517 		printf("Only %d known commands allowed\n", MAXCMDS);
    518 		exit(1);
    519 	}
    520 	addmac(mac);
    521 }
    522 
    523 /*
    524  * Add mac to the list.  We should really have some kind of tree
    525  * structure here but this is a quick-and-dirty job and I just don't
    526  * have time to mess with it.  (I wonder if this will come back to haunt
    527  * me someday?)  Anyway, I claim that .de is fairly rare in user
    528  * nroff programs, and the register loop below is pretty fast.
    529  */
    530 addmac(mac)
    531 char *mac;
    532 {
    533 	register char **src, **dest, **loc;
    534 
    535 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
    536 #ifdef DEBUG
    537 		printf("binsrch(%s) -> already in table\n", mac);
    538 #endif DEBUG
    539 		return;
    540 	}
    541 	/* binsrch sets slot as a side effect */
    542 #ifdef DEBUG
    543 printf("binsrch(%s) -> %d\n", mac, slot);
    544 #endif
    545 	loc = &knowncmds[slot];
    546 	src = &knowncmds[ncmds-1];
    547 	dest = src+1;
    548 	while (dest > loc)
    549 		*dest-- = *src--;
    550 	*loc = malloc(3);
    551 	strcpy(*loc, mac);
    552 	ncmds++;
    553 #ifdef DEBUG
    554 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
    555 #endif
    556 }
    557 
    558 /*
    559  * Do a binary search in knowncmds for mac.
    560  * If found, return the index.  If not, return -1.
    561  */
    562 binsrch(mac)
    563 char *mac;
    564 {
    565 	register char *p;	/* pointer to current cmd in list */
    566 	register int d;		/* difference if any */
    567 	register int mid;	/* mid point in binary search */
    568 	register int top, bot;	/* boundaries of bin search, inclusive */
    569 
    570 	top = ncmds-1;
    571 	bot = 0;
    572 	while (top >= bot) {
    573 		mid = (top+bot)/2;
    574 		p = knowncmds[mid];
    575 		d = p[0] - mac[0];
    576 		if (d == 0)
    577 			d = p[1] - mac[1];
    578 		if (d == 0)
    579 			return mid;
    580 		if (d < 0)
    581 			bot = mid + 1;
    582 		else
    583 			top = mid - 1;
    584 	}
    585 	slot = bot;	/* place it would have gone */
    586 	return -1;
    587 }
    588