Home | History | Annotate | Line # | Download | only in checknr
checknr.c revision 1.7
      1 /*	$NetBSD: checknr.c,v 1.7 2001/01/16 02:50:29 cgd Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1980, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 #ifndef lint
     38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
     39 	The Regents of the University of California.  All rights reserved.\n");
     40 #endif /* not lint */
     41 
     42 #ifndef lint
     43 #if 0
     44 static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
     45 #else
     46 __RCSID("$NetBSD: checknr.c,v 1.7 2001/01/16 02:50:29 cgd Exp $");
     47 #endif
     48 #endif /* not lint */
     49 
     50 /*
     51  * checknr: check an nroff/troff input file for matching macro calls.
     52  * we also attempt to match size and font changes, but only the embedded
     53  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
     54  * later but for now think of these restrictions as contributions to
     55  * structured typesetting.
     56  */
     57 #include <ctype.h>
     58 #include <stdio.h>
     59 #include <stdlib.h>
     60 #include <string.h>
     61 
     62 #define MAXSTK	100	/* Stack size */
     63 #define MAXBR	100	/* Max number of bracket pairs known */
     64 #define MAXCMDS	500	/* Max number of commands known */
     65 
     66 /*
     67  * The stack on which we remember what we've seen so far.
     68  */
     69 struct stkstr {
     70 	int opno;	/* number of opening bracket */
     71 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
     72 	int parm;	/* parm to size, font, etc */
     73 	int lno;	/* line number the thing came in in */
     74 } stk[MAXSTK];
     75 int stktop;
     76 
     77 /*
     78  * The kinds of opening and closing brackets.
     79  */
     80 struct brstr {
     81 	char *opbr;
     82 	char *clbr;
     83 } br[MAXBR] = {
     84 	/* A few bare bones troff commands */
     85 #define SZ	0
     86 	{ "sz",	"sz"},	/* also \s */
     87 #define FT	1
     88 	{ "ft",	"ft"},	/* also \f */
     89 	/* the -mm package */
     90 	{"AL",	"LE"},
     91 	{"AS",	"AE"},
     92 	{"BL",	"LE"},
     93 	{"BS",	"BE"},
     94 	{"DF",	"DE"},
     95 	{"DL",	"LE"},
     96 	{"DS",	"DE"},
     97 	{"FS",	"FE"},
     98 	{"ML",	"LE"},
     99 	{"NS",	"NE"},
    100 	{"RL",	"LE"},
    101 	{"VL",	"LE"},
    102 	/* the -ms package */
    103 	{"AB",	"AE"},
    104 	{"BD",	"DE"},
    105 	{"CD",	"DE"},
    106 	{"DS",	"DE"},
    107 	{"FS",	"FE"},
    108 	{"ID",	"DE"},
    109 	{"KF",	"KE"},
    110 	{"KS",	"KE"},
    111 	{"LD",	"DE"},
    112 	{"LG",	"NL"},
    113 	{"QS",	"QE"},
    114 	{"RS",	"RE"},
    115 	{"SM",	"NL"},
    116 	{"XA",	"XE"},
    117 	{"XS",	"XE"},
    118 	/* The -me package */
    119 	{"(b",	")b"},
    120 	{"(c",	")c"},
    121 	{"(d",	")d"},
    122 	{"(f",	")f"},
    123 	{"(l",	")l"},
    124 	{"(q",	")q"},
    125 	{"(x",	")x"},
    126 	{"(z",	")z"},
    127 	/* Things needed by preprocessors */
    128 	{"EQ",	"EN"},
    129 	{"TS",	"TE"},
    130 	/* Refer */
    131 	{"[",	"]"},
    132 	{0,	0},
    133 };
    134 
    135 /*
    136  * All commands known to nroff, plus macro packages.
    137  * Used so we can complain about unrecognized commands.
    138  */
    139 char *knowncmds[MAXCMDS] = {
    140 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
    141 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
    142 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
    143 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
    144 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
    145 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
    146 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
    147 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
    148 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
    149 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
    150 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
    151 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
    152 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
    153 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
    154 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
    155 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
    156 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
    157 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
    158 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
    159 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
    160 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
    161 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
    162 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
    163 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
    164 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
    165 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
    166 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
    167 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
    168 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
    169 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
    170 "yr", 0
    171 };
    172 
    173 int	lineno;		/* current line number in input file */
    174 char	line[256];	/* the current line */
    175 char	*cfilename;	/* name of current file */
    176 int	nfiles;		/* number of files to process */
    177 int	fflag;		/* -f: ignore \f */
    178 int	sflag;		/* -s: ignore \s */
    179 int	ncmds;		/* size of knowncmds */
    180 int	slot;		/* slot in knowncmds found by binsrch */
    181 
    182 void	addcmd __P((char *));
    183 void	addmac __P((char *));
    184 int	binsrch __P((char *));
    185 void	checkknown __P((char *));
    186 void	chkcmd __P((char *, char *));
    187 void	complain __P((int));
    188 int	eq __P((const void *, const void *));
    189 int	main __P((int, char **));
    190 void	nomatch __P((char *));
    191 void	pe __P((int));
    192 void	process __P((FILE *));
    193 void	prop __P((int));
    194 void	usage __P((void));
    195 
    196 int
    197 main(argc, argv)
    198 	int argc;
    199 	char **argv;
    200 {
    201 	FILE *f;
    202 	int i;
    203 	char *cp;
    204 	char b1[4];
    205 
    206 	/* Figure out how many known commands there are */
    207 	while (knowncmds[ncmds])
    208 		ncmds++;
    209 	while (argc > 1 && argv[1][0] == '-') {
    210 		switch(argv[1][1]) {
    211 
    212 		/* -a: add pairs of macros */
    213 		case 'a':
    214 			i = strlen(argv[1]) - 2;
    215 			if (i % 6 != 0)
    216 				usage();
    217 			/* look for empty macro slots */
    218 			for (i=0; br[i].opbr; i++)
    219 				;
    220 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
    221 				br[i].opbr = malloc(3);
    222 				strncpy(br[i].opbr, cp, 2);
    223 				br[i].clbr = malloc(3);
    224 				strncpy(br[i].clbr, cp+3, 2);
    225 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
    226 				addmac(br[i].clbr);
    227 				i++;
    228 			}
    229 			break;
    230 
    231 		/* -c: add known commands */
    232 		case 'c':
    233 			i = strlen(argv[1]) - 2;
    234 			if (i % 3 != 0)
    235 				usage();
    236 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
    237 				if (cp[2] && cp[2] != '.')
    238 					usage();
    239 				strncpy(b1, cp, 2);
    240 				addmac(b1);
    241 			}
    242 			break;
    243 
    244 		/* -f: ignore font changes */
    245 		case 'f':
    246 			fflag = 1;
    247 			break;
    248 
    249 		/* -s: ignore size changes */
    250 		case 's':
    251 			sflag = 1;
    252 			break;
    253 		default:
    254 			usage();
    255 		}
    256 		argc--; argv++;
    257 	}
    258 
    259 	nfiles = argc - 1;
    260 
    261 	if (nfiles > 0) {
    262 		for (i=1; i<argc; i++) {
    263 			cfilename = argv[i];
    264 			f = fopen(cfilename, "r");
    265 			if (f == NULL)
    266 				perror(cfilename);
    267 			else
    268 				process(f);
    269 		}
    270 	} else {
    271 		cfilename = "stdin";
    272 		process(stdin);
    273 	}
    274 	exit(0);
    275 }
    276 
    277 void
    278 usage()
    279 {
    280 	printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n");
    281 	exit(1);
    282 }
    283 
    284 void
    285 process(f)
    286 	FILE *f;
    287 {
    288 	int i, n;
    289 	char mac[5];	/* The current macro or nroff command */
    290 	int pl;
    291 
    292 	stktop = -1;
    293 	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
    294 		if (line[0] == '.') {
    295 			/*
    296 			 * find and isolate the macro/command name.
    297 			 */
    298 			strncpy(mac, line+1, 4);
    299 			if (isspace((unsigned char)mac[0])) {
    300 				pe(lineno);
    301 				printf("Empty command\n");
    302 			} else if (isspace((unsigned char)mac[1])) {
    303 				mac[1] = 0;
    304 			} else if (isspace((unsigned char)mac[2])) {
    305 				mac[2] = 0;
    306 			} else if (mac[0] != '\\' || mac[1] != '\"') {
    307 				pe(lineno);
    308 				printf("Command too long\n");
    309 			}
    310 
    311 			/*
    312 			 * Is it a known command?
    313 			 */
    314 			checkknown(mac);
    315 
    316 			/*
    317 			 * Should we add it?
    318 			 */
    319 			if (eq(mac, "de"))
    320 				addcmd(line);
    321 
    322 			chkcmd(line, mac);
    323 		}
    324 
    325 		/*
    326 		 * At this point we process the line looking
    327 		 * for \s and \f.
    328 		 */
    329 		for (i=0; line[i]; i++)
    330 			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
    331 				if (!sflag && line[++i]=='s') {
    332 					pl = line[++i];
    333 					if (isdigit((unsigned char)pl)) {
    334 						n = pl - '0';
    335 						pl = ' ';
    336 					} else
    337 						n = 0;
    338 					while (isdigit((unsigned char)line[++i]))
    339 						n = 10 * n + line[i] - '0';
    340 					i--;
    341 					if (n == 0) {
    342 						if (stk[stktop].opno == SZ) {
    343 							stktop--;
    344 						} else {
    345 							pe(lineno);
    346 							printf("unmatched \\s0\n");
    347 						}
    348 					} else {
    349 						stk[++stktop].opno = SZ;
    350 						stk[stktop].pl = pl;
    351 						stk[stktop].parm = n;
    352 						stk[stktop].lno = lineno;
    353 					}
    354 				} else if (!fflag && line[i]=='f') {
    355 					n = line[++i];
    356 					if (n == 'P') {
    357 						if (stk[stktop].opno == FT) {
    358 							stktop--;
    359 						} else {
    360 							pe(lineno);
    361 							printf("unmatched \\fP\n");
    362 						}
    363 					} else {
    364 						stk[++stktop].opno = FT;
    365 						stk[stktop].pl = 1;
    366 						stk[stktop].parm = n;
    367 						stk[stktop].lno = lineno;
    368 					}
    369 				}
    370 			}
    371 	}
    372 	/*
    373 	 * We've hit the end and look at all this stuff that hasn't been
    374 	 * matched yet!  Complain, complain.
    375 	 */
    376 	for (i=stktop; i>=0; i--) {
    377 		complain(i);
    378 	}
    379 }
    380 
    381 void
    382 complain(i)
    383 	int i;
    384 {
    385 	pe(stk[i].lno);
    386 	printf("Unmatched ");
    387 	prop(i);
    388 	printf("\n");
    389 }
    390 
    391 void
    392 prop(i)
    393 	int i;
    394 {
    395 	if (stk[i].pl == 0)
    396 		printf(".%s", br[stk[i].opno].opbr);
    397 	else switch(stk[i].opno) {
    398 	case SZ:
    399 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
    400 		break;
    401 	case FT:
    402 		printf("\\f%c", stk[i].parm);
    403 		break;
    404 	default:
    405 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
    406 			i, stk[i].opno, br[stk[i].opno].opbr,
    407 			br[stk[i].opno].clbr);
    408 	}
    409 }
    410 
    411 void
    412 chkcmd(line, mac)
    413 	char *line;
    414 	char *mac;
    415 {
    416 	int i;
    417 
    418 	/*
    419 	 * Check to see if it matches top of stack.
    420 	 */
    421 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
    422 		stktop--;	/* OK. Pop & forget */
    423 	else {
    424 		/* No. Maybe it's an opener */
    425 		for (i=0; br[i].opbr; i++) {
    426 			if (eq(mac, br[i].opbr)) {
    427 				/* Found. Push it. */
    428 				stktop++;
    429 				stk[stktop].opno = i;
    430 				stk[stktop].pl = 0;
    431 				stk[stktop].parm = 0;
    432 				stk[stktop].lno = lineno;
    433 				break;
    434 			}
    435 			/*
    436 			 * Maybe it's an unmatched closer.
    437 			 * NOTE: this depends on the fact
    438 			 * that none of the closers can be
    439 			 * openers too.
    440 			 */
    441 			if (eq(mac, br[i].clbr)) {
    442 				nomatch(mac);
    443 				break;
    444 			}
    445 		}
    446 	}
    447 }
    448 
    449 void
    450 nomatch(mac)
    451 	char *mac;
    452 {
    453 	int i, j;
    454 
    455 	/*
    456 	 * Look for a match further down on stack
    457 	 * If we find one, it suggests that the stuff in
    458 	 * between is supposed to match itself.
    459 	 */
    460 	for (j=stktop; j>=0; j--)
    461 		if (eq(mac,br[stk[j].opno].clbr)) {
    462 			/* Found.  Make a good diagnostic. */
    463 			if (j == stktop-2) {
    464 				/*
    465 				 * Check for special case \fx..\fR and don't
    466 				 * complain.
    467 				 */
    468 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
    469 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
    470 					stktop = j -1;
    471 					return;
    472 				}
    473 				/*
    474 				 * We have two unmatched frobs.  Chances are
    475 				 * they were intended to match, so we mention
    476 				 * them together.
    477 				 */
    478 				pe(stk[j+1].lno);
    479 				prop(j+1);
    480 				printf(" does not match %d: ", stk[j+2].lno);
    481 				prop(j+2);
    482 				printf("\n");
    483 			} else for (i=j+1; i <= stktop; i++) {
    484 				complain(i);
    485 			}
    486 			stktop = j-1;
    487 			return;
    488 		}
    489 	/* Didn't find one.  Throw this away. */
    490 	pe(lineno);
    491 	printf("Unmatched .%s\n", mac);
    492 }
    493 
    494 /* eq: are two strings equal? */
    495 int
    496 eq(s1, s2)
    497 	const void *s1, *s2;
    498 {
    499 	return (strcmp((char *)s1, (char *)s2) == 0);
    500 }
    501 
    502 /* print the first part of an error message, given the line number */
    503 void
    504 pe(lineno)
    505 	int lineno;
    506 {
    507 	if (nfiles > 1)
    508 		printf("%s: ", cfilename);
    509 	printf("%d: ", lineno);
    510 }
    511 
    512 void
    513 checkknown(mac)
    514 	char *mac;
    515 {
    516 
    517 	if (eq(mac, "."))
    518 		return;
    519 	if (binsrch(mac) >= 0)
    520 		return;
    521 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
    522 		return;
    523 
    524 	pe(lineno);
    525 	printf("Unknown command: .%s\n", mac);
    526 }
    527 
    528 /*
    529  * We have a .de xx line in "line".  Add xx to the list of known commands.
    530  */
    531 void
    532 addcmd(line)
    533 	char *line;
    534 {
    535 	char *mac;
    536 
    537 	/* grab the macro being defined */
    538 	mac = line+4;
    539 	while (isspace((unsigned char)*mac))
    540 		mac++;
    541 	if (*mac == 0) {
    542 		pe(lineno);
    543 		printf("illegal define: %s\n", line);
    544 		return;
    545 	}
    546 	mac[2] = 0;
    547 	if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
    548 		mac[1] = 0;
    549 	if (ncmds >= MAXCMDS) {
    550 		printf("Only %d known commands allowed\n", MAXCMDS);
    551 		exit(1);
    552 	}
    553 	addmac(mac);
    554 }
    555 
    556 /*
    557  * Add mac to the list.  We should really have some kind of tree
    558  * structure here but this is a quick-and-dirty job and I just don't
    559  * have time to mess with it.  (I wonder if this will come back to haunt
    560  * me someday?)  Anyway, I claim that .de is fairly rare in user
    561  * nroff programs, and the register loop below is pretty fast.
    562  */
    563 void
    564 addmac(mac)
    565 	char *mac;
    566 {
    567 	char **src, **dest, **loc;
    568 
    569 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
    570 #ifdef DEBUG
    571 		printf("binsrch(%s) -> already in table\n", mac);
    572 #endif /* DEBUG */
    573 		return;
    574 	}
    575 	/* binsrch sets slot as a side effect */
    576 #ifdef DEBUG
    577 printf("binsrch(%s) -> %d\n", mac, slot);
    578 #endif
    579 	loc = &knowncmds[slot];
    580 	src = &knowncmds[ncmds-1];
    581 	dest = src+1;
    582 	while (dest > loc)
    583 		*dest-- = *src--;
    584 	*loc = malloc(3);
    585 	strcpy(*loc, mac);
    586 	ncmds++;
    587 #ifdef DEBUG
    588 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
    589 #endif
    590 }
    591 
    592 /*
    593  * Do a binary search in knowncmds for mac.
    594  * If found, return the index.  If not, return -1.
    595  */
    596 int
    597 binsrch(mac)
    598 	char *mac;
    599 {
    600 	char *p;	/* pointer to current cmd in list */
    601 	int d;		/* difference if any */
    602 	int mid;	/* mid point in binary search */
    603 	int top, bot;	/* boundaries of bin search, inclusive */
    604 
    605 	top = ncmds-1;
    606 	bot = 0;
    607 	while (top >= bot) {
    608 		mid = (top+bot)/2;
    609 		p = knowncmds[mid];
    610 		d = p[0] - mac[0];
    611 		if (d == 0)
    612 			d = p[1] - mac[1];
    613 		if (d == 0)
    614 			return mid;
    615 		if (d < 0)
    616 			bot = mid + 1;
    617 		else
    618 			top = mid - 1;
    619 	}
    620 	slot = bot;	/* place it would have gone */
    621 	return -1;
    622 }
    623