Home | History | Annotate | Line # | Download | only in m4
main.c revision 1.34
      1 /*	$NetBSD: main.c,v 1.34 2003/08/07 11:14:32 agc Exp $	*/
      2 /*	$OpenBSD: main.c,v 1.51 2001/10/06 10:52:25 espie Exp $	*/
      3 
      4 /*-
      5  * Copyright (c) 1989, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  *
      8  * This code is derived from software contributed to Berkeley by
      9  * Ozan Yigit at York University.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 #if defined(__COPYRIGHT) && !defined(lint)
     38 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\
     39 	The Regents of the University of California.  All rights reserved.\n");
     40 #endif /* not lint */
     41 
     42 #if defined(__RCSID) && !defined(lint)
     43 #if 0
     44 static char sccsid[] = "@(#)main.c	8.1 (Berkeley) 6/6/93";
     45 #else
     46 __RCSID("$NetBSD: main.c,v 1.34 2003/08/07 11:14:32 agc Exp $");
     47 #endif
     48 #endif /* not lint */
     49 
     50 /*
     51  * main.c
     52  * Facility: m4 macro processor
     53  * by: oz
     54  */
     55 
     56 #include <sys/types.h>
     57 #include <assert.h>
     58 #include <ctype.h>
     59 #include <errno.h>
     60 #include <signal.h>
     61 #include <stddef.h>
     62 #include <stdio.h>
     63 #include <stdlib.h>
     64 #include <string.h>
     65 #include "mdef.h"
     66 #include "stdd.h"
     67 #include "extern.h"
     68 #include "pathnames.h"
     69 
     70 ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
     71 stae *mstack;		 	/* stack of m4 machine         */
     72 char *sstack;		 	/* shadow stack, for string space extension */
     73 static size_t STACKMAX;		/* current maximum size of stack */
     74 int sp; 			/* current m4  stack pointer   */
     75 int fp; 			/* m4 call frame pointer       */
     76 struct input_file infile[MAXINP];/* input file stack (0=stdin)  */
     77 FILE **outfile;			/* diversion array(0=bitbucket)*/
     78 int maxout;
     79 FILE *active;			/* active output file pointer  */
     80 int ilevel = 0; 		/* input file stack pointer    */
     81 int oindex = 0; 		/* diversion index..	       */
     82 char *null = "";                /* as it says.. just a null..  */
     83 char *m4wraps = "";             /* m4wrap string default..     */
     84 int m4prefix = 0;		/* prefix keywords with m4_    */
     85 char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
     86 char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
     87 char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
     88 char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
     89 
     90 struct keyblk keywrds[] = {	/* m4 keywords to be installed */
     91 	{ "include",      INCLTYPE },
     92 	{ "sinclude",     SINCTYPE },
     93 	{ "define",       DEFITYPE },
     94 	{ "defn",         DEFNTYPE },
     95 	{ "divert",       DIVRTYPE | NOARGS },
     96 	{ "expr",         EXPRTYPE },
     97 	{ "eval",         EXPRTYPE },
     98 	{ "substr",       SUBSTYPE },
     99 	{ "ifelse",       IFELTYPE },
    100 	{ "ifdef",        IFDFTYPE },
    101 	{ "len",          LENGTYPE },
    102 	{ "incr",         INCRTYPE },
    103 	{ "decr",         DECRTYPE },
    104 	{ "dnl",          DNLNTYPE | NOARGS },
    105 	{ "changequote",  CHNQTYPE | NOARGS },
    106 	{ "changecom",    CHNCTYPE | NOARGS },
    107 	{ "index",        INDXTYPE },
    108 #ifdef EXTENDED
    109 	{ "paste",        PASTTYPE },
    110 	{ "spaste",       SPASTYPE },
    111     	/* Newer extensions, needed to handle gnu-m4 scripts */
    112 	{ "indir",        INDIRTYPE},
    113 	{ "builtin",      BUILTINTYPE},
    114 	{ "patsubst",	  PATSTYPE},
    115 	{ "regexp",	  REGEXPTYPE},
    116 	{ "esyscmd",	  ESYSCMDTYPE},
    117 	{ "__file__",	  FILENAMETYPE | NOARGS},
    118 	{ "__line__",	  LINETYPE | NOARGS},
    119 #endif
    120 	{ "popdef",       POPDTYPE },
    121 	{ "pushdef",      PUSDTYPE },
    122 	{ "dumpdef",      DUMPTYPE | NOARGS },
    123 	{ "shift",        SHIFTYPE | NOARGS },
    124 	{ "translit",     TRNLTYPE },
    125 	{ "undefine",     UNDFTYPE },
    126 	{ "undivert",     UNDVTYPE | NOARGS },
    127 	{ "divnum",       DIVNTYPE | NOARGS },
    128 	{ "maketemp",     MKTMTYPE },
    129 	{ "errprint",     ERRPTYPE | NOARGS },
    130 	{ "m4wrap",       M4WRTYPE | NOARGS },
    131 	{ "m4exit",       EXITTYPE | NOARGS },
    132 	{ "syscmd",       SYSCTYPE },
    133 	{ "sysval",       SYSVTYPE | NOARGS },
    134 	{ "traceon",	  TRACEONTYPE | NOARGS },
    135 	{ "traceoff",	  TRACEOFFTYPE | NOARGS },
    136 
    137 #if defined(unix) || defined(__unix__)
    138 	{ "unix",         SELFTYPE | NOARGS },
    139 #else
    140 #ifdef vms
    141 	{ "vms",          SELFTYPE | NOARGS },
    142 #endif
    143 #endif
    144 };
    145 
    146 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
    147 
    148 extern int optind;
    149 extern char *optarg;
    150 
    151 #define MAXRECORD 50
    152 static struct position {
    153 	char *name;
    154 	unsigned long line;
    155 } quotes[MAXRECORD], paren[MAXRECORD];
    156 
    157 static void record __P((struct position *, int));
    158 static void dump_stack __P((struct position *, int));
    159 
    160 static void macro __P((void));
    161 static void initkwds __P((void));
    162 static ndptr inspect __P((int, char *));
    163 static int do_look_ahead __P((int, const char *));
    164 
    165 static void enlarge_stack __P((void));
    166 
    167 int main __P((int, char *[]));
    168 
    169 int
    170 main(argc,argv)
    171 	int argc;
    172 	char *argv[];
    173 {
    174 	int c;
    175 	int n;
    176 	char *p;
    177 
    178 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
    179 		signal(SIGINT, onintr);
    180 
    181 	/*
    182 	 * We need to know if -P is there before checking -D and -U.
    183 	 */
    184 	while ((c = getopt(argc, argv, "D:I:PU:d:go:t:")) != -1)
    185 		if (c == 'P')
    186 			m4prefix = 1;
    187 	optind = 1;
    188 
    189 	initkwds();
    190 	initspaces();
    191 	STACKMAX = INITSTACKMAX;
    192 
    193 	mstack = (stae *)xalloc(sizeof(stae) * STACKMAX);
    194 	sstack = (char *)xalloc(STACKMAX);
    195 
    196 	maxout = 0;
    197 	outfile = NULL;
    198 	resizedivs(MAXOUT);
    199 
    200 	while ((c = getopt(argc, argv, "D:I:PU:d:go:t:")) != -1)
    201 		switch (c) {
    202 		case 'D':               /* define something..*/
    203 			for (p = optarg; *p; p++)
    204 				if (*p == '=')
    205 					break;
    206 			if (*p)
    207 				*p++ = EOS;
    208 			dodefine(optarg, p);
    209 			break;
    210 		case 'I':
    211 			addtoincludepath(optarg);
    212 			break;
    213 		case 'P':
    214 			break;
    215 		case 'U':               /* undefine...       */
    216 			remhash(optarg, TOP);
    217 			break;
    218 		case 'd':
    219 			set_trace_flags(optarg);
    220 			break;
    221 		case 'g':
    222 			mimic_gnu = 1;
    223 			break;
    224 		case 'o':
    225 			trace_file(optarg);
    226                         break;
    227 		case 't':
    228 			mark_traced(optarg, 1);
    229 			break;
    230 		case '?':
    231 		default:
    232 			usage(argv[0]);
    233 		}
    234 
    235         argc -= optind;
    236         argv += optind;
    237 
    238 	active = stdout;		/* default active output     */
    239 	bbase[0] = bufbase;
    240         if (!argc) {
    241  		sp = -1;		/* stack pointer initialized */
    242 		fp = 0; 		/* frame pointer initialized */
    243 		set_input(infile+0, stdin, "stdin");
    244 					/* default input (naturally) */
    245 		macro();
    246 	} else
    247 		for (; argc--; ++argv) {
    248 			p = *argv;
    249 			if (p[0] == '-' && p[1] == EOS)
    250 				set_input(infile, stdin, "stdin");
    251 			else if (fopen_trypath(infile, p) == NULL)
    252 				err(1, "%s", p);
    253 			sp = -1;
    254 			fp = 0;
    255 			macro();
    256 		    	release_input(infile);
    257 		}
    258 
    259 	if (*m4wraps) { 		/* anything for rundown ??   */
    260 		ilevel = 0;		/* in case m4wrap includes.. */
    261 		bufbase = bp = buf;	/* use the entire buffer   */
    262 		pbstr(m4wraps); 	/* user-defined wrapup act   */
    263 		macro();		/* last will and testament   */
    264 	}
    265 
    266 	if (active != stdout)
    267 		active = stdout;	/* reset output just in case */
    268 	for (n = 1; n < maxout; n++)	/* default wrap-up: undivert */
    269 		if (outfile[n] != NULL)
    270 			getdiv(n);
    271 					/* remove bitbucket if used  */
    272 	if (outfile[0] != NULL) {
    273 		(void) fclose(outfile[0]);
    274 	}
    275 
    276 	return 0;
    277 }
    278 
    279 /*
    280  * Look ahead for `token'.
    281  * (on input `t == token[0]')
    282  * Used for comment and quoting delimiters.
    283  * Returns 1 if `token' present; copied to output.
    284  *         0 if `token' not found; all characters pushed back
    285  */
    286 static int
    287 do_look_ahead(t, token)
    288 	int	t;
    289 	const char	*token;
    290 {
    291 	int i;
    292 
    293 	assert((unsigned char)t == (unsigned char)token[0]);
    294 
    295 	for (i = 1; *++token; i++) {
    296 		t = gpbc();
    297 		if (t == EOF || (unsigned char)t != (unsigned char)*token) {
    298 			putback(t);
    299 			while (--i)
    300 				putback(*--token);
    301 			return 0;
    302 		}
    303 	}
    304 	return 1;
    305 }
    306 
    307 #define LOOK_AHEAD(t, token) (t != EOF && 		\
    308     (unsigned char)(t)==(unsigned char)(token)[0] && 	\
    309     do_look_ahead(t,token))
    310 
    311 /*
    312  * macro - the work horse..
    313  */
    314 static void
    315 macro()
    316 {
    317 	char token[MAXTOK+1];
    318 	int t, l;
    319 	ndptr p;
    320 	int  nlpar;
    321 
    322 	cycle {
    323 		t = gpbc();
    324 		if (t == '_' || isalpha(t)) {
    325 			p = inspect(t, token);
    326 			if (p != nil)
    327 				putback(l = gpbc());
    328 			if (p == nil || (l != LPAREN &&
    329 			    (p->type & NEEDARGS) != 0))
    330 				outputstr(token);
    331 			else {
    332 		/*
    333 		 * real thing.. First build a call frame:
    334 		 */
    335 				pushf(fp);	/* previous call frm */
    336 				pushf(p->type); /* type of the call  */
    337 				pushf(0);	/* parenthesis level */
    338 				fp = sp;	/* new frame pointer */
    339 		/*
    340 		 * now push the string arguments:
    341 		 */
    342 				pushs1(p->defn);	/* defn string */
    343 				pushs1(p->name);	/* macro name  */
    344 				pushs(ep);	      	/* start next..*/
    345 
    346 				if (l != LPAREN && PARLEV == 0)  {
    347 				    /* no bracks  */
    348 					chrsave(EOS);
    349 
    350 					if (sp == STACKMAX)
    351 						errx(1, "internal stack overflow");
    352 					eval((const char **) mstack+fp+1, 2,
    353 					    CALTYP);
    354 
    355 					ep = PREVEP;	/* flush strspace */
    356 					sp = PREVSP;	/* previous sp..  */
    357 					fp = PREVFP;	/* rewind stack...*/
    358 				}
    359 			}
    360 		} else if (t == EOF) {
    361 			if (sp > -1) {
    362 				warnx( "unexpected end of input, unclosed parenthesis:");
    363 				dump_stack(paren, PARLEV);
    364 				exit(1);
    365 			}
    366 			if (ilevel <= 0)
    367 				break;			/* all done thanks.. */
    368 			release_input(infile+ilevel--);
    369 			bufbase = bbase[ilevel];
    370 			continue;
    371 		}
    372 	/*
    373 	 * non-alpha token possibly seen..
    374 	 * [the order of else if .. stmts is important.]
    375 	 */
    376 		else if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
    377 			nlpar = 0;
    378 			record(quotes, nlpar++);
    379 			/*
    380 			 * Opening quote: scan forward until matching
    381 			 * closing quote has been found.
    382 			 */
    383 			do {
    384 
    385 				l = gpbc();
    386 				if (LOOK_AHEAD(l,rquote)) {
    387 					if (--nlpar > 0)
    388 						outputstr(rquote);
    389 				} else if (LOOK_AHEAD(l,lquote)) {
    390 					record(quotes, nlpar++);
    391 					outputstr(lquote);
    392 				} else if (l == EOF) {
    393 					if (nlpar == 1)
    394 						warnx("unclosed quote:");
    395 					else
    396 						warnx("%d unclosed quotes:", nlpar);
    397 					dump_stack(quotes, nlpar);
    398 					exit(1);
    399 				} else {
    400 					if (nlpar > 0) {
    401 						if (sp < 0)
    402 							putc(l, active);
    403 						else
    404 							CHRSAVE(l);
    405 					}
    406 				}
    407 			}
    408 			while (nlpar != 0);
    409 		}
    410 
    411 		else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
    412 			fputs(scommt, active);
    413 
    414 			for(;;) {
    415 				t = gpbc();
    416 				if (LOOK_AHEAD(t, ecommt)) {
    417 					fputs(ecommt, active);
    418 					break;
    419 				}
    420 				if (t == EOF)
    421 					break;
    422 				putc(t, active);
    423 			}
    424 		}
    425 
    426 		else if (sp < 0) {		/* not in a macro at all */
    427 			putc(t, active);	/* output directly..	 */
    428 		}
    429 
    430 		else switch(t) {
    431 
    432 		case LPAREN:
    433 			if (PARLEV > 0)
    434 				chrsave(t);
    435 			while (isspace(l = gpbc()))
    436 				;		/* skip blank, tab, nl.. */
    437 			putback(l);
    438 			record(paren, PARLEV++);
    439 			break;
    440 
    441 		case RPAREN:
    442 			if (--PARLEV > 0)
    443 				chrsave(t);
    444 			else {			/* end of argument list */
    445 				chrsave(EOS);
    446 
    447 				if (sp == STACKMAX)
    448 					errx(1, "internal stack overflow");
    449 
    450 				eval((const char **) mstack+fp+1, sp-fp,
    451 				    CALTYP);
    452 
    453 				ep = PREVEP;	/* flush strspace */
    454 				sp = PREVSP;	/* previous sp..  */
    455 				fp = PREVFP;	/* rewind stack...*/
    456 			}
    457 			break;
    458 
    459 		case COMMA:
    460 			if (PARLEV == 1) {
    461 				chrsave(EOS);		/* new argument   */
    462 				while (isspace(l = gpbc()))
    463 					;
    464 				putback(l);
    465 				pushs(ep);
    466 			} else
    467 				chrsave(t);
    468 			break;
    469 
    470 		default:
    471 			if (LOOK_AHEAD(t, scommt)) {
    472 				char *p;
    473 				for (p = scommt; *p; p++)
    474 					chrsave(*p);
    475 				for(;;) {
    476 					t = gpbc();
    477 					if (LOOK_AHEAD(t, ecommt)) {
    478 						for (p = ecommt; *p; p++)
    479 							chrsave(*p);
    480 						break;
    481 					}
    482 					if (t == EOF)
    483 					    break;
    484 					CHRSAVE(t);
    485 				}
    486 			} else
    487 				CHRSAVE(t);		/* stack the char */
    488 			break;
    489 		}
    490 	}
    491 }
    492 
    493 /*
    494  * output string directly, without pushing it for reparses.
    495  */
    496 void
    497 outputstr(s)
    498 	const char *s;
    499 {
    500 	if (sp < 0)
    501 		while (*s)
    502 			putc(*s++, active);
    503 	else
    504 		while (*s)
    505 			CHRSAVE(*s++);
    506 }
    507 
    508 /*
    509  * build an input token..
    510  * consider only those starting with _ or A-Za-z. This is a
    511  * combo with lookup to speed things up.
    512  */
    513 static ndptr
    514 inspect(c, tp)
    515 	int c;
    516 	char *tp;
    517 {
    518 	char *name = tp;
    519 	char *etp = tp+MAXTOK;
    520 	ndptr p;
    521 	unsigned int h;
    522 
    523 	h = *tp++ = c;
    524 
    525 	while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
    526 		h = (h << 5) + h + (*tp++ = c);
    527 	if (c != EOF)
    528 		PUTBACK(c);
    529 	*tp = EOS;
    530 	/* token is too long, it won't match anything, but it can still
    531 	 * be output. */
    532 	if (tp == ep) {
    533 		outputstr(name);
    534 		while (isalnum(c = gpbc()) || c == '_') {
    535 			if (sp < 0)
    536 				putc(c, active);
    537 			else
    538 				CHRSAVE(c);
    539 		}
    540 		*name = EOS;
    541 		return nil;
    542 	}
    543 
    544 	for (p = hashtab[h % HASHSIZE]; p != nil; p = p->nxtptr)
    545 		if (h == p->hv && STREQ(name, p->name))
    546 			break;
    547 	return p;
    548 }
    549 
    550 /*
    551  * initkwds - initialise m4 keywords as fast as possible.
    552  * This very similar to install, but without certain overheads,
    553  * such as calling lookup. Malloc is not used for storing the
    554  * keyword strings, since we simply use the static pointers
    555  * within keywrds block.
    556  */
    557 static void
    558 initkwds()
    559 {
    560 	size_t i;
    561 	unsigned int h;
    562 	ndptr p;
    563 	char *k;
    564 
    565 	for (i = 0; i < MAXKEYS; i++) {
    566 		k = (char *)keywrds[i].knam;
    567 		if (m4prefix) {
    568 			size_t klen = strlen(k);
    569 			char *newk = malloc(klen + 4);
    570 
    571 			if (snprintf(newk, klen+4, "m4_%s", k) == -1)
    572 				err(1, "snprintf");
    573 			keywrds[i].knam = newk;
    574 			k = newk;
    575 		}
    576 		h = hash(k);
    577 		p = (ndptr) xalloc(sizeof(struct ndblock));
    578 		p->nxtptr = hashtab[h % HASHSIZE];
    579 		hashtab[h % HASHSIZE] = p;
    580 		p->name = xstrdup(keywrds[i].knam);
    581 		p->defn = null;
    582 		p->hv = h;
    583 		p->type = keywrds[i].ktyp & TYPEMASK;
    584 		if ((keywrds[i].ktyp & NOARGS) == 0)
    585 			p->type |= NEEDARGS;
    586 	}
    587 }
    588 
    589 /* Look up a builtin type, even if overridden by the user */
    590 int
    591 builtin_type(key)
    592 	const char *key;
    593 {
    594 	int i;
    595 
    596 	for (i = 0; i != MAXKEYS; i++)
    597 		if (STREQ(keywrds[i].knam, key))
    598 			return keywrds[i].ktyp;
    599 	return -1;
    600 }
    601 
    602 const char *
    603 builtin_realname(n)
    604 	int n;
    605 {
    606 	int i;
    607 
    608 	for (i = 0; i != MAXKEYS; i++)
    609 		if (((keywrds[i].ktyp ^ n) & TYPEMASK) == 0)
    610 			return keywrds[i].knam;
    611 	return NULL;
    612 }
    613 
    614 static void
    615 record(t, lev)
    616 	struct position *t;
    617 	int lev;
    618 {
    619 	if (lev < MAXRECORD) {
    620 		t[lev].name = CURRENT_NAME;
    621 		t[lev].line = CURRENT_LINE;
    622 	}
    623 }
    624 
    625 static void
    626 dump_stack(t, lev)
    627 	struct position *t;
    628 	int lev;
    629 {
    630 	int i;
    631 
    632 	for (i = 0; i < lev; i++) {
    633 		if (i == MAXRECORD) {
    634 			fprintf(stderr, "   ...\n");
    635 			break;
    636 		}
    637 		fprintf(stderr, "   %s at line %lu\n",
    638 			t[i].name, t[i].line);
    639 	}
    640 }
    641 
    642 
    643 static void
    644 enlarge_stack()
    645 {
    646 	STACKMAX *= 2;
    647 	mstack = realloc(mstack, sizeof(stae) * STACKMAX);
    648 	sstack = realloc(sstack, STACKMAX);
    649 	if (mstack == NULL || sstack == NULL)
    650 		errx(1, "Evaluation stack overflow (%lu)",
    651 		    (unsigned long)STACKMAX);
    652 }
    653