Home | History | Annotate | Line # | Download | only in m4
main.c revision 1.39
      1 /*	$OpenBSD: main.c,v 1.77 2009/10/14 17:19:47 sthen Exp $	*/
      2 /*	$NetBSD: main.c,v 1.39 2009/11/06 15:13:27 joerg Exp $	*/
      3 
      4 /*-
      5  * Copyright (c) 1989, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  *
      8  * This code is derived from software contributed to Berkeley by
      9  * Ozan Yigit at York University.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 /*
     37  * main.c
     38  * Facility: m4 macro processor
     39  * by: oz
     40  */
     41 #if HAVE_NBTOOL_CONFIG_H
     42 #include "nbtool_config.h"
     43 #endif
     44 #include <sys/cdefs.h>
     45 __RCSID("$NetBSD: main.c,v 1.39 2009/11/06 15:13:27 joerg Exp $");
     46 #include <assert.h>
     47 #include <signal.h>
     48 #include <err.h>
     49 #include <errno.h>
     50 #include <unistd.h>
     51 #include <stdio.h>
     52 #include <ctype.h>
     53 #include <string.h>
     54 #include <stddef.h>
     55 #include <stdint.h>
     56 #include <stdlib.h>
     57 #include <ohash.h>
     58 #include "mdef.h"
     59 #include "stdd.h"
     60 #include "extern.h"
     61 #include "pathnames.h"
     62 
     63 ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
     64 stae *mstack;		 	/* stack of m4 machine         */
     65 char *sstack;		 	/* shadow stack, for string space extension */
     66 static size_t STACKMAX;		/* current maximum size of stack */
     67 int sp; 			/* current m4  stack pointer   */
     68 int fp; 			/* m4 call frame pointer       */
     69 struct input_file infile[MAXINP];/* input file stack (0=stdin)  */
     70 FILE **outfile;			/* diversion array(0=bitbucket)*/
     71 int maxout;
     72 FILE *active;			/* active output file pointer  */
     73 int ilevel = 0; 		/* input file stack pointer    */
     74 int oindex = 0; 		/* diversion index..	       */
     75 const char *null = "";          /* as it says.. just a null..  */
     76 char **m4wraps = NULL;		/* m4wraps array.     	       */
     77 int maxwraps = 0;		/* size of m4wraps array       */
     78 int wrapindex = 0;		/* current offset in m4wraps   */
     79 char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
     80 char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
     81 char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
     82 char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
     83 int  synch_lines = 0;		/* line synchronisation for C preprocessor */
     84 int  prefix_builtins = 0;	/* -P option to prefix builtin keywords */
     85 
     86 struct keyblk {
     87         const char *knam;	/* keyword name */
     88         int	ktyp;           /* keyword type */
     89 };
     90 
     91 struct keyblk keywrds[] = {	/* m4 keywords to be installed */
     92 	{ "include",      INCLTYPE },
     93 	{ "sinclude",     SINCTYPE },
     94 	{ "define",       DEFITYPE },
     95 	{ "defn",         DEFNTYPE },
     96 	{ "divert",       DIVRTYPE | NOARGS },
     97 	{ "expr",         EXPRTYPE },
     98 	{ "eval",         EXPRTYPE },
     99 	{ "substr",       SUBSTYPE },
    100 	{ "ifelse",       IFELTYPE },
    101 	{ "ifdef",        IFDFTYPE },
    102 	{ "len",          LENGTYPE },
    103 	{ "incr",         INCRTYPE },
    104 	{ "decr",         DECRTYPE },
    105 	{ "dnl",          DNLNTYPE | NOARGS },
    106 	{ "changequote",  CHNQTYPE | NOARGS },
    107 	{ "changecom",    CHNCTYPE | NOARGS },
    108 	{ "index",        INDXTYPE },
    109 #ifdef EXTENDED
    110 	{ "paste",        PASTTYPE },
    111 	{ "spaste",       SPASTYPE },
    112     	/* Newer extensions, needed to handle gnu-m4 scripts */
    113 	{ "indir",        INDIRTYPE},
    114 	{ "builtin",      BUILTINTYPE},
    115 	{ "patsubst",	  PATSTYPE},
    116 	{ "regexp",	  REGEXPTYPE},
    117 	{ "esyscmd",	  ESYSCMDTYPE},
    118 	{ "__file__",	  FILENAMETYPE | NOARGS},
    119 	{ "__line__",	  LINETYPE | NOARGS},
    120 #endif
    121 	{ "popdef",       POPDTYPE },
    122 	{ "pushdef",      PUSDTYPE },
    123 	{ "dumpdef",      DUMPTYPE | NOARGS },
    124 	{ "shift",        SHIFTYPE | NOARGS },
    125 	{ "translit",     TRNLTYPE },
    126 	{ "undefine",     UNDFTYPE },
    127 	{ "undivert",     UNDVTYPE | NOARGS },
    128 	{ "divnum",       DIVNTYPE | NOARGS },
    129 	{ "maketemp",     MKTMTYPE },
    130 	{ "errprint",     ERRPTYPE | NOARGS },
    131 	{ "m4wrap",       M4WRTYPE | NOARGS },
    132 	{ "m4exit",       EXITTYPE | NOARGS },
    133 	{ "syscmd",       SYSCTYPE },
    134 	{ "sysval",       SYSVTYPE | NOARGS },
    135 	{ "traceon",	  TRACEONTYPE | NOARGS },
    136 	{ "traceoff",	  TRACEOFFTYPE | NOARGS },
    137 
    138 #if defined(unix) || defined(__unix__)
    139 	{ "unix",         SELFTYPE | NOARGS },
    140 #else
    141 #ifdef vms
    142 	{ "vms",          SELFTYPE | NOARGS },
    143 #endif
    144 #endif
    145 };
    146 
    147 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
    148 
    149 extern int optind;
    150 extern char *optarg;
    151 
    152 #define MAXRECORD 50
    153 static struct position {
    154 	char *name;
    155 	unsigned long line;
    156 } quotes[MAXRECORD], paren[MAXRECORD];
    157 
    158 static void record(struct position *, int);
    159 static void dump_stack(struct position *, int);
    160 
    161 static void macro(void);
    162 static void initkwds(void);
    163 static ndptr inspect(int, char *);
    164 static int do_look_ahead(int, const char *);
    165 static void reallyoutputstr(const char *);
    166 static void reallyputchar(int);
    167 
    168 static void enlarge_stack(void);
    169 
    170 int main(int, char *[]);
    171 
    172 int
    173 main(int argc, char *argv[])
    174 {
    175 	int c;
    176 	int n;
    177 	char *p;
    178 
    179 	setprogname(argv[0]);
    180 
    181 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
    182 		signal(SIGINT, onintr);
    183 
    184 	init_macros();
    185 	initspaces();
    186 	STACKMAX = INITSTACKMAX;
    187 
    188 	mstack = (stae *)xalloc(sizeof(stae) * STACKMAX, NULL);
    189 	sstack = (char *)xalloc(STACKMAX, NULL);
    190 
    191 	maxout = 0;
    192 	outfile = NULL;
    193 	resizedivs(MAXOUT);
    194 
    195 	while ((c = getopt(argc, argv, "gst:d:D:U:o:I:P")) != -1)
    196 		switch(c) {
    197 
    198 		case 'D':               /* define something..*/
    199 			for (p = optarg; *p; p++)
    200 				if (*p == '=')
    201 					break;
    202 			if (*p)
    203 				*p++ = EOS;
    204 			dodefine(optarg, p);
    205 			break;
    206 		case 'I':
    207 			addtoincludepath(optarg);
    208 			break;
    209 		case 'P':
    210 			prefix_builtins = 1;
    211 			break;
    212 		case 'U':               /* undefine...       */
    213 			macro_popdef(optarg);
    214 			break;
    215 		case 'g':
    216 			mimic_gnu = 1;
    217 			break;
    218 		case 'd':
    219 			set_trace_flags(optarg);
    220 			break;
    221 		case 's':
    222 			synch_lines = 1;
    223 			break;
    224 		case 't':
    225 			mark_traced(optarg, 1);
    226 			break;
    227 		case 'o':
    228 			trace_file(optarg);
    229                         break;
    230 		case '?':
    231 			usage();
    232 		}
    233 
    234         argc -= optind;
    235         argv += optind;
    236 
    237 	initkwds();
    238 	if (mimic_gnu)
    239 		setup_builtin("format", FORMATTYPE);
    240 
    241 	active = stdout;		/* default active output     */
    242 	bbase[0] = bufbase;
    243         if (!argc) {
    244  		sp = -1;		/* stack pointer initialized */
    245 		fp = 0; 		/* frame pointer initialized */
    246 		set_input(infile+0, stdin, "stdin");
    247 					/* default input (naturally) */
    248 		macro();
    249 	} else
    250 		for (; argc--; ++argv) {
    251 			p = *argv;
    252 			if (p[0] == '-' && p[1] == EOS)
    253 				set_input(infile, stdin, "stdin");
    254 			else if (fopen_trypath(infile, p) == NULL)
    255 				err(1, "%s", p);
    256 			sp = -1;
    257 			fp = 0;
    258 			macro();
    259 		    	release_input(infile);
    260 		}
    261 
    262 	if (wrapindex) {
    263 		int i;
    264 
    265 		ilevel = 0;		/* in case m4wrap includes.. */
    266 		bufbase = bp = buf;	/* use the entire buffer   */
    267 		if (mimic_gnu) {
    268 			while (wrapindex != 0) {
    269 				for (i = 0; i < wrapindex; i++)
    270 					pbstr(m4wraps[i]);
    271 				wrapindex =0;
    272 				macro();
    273 			}
    274 		} else {
    275 			for (i = 0; i < wrapindex; i++) {
    276 				pbstr(m4wraps[i]);
    277 				macro();
    278 		    	}
    279 		}
    280 	}
    281 
    282 	if (active != stdout)
    283 		active = stdout;	/* reset output just in case */
    284 	for (n = 1; n < maxout; n++)	/* default wrap-up: undivert */
    285 		if (outfile[n] != NULL)
    286 			getdiv(n);
    287 					/* remove bitbucket if used  */
    288 	if (outfile[0] != NULL) {
    289 		(void) fclose(outfile[0]);
    290 	}
    291 
    292 	return 0;
    293 }
    294 
    295 /*
    296  * Look ahead for `token'.
    297  * (on input `t == token[0]')
    298  * Used for comment and quoting delimiters.
    299  * Returns 1 if `token' present; copied to output.
    300  *         0 if `token' not found; all characters pushed back
    301  */
    302 static int
    303 do_look_ahead(int t, const char *token)
    304 {
    305 	int i;
    306 
    307 	assert((unsigned char)t == (unsigned char)token[0]);
    308 
    309 	for (i = 1; *++token; i++) {
    310 		t = gpbc();
    311 		if (t == EOF || (unsigned char)t != (unsigned char)*token) {
    312 			pushback(t);
    313 			while (--i)
    314 				pushback(*--token);
    315 			return 0;
    316 		}
    317 	}
    318 	return 1;
    319 }
    320 
    321 #define LOOK_AHEAD(t, token) (t != EOF && 		\
    322     (unsigned char)(t)==(unsigned char)(token)[0] && 	\
    323     do_look_ahead(t,token))
    324 
    325 /*
    326  * macro - the work horse..
    327  */
    328 static void
    329 macro(void)
    330 {
    331 	char token[MAXTOK+1];
    332 	int t, l;
    333 	ndptr p;
    334 	int  nlpar;
    335 
    336 	cycle {
    337 		t = gpbc();
    338 
    339 		if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
    340 			nlpar = 0;
    341 			record(quotes, nlpar++);
    342 			/*
    343 			 * Opening quote: scan forward until matching
    344 			 * closing quote has been found.
    345 			 */
    346 			do {
    347 
    348 				l = gpbc();
    349 				if (LOOK_AHEAD(l,rquote)) {
    350 					if (--nlpar > 0)
    351 						outputstr(rquote);
    352 				} else if (LOOK_AHEAD(l,lquote)) {
    353 					record(quotes, nlpar++);
    354 					outputstr(lquote);
    355 				} else if (l == EOF) {
    356 					if (nlpar == 1)
    357 						warnx("unclosed quote:");
    358 					else
    359 						warnx("%d unclosed quotes:", nlpar);
    360 					dump_stack(quotes, nlpar);
    361 					exit(1);
    362 				} else {
    363 					if (nlpar > 0) {
    364 						if (sp < 0)
    365 							reallyputchar(l);
    366 						else
    367 							CHRSAVE(l);
    368 					}
    369 				}
    370 			}
    371 			while (nlpar != 0);
    372 		} else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
    373 			reallyoutputstr(scommt);
    374 
    375 			for(;;) {
    376 				t = gpbc();
    377 				if (LOOK_AHEAD(t, ecommt)) {
    378 					reallyoutputstr(ecommt);
    379 					break;
    380 				}
    381 				if (t == EOF)
    382 					break;
    383 				reallyputchar(t);
    384 			}
    385 		} else if (t == '_' || isalpha(t)) {
    386 			p = inspect(t, token);
    387 			if (p != NULL)
    388 				pushback(l = gpbc());
    389 			if (p == NULL || (l != LPAREN &&
    390 			    (macro_getdef(p)->type & NEEDARGS) != 0))
    391 				outputstr(token);
    392 			else {
    393 		/*
    394 		 * real thing.. First build a call frame:
    395 		 */
    396 				pushf(fp);	/* previous call frm */
    397 				pushf(macro_getdef(p)->type); /* type of the call  */
    398 				pushf(is_traced(p));
    399 				pushf(0);	/* parenthesis level */
    400 				fp = sp;	/* new frame pointer */
    401 		/*
    402 		 * now push the string arguments:
    403 		 */
    404 				pushs1(macro_getdef(p)->defn);	/* defn string */
    405 				pushs1((char *)macro_name(p));	/* macro name  */
    406 				pushs(ep);	      	/* start next..*/
    407 
    408 				if (l != LPAREN && PARLEV == 0)  {
    409 				    /* no bracks  */
    410 					chrsave(EOS);
    411 
    412 					if ((size_t)sp == STACKMAX)
    413 						errx(1, "internal stack overflow");
    414 					eval((const char **) mstack+fp+1, 2,
    415 					    CALTYP, TRACESTATUS);
    416 
    417 					ep = PREVEP;	/* flush strspace */
    418 					sp = PREVSP;	/* previous sp..  */
    419 					fp = PREVFP;	/* rewind stack...*/
    420 				}
    421 			}
    422 		} else if (t == EOF) {
    423 			if (sp > -1 && ilevel <= 0) {
    424 				warnx( "unexpected end of input, unclosed parenthesis:");
    425 				dump_stack(paren, PARLEV);
    426 				exit(1);
    427 			}
    428 			if (ilevel <= 0)
    429 				break;			/* all done thanks.. */
    430 			release_input(infile+ilevel--);
    431 			emit_synchline();
    432 			bufbase = bbase[ilevel];
    433 			continue;
    434 		} else if (sp < 0) {		/* not in a macro at all */
    435 			reallyputchar(t);	/* output directly..	 */
    436 		}
    437 
    438 		else switch(t) {
    439 
    440 		case LPAREN:
    441 			if (PARLEV > 0)
    442 				chrsave(t);
    443 			while (isspace(l = gpbc())) /* skip blank, tab, nl.. */
    444 				if (PARLEV > 0)
    445 					chrsave(l);
    446 			pushback(l);
    447 			record(paren, PARLEV++);
    448 			break;
    449 
    450 		case RPAREN:
    451 			if (--PARLEV > 0)
    452 				chrsave(t);
    453 			else {			/* end of argument list */
    454 				chrsave(EOS);
    455 
    456 				if ((size_t)sp == STACKMAX)
    457 					errx(1, "internal stack overflow");
    458 
    459 				eval((const char **) mstack+fp+1, sp-fp,
    460 				    CALTYP, TRACESTATUS);
    461 
    462 				ep = PREVEP;	/* flush strspace */
    463 				sp = PREVSP;	/* previous sp..  */
    464 				fp = PREVFP;	/* rewind stack...*/
    465 			}
    466 			break;
    467 
    468 		case COMMA:
    469 			if (PARLEV == 1) {
    470 				chrsave(EOS);		/* new argument   */
    471 				while (isspace(l = gpbc()))
    472 					;
    473 				pushback(l);
    474 				pushs(ep);
    475 			} else
    476 				chrsave(t);
    477 			break;
    478 
    479 		default:
    480 			if (LOOK_AHEAD(t, scommt)) {
    481 				char *q;
    482 				for (q = scommt; *q; p++)
    483 					chrsave(*q);
    484 				for(;;) {
    485 					t = gpbc();
    486 					if (LOOK_AHEAD(t, ecommt)) {
    487 						for (q = ecommt; *q; q++)
    488 							chrsave(*q);
    489 						break;
    490 					}
    491 					if (t == EOF)
    492 					    break;
    493 					CHRSAVE(t);
    494 				}
    495 			} else
    496 				CHRSAVE(t);		/* stack the char */
    497 			break;
    498 		}
    499 	}
    500 }
    501 
    502 /*
    503  * output string directly, without pushing it for reparses.
    504  */
    505 void
    506 outputstr(const char *s)
    507 {
    508 	if (sp < 0)
    509 		reallyoutputstr(s);
    510 	else
    511 		while (*s)
    512 			CHRSAVE(*s++);
    513 }
    514 
    515 void
    516 reallyoutputstr(const char *s)
    517 {
    518 	if (synch_lines) {
    519 		while (*s) {
    520 			fputc(*s, active);
    521 			if (*s++ == '\n') {
    522 				infile[ilevel].synch_lineno++;
    523 				if (infile[ilevel].synch_lineno !=
    524 				    infile[ilevel].lineno)
    525 					do_emit_synchline();
    526 			}
    527 		}
    528 	} else
    529 		fputs(s, active);
    530 }
    531 
    532 void
    533 reallyputchar(int c)
    534 {
    535 	putc(c, active);
    536 	if (synch_lines && c == '\n') {
    537 		infile[ilevel].synch_lineno++;
    538 		if (infile[ilevel].synch_lineno != infile[ilevel].lineno)
    539 			do_emit_synchline();
    540 	}
    541 }
    542 
    543 /*
    544  * build an input token..
    545  * consider only those starting with _ or A-Za-z.
    546  */
    547 static ndptr
    548 inspect(int c, char *tp)
    549 {
    550 	char *name = tp;
    551 	char *etp = tp+MAXTOK;
    552 	ndptr p;
    553 
    554 	*tp++ = c;
    555 
    556 	while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
    557 		*tp++ = c;
    558 	if (c != EOF)
    559 		PUSHBACK(c);
    560 	*tp = EOS;
    561 	/* token is too long, it won't match anything, but it can still
    562 	 * be output. */
    563 	if (tp == ep) {
    564 		outputstr(name);
    565 		while (isalnum(c = gpbc()) || c == '_') {
    566 			if (sp < 0)
    567 				reallyputchar(c);
    568 			else
    569 				CHRSAVE(c);
    570 		}
    571 		*name = EOS;
    572 		return NULL;
    573 	}
    574 
    575 	p = ohash_find(&macros, ohash_qlookupi(&macros, name, (const char **)&tp));
    576 	if (p == NULL)
    577 		return NULL;
    578 	if (macro_getdef(p) == NULL)
    579 		return NULL;
    580 	return p;
    581 }
    582 
    583 /*
    584  * initkwds - initialise m4 keywords as fast as possible.
    585  * This very similar to install, but without certain overheads,
    586  * such as calling lookup. Malloc is not used for storing the
    587  * keyword strings, since we simply use the static pointers
    588  * within keywrds block.
    589  */
    590 static void
    591 initkwds(void)
    592 {
    593 	unsigned int type;
    594 	size_t i;
    595 
    596 	for (i = 0; i < MAXKEYS; i++) {
    597 		type = keywrds[i].ktyp & TYPEMASK;
    598 		if ((keywrds[i].ktyp & NOARGS) == 0)
    599 			type |= NEEDARGS;
    600 		setup_builtin(keywrds[i].knam, type);
    601 	}
    602 }
    603 
    604 static void
    605 record(struct position *t, int lev)
    606 {
    607 	if (lev < MAXRECORD) {
    608 		t[lev].name = CURRENT_NAME;
    609 		t[lev].line = CURRENT_LINE;
    610 	}
    611 }
    612 
    613 static void
    614 dump_stack(struct position *t, int lev)
    615 {
    616 	int i;
    617 
    618 	for (i = 0; i < lev; i++) {
    619 		if (i == MAXRECORD) {
    620 			fprintf(stderr, "   ...\n");
    621 			break;
    622 		}
    623 		fprintf(stderr, "   %s at line %lu\n",
    624 			t[i].name, t[i].line);
    625 	}
    626 }
    627 
    628 
    629 static void
    630 enlarge_stack(void)
    631 {
    632 	STACKMAX += STACKMAX/2;
    633 	mstack = xrealloc(mstack, sizeof(stae) * STACKMAX,
    634 	    "Evaluation stack overflow (%lu)",
    635 	    (unsigned long)STACKMAX);
    636 	sstack = xrealloc(sstack, STACKMAX,
    637 	    "Evaluation stack overflow (%lu)",
    638 	    (unsigned long)STACKMAX);
    639 }
    640