Home | History | Annotate | Line # | Download | only in m4
main.c revision 1.1
      1 /*
      2  * Copyright (c) 1989 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Ozan Yigit.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  */
     36 
     37 #ifndef lint
     38 static char sccsid[] = "@(#)main.c	5.6 (Berkeley) 3/6/91";
     39 #endif /* not lint */
     40 
     41 /*
     42  * main.c
     43  * Facility: m4 macro processor
     44  * by: oz
     45  */
     46 
     47 #include <signal.h>
     48 #include <unistd.h>
     49 #include <stdio.h>
     50 #include <stdlib.h>
     51 #include <string.h>
     52 #include "mdef.h"
     53 #include "pathnames.h"
     54 
     55 /*
     56  * m4 - macro processor
     57  *
     58  * PD m4 is based on the macro tool distributed with the software
     59  * tools (VOS) package, and described in the "SOFTWARE TOOLS" and
     60  * "SOFTWARE TOOLS IN PASCAL" books. It has been expanded to include
     61  * most of the command set of SysV m4, the standard UN*X macro processor.
     62  *
     63  * Since both PD m4 and UN*X m4 are based on SOFTWARE TOOLS macro,
     64  * there may be certain implementation similarities between
     65  * the two. The PD m4 was produced without ANY references to m4
     66  * sources.
     67  *
     68  * References:
     69  *
     70  *	Software Tools distribution: macro
     71  *
     72  *	Kernighan, Brian W. and P. J. Plauger, SOFTWARE
     73  *	TOOLS IN PASCAL, Addison-Wesley, Mass. 1981
     74  *
     75  *	Kernighan, Brian W. and P. J. Plauger, SOFTWARE
     76  *	TOOLS, Addison-Wesley, Mass. 1976
     77  *
     78  *	Kernighan, Brian W. and Dennis M. Ritchie,
     79  *	THE M4 MACRO PROCESSOR, Unix Programmer's Manual,
     80  *	Seventh Edition, Vol. 2, Bell Telephone Labs, 1979
     81  *
     82  *	System V man page for M4
     83  *
     84  * Modification History:
     85  *
     86  * Jan 28 1986 Oz	Break the whole thing into little
     87  *			pieces, for easier (?) maintenance.
     88  *
     89  * Dec 12 1985 Oz	Optimize the code, try to squeeze
     90  *			few microseconds out..
     91  *
     92  * Dec 05 1985 Oz	Add getopt interface, define (-D),
     93  *			undefine (-U) options.
     94  *
     95  * Oct 21 1985 Oz	Clean up various bugs, add comment handling.
     96  *
     97  * June 7 1985 Oz	Add some of SysV m4 stuff (m4wrap, pushdef,
     98  *			popdef, decr, shift etc.).
     99  *
    100  * June 5 1985 Oz	Initial cut.
    101  *
    102  * Implementation Notes:
    103  *
    104  * [1]	PD m4 uses a different (and simpler) stack mechanism than the one
    105  *	described in Software Tools and Software Tools in Pascal books.
    106  *	The triple stack nonsense is replaced with a single stack containing
    107  *	the call frames and the arguments. Each frame is back-linked to a
    108  * 	previous stack frame, which enables us to rewind the stack after
    109  * 	each nested call is completed. Each argument is a character pointer
    110  *	to the beginning of the argument string within the string space.
    111  *	The only exceptions to this are (*) arg 0 and arg 1, which are
    112  * 	the macro definition and macro name strings, stored dynamically
    113  *	for the hash table.
    114  *
    115  *	    .					   .
    116  *	|   .	|  <-- sp			|  .  |
    117  *	+-------+				+-----+
    118  *	| arg 3 ------------------------------->| str |
    119  *	+-------+				|  .  |
    120  *	| arg 2 --------------+ 		   .
    121  *	+-------+	      |
    122  *	    *		      |			|     |
    123  *	+-------+	      | 		+-----+
    124  *	| plev	|  <-- fp     +---------------->| str |
    125  *	+-------+				|  .  |
    126  *	| type	|				   .
    127  *	+-------+
    128  *	| prcf	-----------+		plev: paren level
    129  *	+-------+  	   |		type: call type
    130  *	|   .	| 	   |		prcf: prev. call frame
    131  *	    .	   	   |
    132  *	+-------+	   |
    133  *	|	<----------+
    134  *	+-------+
    135  *
    136  * [2]	We have three types of null values:
    137  *
    138  *		nil  - nodeblock pointer type 0
    139  *		null - null string ("")
    140  *		NULL - Stdio-defined NULL
    141  *
    142  */
    143 
    144 ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
    145 char buf[BUFSIZE];		/* push-back buffer	       */
    146 char *bp = buf; 		/* first available character   */
    147 char *endpbb = buf+BUFSIZE;	/* end of push-back buffer     */
    148 stae mstack[STACKMAX+1]; 	/* stack of m4 machine         */
    149 char strspace[STRSPMAX+1];	/* string space for evaluation */
    150 char *ep = strspace;		/* first free char in strspace */
    151 char *endest= strspace+STRSPMAX;/* end of string space	       */
    152 int sp; 			/* current m4  stack pointer   */
    153 int fp; 			/* m4 call frame pointer       */
    154 FILE *infile[MAXINP];		/* input file stack (0=stdin)  */
    155 FILE *outfile[MAXOUT];		/* diversion array(0=bitbucket)*/
    156 FILE *active;			/* active output file pointer  */
    157 char *m4temp;			/* filename for diversions     */
    158 int ilevel = 0; 		/* input file stack pointer    */
    159 int oindex = 0; 		/* diversion index..	       */
    160 char *null = "";                /* as it says.. just a null..  */
    161 char *m4wraps = "";             /* m4wrap string default..     */
    162 char lquote = LQUOTE;		/* left quote character  (`)   */
    163 char rquote = RQUOTE;		/* right quote character (')   */
    164 char scommt = SCOMMT;		/* start character for comment */
    165 char ecommt = ECOMMT;		/* end character for comment   */
    166 struct keyblk keywrds[] = {	/* m4 keywords to be installed */
    167 	"include",      INCLTYPE,
    168 	"sinclude",     SINCTYPE,
    169 	"define",       DEFITYPE,
    170 	"defn",         DEFNTYPE,
    171 	"divert",       DIVRTYPE,
    172 	"expr",         EXPRTYPE,
    173 	"eval",         EXPRTYPE,
    174 	"substr",       SUBSTYPE,
    175 	"ifelse",       IFELTYPE,
    176 	"ifdef",        IFDFTYPE,
    177 	"len",          LENGTYPE,
    178 	"incr",         INCRTYPE,
    179 	"decr",         DECRTYPE,
    180 	"dnl",          DNLNTYPE,
    181 	"changequote",  CHNQTYPE,
    182 	"changecom",    CHNCTYPE,
    183 	"index",        INDXTYPE,
    184 #ifdef EXTENDED
    185 	"paste",        PASTTYPE,
    186 	"spaste",       SPASTYPE,
    187 #endif
    188 	"popdef",       POPDTYPE,
    189 	"pushdef",      PUSDTYPE,
    190 	"dumpdef",      DUMPTYPE,
    191 	"shift",        SHIFTYPE,
    192 	"translit",     TRNLTYPE,
    193 	"undefine",     UNDFTYPE,
    194 	"undivert",     UNDVTYPE,
    195 	"divnum",       DIVNTYPE,
    196 	"maketemp",     MKTMTYPE,
    197 	"errprint",     ERRPTYPE,
    198 	"m4wrap",       M4WRTYPE,
    199 	"m4exit",       EXITTYPE,
    200 	"syscmd",       SYSCTYPE,
    201 	"sysval",       SYSVTYPE,
    202 	"unix",         MACRTYPE,
    203 };
    204 
    205 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
    206 
    207 extern ndptr lookup();
    208 extern ndptr addent();
    209 extern void onintr();
    210 
    211 extern int optind;
    212 extern char *optarg;
    213 
    214 main(argc,argv)
    215 	int argc;
    216 	char **argv;
    217 {
    218 	register int c;
    219 	register int n;
    220 	char *p;
    221 
    222 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
    223 		signal(SIGINT, onintr);
    224 #ifdef NONZEROPAGES
    225 	initm4();
    226 #endif
    227 	initkwds();
    228 
    229 	while ((c = getopt(argc, argv, "tD:U:o:")) != EOF)
    230 		switch(c) {
    231 
    232 		case 'D':               /* define something..*/
    233 			for (p = optarg; *p; p++)
    234 				if (*p == '=')
    235 					break;
    236 			if (*p)
    237 				*p++ = EOS;
    238 			dodefine(optarg, p);
    239 			break;
    240 		case 'U':               /* undefine...       */
    241 			remhash(optarg, TOP);
    242 			break;
    243 		case 'o':		/* specific output   */
    244 		case '?':
    245 		default:
    246 			usage();
    247 		}
    248 
    249 	infile[0] = stdin;		/* default input (naturally) */
    250 	active = stdout;		/* default active output     */
    251 	m4temp = mktemp(strdup(DIVNAM));/* filename for diversions   */
    252 
    253 	sp = -1;			/* stack pointer initialized */
    254 	fp = 0; 			/* frame pointer initialized */
    255 
    256 	macro();			/* get some work done here   */
    257 
    258 	if (*m4wraps) { 		/* anything for rundown ??   */
    259 		ilevel = 0;		/* in case m4wrap includes.. */
    260 		putback(EOF);		/* eof is a must !!	     */
    261 		pbstr(m4wraps); 	/* user-defined wrapup act   */
    262 		macro();		/* last will and testament   */
    263 	}
    264 
    265 	if (active != stdout)
    266 		active = stdout;	/* reset output just in case */
    267 	for (n = 1; n < MAXOUT; n++)	/* default wrap-up: undivert */
    268 		if (outfile[n] != NULL)
    269 			getdiv(n);
    270 					/* remove bitbucket if used  */
    271 	if (outfile[0] != NULL) {
    272 		(void) fclose(outfile[0]);
    273 		m4temp[UNIQUE] = '0';
    274 		(void) unlink(m4temp);
    275 	}
    276 
    277 	exit(0);
    278 }
    279 
    280 ndptr inspect();	/* forward ... */
    281 
    282 /*
    283  * macro - the work horse..
    284  *
    285  */
    286 macro() {
    287 	char token[MAXTOK];
    288 	register char *s;
    289 	register int t, l;
    290 	register ndptr p;
    291 	register int  nlpar;
    292 
    293 	cycle {
    294 		if ((t = gpbc()) == '_' || isalpha(t)) {
    295 			putback(t);
    296 			if ((p = inspect(s = token)) == nil) {
    297 				if (sp < 0)
    298 					while (*s)
    299 						putc(*s++, active);
    300 				else
    301 					while (*s)
    302 						chrsave(*s++);
    303 			}
    304 			else {
    305 		/*
    306 		 * real thing.. First build a call frame:
    307 		 *
    308 		 */
    309 				pushf(fp);	/* previous call frm */
    310 				pushf(p->type); /* type of the call  */
    311 				pushf(0);	/* parenthesis level */
    312 				fp = sp;	/* new frame pointer */
    313 		/*
    314 		 * now push the string arguments:
    315 		 *
    316 		 */
    317 				pushs(p->defn);	      /* defn string */
    318 				pushs(p->name);	      /* macro name  */
    319 				pushs(ep);	      /* start next..*/
    320 
    321 				putback(l = gpbc());
    322 				if (l != LPAREN)  {   /* add bracks  */
    323 					putback(RPAREN);
    324 					putback(LPAREN);
    325 				}
    326 			}
    327 		}
    328 		else if (t == EOF) {
    329 			if (sp > -1)
    330 				error("m4: unexpected end of input");
    331 			if (--ilevel < 0)
    332 				break;			/* all done thanks.. */
    333 			(void) fclose(infile[ilevel+1]);
    334 			continue;
    335 		}
    336 	/*
    337 	 * non-alpha single-char token seen..
    338 	 * [the order of else if .. stmts is
    339 	 * important.]
    340 	 *
    341 	 */
    342 		else if (t == lquote) { 		/* strip quotes */
    343 			nlpar = 1;
    344 			do {
    345 				if ((l = gpbc()) == rquote)
    346 					nlpar--;
    347 				else if (l == lquote)
    348 					nlpar++;
    349 				else if (l == EOF)
    350 					error("m4: missing right quote");
    351 				if (nlpar > 0) {
    352 					if (sp < 0)
    353 						putc(l, active);
    354 					else
    355 						chrsave(l);
    356 				}
    357 			}
    358 			while (nlpar != 0);
    359 		}
    360 
    361 		else if (sp < 0) {		/* not in a macro at all */
    362 			if (t == scommt) {	/* comment handling here */
    363 				putc(t, active);
    364 				while ((t = gpbc()) != ecommt)
    365 					putc(t, active);
    366 			}
    367 			putc(t, active);	/* output directly..	 */
    368 		}
    369 
    370 		else switch(t) {
    371 
    372 		case LPAREN:
    373 			if (PARLEV > 0)
    374 				chrsave(t);
    375 			while (isspace(l = gpbc()))
    376 				;		/* skip blank, tab, nl.. */
    377 			putback(l);
    378 			PARLEV++;
    379 			break;
    380 
    381 		case RPAREN:
    382 			if (--PARLEV > 0)
    383 				chrsave(t);
    384 			else {			/* end of argument list */
    385 				chrsave(EOS);
    386 
    387 				if (sp == STACKMAX)
    388 					error("m4: internal stack overflow");
    389 
    390 				if (CALTYP == MACRTYPE)
    391 					expand(mstack+fp+1, sp-fp);
    392 				else
    393 					eval(mstack+fp+1, sp-fp, CALTYP);
    394 
    395 				ep = PREVEP;	/* flush strspace */
    396 				sp = PREVSP;	/* previous sp..  */
    397 				fp = PREVFP;	/* rewind stack...*/
    398 			}
    399 			break;
    400 
    401 		case COMMA:
    402 			if (PARLEV == 1)	{
    403 				chrsave(EOS);		/* new argument   */
    404 				while (isspace(l = gpbc()))
    405 					;
    406 				putback(l);
    407 				pushs(ep);
    408 			}
    409 			break;
    410 		default:
    411 			chrsave(t);			/* stack the char */
    412 			break;
    413 		}
    414 	}
    415 }
    416 
    417 
    418 /*
    419  * build an input token..
    420  * consider only those starting with _ or A-Za-z. This is a
    421  * combo with lookup to speed things up.
    422  */
    423 ndptr
    424 inspect(tp)
    425 register char *tp;
    426 {
    427 	register int h = 0;
    428 	register char c;
    429 	register char *name = tp;
    430 	register char *etp = tp+MAXTOK;
    431 	register ndptr p;
    432 
    433 	while (tp < etp && (isalnum(c = gpbc()) || c == '_'))
    434 		h += (*tp++ = c);
    435 	putback(c);
    436 	if (tp == etp)
    437 		error("m4: token too long");
    438 	*tp = EOS;
    439 	for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
    440 		if (strcmp(name, p->name) == 0)
    441 			break;
    442 	return(p);
    443 }
    444 
    445 #ifdef NONZEROPAGES
    446 /*
    447  * initm4 - initialize various tables. Useful only if your system
    448  * does not know anything about demand-zero pages.
    449  *
    450  */
    451 initm4()
    452 {
    453 	register int i;
    454 
    455 	for (i = 0; i < HASHSIZE; i++)
    456 		hashtab[i] = nil;
    457 	for (i = 0; i < MAXOUT; i++)
    458 		outfile[i] = NULL;
    459 }
    460 #endif
    461 
    462 /*
    463  * initkwds - initialise m4 keywords as fast as possible.
    464  * This very similar to install, but without certain overheads,
    465  * such as calling lookup. Malloc is not used for storing the
    466  * keyword strings, since we simply use the static  pointers
    467  * within keywrds block. We also assume that there is enough memory
    468  * to at least install the keywords (i.e. malloc won't fail).
    469  *
    470  */
    471 initkwds() {
    472 	register int i;
    473 	register int h;
    474 	register ndptr p;
    475 
    476 	for (i = 0; i < MAXKEYS; i++) {
    477 		h = hash(keywrds[i].knam);
    478 		p = (ndptr) malloc(sizeof(struct ndblock));
    479 		p->nxtptr = hashtab[h];
    480 		hashtab[h] = p;
    481 		p->name = keywrds[i].knam;
    482 		p->defn = null;
    483 		p->type = keywrds[i].ktyp | STATIC;
    484 	}
    485 }
    486