main.c revision 1.1 1 1.1 cgd /*
2 1.1 cgd * Copyright (c) 1989 The Regents of the University of California.
3 1.1 cgd * All rights reserved.
4 1.1 cgd *
5 1.1 cgd * This code is derived from software contributed to Berkeley by
6 1.1 cgd * Ozan Yigit.
7 1.1 cgd *
8 1.1 cgd * Redistribution and use in source and binary forms, with or without
9 1.1 cgd * modification, are permitted provided that the following conditions
10 1.1 cgd * are met:
11 1.1 cgd * 1. Redistributions of source code must retain the above copyright
12 1.1 cgd * notice, this list of conditions and the following disclaimer.
13 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 cgd * notice, this list of conditions and the following disclaimer in the
15 1.1 cgd * documentation and/or other materials provided with the distribution.
16 1.1 cgd * 3. All advertising materials mentioning features or use of this software
17 1.1 cgd * must display the following acknowledgement:
18 1.1 cgd * This product includes software developed by the University of
19 1.1 cgd * California, Berkeley and its contributors.
20 1.1 cgd * 4. Neither the name of the University nor the names of its contributors
21 1.1 cgd * may be used to endorse or promote products derived from this software
22 1.1 cgd * without specific prior written permission.
23 1.1 cgd *
24 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 1.1 cgd * SUCH DAMAGE.
35 1.1 cgd */
36 1.1 cgd
37 1.1 cgd #ifndef lint
38 1.1 cgd static char sccsid[] = "@(#)main.c 5.6 (Berkeley) 3/6/91";
39 1.1 cgd #endif /* not lint */
40 1.1 cgd
41 1.1 cgd /*
42 1.1 cgd * main.c
43 1.1 cgd * Facility: m4 macro processor
44 1.1 cgd * by: oz
45 1.1 cgd */
46 1.1 cgd
47 1.1 cgd #include <signal.h>
48 1.1 cgd #include <unistd.h>
49 1.1 cgd #include <stdio.h>
50 1.1 cgd #include <stdlib.h>
51 1.1 cgd #include <string.h>
52 1.1 cgd #include "mdef.h"
53 1.1 cgd #include "pathnames.h"
54 1.1 cgd
55 1.1 cgd /*
56 1.1 cgd * m4 - macro processor
57 1.1 cgd *
58 1.1 cgd * PD m4 is based on the macro tool distributed with the software
59 1.1 cgd * tools (VOS) package, and described in the "SOFTWARE TOOLS" and
60 1.1 cgd * "SOFTWARE TOOLS IN PASCAL" books. It has been expanded to include
61 1.1 cgd * most of the command set of SysV m4, the standard UN*X macro processor.
62 1.1 cgd *
63 1.1 cgd * Since both PD m4 and UN*X m4 are based on SOFTWARE TOOLS macro,
64 1.1 cgd * there may be certain implementation similarities between
65 1.1 cgd * the two. The PD m4 was produced without ANY references to m4
66 1.1 cgd * sources.
67 1.1 cgd *
68 1.1 cgd * References:
69 1.1 cgd *
70 1.1 cgd * Software Tools distribution: macro
71 1.1 cgd *
72 1.1 cgd * Kernighan, Brian W. and P. J. Plauger, SOFTWARE
73 1.1 cgd * TOOLS IN PASCAL, Addison-Wesley, Mass. 1981
74 1.1 cgd *
75 1.1 cgd * Kernighan, Brian W. and P. J. Plauger, SOFTWARE
76 1.1 cgd * TOOLS, Addison-Wesley, Mass. 1976
77 1.1 cgd *
78 1.1 cgd * Kernighan, Brian W. and Dennis M. Ritchie,
79 1.1 cgd * THE M4 MACRO PROCESSOR, Unix Programmer's Manual,
80 1.1 cgd * Seventh Edition, Vol. 2, Bell Telephone Labs, 1979
81 1.1 cgd *
82 1.1 cgd * System V man page for M4
83 1.1 cgd *
84 1.1 cgd * Modification History:
85 1.1 cgd *
86 1.1 cgd * Jan 28 1986 Oz Break the whole thing into little
87 1.1 cgd * pieces, for easier (?) maintenance.
88 1.1 cgd *
89 1.1 cgd * Dec 12 1985 Oz Optimize the code, try to squeeze
90 1.1 cgd * few microseconds out..
91 1.1 cgd *
92 1.1 cgd * Dec 05 1985 Oz Add getopt interface, define (-D),
93 1.1 cgd * undefine (-U) options.
94 1.1 cgd *
95 1.1 cgd * Oct 21 1985 Oz Clean up various bugs, add comment handling.
96 1.1 cgd *
97 1.1 cgd * June 7 1985 Oz Add some of SysV m4 stuff (m4wrap, pushdef,
98 1.1 cgd * popdef, decr, shift etc.).
99 1.1 cgd *
100 1.1 cgd * June 5 1985 Oz Initial cut.
101 1.1 cgd *
102 1.1 cgd * Implementation Notes:
103 1.1 cgd *
104 1.1 cgd * [1] PD m4 uses a different (and simpler) stack mechanism than the one
105 1.1 cgd * described in Software Tools and Software Tools in Pascal books.
106 1.1 cgd * The triple stack nonsense is replaced with a single stack containing
107 1.1 cgd * the call frames and the arguments. Each frame is back-linked to a
108 1.1 cgd * previous stack frame, which enables us to rewind the stack after
109 1.1 cgd * each nested call is completed. Each argument is a character pointer
110 1.1 cgd * to the beginning of the argument string within the string space.
111 1.1 cgd * The only exceptions to this are (*) arg 0 and arg 1, which are
112 1.1 cgd * the macro definition and macro name strings, stored dynamically
113 1.1 cgd * for the hash table.
114 1.1 cgd *
115 1.1 cgd * . .
116 1.1 cgd * | . | <-- sp | . |
117 1.1 cgd * +-------+ +-----+
118 1.1 cgd * | arg 3 ------------------------------->| str |
119 1.1 cgd * +-------+ | . |
120 1.1 cgd * | arg 2 --------------+ .
121 1.1 cgd * +-------+ |
122 1.1 cgd * * | | |
123 1.1 cgd * +-------+ | +-----+
124 1.1 cgd * | plev | <-- fp +---------------->| str |
125 1.1 cgd * +-------+ | . |
126 1.1 cgd * | type | .
127 1.1 cgd * +-------+
128 1.1 cgd * | prcf -----------+ plev: paren level
129 1.1 cgd * +-------+ | type: call type
130 1.1 cgd * | . | | prcf: prev. call frame
131 1.1 cgd * . |
132 1.1 cgd * +-------+ |
133 1.1 cgd * | <----------+
134 1.1 cgd * +-------+
135 1.1 cgd *
136 1.1 cgd * [2] We have three types of null values:
137 1.1 cgd *
138 1.1 cgd * nil - nodeblock pointer type 0
139 1.1 cgd * null - null string ("")
140 1.1 cgd * NULL - Stdio-defined NULL
141 1.1 cgd *
142 1.1 cgd */
143 1.1 cgd
144 1.1 cgd ndptr hashtab[HASHSIZE]; /* hash table for macros etc. */
145 1.1 cgd char buf[BUFSIZE]; /* push-back buffer */
146 1.1 cgd char *bp = buf; /* first available character */
147 1.1 cgd char *endpbb = buf+BUFSIZE; /* end of push-back buffer */
148 1.1 cgd stae mstack[STACKMAX+1]; /* stack of m4 machine */
149 1.1 cgd char strspace[STRSPMAX+1]; /* string space for evaluation */
150 1.1 cgd char *ep = strspace; /* first free char in strspace */
151 1.1 cgd char *endest= strspace+STRSPMAX;/* end of string space */
152 1.1 cgd int sp; /* current m4 stack pointer */
153 1.1 cgd int fp; /* m4 call frame pointer */
154 1.1 cgd FILE *infile[MAXINP]; /* input file stack (0=stdin) */
155 1.1 cgd FILE *outfile[MAXOUT]; /* diversion array(0=bitbucket)*/
156 1.1 cgd FILE *active; /* active output file pointer */
157 1.1 cgd char *m4temp; /* filename for diversions */
158 1.1 cgd int ilevel = 0; /* input file stack pointer */
159 1.1 cgd int oindex = 0; /* diversion index.. */
160 1.1 cgd char *null = ""; /* as it says.. just a null.. */
161 1.1 cgd char *m4wraps = ""; /* m4wrap string default.. */
162 1.1 cgd char lquote = LQUOTE; /* left quote character (`) */
163 1.1 cgd char rquote = RQUOTE; /* right quote character (') */
164 1.1 cgd char scommt = SCOMMT; /* start character for comment */
165 1.1 cgd char ecommt = ECOMMT; /* end character for comment */
166 1.1 cgd struct keyblk keywrds[] = { /* m4 keywords to be installed */
167 1.1 cgd "include", INCLTYPE,
168 1.1 cgd "sinclude", SINCTYPE,
169 1.1 cgd "define", DEFITYPE,
170 1.1 cgd "defn", DEFNTYPE,
171 1.1 cgd "divert", DIVRTYPE,
172 1.1 cgd "expr", EXPRTYPE,
173 1.1 cgd "eval", EXPRTYPE,
174 1.1 cgd "substr", SUBSTYPE,
175 1.1 cgd "ifelse", IFELTYPE,
176 1.1 cgd "ifdef", IFDFTYPE,
177 1.1 cgd "len", LENGTYPE,
178 1.1 cgd "incr", INCRTYPE,
179 1.1 cgd "decr", DECRTYPE,
180 1.1 cgd "dnl", DNLNTYPE,
181 1.1 cgd "changequote", CHNQTYPE,
182 1.1 cgd "changecom", CHNCTYPE,
183 1.1 cgd "index", INDXTYPE,
184 1.1 cgd #ifdef EXTENDED
185 1.1 cgd "paste", PASTTYPE,
186 1.1 cgd "spaste", SPASTYPE,
187 1.1 cgd #endif
188 1.1 cgd "popdef", POPDTYPE,
189 1.1 cgd "pushdef", PUSDTYPE,
190 1.1 cgd "dumpdef", DUMPTYPE,
191 1.1 cgd "shift", SHIFTYPE,
192 1.1 cgd "translit", TRNLTYPE,
193 1.1 cgd "undefine", UNDFTYPE,
194 1.1 cgd "undivert", UNDVTYPE,
195 1.1 cgd "divnum", DIVNTYPE,
196 1.1 cgd "maketemp", MKTMTYPE,
197 1.1 cgd "errprint", ERRPTYPE,
198 1.1 cgd "m4wrap", M4WRTYPE,
199 1.1 cgd "m4exit", EXITTYPE,
200 1.1 cgd "syscmd", SYSCTYPE,
201 1.1 cgd "sysval", SYSVTYPE,
202 1.1 cgd "unix", MACRTYPE,
203 1.1 cgd };
204 1.1 cgd
205 1.1 cgd #define MAXKEYS (sizeof(keywrds)/sizeof(struct keyblk))
206 1.1 cgd
207 1.1 cgd extern ndptr lookup();
208 1.1 cgd extern ndptr addent();
209 1.1 cgd extern void onintr();
210 1.1 cgd
211 1.1 cgd extern int optind;
212 1.1 cgd extern char *optarg;
213 1.1 cgd
214 1.1 cgd main(argc,argv)
215 1.1 cgd int argc;
216 1.1 cgd char **argv;
217 1.1 cgd {
218 1.1 cgd register int c;
219 1.1 cgd register int n;
220 1.1 cgd char *p;
221 1.1 cgd
222 1.1 cgd if (signal(SIGINT, SIG_IGN) != SIG_IGN)
223 1.1 cgd signal(SIGINT, onintr);
224 1.1 cgd #ifdef NONZEROPAGES
225 1.1 cgd initm4();
226 1.1 cgd #endif
227 1.1 cgd initkwds();
228 1.1 cgd
229 1.1 cgd while ((c = getopt(argc, argv, "tD:U:o:")) != EOF)
230 1.1 cgd switch(c) {
231 1.1 cgd
232 1.1 cgd case 'D': /* define something..*/
233 1.1 cgd for (p = optarg; *p; p++)
234 1.1 cgd if (*p == '=')
235 1.1 cgd break;
236 1.1 cgd if (*p)
237 1.1 cgd *p++ = EOS;
238 1.1 cgd dodefine(optarg, p);
239 1.1 cgd break;
240 1.1 cgd case 'U': /* undefine... */
241 1.1 cgd remhash(optarg, TOP);
242 1.1 cgd break;
243 1.1 cgd case 'o': /* specific output */
244 1.1 cgd case '?':
245 1.1 cgd default:
246 1.1 cgd usage();
247 1.1 cgd }
248 1.1 cgd
249 1.1 cgd infile[0] = stdin; /* default input (naturally) */
250 1.1 cgd active = stdout; /* default active output */
251 1.1 cgd m4temp = mktemp(strdup(DIVNAM));/* filename for diversions */
252 1.1 cgd
253 1.1 cgd sp = -1; /* stack pointer initialized */
254 1.1 cgd fp = 0; /* frame pointer initialized */
255 1.1 cgd
256 1.1 cgd macro(); /* get some work done here */
257 1.1 cgd
258 1.1 cgd if (*m4wraps) { /* anything for rundown ?? */
259 1.1 cgd ilevel = 0; /* in case m4wrap includes.. */
260 1.1 cgd putback(EOF); /* eof is a must !! */
261 1.1 cgd pbstr(m4wraps); /* user-defined wrapup act */
262 1.1 cgd macro(); /* last will and testament */
263 1.1 cgd }
264 1.1 cgd
265 1.1 cgd if (active != stdout)
266 1.1 cgd active = stdout; /* reset output just in case */
267 1.1 cgd for (n = 1; n < MAXOUT; n++) /* default wrap-up: undivert */
268 1.1 cgd if (outfile[n] != NULL)
269 1.1 cgd getdiv(n);
270 1.1 cgd /* remove bitbucket if used */
271 1.1 cgd if (outfile[0] != NULL) {
272 1.1 cgd (void) fclose(outfile[0]);
273 1.1 cgd m4temp[UNIQUE] = '0';
274 1.1 cgd (void) unlink(m4temp);
275 1.1 cgd }
276 1.1 cgd
277 1.1 cgd exit(0);
278 1.1 cgd }
279 1.1 cgd
280 1.1 cgd ndptr inspect(); /* forward ... */
281 1.1 cgd
282 1.1 cgd /*
283 1.1 cgd * macro - the work horse..
284 1.1 cgd *
285 1.1 cgd */
286 1.1 cgd macro() {
287 1.1 cgd char token[MAXTOK];
288 1.1 cgd register char *s;
289 1.1 cgd register int t, l;
290 1.1 cgd register ndptr p;
291 1.1 cgd register int nlpar;
292 1.1 cgd
293 1.1 cgd cycle {
294 1.1 cgd if ((t = gpbc()) == '_' || isalpha(t)) {
295 1.1 cgd putback(t);
296 1.1 cgd if ((p = inspect(s = token)) == nil) {
297 1.1 cgd if (sp < 0)
298 1.1 cgd while (*s)
299 1.1 cgd putc(*s++, active);
300 1.1 cgd else
301 1.1 cgd while (*s)
302 1.1 cgd chrsave(*s++);
303 1.1 cgd }
304 1.1 cgd else {
305 1.1 cgd /*
306 1.1 cgd * real thing.. First build a call frame:
307 1.1 cgd *
308 1.1 cgd */
309 1.1 cgd pushf(fp); /* previous call frm */
310 1.1 cgd pushf(p->type); /* type of the call */
311 1.1 cgd pushf(0); /* parenthesis level */
312 1.1 cgd fp = sp; /* new frame pointer */
313 1.1 cgd /*
314 1.1 cgd * now push the string arguments:
315 1.1 cgd *
316 1.1 cgd */
317 1.1 cgd pushs(p->defn); /* defn string */
318 1.1 cgd pushs(p->name); /* macro name */
319 1.1 cgd pushs(ep); /* start next..*/
320 1.1 cgd
321 1.1 cgd putback(l = gpbc());
322 1.1 cgd if (l != LPAREN) { /* add bracks */
323 1.1 cgd putback(RPAREN);
324 1.1 cgd putback(LPAREN);
325 1.1 cgd }
326 1.1 cgd }
327 1.1 cgd }
328 1.1 cgd else if (t == EOF) {
329 1.1 cgd if (sp > -1)
330 1.1 cgd error("m4: unexpected end of input");
331 1.1 cgd if (--ilevel < 0)
332 1.1 cgd break; /* all done thanks.. */
333 1.1 cgd (void) fclose(infile[ilevel+1]);
334 1.1 cgd continue;
335 1.1 cgd }
336 1.1 cgd /*
337 1.1 cgd * non-alpha single-char token seen..
338 1.1 cgd * [the order of else if .. stmts is
339 1.1 cgd * important.]
340 1.1 cgd *
341 1.1 cgd */
342 1.1 cgd else if (t == lquote) { /* strip quotes */
343 1.1 cgd nlpar = 1;
344 1.1 cgd do {
345 1.1 cgd if ((l = gpbc()) == rquote)
346 1.1 cgd nlpar--;
347 1.1 cgd else if (l == lquote)
348 1.1 cgd nlpar++;
349 1.1 cgd else if (l == EOF)
350 1.1 cgd error("m4: missing right quote");
351 1.1 cgd if (nlpar > 0) {
352 1.1 cgd if (sp < 0)
353 1.1 cgd putc(l, active);
354 1.1 cgd else
355 1.1 cgd chrsave(l);
356 1.1 cgd }
357 1.1 cgd }
358 1.1 cgd while (nlpar != 0);
359 1.1 cgd }
360 1.1 cgd
361 1.1 cgd else if (sp < 0) { /* not in a macro at all */
362 1.1 cgd if (t == scommt) { /* comment handling here */
363 1.1 cgd putc(t, active);
364 1.1 cgd while ((t = gpbc()) != ecommt)
365 1.1 cgd putc(t, active);
366 1.1 cgd }
367 1.1 cgd putc(t, active); /* output directly.. */
368 1.1 cgd }
369 1.1 cgd
370 1.1 cgd else switch(t) {
371 1.1 cgd
372 1.1 cgd case LPAREN:
373 1.1 cgd if (PARLEV > 0)
374 1.1 cgd chrsave(t);
375 1.1 cgd while (isspace(l = gpbc()))
376 1.1 cgd ; /* skip blank, tab, nl.. */
377 1.1 cgd putback(l);
378 1.1 cgd PARLEV++;
379 1.1 cgd break;
380 1.1 cgd
381 1.1 cgd case RPAREN:
382 1.1 cgd if (--PARLEV > 0)
383 1.1 cgd chrsave(t);
384 1.1 cgd else { /* end of argument list */
385 1.1 cgd chrsave(EOS);
386 1.1 cgd
387 1.1 cgd if (sp == STACKMAX)
388 1.1 cgd error("m4: internal stack overflow");
389 1.1 cgd
390 1.1 cgd if (CALTYP == MACRTYPE)
391 1.1 cgd expand(mstack+fp+1, sp-fp);
392 1.1 cgd else
393 1.1 cgd eval(mstack+fp+1, sp-fp, CALTYP);
394 1.1 cgd
395 1.1 cgd ep = PREVEP; /* flush strspace */
396 1.1 cgd sp = PREVSP; /* previous sp.. */
397 1.1 cgd fp = PREVFP; /* rewind stack...*/
398 1.1 cgd }
399 1.1 cgd break;
400 1.1 cgd
401 1.1 cgd case COMMA:
402 1.1 cgd if (PARLEV == 1) {
403 1.1 cgd chrsave(EOS); /* new argument */
404 1.1 cgd while (isspace(l = gpbc()))
405 1.1 cgd ;
406 1.1 cgd putback(l);
407 1.1 cgd pushs(ep);
408 1.1 cgd }
409 1.1 cgd break;
410 1.1 cgd default:
411 1.1 cgd chrsave(t); /* stack the char */
412 1.1 cgd break;
413 1.1 cgd }
414 1.1 cgd }
415 1.1 cgd }
416 1.1 cgd
417 1.1 cgd
418 1.1 cgd /*
419 1.1 cgd * build an input token..
420 1.1 cgd * consider only those starting with _ or A-Za-z. This is a
421 1.1 cgd * combo with lookup to speed things up.
422 1.1 cgd */
423 1.1 cgd ndptr
424 1.1 cgd inspect(tp)
425 1.1 cgd register char *tp;
426 1.1 cgd {
427 1.1 cgd register int h = 0;
428 1.1 cgd register char c;
429 1.1 cgd register char *name = tp;
430 1.1 cgd register char *etp = tp+MAXTOK;
431 1.1 cgd register ndptr p;
432 1.1 cgd
433 1.1 cgd while (tp < etp && (isalnum(c = gpbc()) || c == '_'))
434 1.1 cgd h += (*tp++ = c);
435 1.1 cgd putback(c);
436 1.1 cgd if (tp == etp)
437 1.1 cgd error("m4: token too long");
438 1.1 cgd *tp = EOS;
439 1.1 cgd for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
440 1.1 cgd if (strcmp(name, p->name) == 0)
441 1.1 cgd break;
442 1.1 cgd return(p);
443 1.1 cgd }
444 1.1 cgd
445 1.1 cgd #ifdef NONZEROPAGES
446 1.1 cgd /*
447 1.1 cgd * initm4 - initialize various tables. Useful only if your system
448 1.1 cgd * does not know anything about demand-zero pages.
449 1.1 cgd *
450 1.1 cgd */
451 1.1 cgd initm4()
452 1.1 cgd {
453 1.1 cgd register int i;
454 1.1 cgd
455 1.1 cgd for (i = 0; i < HASHSIZE; i++)
456 1.1 cgd hashtab[i] = nil;
457 1.1 cgd for (i = 0; i < MAXOUT; i++)
458 1.1 cgd outfile[i] = NULL;
459 1.1 cgd }
460 1.1 cgd #endif
461 1.1 cgd
462 1.1 cgd /*
463 1.1 cgd * initkwds - initialise m4 keywords as fast as possible.
464 1.1 cgd * This very similar to install, but without certain overheads,
465 1.1 cgd * such as calling lookup. Malloc is not used for storing the
466 1.1 cgd * keyword strings, since we simply use the static pointers
467 1.1 cgd * within keywrds block. We also assume that there is enough memory
468 1.1 cgd * to at least install the keywords (i.e. malloc won't fail).
469 1.1 cgd *
470 1.1 cgd */
471 1.1 cgd initkwds() {
472 1.1 cgd register int i;
473 1.1 cgd register int h;
474 1.1 cgd register ndptr p;
475 1.1 cgd
476 1.1 cgd for (i = 0; i < MAXKEYS; i++) {
477 1.1 cgd h = hash(keywrds[i].knam);
478 1.1 cgd p = (ndptr) malloc(sizeof(struct ndblock));
479 1.1 cgd p->nxtptr = hashtab[h];
480 1.1 cgd hashtab[h] = p;
481 1.1 cgd p->name = keywrds[i].knam;
482 1.1 cgd p->defn = null;
483 1.1 cgd p->type = keywrds[i].ktyp | STATIC;
484 1.1 cgd }
485 1.1 cgd }
486