unifdef.c revision 1.11 1 /* $NetBSD: unifdef.c,v 1.11 2003/07/30 08:21:47 itojun Exp $ */
2
3 /*
4 * Copyright (c) 2002, 2003 Tony Finch <dot (at) dotat.at>
5 * Copyright (c) 1985, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Dave Yost. It was rewritten to support ANSI C by Tony Finch.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41
42 #ifndef lint
43 #if 0
44 static const char copyright[] =
45 "@(#) Copyright (c) 1985, 1993\n\
46 The Regents of the University of California. All rights reserved.\n";
47 #endif
48 #ifdef __IDSTRING
49 __IDSTRING(Berkeley, "@(#)unifdef.c 8.1 (Berkeley) 6/6/93");
50 __IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.11 2003/07/30 08:21:47 itojun Exp $");
51 __IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.161 2003/07/01 15:32:48 fanf2 Exp $");
52 #endif
53 #endif /* not lint */
54 #ifdef __FBSDID
55 __FBSDID("$FreeBSD: src/usr.bin/unifdef/unifdef.c,v 1.18 2003/07/01 15:30:43 fanf Exp $");
56 #endif
57
58 /*
59 * unifdef - remove ifdef'ed lines
60 *
61 * Wishlist:
62 * provide an option which will append the name of the
63 * appropriate symbol after #else's and #endif's
64 * provide an option which will check symbols after
65 * #else's and #endif's to see that they match their
66 * corresponding #ifdef or #ifndef
67 * generate #line directives in place of deleted code
68 *
69 * The first two items above require better buffer handling, which would
70 * also make it possible to handle all "dodgy" directives correctly.
71 */
72
73 #include <ctype.h>
74 #include <err.h>
75 #include <stdarg.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80
81 #include "stdbool.h"
82
83 /* types of input lines: */
84 typedef enum {
85 LT_TRUEI, /* a true #if with ignore flag */
86 LT_FALSEI, /* a false #if with ignore flag */
87 LT_IF, /* an unknown #if */
88 LT_TRUE, /* a true #if */
89 LT_FALSE, /* a false #if */
90 LT_ELIF, /* an unknown #elif */
91 LT_ELTRUE, /* a true #elif */
92 LT_ELFALSE, /* a false #elif */
93 LT_ELSE, /* #else */
94 LT_ENDIF, /* #endif */
95 LT_DODGY, /* flag: directive is not on one line */
96 LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
97 LT_PLAIN, /* ordinary line */
98 LT_EOF, /* end of file */
99 LT_COUNT
100 } Linetype;
101
102 static char const * const linetype_name[] = {
103 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
104 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
105 "DODGY TRUEI", "DODGY FALSEI",
106 "DODGY IF", "DODGY TRUE", "DODGY FALSE",
107 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
108 "DODGY ELSE", "DODGY ENDIF",
109 "PLAIN", "EOF"
110 };
111
112 /* state of #if processing */
113 typedef enum {
114 IS_OUTSIDE,
115 IS_FALSE_PREFIX, /* false #if followed by false #elifs */
116 IS_TRUE_PREFIX, /* first non-false #(el)if is true */
117 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
118 IS_FALSE_MIDDLE, /* a false #elif after a pass state */
119 IS_TRUE_MIDDLE, /* a true #elif after a pass state */
120 IS_PASS_ELSE, /* an else after a pass state */
121 IS_FALSE_ELSE, /* an else after a true state */
122 IS_TRUE_ELSE, /* an else after only false states */
123 IS_FALSE_TRAILER, /* #elifs after a true are false */
124 IS_COUNT
125 } Ifstate;
126
127 static char const * const ifstate_name[] = {
128 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
129 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
130 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
131 "FALSE_TRAILER"
132 };
133
134 /* state of comment parser */
135 typedef enum {
136 NO_COMMENT = false, /* outside a comment */
137 C_COMMENT, /* in a comment like this one */
138 CXX_COMMENT, /* between // and end of line */
139 STARTING_COMMENT, /* just after slash-backslash-newline */
140 FINISHING_COMMENT /* star-backslash-newline in a C comment */
141 } Comment_state;
142
143 static char const * const comment_name[] = {
144 "NO", "C", "CXX", "STARTING", "FINISHING"
145 };
146
147 /* state of preprocessor line parser */
148 typedef enum {
149 LS_START, /* only space and comments on this line */
150 LS_HASH, /* only space, comments, and a hash */
151 LS_DIRTY /* this line can't be a preprocessor line */
152 } Line_state;
153
154 static char const * const linestate_name[] = {
155 "START", "HASH", "DIRTY"
156 };
157
158 /*
159 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
160 */
161 #define MAXDEPTH 64 /* maximum #if nesting */
162 #define MAXLINE 4096 /* maximum length of line */
163 #define MAXSYMS 4096 /* maximum number of symbols */
164
165 /*
166 * Sometimes when editing a keyword the replacement text is longer, so
167 * we leave some space at the end of the tline buffer to accommodate this.
168 */
169 #define EDITSLOP 10
170
171 /*
172 * Globals.
173 */
174
175 static bool complement; /* -c: do the complement */
176 static bool debugging; /* -d: debugging reports */
177 static bool iocccok; /* -e: fewer IOCCC errors */
178 static bool killconsts; /* -k: eval constant #ifs */
179 static bool lnblank; /* -l: blank deleted lines */
180 static bool symlist; /* -s: output symbol list */
181 static bool text; /* -t: this is a text file */
182
183 static const char *symname[MAXSYMS]; /* symbol name */
184 static const char *value[MAXSYMS]; /* -Dsym=value */
185 static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */
186 static int nsyms; /* number of symbols */
187
188 static FILE *input; /* input file pointer */
189 static const char *filename; /* input file name */
190 static int linenum; /* current line number */
191
192 static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
193 static char *keyword; /* used for editing #elif's */
194
195 static Comment_state incomment; /* comment parser state */
196 static Line_state linestate; /* #if line parser state */
197 static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
198 static bool ignoring[MAXDEPTH]; /* ignore comments state */
199 static int stifline[MAXDEPTH]; /* start of current #if */
200 static int depth; /* current #if nesting */
201 static bool keepthis; /* don't delete constant #if */
202
203 static int exitstat; /* program exit status */
204
205 static void addsym(bool, bool, char *);
206 static void debug(const char *, ...);
207 static void done(void);
208 static void error(const char *);
209 static int findsym(const char *);
210 static void flushline(bool);
211 static Linetype getline(void);
212 static Linetype ifeval(const char **);
213 static void ignoreoff(void);
214 static void ignoreon(void);
215 static void keywordedit(const char *);
216 static void nest(void);
217 static void process(void);
218 static const char *skipcomment(const char *);
219 static const char *skipsym(const char *);
220 static void state(Ifstate);
221 static int strlcmp(const char *, const char *, size_t);
222 static void usage(void);
223
224 #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
225
226 /*
227 * The main program.
228 */
229 int
230 main(int argc, char *argv[])
231 {
232 int opt;
233
234 while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1)
235 switch (opt) {
236 case 'i': /* treat stuff controlled by these symbols as text */
237 /*
238 * For strict backwards-compatibility the U or D
239 * should be immediately after the -i but it doesn't
240 * matter much if we relax that requirement.
241 */
242 opt = *optarg++;
243 if (opt == 'D')
244 addsym(true, true, optarg);
245 else if (opt == 'U')
246 addsym(true, false, optarg);
247 else
248 usage();
249 break;
250 case 'D': /* define a symbol */
251 addsym(false, true, optarg);
252 break;
253 case 'U': /* undef a symbol */
254 addsym(false, false, optarg);
255 break;
256 case 'I':
257 /* no-op for compatibility with cpp */
258 break;
259 case 'c': /* treat -D as -U and vice versa */
260 complement = true;
261 break;
262 case 'd':
263 debugging = true;
264 break;
265 case 'e': /* fewer errors from dodgy lines */
266 iocccok = true;
267 break;
268 case 'k': /* process constant #ifs */
269 killconsts = true;
270 break;
271 case 'l': /* blank deleted lines instead of omitting them */
272 lnblank = true;
273 break;
274 case 's': /* only output list of symbols that control #ifs */
275 symlist = true;
276 break;
277 case 't': /* don't parse C comments */
278 text = true;
279 break;
280 default:
281 usage();
282 }
283 argc -= optind;
284 argv += optind;
285 if (nsyms == 0 && !symlist) {
286 warnx("must -D or -U at least one symbol");
287 usage();
288 }
289 if (argc > 1) {
290 errx(2, "can only do one file");
291 } else if (argc == 1 && strcmp(*argv, "-") != 0) {
292 filename = *argv;
293 input = fopen(filename, "r");
294 if (input == NULL)
295 err(2, "can't open %s", filename);
296 } else {
297 filename = "[stdin]";
298 input = stdin;
299 }
300 process();
301 abort(); /* bug */
302 }
303
304 static void
305 usage(void)
306 {
307 fprintf(stderr, "usage: unifdef [-cdeklst]"
308 " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
309 exit(2);
310 }
311
312 /*
313 * A state transition function alters the global #if processing state
314 * in a particular way. The table below is indexed by the current
315 * processing state and the type of the current line.
316 *
317 * Nesting is handled by keeping a stack of states; some transition
318 * functions increase or decrease the depth. They also maintain the
319 * ignore state on a stack. In some complicated cases they have to
320 * alter the preprocessor directive, as follows.
321 *
322 * When we have processed a group that starts off with a known-false
323 * #if/#elif sequence (which has therefore been deleted) followed by a
324 * #elif that we don't understand and therefore must keep, we edit the
325 * latter into a #if to keep the nesting correct.
326 *
327 * When we find a true #elif in a group, the following block will
328 * always be kept and the rest of the sequence after the next #elif or
329 * #else will be discarded. We edit the #elif into a #else and the
330 * following directive to #endif since this has the desired behaviour.
331 *
332 * "Dodgy" directives are split across multiple lines, the most common
333 * example being a multi-line comment hanging off the right of the
334 * directive. We can handle them correctly only if there is no change
335 * from printing to dropping (or vice versa) caused by that directive.
336 * If the directive is the first of a group we have a choice between
337 * failing with an error, or passing it through unchanged instead of
338 * evaluating it. The latter is not the default to avoid questions from
339 * users about unifdef unexpectedly leaving behind preprocessor directives.
340 */
341 typedef void state_fn(void);
342
343 /* report an error */
344 static void Eelif (void) { error("Inappropriate #elif"); }
345 static void Eelse (void) { error("Inappropriate #else"); }
346 static void Eendif(void) { error("Inappropriate #endif"); }
347 static void Eeof (void) { error("Premature EOF"); }
348 static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
349 /* plain line handling */
350 static void print (void) { flushline(true); }
351 static void drop (void) { flushline(false); }
352 /* output lacks group's start line */
353 static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); }
354 static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); }
355 static void Selse (void) { drop(); state(IS_TRUE_ELSE); }
356 /* print/pass this block */
357 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
358 static void Pelse (void) { print(); state(IS_PASS_ELSE); }
359 static void Pendif(void) { print(); --depth; }
360 /* discard this block */
361 static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); }
362 static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); }
363 static void Delse (void) { drop(); state(IS_FALSE_ELSE); }
364 static void Dendif(void) { drop(); --depth; }
365 /* first line of group */
366 static void Fdrop (void) { nest(); Dfalse(); }
367 static void Fpass (void) { nest(); Pelif(); }
368 static void Ftrue (void) { nest(); Strue(); }
369 static void Ffalse(void) { nest(); Sfalse(); }
370 /* variable pedantry for obfuscated lines */
371 static void Oiffy (void) { if (iocccok) Fpass(); else Eioccc(); ignoreon(); }
372 static void Oif (void) { if (iocccok) Fpass(); else Eioccc(); }
373 static void Oelif (void) { if (iocccok) Pelif(); else Eioccc(); }
374 /* ignore comments in this block */
375 static void Idrop (void) { Fdrop(); ignoreon(); }
376 static void Itrue (void) { Ftrue(); ignoreon(); }
377 static void Ifalse(void) { Ffalse(); ignoreon(); }
378 /* edit this line */
379 static void Mpass (void) { strncpy(keyword, "if ", 4); Pelif(); }
380 static void Mtrue (void) { keywordedit("else\n"); state(IS_TRUE_MIDDLE); }
381 static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
382 static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
383
384 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
385 /* IS_OUTSIDE */
386 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
387 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
388 print, done },
389 /* IS_FALSE_PREFIX */
390 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
391 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
392 drop, Eeof },
393 /* IS_TRUE_PREFIX */
394 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
395 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
396 print, Eeof },
397 /* IS_PASS_MIDDLE */
398 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
399 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
400 print, Eeof },
401 /* IS_FALSE_MIDDLE */
402 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
403 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
404 drop, Eeof },
405 /* IS_TRUE_MIDDLE */
406 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
407 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
408 print, Eeof },
409 /* IS_PASS_ELSE */
410 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
411 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
412 print, Eeof },
413 /* IS_FALSE_ELSE */
414 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
415 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
416 drop, Eeof },
417 /* IS_TRUE_ELSE */
418 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
419 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
420 print, Eeof },
421 /* IS_FALSE_TRAILER */
422 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
423 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
424 drop, Eeof }
425 /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
426 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
427 PLAIN EOF */
428 };
429
430 /*
431 * State machine utility functions
432 */
433 static void
434 done(void)
435 {
436 if (incomment)
437 error("EOF in comment");
438 exit(exitstat);
439 }
440 static void
441 ignoreoff(void)
442 {
443 ignoring[depth] = ignoring[depth-1];
444 }
445 static void
446 ignoreon(void)
447 {
448 ignoring[depth] = true;
449 }
450 static void
451 keywordedit(const char *replacement)
452 {
453 strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
454 print();
455 }
456 static void
457 nest(void)
458 {
459 depth += 1;
460 if (depth >= MAXDEPTH)
461 error("Too many levels of nesting");
462 stifline[depth] = linenum;
463 }
464 static void
465 state(Ifstate is)
466 {
467 ifstate[depth] = is;
468 }
469
470 /*
471 * Write a line to the output or not, according to command line options.
472 */
473 static void
474 flushline(bool keep)
475 {
476 if (symlist)
477 return;
478 if (keep ^ complement)
479 fputs(tline, stdout);
480 else {
481 if (lnblank)
482 putc('\n', stdout);
483 exitstat = 1;
484 }
485 }
486
487 /*
488 * The driver for the state machine.
489 */
490 static void
491 process(void)
492 {
493 Linetype lineval;
494
495 for (;;) {
496 linenum++;
497 lineval = getline();
498 trans_table[ifstate[depth]][lineval]();
499 debug("process %s -> %s depth %d",
500 linetype_name[lineval],
501 ifstate_name[ifstate[depth]], depth);
502 }
503 }
504
505 /*
506 * Parse a line and determine its type. We keep the preprocessor line
507 * parser state between calls in the global variable linestate, with
508 * help from skipcomment().
509 */
510 static Linetype
511 getline(void)
512 {
513 const char *cp;
514 int cursym;
515 int kwlen;
516 Linetype retval;
517 Comment_state wascomment;
518
519 if (fgets(tline, MAXLINE, input) == NULL)
520 return (LT_EOF);
521 retval = LT_PLAIN;
522 wascomment = incomment;
523 cp = skipcomment(tline);
524 if (linestate == LS_START) {
525 if (*cp == '#') {
526 linestate = LS_HASH;
527 cp = skipcomment(cp + 1);
528 } else if (*cp != '\0')
529 linestate = LS_DIRTY;
530 }
531 if (!incomment && linestate == LS_HASH) {
532 keyword = tline + (cp - tline);
533 cp = skipsym(cp);
534 kwlen = cp - keyword;
535 /* no way can we deal with a continuation inside a keyword */
536 if (strncmp(cp, "\\\n", 2) == 0)
537 Eioccc();
538 if (strlcmp("ifdef", keyword, kwlen) == 0 ||
539 strlcmp("ifndef", keyword, kwlen) == 0) {
540 cp = skipcomment(cp);
541 if ((cursym = findsym(cp)) < 0)
542 retval = LT_IF;
543 else {
544 retval = (keyword[2] == 'n')
545 ? LT_FALSE : LT_TRUE;
546 if (value[cursym] == NULL)
547 retval = (retval == LT_TRUE)
548 ? LT_FALSE : LT_TRUE;
549 if (ignore[cursym])
550 retval = (retval == LT_TRUE)
551 ? LT_TRUEI : LT_FALSEI;
552 }
553 cp = skipsym(cp);
554 } else if (strlcmp("if", keyword, kwlen) == 0)
555 retval = ifeval(&cp);
556 else if (strlcmp("elif", keyword, kwlen) == 0)
557 retval = ifeval(&cp) - LT_IF + LT_ELIF;
558 else if (strlcmp("else", keyword, kwlen) == 0)
559 retval = LT_ELSE;
560 else if (strlcmp("endif", keyword, kwlen) == 0)
561 retval = LT_ENDIF;
562 else {
563 linestate = LS_DIRTY;
564 retval = LT_PLAIN;
565 }
566 cp = skipcomment(cp);
567 if (*cp != '\0') {
568 linestate = LS_DIRTY;
569 if (retval == LT_TRUE || retval == LT_FALSE ||
570 retval == LT_TRUEI || retval == LT_FALSEI)
571 retval = LT_IF;
572 if (retval == LT_ELTRUE || retval == LT_ELFALSE)
573 retval = LT_ELIF;
574 }
575 if (retval != LT_PLAIN && (wascomment || incomment)) {
576 retval += LT_DODGY;
577 if (incomment)
578 linestate = LS_DIRTY;
579 }
580 /* skipcomment should have changed the state */
581 if (linestate == LS_HASH)
582 abort(); /* bug */
583 }
584 if (linestate == LS_DIRTY) {
585 while (*cp != '\0')
586 cp = skipcomment(cp + 1);
587 }
588 debug("parser %s comment %s line",
589 comment_name[incomment], linestate_name[linestate]);
590 return (retval);
591 }
592
593 /*
594 * These are the binary operators that are supported by the expression
595 * evaluator. Note that if support for division is added then we also
596 * need short-circuiting booleans because of divide-by-zero.
597 */
598 static int op_lt(int a, int b) { return (a < b); }
599 static int op_gt(int a, int b) { return (a > b); }
600 static int op_le(int a, int b) { return (a <= b); }
601 static int op_ge(int a, int b) { return (a >= b); }
602 static int op_eq(int a, int b) { return (a == b); }
603 static int op_ne(int a, int b) { return (a != b); }
604 static int op_or(int a, int b) { return (a || b); }
605 static int op_and(int a, int b) { return (a && b); }
606
607 /*
608 * An evaluation function takes three arguments, as follows: (1) a pointer to
609 * an element of the precedence table which lists the operators at the current
610 * level of precedence; (2) a pointer to an integer which will receive the
611 * value of the expression; and (3) a pointer to a char* that points to the
612 * expression to be evaluated and that is updated to the end of the expression
613 * when evaluation is complete. The function returns LT_FALSE if the value of
614 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
615 * expression could not be evaluated.
616 */
617 struct ops;
618
619 typedef Linetype eval_fn(const struct ops *, int *, const char **);
620
621 static eval_fn eval_table, eval_unary;
622
623 /*
624 * The precedence table. Expressions involving binary operators are evaluated
625 * in a table-driven way by eval_table. When it evaluates a subexpression it
626 * calls the inner function with its first argument pointing to the next
627 * element of the table. Innermost expressions have special non-table-driven
628 * handling.
629 */
630 static const struct ops {
631 eval_fn *inner;
632 struct op {
633 const char *str;
634 int (*fn)(int, int);
635 } op[5];
636 } eval_ops[] = {
637 { eval_table, { { "||", op_or } } },
638 { eval_table, { { "&&", op_and } } },
639 { eval_table, { { "==", op_eq },
640 { "!=", op_ne } } },
641 { eval_unary, { { "<=", op_le },
642 { ">=", op_ge },
643 { "<", op_lt },
644 { ">", op_gt } } }
645 };
646
647 /*
648 * Function for evaluating the innermost parts of expressions,
649 * viz. !expr (expr) defined(symbol) symbol number
650 * We reset the keepthis flag when we find a non-constant subexpression.
651 */
652 static Linetype
653 eval_unary(const struct ops *ops, int *valp, const char **cpp)
654 {
655 const char *cp;
656 char *ep;
657 int sym;
658
659 cp = skipcomment(*cpp);
660 if (*cp == '!') {
661 debug("eval%d !", ops - eval_ops);
662 cp++;
663 if (eval_unary(ops, valp, &cp) == LT_IF)
664 return (LT_IF);
665 *valp = !*valp;
666 } else if (*cp == '(') {
667 cp++;
668 debug("eval%d (", ops - eval_ops);
669 if (eval_table(eval_ops, valp, &cp) == LT_IF)
670 return (LT_IF);
671 cp = skipcomment(cp);
672 if (*cp++ != ')')
673 return (LT_IF);
674 } else if (isdigit((unsigned char)*cp)) {
675 debug("eval%d number", ops - eval_ops);
676 *valp = strtol(cp, &ep, 0);
677 cp = skipsym(cp);
678 } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
679 cp = skipcomment(cp+7);
680 debug("eval%d defined", ops - eval_ops);
681 if (*cp++ != '(')
682 return (LT_IF);
683 cp = skipcomment(cp);
684 sym = findsym(cp);
685 if (sym < 0 && !symlist)
686 return (LT_IF);
687 *valp = (value[sym] != NULL);
688 cp = skipsym(cp);
689 cp = skipcomment(cp);
690 if (*cp++ != ')')
691 return (LT_IF);
692 keepthis = false;
693 } else if (!endsym(*cp)) {
694 debug("eval%d symbol", ops - eval_ops);
695 sym = findsym(cp);
696 if (sym < 0 && !symlist)
697 return (LT_IF);
698 if (value[sym] == NULL)
699 *valp = 0;
700 else {
701 *valp = strtol(value[sym], &ep, 0);
702 if (*ep != '\0' || ep == value[sym])
703 return (LT_IF);
704 }
705 cp = skipsym(cp);
706 keepthis = false;
707 } else {
708 debug("eval%d bad expr", ops - eval_ops);
709 return (LT_IF);
710 }
711
712 *cpp = cp;
713 debug("eval%d = %d", ops - eval_ops, *valp);
714 return (*valp ? LT_TRUE : LT_FALSE);
715 }
716
717 /*
718 * Table-driven evaluation of binary operators.
719 */
720 static Linetype
721 eval_table(const struct ops *ops, int *valp, const char **cpp)
722 {
723 const struct op *op;
724 const char *cp;
725 int val;
726
727 debug("eval%d", ops - eval_ops);
728 cp = *cpp;
729 if (ops->inner(ops+1, valp, &cp) == LT_IF)
730 return (LT_IF);
731 for (;;) {
732 cp = skipcomment(cp);
733 for (op = ops->op; op->str != NULL; op++)
734 if (strncmp(cp, op->str, strlen(op->str)) == 0)
735 break;
736 if (op->str == NULL)
737 break;
738 cp += strlen(op->str);
739 debug("eval%d %s", ops - eval_ops, op->str);
740 if (ops->inner(ops+1, &val, &cp) == LT_IF)
741 return (LT_IF);
742 *valp = op->fn(*valp, val);
743 }
744
745 *cpp = cp;
746 debug("eval%d = %d", ops - eval_ops, *valp);
747 return (*valp ? LT_TRUE : LT_FALSE);
748 }
749
750 /*
751 * Evaluate the expression on a #if or #elif line. If we can work out
752 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
753 * return just a generic LT_IF.
754 */
755 static Linetype
756 ifeval(const char **cpp)
757 {
758 int ret;
759 int val;
760
761 debug("eval %s", *cpp);
762 keepthis = killconsts ? false : true;
763 ret = eval_table(eval_ops, &val, cpp);
764 debug("eval = %d", val);
765 return (keepthis ? LT_IF : ret);
766 }
767
768 /*
769 * Skip over comments and stop at the next character position that is
770 * not whitespace. Between calls we keep the comment state in the
771 * global variable incomment, and we also adjust the global variable
772 * linestate when we see a newline.
773 * XXX: doesn't cope with the buffer splitting inside a state transition.
774 */
775 static const char *
776 skipcomment(const char *cp)
777 {
778 if (text || ignoring[depth]) {
779 for (; isspace((unsigned char)*cp); cp++)
780 if (*cp == '\n')
781 linestate = LS_START;
782 return (cp);
783 }
784 while (*cp != '\0')
785 /* don't reset to LS_START after a line continuation */
786 if (strncmp(cp, "\\\n", 2) == 0)
787 cp += 2;
788 else switch (incomment) {
789 case NO_COMMENT:
790 if (strncmp(cp, "/\\\n", 3) == 0) {
791 incomment = STARTING_COMMENT;
792 cp += 3;
793 } else if (strncmp(cp, "/*", 2) == 0) {
794 incomment = C_COMMENT;
795 cp += 2;
796 } else if (strncmp(cp, "//", 2) == 0) {
797 incomment = CXX_COMMENT;
798 cp += 2;
799 } else if (strncmp(cp, "\n", 1) == 0) {
800 linestate = LS_START;
801 cp += 1;
802 } else if (strchr(" \t", *cp) != NULL) {
803 cp += 1;
804 } else
805 return (cp);
806 continue;
807 case CXX_COMMENT:
808 if (strncmp(cp, "\n", 1) == 0) {
809 incomment = NO_COMMENT;
810 linestate = LS_START;
811 }
812 cp += 1;
813 continue;
814 case C_COMMENT:
815 if (strncmp(cp, "*\\\n", 3) == 0) {
816 incomment = FINISHING_COMMENT;
817 cp += 3;
818 } else if (strncmp(cp, "*/", 2) == 0) {
819 incomment = NO_COMMENT;
820 cp += 2;
821 } else
822 cp += 1;
823 continue;
824 case STARTING_COMMENT:
825 if (*cp == '*') {
826 incomment = C_COMMENT;
827 cp += 1;
828 } else if (*cp == '/') {
829 incomment = CXX_COMMENT;
830 cp += 1;
831 } else {
832 incomment = NO_COMMENT;
833 linestate = LS_DIRTY;
834 }
835 continue;
836 case FINISHING_COMMENT:
837 if (*cp == '/') {
838 incomment = NO_COMMENT;
839 cp += 1;
840 } else
841 incomment = C_COMMENT;
842 continue;
843 default:
844 abort(); /* bug */
845 }
846 return (cp);
847 }
848
849 /*
850 * Skip over an identifier.
851 */
852 static const char *
853 skipsym(const char *cp)
854 {
855 while (!endsym(*cp))
856 ++cp;
857 return (cp);
858 }
859
860 /*
861 * Look for the symbol in the symbol table. If is is found, we return
862 * the symbol table index, else we return -1.
863 */
864 static int
865 findsym(const char *str)
866 {
867 const char *cp;
868 int symind;
869
870 cp = skipsym(str);
871 if (cp == str)
872 return (-1);
873 if (symlist)
874 printf("%.*s\n", (int)(cp-str), str);
875 for (symind = 0; symind < nsyms; ++symind) {
876 if (strlcmp(symname[symind], str, cp-str) == 0) {
877 debug("findsym %s %s", symname[symind],
878 value[symind] ? value[symind] : "");
879 return (symind);
880 }
881 }
882 return (-1);
883 }
884
885 /*
886 * Add a symbol to the symbol table.
887 */
888 static void
889 addsym(bool ignorethis, bool definethis, char *sym)
890 {
891 int symind;
892 char *val;
893
894 symind = findsym(sym);
895 if (symind < 0) {
896 if (nsyms >= MAXSYMS)
897 errx(2, "too many symbols");
898 symind = nsyms++;
899 }
900 symname[symind] = sym;
901 ignore[symind] = ignorethis;
902 val = sym + (skipsym(sym) - sym);
903 if (definethis) {
904 if (*val == '=') {
905 value[symind] = val+1;
906 *val = '\0';
907 } else if (*val == '\0')
908 value[symind] = "";
909 else
910 usage();
911 } else {
912 if (*val != '\0')
913 usage();
914 value[symind] = NULL;
915 }
916 }
917
918 /*
919 * Compare s with n characters of t.
920 * The same as strncmp() except that it checks that s[n] == '\0'.
921 */
922 static int
923 strlcmp(const char *s, const char *t, size_t n)
924 {
925 while (n-- && *t != '\0')
926 if (*s != *t)
927 return ((unsigned char)*s - (unsigned char)*t);
928 else
929 ++s, ++t;
930 return ((unsigned char)*s);
931 }
932
933 /*
934 * Diagnostics.
935 */
936 static void
937 debug(const char *msg, ...)
938 {
939 va_list ap;
940
941 if (debugging) {
942 va_start(ap, msg);
943 vwarnx(msg, ap);
944 va_end(ap);
945 }
946 }
947
948 static void
949 error(const char *msg)
950 {
951 if (depth == 0)
952 warnx("%s: %d: %s", filename, linenum, msg);
953 else
954 warnx("%s: %d: %s (#if line %d depth %d)",
955 filename, linenum, msg, stifline[depth], depth);
956 errx(2, "output may be truncated");
957 }
958