Home | History | Annotate | Line # | Download | only in unifdef
unifdef.c revision 1.11
      1 /*	$NetBSD: unifdef.c,v 1.11 2003/07/30 08:21:47 itojun Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002, 2003 Tony Finch <dot (at) dotat.at>
      5  * Copyright (c) 1985, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  *
      8  * This code is derived from software contributed to Berkeley by
      9  * Dave Yost. It was rewritten to support ANSI C by Tony Finch.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the University of
     22  *	California, Berkeley and its contributors.
     23  * 4. Neither the name of the University nor the names of its contributors
     24  *    may be used to endorse or promote products derived from this software
     25  *    without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37  * SUCH DAMAGE.
     38  */
     39 
     40 #include <sys/cdefs.h>
     41 
     42 #ifndef lint
     43 #if 0
     44 static const char copyright[] =
     45 "@(#) Copyright (c) 1985, 1993\n\
     46 	The Regents of the University of California.  All rights reserved.\n";
     47 #endif
     48 #ifdef __IDSTRING
     49 __IDSTRING(Berkeley, "@(#)unifdef.c	8.1 (Berkeley) 6/6/93");
     50 __IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.11 2003/07/30 08:21:47 itojun Exp $");
     51 __IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.161 2003/07/01 15:32:48 fanf2 Exp $");
     52 #endif
     53 #endif /* not lint */
     54 #ifdef __FBSDID
     55 __FBSDID("$FreeBSD: src/usr.bin/unifdef/unifdef.c,v 1.18 2003/07/01 15:30:43 fanf Exp $");
     56 #endif
     57 
     58 /*
     59  * unifdef - remove ifdef'ed lines
     60  *
     61  *  Wishlist:
     62  *      provide an option which will append the name of the
     63  *        appropriate symbol after #else's and #endif's
     64  *      provide an option which will check symbols after
     65  *        #else's and #endif's to see that they match their
     66  *        corresponding #ifdef or #ifndef
     67  *      generate #line directives in place of deleted code
     68  *
     69  *   The first two items above require better buffer handling, which would
     70  *     also make it possible to handle all "dodgy" directives correctly.
     71  */
     72 
     73 #include <ctype.h>
     74 #include <err.h>
     75 #include <stdarg.h>
     76 #include <stdio.h>
     77 #include <stdlib.h>
     78 #include <string.h>
     79 #include <unistd.h>
     80 
     81 #include "stdbool.h"
     82 
     83 /* types of input lines: */
     84 typedef enum {
     85 	LT_TRUEI,		/* a true #if with ignore flag */
     86 	LT_FALSEI,		/* a false #if with ignore flag */
     87 	LT_IF,			/* an unknown #if */
     88 	LT_TRUE,		/* a true #if */
     89 	LT_FALSE,		/* a false #if */
     90 	LT_ELIF,		/* an unknown #elif */
     91 	LT_ELTRUE,		/* a true #elif */
     92 	LT_ELFALSE,		/* a false #elif */
     93 	LT_ELSE,		/* #else */
     94 	LT_ENDIF,		/* #endif */
     95 	LT_DODGY,		/* flag: directive is not on one line */
     96 	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
     97 	LT_PLAIN,		/* ordinary line */
     98 	LT_EOF,			/* end of file */
     99 	LT_COUNT
    100 } Linetype;
    101 
    102 static char const * const linetype_name[] = {
    103 	"TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
    104 	"ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
    105 	"DODGY TRUEI", "DODGY FALSEI",
    106 	"DODGY IF", "DODGY TRUE", "DODGY FALSE",
    107 	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
    108 	"DODGY ELSE", "DODGY ENDIF",
    109 	"PLAIN", "EOF"
    110 };
    111 
    112 /* state of #if processing */
    113 typedef enum {
    114 	IS_OUTSIDE,
    115 	IS_FALSE_PREFIX,	/* false #if followed by false #elifs */
    116 	IS_TRUE_PREFIX,		/* first non-false #(el)if is true */
    117 	IS_PASS_MIDDLE,		/* first non-false #(el)if is unknown */
    118 	IS_FALSE_MIDDLE,	/* a false #elif after a pass state */
    119 	IS_TRUE_MIDDLE,		/* a true #elif after a pass state */
    120 	IS_PASS_ELSE,		/* an else after a pass state */
    121 	IS_FALSE_ELSE,		/* an else after a true state */
    122 	IS_TRUE_ELSE,		/* an else after only false states */
    123 	IS_FALSE_TRAILER,	/* #elifs after a true are false */
    124 	IS_COUNT
    125 } Ifstate;
    126 
    127 static char const * const ifstate_name[] = {
    128 	"OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
    129 	"PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
    130 	"PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
    131 	"FALSE_TRAILER"
    132 };
    133 
    134 /* state of comment parser */
    135 typedef enum {
    136 	NO_COMMENT = false,	/* outside a comment */
    137 	C_COMMENT,		/* in a comment like this one */
    138 	CXX_COMMENT,		/* between // and end of line */
    139 	STARTING_COMMENT,	/* just after slash-backslash-newline */
    140 	FINISHING_COMMENT	/* star-backslash-newline in a C comment */
    141 } Comment_state;
    142 
    143 static char const * const comment_name[] = {
    144 	"NO", "C", "CXX", "STARTING", "FINISHING"
    145 };
    146 
    147 /* state of preprocessor line parser */
    148 typedef enum {
    149 	LS_START,		/* only space and comments on this line */
    150 	LS_HASH,		/* only space, comments, and a hash */
    151 	LS_DIRTY		/* this line can't be a preprocessor line */
    152 } Line_state;
    153 
    154 static char const * const linestate_name[] = {
    155 	"START", "HASH", "DIRTY"
    156 };
    157 
    158 /*
    159  * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
    160  */
    161 #define	MAXDEPTH        64			/* maximum #if nesting */
    162 #define	MAXLINE         4096			/* maximum length of line */
    163 #define	MAXSYMS         4096			/* maximum number of symbols */
    164 
    165 /*
    166  * Sometimes when editing a keyword the replacement text is longer, so
    167  * we leave some space at the end of the tline buffer to accommodate this.
    168  */
    169 #define	EDITSLOP        10
    170 
    171 /*
    172  * Globals.
    173  */
    174 
    175 static bool             complement;		/* -c: do the complement */
    176 static bool             debugging;		/* -d: debugging reports */
    177 static bool             iocccok;		/* -e: fewer IOCCC errors */
    178 static bool             killconsts;		/* -k: eval constant #ifs */
    179 static bool             lnblank;		/* -l: blank deleted lines */
    180 static bool             symlist;		/* -s: output symbol list */
    181 static bool             text;			/* -t: this is a text file */
    182 
    183 static const char      *symname[MAXSYMS];	/* symbol name */
    184 static const char      *value[MAXSYMS];		/* -Dsym=value */
    185 static bool             ignore[MAXSYMS];	/* -iDsym or -iUsym */
    186 static int              nsyms;			/* number of symbols */
    187 
    188 static FILE            *input;			/* input file pointer */
    189 static const char      *filename;		/* input file name */
    190 static int              linenum;		/* current line number */
    191 
    192 static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
    193 static char            *keyword;		/* used for editing #elif's */
    194 
    195 static Comment_state    incomment;		/* comment parser state */
    196 static Line_state       linestate;		/* #if line parser state */
    197 static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */
    198 static bool             ignoring[MAXDEPTH];	/* ignore comments state */
    199 static int              stifline[MAXDEPTH];	/* start of current #if */
    200 static int              depth;			/* current #if nesting */
    201 static bool             keepthis;		/* don't delete constant #if */
    202 
    203 static int              exitstat;		/* program exit status */
    204 
    205 static void             addsym(bool, bool, char *);
    206 static void             debug(const char *, ...);
    207 static void             done(void);
    208 static void             error(const char *);
    209 static int              findsym(const char *);
    210 static void             flushline(bool);
    211 static Linetype         getline(void);
    212 static Linetype         ifeval(const char **);
    213 static void             ignoreoff(void);
    214 static void             ignoreon(void);
    215 static void             keywordedit(const char *);
    216 static void             nest(void);
    217 static void             process(void);
    218 static const char      *skipcomment(const char *);
    219 static const char      *skipsym(const char *);
    220 static void             state(Ifstate);
    221 static int              strlcmp(const char *, const char *, size_t);
    222 static void             usage(void);
    223 
    224 #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
    225 
    226 /*
    227  * The main program.
    228  */
    229 int
    230 main(int argc, char *argv[])
    231 {
    232 	int opt;
    233 
    234 	while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1)
    235 		switch (opt) {
    236 		case 'i': /* treat stuff controlled by these symbols as text */
    237 			/*
    238 			 * For strict backwards-compatibility the U or D
    239 			 * should be immediately after the -i but it doesn't
    240 			 * matter much if we relax that requirement.
    241 			 */
    242 			opt = *optarg++;
    243 			if (opt == 'D')
    244 				addsym(true, true, optarg);
    245 			else if (opt == 'U')
    246 				addsym(true, false, optarg);
    247 			else
    248 				usage();
    249 			break;
    250 		case 'D': /* define a symbol */
    251 			addsym(false, true, optarg);
    252 			break;
    253 		case 'U': /* undef a symbol */
    254 			addsym(false, false, optarg);
    255 			break;
    256 		case 'I':
    257 			/* no-op for compatibility with cpp */
    258 			break;
    259 		case 'c': /* treat -D as -U and vice versa */
    260 			complement = true;
    261 			break;
    262 		case 'd':
    263 			debugging = true;
    264 			break;
    265 		case 'e': /* fewer errors from dodgy lines */
    266 			iocccok = true;
    267 			break;
    268 		case 'k': /* process constant #ifs */
    269 			killconsts = true;
    270 			break;
    271 		case 'l': /* blank deleted lines instead of omitting them */
    272 			lnblank = true;
    273 			break;
    274 		case 's': /* only output list of symbols that control #ifs */
    275 			symlist = true;
    276 			break;
    277 		case 't': /* don't parse C comments */
    278 			text = true;
    279 			break;
    280 		default:
    281 			usage();
    282 		}
    283 	argc -= optind;
    284 	argv += optind;
    285 	if (nsyms == 0 && !symlist) {
    286 		warnx("must -D or -U at least one symbol");
    287 		usage();
    288 	}
    289 	if (argc > 1) {
    290 		errx(2, "can only do one file");
    291 	} else if (argc == 1 && strcmp(*argv, "-") != 0) {
    292 		filename = *argv;
    293 		input = fopen(filename, "r");
    294 		if (input == NULL)
    295 			err(2, "can't open %s", filename);
    296 	} else {
    297 		filename = "[stdin]";
    298 		input = stdin;
    299 	}
    300 	process();
    301 	abort(); /* bug */
    302 }
    303 
    304 static void
    305 usage(void)
    306 {
    307 	fprintf(stderr, "usage: unifdef [-cdeklst]"
    308 	    " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
    309 	exit(2);
    310 }
    311 
    312 /*
    313  * A state transition function alters the global #if processing state
    314  * in a particular way. The table below is indexed by the current
    315  * processing state and the type of the current line.
    316  *
    317  * Nesting is handled by keeping a stack of states; some transition
    318  * functions increase or decrease the depth. They also maintain the
    319  * ignore state on a stack. In some complicated cases they have to
    320  * alter the preprocessor directive, as follows.
    321  *
    322  * When we have processed a group that starts off with a known-false
    323  * #if/#elif sequence (which has therefore been deleted) followed by a
    324  * #elif that we don't understand and therefore must keep, we edit the
    325  * latter into a #if to keep the nesting correct.
    326  *
    327  * When we find a true #elif in a group, the following block will
    328  * always be kept and the rest of the sequence after the next #elif or
    329  * #else will be discarded. We edit the #elif into a #else and the
    330  * following directive to #endif since this has the desired behaviour.
    331  *
    332  * "Dodgy" directives are split across multiple lines, the most common
    333  * example being a multi-line comment hanging off the right of the
    334  * directive. We can handle them correctly only if there is no change
    335  * from printing to dropping (or vice versa) caused by that directive.
    336  * If the directive is the first of a group we have a choice between
    337  * failing with an error, or passing it through unchanged instead of
    338  * evaluating it. The latter is not the default to avoid questions from
    339  * users about unifdef unexpectedly leaving behind preprocessor directives.
    340  */
    341 typedef void state_fn(void);
    342 
    343 /* report an error */
    344 static void Eelif (void) { error("Inappropriate #elif"); }
    345 static void Eelse (void) { error("Inappropriate #else"); }
    346 static void Eendif(void) { error("Inappropriate #endif"); }
    347 static void Eeof  (void) { error("Premature EOF"); }
    348 static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
    349 /* plain line handling */
    350 static void print (void) { flushline(true); }
    351 static void drop  (void) { flushline(false); }
    352 /* output lacks group's start line */
    353 static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
    354 static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
    355 static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
    356 /* print/pass this block */
    357 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
    358 static void Pelse (void) { print();              state(IS_PASS_ELSE); }
    359 static void Pendif(void) { print(); --depth; }
    360 /* discard this block */
    361 static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
    362 static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
    363 static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
    364 static void Dendif(void) { drop();  --depth; }
    365 /* first line of group */
    366 static void Fdrop (void) { nest();  Dfalse(); }
    367 static void Fpass (void) { nest();  Pelif(); }
    368 static void Ftrue (void) { nest();  Strue(); }
    369 static void Ffalse(void) { nest();  Sfalse(); }
    370 /* variable pedantry for obfuscated lines */
    371 static void Oiffy (void) { if (iocccok) Fpass(); else Eioccc(); ignoreon(); }
    372 static void Oif   (void) { if (iocccok) Fpass(); else Eioccc(); }
    373 static void Oelif (void) { if (iocccok) Pelif(); else Eioccc(); }
    374 /* ignore comments in this block */
    375 static void Idrop (void) { Fdrop();  ignoreon(); }
    376 static void Itrue (void) { Ftrue();  ignoreon(); }
    377 static void Ifalse(void) { Ffalse(); ignoreon(); }
    378 /* edit this line */
    379 static void Mpass (void) { strncpy(keyword, "if  ", 4); Pelif(); }
    380 static void Mtrue (void) { keywordedit("else\n");  state(IS_TRUE_MIDDLE); }
    381 static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
    382 static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
    383 
    384 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
    385 /* IS_OUTSIDE */
    386 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
    387   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
    388   print, done },
    389 /* IS_FALSE_PREFIX */
    390 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
    391   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
    392   drop,  Eeof },
    393 /* IS_TRUE_PREFIX */
    394 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
    395   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
    396   print, Eeof },
    397 /* IS_PASS_MIDDLE */
    398 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
    399   Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
    400   print, Eeof },
    401 /* IS_FALSE_MIDDLE */
    402 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
    403   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
    404   drop,  Eeof },
    405 /* IS_TRUE_MIDDLE */
    406 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
    407   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
    408   print, Eeof },
    409 /* IS_PASS_ELSE */
    410 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
    411   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
    412   print, Eeof },
    413 /* IS_FALSE_ELSE */
    414 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
    415   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
    416   drop,  Eeof },
    417 /* IS_TRUE_ELSE */
    418 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
    419   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
    420   print, Eeof },
    421 /* IS_FALSE_TRAILER */
    422 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
    423   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
    424   drop,  Eeof }
    425 /*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
    426   TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
    427   PLAIN  EOF */
    428 };
    429 
    430 /*
    431  * State machine utility functions
    432  */
    433 static void
    434 done(void)
    435 {
    436 	if (incomment)
    437 		error("EOF in comment");
    438 	exit(exitstat);
    439 }
    440 static void
    441 ignoreoff(void)
    442 {
    443 	ignoring[depth] = ignoring[depth-1];
    444 }
    445 static void
    446 ignoreon(void)
    447 {
    448 	ignoring[depth] = true;
    449 }
    450 static void
    451 keywordedit(const char *replacement)
    452 {
    453 	strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
    454 	print();
    455 }
    456 static void
    457 nest(void)
    458 {
    459 	depth += 1;
    460 	if (depth >= MAXDEPTH)
    461 		error("Too many levels of nesting");
    462 	stifline[depth] = linenum;
    463 }
    464 static void
    465 state(Ifstate is)
    466 {
    467 	ifstate[depth] = is;
    468 }
    469 
    470 /*
    471  * Write a line to the output or not, according to command line options.
    472  */
    473 static void
    474 flushline(bool keep)
    475 {
    476 	if (symlist)
    477 		return;
    478 	if (keep ^ complement)
    479 		fputs(tline, stdout);
    480 	else {
    481 		if (lnblank)
    482 			putc('\n', stdout);
    483 		exitstat = 1;
    484 	}
    485 }
    486 
    487 /*
    488  * The driver for the state machine.
    489  */
    490 static void
    491 process(void)
    492 {
    493 	Linetype lineval;
    494 
    495 	for (;;) {
    496 		linenum++;
    497 		lineval = getline();
    498 		trans_table[ifstate[depth]][lineval]();
    499 		debug("process %s -> %s depth %d",
    500 		    linetype_name[lineval],
    501 		    ifstate_name[ifstate[depth]], depth);
    502 	}
    503 }
    504 
    505 /*
    506  * Parse a line and determine its type. We keep the preprocessor line
    507  * parser state between calls in the global variable linestate, with
    508  * help from skipcomment().
    509  */
    510 static Linetype
    511 getline(void)
    512 {
    513 	const char *cp;
    514 	int cursym;
    515 	int kwlen;
    516 	Linetype retval;
    517 	Comment_state wascomment;
    518 
    519 	if (fgets(tline, MAXLINE, input) == NULL)
    520 		return (LT_EOF);
    521 	retval = LT_PLAIN;
    522 	wascomment = incomment;
    523 	cp = skipcomment(tline);
    524 	if (linestate == LS_START) {
    525 		if (*cp == '#') {
    526 			linestate = LS_HASH;
    527 			cp = skipcomment(cp + 1);
    528 		} else if (*cp != '\0')
    529 			linestate = LS_DIRTY;
    530 	}
    531 	if (!incomment && linestate == LS_HASH) {
    532 		keyword = tline + (cp - tline);
    533 		cp = skipsym(cp);
    534 		kwlen = cp - keyword;
    535 		/* no way can we deal with a continuation inside a keyword */
    536 		if (strncmp(cp, "\\\n", 2) == 0)
    537 			Eioccc();
    538 		if (strlcmp("ifdef", keyword, kwlen) == 0 ||
    539 		    strlcmp("ifndef", keyword, kwlen) == 0) {
    540 			cp = skipcomment(cp);
    541 			if ((cursym = findsym(cp)) < 0)
    542 				retval = LT_IF;
    543 			else {
    544 				retval = (keyword[2] == 'n')
    545 				    ? LT_FALSE : LT_TRUE;
    546 				if (value[cursym] == NULL)
    547 					retval = (retval == LT_TRUE)
    548 					    ? LT_FALSE : LT_TRUE;
    549 				if (ignore[cursym])
    550 					retval = (retval == LT_TRUE)
    551 					    ? LT_TRUEI : LT_FALSEI;
    552 			}
    553 			cp = skipsym(cp);
    554 		} else if (strlcmp("if", keyword, kwlen) == 0)
    555 			retval = ifeval(&cp);
    556 		else if (strlcmp("elif", keyword, kwlen) == 0)
    557 			retval = ifeval(&cp) - LT_IF + LT_ELIF;
    558 		else if (strlcmp("else", keyword, kwlen) == 0)
    559 			retval = LT_ELSE;
    560 		else if (strlcmp("endif", keyword, kwlen) == 0)
    561 			retval = LT_ENDIF;
    562 		else {
    563 			linestate = LS_DIRTY;
    564 			retval = LT_PLAIN;
    565 		}
    566 		cp = skipcomment(cp);
    567 		if (*cp != '\0') {
    568 			linestate = LS_DIRTY;
    569 			if (retval == LT_TRUE || retval == LT_FALSE ||
    570 			    retval == LT_TRUEI || retval == LT_FALSEI)
    571 				retval = LT_IF;
    572 			if (retval == LT_ELTRUE || retval == LT_ELFALSE)
    573 				retval = LT_ELIF;
    574 		}
    575 		if (retval != LT_PLAIN && (wascomment || incomment)) {
    576 			retval += LT_DODGY;
    577 			if (incomment)
    578 				linestate = LS_DIRTY;
    579 		}
    580 		/* skipcomment should have changed the state */
    581 		if (linestate == LS_HASH)
    582 			abort(); /* bug */
    583 	}
    584 	if (linestate == LS_DIRTY) {
    585 		while (*cp != '\0')
    586 			cp = skipcomment(cp + 1);
    587 	}
    588 	debug("parser %s comment %s line",
    589 	    comment_name[incomment], linestate_name[linestate]);
    590 	return (retval);
    591 }
    592 
    593 /*
    594  * These are the binary operators that are supported by the expression
    595  * evaluator. Note that if support for division is added then we also
    596  * need short-circuiting booleans because of divide-by-zero.
    597  */
    598 static int op_lt(int a, int b) { return (a < b); }
    599 static int op_gt(int a, int b) { return (a > b); }
    600 static int op_le(int a, int b) { return (a <= b); }
    601 static int op_ge(int a, int b) { return (a >= b); }
    602 static int op_eq(int a, int b) { return (a == b); }
    603 static int op_ne(int a, int b) { return (a != b); }
    604 static int op_or(int a, int b) { return (a || b); }
    605 static int op_and(int a, int b) { return (a && b); }
    606 
    607 /*
    608  * An evaluation function takes three arguments, as follows: (1) a pointer to
    609  * an element of the precedence table which lists the operators at the current
    610  * level of precedence; (2) a pointer to an integer which will receive the
    611  * value of the expression; and (3) a pointer to a char* that points to the
    612  * expression to be evaluated and that is updated to the end of the expression
    613  * when evaluation is complete. The function returns LT_FALSE if the value of
    614  * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
    615  * expression could not be evaluated.
    616  */
    617 struct ops;
    618 
    619 typedef Linetype eval_fn(const struct ops *, int *, const char **);
    620 
    621 static eval_fn eval_table, eval_unary;
    622 
    623 /*
    624  * The precedence table. Expressions involving binary operators are evaluated
    625  * in a table-driven way by eval_table. When it evaluates a subexpression it
    626  * calls the inner function with its first argument pointing to the next
    627  * element of the table. Innermost expressions have special non-table-driven
    628  * handling.
    629  */
    630 static const struct ops {
    631 	eval_fn *inner;
    632 	struct op {
    633 		const char *str;
    634 		int (*fn)(int, int);
    635 	} op[5];
    636 } eval_ops[] = {
    637 	{ eval_table, { { "||", op_or } } },
    638 	{ eval_table, { { "&&", op_and } } },
    639 	{ eval_table, { { "==", op_eq },
    640 			{ "!=", op_ne } } },
    641 	{ eval_unary, { { "<=", op_le },
    642 			{ ">=", op_ge },
    643 			{ "<", op_lt },
    644 			{ ">", op_gt } } }
    645 };
    646 
    647 /*
    648  * Function for evaluating the innermost parts of expressions,
    649  * viz. !expr (expr) defined(symbol) symbol number
    650  * We reset the keepthis flag when we find a non-constant subexpression.
    651  */
    652 static Linetype
    653 eval_unary(const struct ops *ops, int *valp, const char **cpp)
    654 {
    655 	const char *cp;
    656 	char *ep;
    657 	int sym;
    658 
    659 	cp = skipcomment(*cpp);
    660 	if (*cp == '!') {
    661 		debug("eval%d !", ops - eval_ops);
    662 		cp++;
    663 		if (eval_unary(ops, valp, &cp) == LT_IF)
    664 			return (LT_IF);
    665 		*valp = !*valp;
    666 	} else if (*cp == '(') {
    667 		cp++;
    668 		debug("eval%d (", ops - eval_ops);
    669 		if (eval_table(eval_ops, valp, &cp) == LT_IF)
    670 			return (LT_IF);
    671 		cp = skipcomment(cp);
    672 		if (*cp++ != ')')
    673 			return (LT_IF);
    674 	} else if (isdigit((unsigned char)*cp)) {
    675 		debug("eval%d number", ops - eval_ops);
    676 		*valp = strtol(cp, &ep, 0);
    677 		cp = skipsym(cp);
    678 	} else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
    679 		cp = skipcomment(cp+7);
    680 		debug("eval%d defined", ops - eval_ops);
    681 		if (*cp++ != '(')
    682 			return (LT_IF);
    683 		cp = skipcomment(cp);
    684 		sym = findsym(cp);
    685 		if (sym < 0 && !symlist)
    686 			return (LT_IF);
    687 		*valp = (value[sym] != NULL);
    688 		cp = skipsym(cp);
    689 		cp = skipcomment(cp);
    690 		if (*cp++ != ')')
    691 			return (LT_IF);
    692 		keepthis = false;
    693 	} else if (!endsym(*cp)) {
    694 		debug("eval%d symbol", ops - eval_ops);
    695 		sym = findsym(cp);
    696 		if (sym < 0 && !symlist)
    697 			return (LT_IF);
    698 		if (value[sym] == NULL)
    699 			*valp = 0;
    700 		else {
    701 			*valp = strtol(value[sym], &ep, 0);
    702 			if (*ep != '\0' || ep == value[sym])
    703 				return (LT_IF);
    704 		}
    705 		cp = skipsym(cp);
    706 		keepthis = false;
    707 	} else {
    708 		debug("eval%d bad expr", ops - eval_ops);
    709 		return (LT_IF);
    710 	}
    711 
    712 	*cpp = cp;
    713 	debug("eval%d = %d", ops - eval_ops, *valp);
    714 	return (*valp ? LT_TRUE : LT_FALSE);
    715 }
    716 
    717 /*
    718  * Table-driven evaluation of binary operators.
    719  */
    720 static Linetype
    721 eval_table(const struct ops *ops, int *valp, const char **cpp)
    722 {
    723 	const struct op *op;
    724 	const char *cp;
    725 	int val;
    726 
    727 	debug("eval%d", ops - eval_ops);
    728 	cp = *cpp;
    729 	if (ops->inner(ops+1, valp, &cp) == LT_IF)
    730 		return (LT_IF);
    731 	for (;;) {
    732 		cp = skipcomment(cp);
    733 		for (op = ops->op; op->str != NULL; op++)
    734 			if (strncmp(cp, op->str, strlen(op->str)) == 0)
    735 				break;
    736 		if (op->str == NULL)
    737 			break;
    738 		cp += strlen(op->str);
    739 		debug("eval%d %s", ops - eval_ops, op->str);
    740 		if (ops->inner(ops+1, &val, &cp) == LT_IF)
    741 			return (LT_IF);
    742 		*valp = op->fn(*valp, val);
    743 	}
    744 
    745 	*cpp = cp;
    746 	debug("eval%d = %d", ops - eval_ops, *valp);
    747 	return (*valp ? LT_TRUE : LT_FALSE);
    748 }
    749 
    750 /*
    751  * Evaluate the expression on a #if or #elif line. If we can work out
    752  * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
    753  * return just a generic LT_IF.
    754  */
    755 static Linetype
    756 ifeval(const char **cpp)
    757 {
    758 	int ret;
    759 	int val;
    760 
    761 	debug("eval %s", *cpp);
    762 	keepthis = killconsts ? false : true;
    763 	ret = eval_table(eval_ops, &val, cpp);
    764 	debug("eval = %d", val);
    765 	return (keepthis ? LT_IF : ret);
    766 }
    767 
    768 /*
    769  * Skip over comments and stop at the next character position that is
    770  * not whitespace. Between calls we keep the comment state in the
    771  * global variable incomment, and we also adjust the global variable
    772  * linestate when we see a newline.
    773  * XXX: doesn't cope with the buffer splitting inside a state transition.
    774  */
    775 static const char *
    776 skipcomment(const char *cp)
    777 {
    778 	if (text || ignoring[depth]) {
    779 		for (; isspace((unsigned char)*cp); cp++)
    780 			if (*cp == '\n')
    781 				linestate = LS_START;
    782 		return (cp);
    783 	}
    784 	while (*cp != '\0')
    785 		/* don't reset to LS_START after a line continuation */
    786 		if (strncmp(cp, "\\\n", 2) == 0)
    787 			cp += 2;
    788 		else switch (incomment) {
    789 		case NO_COMMENT:
    790 			if (strncmp(cp, "/\\\n", 3) == 0) {
    791 				incomment = STARTING_COMMENT;
    792 				cp += 3;
    793 			} else if (strncmp(cp, "/*", 2) == 0) {
    794 				incomment = C_COMMENT;
    795 				cp += 2;
    796 			} else if (strncmp(cp, "//", 2) == 0) {
    797 				incomment = CXX_COMMENT;
    798 				cp += 2;
    799 			} else if (strncmp(cp, "\n", 1) == 0) {
    800 				linestate = LS_START;
    801 				cp += 1;
    802 			} else if (strchr(" \t", *cp) != NULL) {
    803 				cp += 1;
    804 			} else
    805 				return (cp);
    806 			continue;
    807 		case CXX_COMMENT:
    808 			if (strncmp(cp, "\n", 1) == 0) {
    809 				incomment = NO_COMMENT;
    810 				linestate = LS_START;
    811 			}
    812 			cp += 1;
    813 			continue;
    814 		case C_COMMENT:
    815 			if (strncmp(cp, "*\\\n", 3) == 0) {
    816 				incomment = FINISHING_COMMENT;
    817 				cp += 3;
    818 			} else if (strncmp(cp, "*/", 2) == 0) {
    819 				incomment = NO_COMMENT;
    820 				cp += 2;
    821 			} else
    822 				cp += 1;
    823 			continue;
    824 		case STARTING_COMMENT:
    825 			if (*cp == '*') {
    826 				incomment = C_COMMENT;
    827 				cp += 1;
    828 			} else if (*cp == '/') {
    829 				incomment = CXX_COMMENT;
    830 				cp += 1;
    831 			} else {
    832 				incomment = NO_COMMENT;
    833 				linestate = LS_DIRTY;
    834 			}
    835 			continue;
    836 		case FINISHING_COMMENT:
    837 			if (*cp == '/') {
    838 				incomment = NO_COMMENT;
    839 				cp += 1;
    840 			} else
    841 				incomment = C_COMMENT;
    842 			continue;
    843 		default:
    844 			abort(); /* bug */
    845 		}
    846 	return (cp);
    847 }
    848 
    849 /*
    850  * Skip over an identifier.
    851  */
    852 static const char *
    853 skipsym(const char *cp)
    854 {
    855 	while (!endsym(*cp))
    856 		++cp;
    857 	return (cp);
    858 }
    859 
    860 /*
    861  * Look for the symbol in the symbol table. If is is found, we return
    862  * the symbol table index, else we return -1.
    863  */
    864 static int
    865 findsym(const char *str)
    866 {
    867 	const char *cp;
    868 	int symind;
    869 
    870 	cp = skipsym(str);
    871 	if (cp == str)
    872 		return (-1);
    873 	if (symlist)
    874 		printf("%.*s\n", (int)(cp-str), str);
    875 	for (symind = 0; symind < nsyms; ++symind) {
    876 		if (strlcmp(symname[symind], str, cp-str) == 0) {
    877 			debug("findsym %s %s", symname[symind],
    878 			    value[symind] ? value[symind] : "");
    879 			return (symind);
    880 		}
    881 	}
    882 	return (-1);
    883 }
    884 
    885 /*
    886  * Add a symbol to the symbol table.
    887  */
    888 static void
    889 addsym(bool ignorethis, bool definethis, char *sym)
    890 {
    891 	int symind;
    892 	char *val;
    893 
    894 	symind = findsym(sym);
    895 	if (symind < 0) {
    896 		if (nsyms >= MAXSYMS)
    897 			errx(2, "too many symbols");
    898 		symind = nsyms++;
    899 	}
    900 	symname[symind] = sym;
    901 	ignore[symind] = ignorethis;
    902 	val = sym + (skipsym(sym) - sym);
    903 	if (definethis) {
    904 		if (*val == '=') {
    905 			value[symind] = val+1;
    906 			*val = '\0';
    907 		} else if (*val == '\0')
    908 			value[symind] = "";
    909 		else
    910 			usage();
    911 	} else {
    912 		if (*val != '\0')
    913 			usage();
    914 		value[symind] = NULL;
    915 	}
    916 }
    917 
    918 /*
    919  * Compare s with n characters of t.
    920  * The same as strncmp() except that it checks that s[n] == '\0'.
    921  */
    922 static int
    923 strlcmp(const char *s, const char *t, size_t n)
    924 {
    925 	while (n-- && *t != '\0')
    926 		if (*s != *t)
    927 			return ((unsigned char)*s - (unsigned char)*t);
    928 		else
    929 			++s, ++t;
    930 	return ((unsigned char)*s);
    931 }
    932 
    933 /*
    934  * Diagnostics.
    935  */
    936 static void
    937 debug(const char *msg, ...)
    938 {
    939 	va_list ap;
    940 
    941 	if (debugging) {
    942 		va_start(ap, msg);
    943 		vwarnx(msg, ap);
    944 		va_end(ap);
    945 	}
    946 }
    947 
    948 static void
    949 error(const char *msg)
    950 {
    951 	if (depth == 0)
    952 		warnx("%s: %d: %s", filename, linenum, msg);
    953 	else
    954 		warnx("%s: %d: %s (#if line %d depth %d)",
    955 		    filename, linenum, msg, stifline[depth], depth);
    956 	errx(2, "output may be truncated");
    957 }
    958