Home | History | Annotate | Line # | Download | only in unifdef
unifdef.c revision 1.12
      1 /*	$NetBSD: unifdef.c,v 1.12 2003/08/07 11:16:55 agc Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1985, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley by
      8  * Dave Yost. It was rewritten to support ANSI C by Tony Finch.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. Neither the name of the University nor the names of its contributors
     19  *    may be used to endorse or promote products derived from this software
     20  *    without specific prior written permission.
     21  *
     22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32  * SUCH DAMAGE.
     33  */
     34 
     35 /*
     36  * Copyright (c) 2002, 2003 Tony Finch <dot (at) dotat.at>
     37  *
     38  * This code is derived from software contributed to Berkeley by
     39  * Dave Yost. It was rewritten to support ANSI C by Tony Finch.
     40  *
     41  * Redistribution and use in source and binary forms, with or without
     42  * modification, are permitted provided that the following conditions
     43  * are met:
     44  * 1. Redistributions of source code must retain the above copyright
     45  *    notice, this list of conditions and the following disclaimer.
     46  * 2. Redistributions in binary form must reproduce the above copyright
     47  *    notice, this list of conditions and the following disclaimer in the
     48  *    documentation and/or other materials provided with the distribution.
     49  * 3. All advertising materials mentioning features or use of this software
     50  *    must display the following acknowledgement:
     51  *	This product includes software developed by the University of
     52  *	California, Berkeley and its contributors.
     53  * 4. Neither the name of the University nor the names of its contributors
     54  *    may be used to endorse or promote products derived from this software
     55  *    without specific prior written permission.
     56  *
     57  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     58  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     59  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     60  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     61  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     62  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     63  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     64  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     65  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     66  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     67  * SUCH DAMAGE.
     68  */
     69 
     70 #include <sys/cdefs.h>
     71 
     72 #ifndef lint
     73 #if 0
     74 static const char copyright[] =
     75 "@(#) Copyright (c) 1985, 1993\n\
     76 	The Regents of the University of California.  All rights reserved.\n";
     77 #endif
     78 #ifdef __IDSTRING
     79 __IDSTRING(Berkeley, "@(#)unifdef.c	8.1 (Berkeley) 6/6/93");
     80 __IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.12 2003/08/07 11:16:55 agc Exp $");
     81 __IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.161 2003/07/01 15:32:48 fanf2 Exp $");
     82 #endif
     83 #endif /* not lint */
     84 #ifdef __FBSDID
     85 __FBSDID("$FreeBSD: src/usr.bin/unifdef/unifdef.c,v 1.18 2003/07/01 15:30:43 fanf Exp $");
     86 #endif
     87 
     88 /*
     89  * unifdef - remove ifdef'ed lines
     90  *
     91  *  Wishlist:
     92  *      provide an option which will append the name of the
     93  *        appropriate symbol after #else's and #endif's
     94  *      provide an option which will check symbols after
     95  *        #else's and #endif's to see that they match their
     96  *        corresponding #ifdef or #ifndef
     97  *      generate #line directives in place of deleted code
     98  *
     99  *   The first two items above require better buffer handling, which would
    100  *     also make it possible to handle all "dodgy" directives correctly.
    101  */
    102 
    103 #include <ctype.h>
    104 #include <err.h>
    105 #include <stdarg.h>
    106 #include <stdio.h>
    107 #include <stdlib.h>
    108 #include <string.h>
    109 #include <unistd.h>
    110 
    111 #include "stdbool.h"
    112 
    113 /* types of input lines: */
    114 typedef enum {
    115 	LT_TRUEI,		/* a true #if with ignore flag */
    116 	LT_FALSEI,		/* a false #if with ignore flag */
    117 	LT_IF,			/* an unknown #if */
    118 	LT_TRUE,		/* a true #if */
    119 	LT_FALSE,		/* a false #if */
    120 	LT_ELIF,		/* an unknown #elif */
    121 	LT_ELTRUE,		/* a true #elif */
    122 	LT_ELFALSE,		/* a false #elif */
    123 	LT_ELSE,		/* #else */
    124 	LT_ENDIF,		/* #endif */
    125 	LT_DODGY,		/* flag: directive is not on one line */
    126 	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
    127 	LT_PLAIN,		/* ordinary line */
    128 	LT_EOF,			/* end of file */
    129 	LT_COUNT
    130 } Linetype;
    131 
    132 static char const * const linetype_name[] = {
    133 	"TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
    134 	"ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
    135 	"DODGY TRUEI", "DODGY FALSEI",
    136 	"DODGY IF", "DODGY TRUE", "DODGY FALSE",
    137 	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
    138 	"DODGY ELSE", "DODGY ENDIF",
    139 	"PLAIN", "EOF"
    140 };
    141 
    142 /* state of #if processing */
    143 typedef enum {
    144 	IS_OUTSIDE,
    145 	IS_FALSE_PREFIX,	/* false #if followed by false #elifs */
    146 	IS_TRUE_PREFIX,		/* first non-false #(el)if is true */
    147 	IS_PASS_MIDDLE,		/* first non-false #(el)if is unknown */
    148 	IS_FALSE_MIDDLE,	/* a false #elif after a pass state */
    149 	IS_TRUE_MIDDLE,		/* a true #elif after a pass state */
    150 	IS_PASS_ELSE,		/* an else after a pass state */
    151 	IS_FALSE_ELSE,		/* an else after a true state */
    152 	IS_TRUE_ELSE,		/* an else after only false states */
    153 	IS_FALSE_TRAILER,	/* #elifs after a true are false */
    154 	IS_COUNT
    155 } Ifstate;
    156 
    157 static char const * const ifstate_name[] = {
    158 	"OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
    159 	"PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
    160 	"PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
    161 	"FALSE_TRAILER"
    162 };
    163 
    164 /* state of comment parser */
    165 typedef enum {
    166 	NO_COMMENT = false,	/* outside a comment */
    167 	C_COMMENT,		/* in a comment like this one */
    168 	CXX_COMMENT,		/* between // and end of line */
    169 	STARTING_COMMENT,	/* just after slash-backslash-newline */
    170 	FINISHING_COMMENT	/* star-backslash-newline in a C comment */
    171 } Comment_state;
    172 
    173 static char const * const comment_name[] = {
    174 	"NO", "C", "CXX", "STARTING", "FINISHING"
    175 };
    176 
    177 /* state of preprocessor line parser */
    178 typedef enum {
    179 	LS_START,		/* only space and comments on this line */
    180 	LS_HASH,		/* only space, comments, and a hash */
    181 	LS_DIRTY		/* this line can't be a preprocessor line */
    182 } Line_state;
    183 
    184 static char const * const linestate_name[] = {
    185 	"START", "HASH", "DIRTY"
    186 };
    187 
    188 /*
    189  * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
    190  */
    191 #define	MAXDEPTH        64			/* maximum #if nesting */
    192 #define	MAXLINE         4096			/* maximum length of line */
    193 #define	MAXSYMS         4096			/* maximum number of symbols */
    194 
    195 /*
    196  * Sometimes when editing a keyword the replacement text is longer, so
    197  * we leave some space at the end of the tline buffer to accommodate this.
    198  */
    199 #define	EDITSLOP        10
    200 
    201 /*
    202  * Globals.
    203  */
    204 
    205 static bool             complement;		/* -c: do the complement */
    206 static bool             debugging;		/* -d: debugging reports */
    207 static bool             iocccok;		/* -e: fewer IOCCC errors */
    208 static bool             killconsts;		/* -k: eval constant #ifs */
    209 static bool             lnblank;		/* -l: blank deleted lines */
    210 static bool             symlist;		/* -s: output symbol list */
    211 static bool             text;			/* -t: this is a text file */
    212 
    213 static const char      *symname[MAXSYMS];	/* symbol name */
    214 static const char      *value[MAXSYMS];		/* -Dsym=value */
    215 static bool             ignore[MAXSYMS];	/* -iDsym or -iUsym */
    216 static int              nsyms;			/* number of symbols */
    217 
    218 static FILE            *input;			/* input file pointer */
    219 static const char      *filename;		/* input file name */
    220 static int              linenum;		/* current line number */
    221 
    222 static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
    223 static char            *keyword;		/* used for editing #elif's */
    224 
    225 static Comment_state    incomment;		/* comment parser state */
    226 static Line_state       linestate;		/* #if line parser state */
    227 static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */
    228 static bool             ignoring[MAXDEPTH];	/* ignore comments state */
    229 static int              stifline[MAXDEPTH];	/* start of current #if */
    230 static int              depth;			/* current #if nesting */
    231 static bool             keepthis;		/* don't delete constant #if */
    232 
    233 static int              exitstat;		/* program exit status */
    234 
    235 static void             addsym(bool, bool, char *);
    236 static void             debug(const char *, ...);
    237 static void             done(void);
    238 static void             error(const char *);
    239 static int              findsym(const char *);
    240 static void             flushline(bool);
    241 static Linetype         getline(void);
    242 static Linetype         ifeval(const char **);
    243 static void             ignoreoff(void);
    244 static void             ignoreon(void);
    245 static void             keywordedit(const char *);
    246 static void             nest(void);
    247 static void             process(void);
    248 static const char      *skipcomment(const char *);
    249 static const char      *skipsym(const char *);
    250 static void             state(Ifstate);
    251 static int              strlcmp(const char *, const char *, size_t);
    252 static void             usage(void);
    253 
    254 #define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
    255 
    256 /*
    257  * The main program.
    258  */
    259 int
    260 main(int argc, char *argv[])
    261 {
    262 	int opt;
    263 
    264 	while ((opt = getopt(argc, argv, "i:D:U:I:cdeklst")) != -1)
    265 		switch (opt) {
    266 		case 'i': /* treat stuff controlled by these symbols as text */
    267 			/*
    268 			 * For strict backwards-compatibility the U or D
    269 			 * should be immediately after the -i but it doesn't
    270 			 * matter much if we relax that requirement.
    271 			 */
    272 			opt = *optarg++;
    273 			if (opt == 'D')
    274 				addsym(true, true, optarg);
    275 			else if (opt == 'U')
    276 				addsym(true, false, optarg);
    277 			else
    278 				usage();
    279 			break;
    280 		case 'D': /* define a symbol */
    281 			addsym(false, true, optarg);
    282 			break;
    283 		case 'U': /* undef a symbol */
    284 			addsym(false, false, optarg);
    285 			break;
    286 		case 'I':
    287 			/* no-op for compatibility with cpp */
    288 			break;
    289 		case 'c': /* treat -D as -U and vice versa */
    290 			complement = true;
    291 			break;
    292 		case 'd':
    293 			debugging = true;
    294 			break;
    295 		case 'e': /* fewer errors from dodgy lines */
    296 			iocccok = true;
    297 			break;
    298 		case 'k': /* process constant #ifs */
    299 			killconsts = true;
    300 			break;
    301 		case 'l': /* blank deleted lines instead of omitting them */
    302 			lnblank = true;
    303 			break;
    304 		case 's': /* only output list of symbols that control #ifs */
    305 			symlist = true;
    306 			break;
    307 		case 't': /* don't parse C comments */
    308 			text = true;
    309 			break;
    310 		default:
    311 			usage();
    312 		}
    313 	argc -= optind;
    314 	argv += optind;
    315 	if (nsyms == 0 && !symlist) {
    316 		warnx("must -D or -U at least one symbol");
    317 		usage();
    318 	}
    319 	if (argc > 1) {
    320 		errx(2, "can only do one file");
    321 	} else if (argc == 1 && strcmp(*argv, "-") != 0) {
    322 		filename = *argv;
    323 		input = fopen(filename, "r");
    324 		if (input == NULL)
    325 			err(2, "can't open %s", filename);
    326 	} else {
    327 		filename = "[stdin]";
    328 		input = stdin;
    329 	}
    330 	process();
    331 	abort(); /* bug */
    332 }
    333 
    334 static void
    335 usage(void)
    336 {
    337 	fprintf(stderr, "usage: unifdef [-cdeklst]"
    338 	    " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
    339 	exit(2);
    340 }
    341 
    342 /*
    343  * A state transition function alters the global #if processing state
    344  * in a particular way. The table below is indexed by the current
    345  * processing state and the type of the current line.
    346  *
    347  * Nesting is handled by keeping a stack of states; some transition
    348  * functions increase or decrease the depth. They also maintain the
    349  * ignore state on a stack. In some complicated cases they have to
    350  * alter the preprocessor directive, as follows.
    351  *
    352  * When we have processed a group that starts off with a known-false
    353  * #if/#elif sequence (which has therefore been deleted) followed by a
    354  * #elif that we don't understand and therefore must keep, we edit the
    355  * latter into a #if to keep the nesting correct.
    356  *
    357  * When we find a true #elif in a group, the following block will
    358  * always be kept and the rest of the sequence after the next #elif or
    359  * #else will be discarded. We edit the #elif into a #else and the
    360  * following directive to #endif since this has the desired behaviour.
    361  *
    362  * "Dodgy" directives are split across multiple lines, the most common
    363  * example being a multi-line comment hanging off the right of the
    364  * directive. We can handle them correctly only if there is no change
    365  * from printing to dropping (or vice versa) caused by that directive.
    366  * If the directive is the first of a group we have a choice between
    367  * failing with an error, or passing it through unchanged instead of
    368  * evaluating it. The latter is not the default to avoid questions from
    369  * users about unifdef unexpectedly leaving behind preprocessor directives.
    370  */
    371 typedef void state_fn(void);
    372 
    373 /* report an error */
    374 static void Eelif (void) { error("Inappropriate #elif"); }
    375 static void Eelse (void) { error("Inappropriate #else"); }
    376 static void Eendif(void) { error("Inappropriate #endif"); }
    377 static void Eeof  (void) { error("Premature EOF"); }
    378 static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
    379 /* plain line handling */
    380 static void print (void) { flushline(true); }
    381 static void drop  (void) { flushline(false); }
    382 /* output lacks group's start line */
    383 static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
    384 static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
    385 static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
    386 /* print/pass this block */
    387 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
    388 static void Pelse (void) { print();              state(IS_PASS_ELSE); }
    389 static void Pendif(void) { print(); --depth; }
    390 /* discard this block */
    391 static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
    392 static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
    393 static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
    394 static void Dendif(void) { drop();  --depth; }
    395 /* first line of group */
    396 static void Fdrop (void) { nest();  Dfalse(); }
    397 static void Fpass (void) { nest();  Pelif(); }
    398 static void Ftrue (void) { nest();  Strue(); }
    399 static void Ffalse(void) { nest();  Sfalse(); }
    400 /* variable pedantry for obfuscated lines */
    401 static void Oiffy (void) { if (iocccok) Fpass(); else Eioccc(); ignoreon(); }
    402 static void Oif   (void) { if (iocccok) Fpass(); else Eioccc(); }
    403 static void Oelif (void) { if (iocccok) Pelif(); else Eioccc(); }
    404 /* ignore comments in this block */
    405 static void Idrop (void) { Fdrop();  ignoreon(); }
    406 static void Itrue (void) { Ftrue();  ignoreon(); }
    407 static void Ifalse(void) { Ffalse(); ignoreon(); }
    408 /* edit this line */
    409 static void Mpass (void) { strncpy(keyword, "if  ", 4); Pelif(); }
    410 static void Mtrue (void) { keywordedit("else\n");  state(IS_TRUE_MIDDLE); }
    411 static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
    412 static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
    413 
    414 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
    415 /* IS_OUTSIDE */
    416 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
    417   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
    418   print, done },
    419 /* IS_FALSE_PREFIX */
    420 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
    421   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
    422   drop,  Eeof },
    423 /* IS_TRUE_PREFIX */
    424 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
    425   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
    426   print, Eeof },
    427 /* IS_PASS_MIDDLE */
    428 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
    429   Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
    430   print, Eeof },
    431 /* IS_FALSE_MIDDLE */
    432 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
    433   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
    434   drop,  Eeof },
    435 /* IS_TRUE_MIDDLE */
    436 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
    437   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
    438   print, Eeof },
    439 /* IS_PASS_ELSE */
    440 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
    441   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
    442   print, Eeof },
    443 /* IS_FALSE_ELSE */
    444 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
    445   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
    446   drop,  Eeof },
    447 /* IS_TRUE_ELSE */
    448 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
    449   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
    450   print, Eeof },
    451 /* IS_FALSE_TRAILER */
    452 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
    453   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
    454   drop,  Eeof }
    455 /*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
    456   TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
    457   PLAIN  EOF */
    458 };
    459 
    460 /*
    461  * State machine utility functions
    462  */
    463 static void
    464 done(void)
    465 {
    466 	if (incomment)
    467 		error("EOF in comment");
    468 	exit(exitstat);
    469 }
    470 static void
    471 ignoreoff(void)
    472 {
    473 	ignoring[depth] = ignoring[depth-1];
    474 }
    475 static void
    476 ignoreon(void)
    477 {
    478 	ignoring[depth] = true;
    479 }
    480 static void
    481 keywordedit(const char *replacement)
    482 {
    483 	strlcpy(keyword, replacement, tline + sizeof(tline) - keyword);
    484 	print();
    485 }
    486 static void
    487 nest(void)
    488 {
    489 	depth += 1;
    490 	if (depth >= MAXDEPTH)
    491 		error("Too many levels of nesting");
    492 	stifline[depth] = linenum;
    493 }
    494 static void
    495 state(Ifstate is)
    496 {
    497 	ifstate[depth] = is;
    498 }
    499 
    500 /*
    501  * Write a line to the output or not, according to command line options.
    502  */
    503 static void
    504 flushline(bool keep)
    505 {
    506 	if (symlist)
    507 		return;
    508 	if (keep ^ complement)
    509 		fputs(tline, stdout);
    510 	else {
    511 		if (lnblank)
    512 			putc('\n', stdout);
    513 		exitstat = 1;
    514 	}
    515 }
    516 
    517 /*
    518  * The driver for the state machine.
    519  */
    520 static void
    521 process(void)
    522 {
    523 	Linetype lineval;
    524 
    525 	for (;;) {
    526 		linenum++;
    527 		lineval = getline();
    528 		trans_table[ifstate[depth]][lineval]();
    529 		debug("process %s -> %s depth %d",
    530 		    linetype_name[lineval],
    531 		    ifstate_name[ifstate[depth]], depth);
    532 	}
    533 }
    534 
    535 /*
    536  * Parse a line and determine its type. We keep the preprocessor line
    537  * parser state between calls in the global variable linestate, with
    538  * help from skipcomment().
    539  */
    540 static Linetype
    541 getline(void)
    542 {
    543 	const char *cp;
    544 	int cursym;
    545 	int kwlen;
    546 	Linetype retval;
    547 	Comment_state wascomment;
    548 
    549 	if (fgets(tline, MAXLINE, input) == NULL)
    550 		return (LT_EOF);
    551 	retval = LT_PLAIN;
    552 	wascomment = incomment;
    553 	cp = skipcomment(tline);
    554 	if (linestate == LS_START) {
    555 		if (*cp == '#') {
    556 			linestate = LS_HASH;
    557 			cp = skipcomment(cp + 1);
    558 		} else if (*cp != '\0')
    559 			linestate = LS_DIRTY;
    560 	}
    561 	if (!incomment && linestate == LS_HASH) {
    562 		keyword = tline + (cp - tline);
    563 		cp = skipsym(cp);
    564 		kwlen = cp - keyword;
    565 		/* no way can we deal with a continuation inside a keyword */
    566 		if (strncmp(cp, "\\\n", 2) == 0)
    567 			Eioccc();
    568 		if (strlcmp("ifdef", keyword, kwlen) == 0 ||
    569 		    strlcmp("ifndef", keyword, kwlen) == 0) {
    570 			cp = skipcomment(cp);
    571 			if ((cursym = findsym(cp)) < 0)
    572 				retval = LT_IF;
    573 			else {
    574 				retval = (keyword[2] == 'n')
    575 				    ? LT_FALSE : LT_TRUE;
    576 				if (value[cursym] == NULL)
    577 					retval = (retval == LT_TRUE)
    578 					    ? LT_FALSE : LT_TRUE;
    579 				if (ignore[cursym])
    580 					retval = (retval == LT_TRUE)
    581 					    ? LT_TRUEI : LT_FALSEI;
    582 			}
    583 			cp = skipsym(cp);
    584 		} else if (strlcmp("if", keyword, kwlen) == 0)
    585 			retval = ifeval(&cp);
    586 		else if (strlcmp("elif", keyword, kwlen) == 0)
    587 			retval = ifeval(&cp) - LT_IF + LT_ELIF;
    588 		else if (strlcmp("else", keyword, kwlen) == 0)
    589 			retval = LT_ELSE;
    590 		else if (strlcmp("endif", keyword, kwlen) == 0)
    591 			retval = LT_ENDIF;
    592 		else {
    593 			linestate = LS_DIRTY;
    594 			retval = LT_PLAIN;
    595 		}
    596 		cp = skipcomment(cp);
    597 		if (*cp != '\0') {
    598 			linestate = LS_DIRTY;
    599 			if (retval == LT_TRUE || retval == LT_FALSE ||
    600 			    retval == LT_TRUEI || retval == LT_FALSEI)
    601 				retval = LT_IF;
    602 			if (retval == LT_ELTRUE || retval == LT_ELFALSE)
    603 				retval = LT_ELIF;
    604 		}
    605 		if (retval != LT_PLAIN && (wascomment || incomment)) {
    606 			retval += LT_DODGY;
    607 			if (incomment)
    608 				linestate = LS_DIRTY;
    609 		}
    610 		/* skipcomment should have changed the state */
    611 		if (linestate == LS_HASH)
    612 			abort(); /* bug */
    613 	}
    614 	if (linestate == LS_DIRTY) {
    615 		while (*cp != '\0')
    616 			cp = skipcomment(cp + 1);
    617 	}
    618 	debug("parser %s comment %s line",
    619 	    comment_name[incomment], linestate_name[linestate]);
    620 	return (retval);
    621 }
    622 
    623 /*
    624  * These are the binary operators that are supported by the expression
    625  * evaluator. Note that if support for division is added then we also
    626  * need short-circuiting booleans because of divide-by-zero.
    627  */
    628 static int op_lt(int a, int b) { return (a < b); }
    629 static int op_gt(int a, int b) { return (a > b); }
    630 static int op_le(int a, int b) { return (a <= b); }
    631 static int op_ge(int a, int b) { return (a >= b); }
    632 static int op_eq(int a, int b) { return (a == b); }
    633 static int op_ne(int a, int b) { return (a != b); }
    634 static int op_or(int a, int b) { return (a || b); }
    635 static int op_and(int a, int b) { return (a && b); }
    636 
    637 /*
    638  * An evaluation function takes three arguments, as follows: (1) a pointer to
    639  * an element of the precedence table which lists the operators at the current
    640  * level of precedence; (2) a pointer to an integer which will receive the
    641  * value of the expression; and (3) a pointer to a char* that points to the
    642  * expression to be evaluated and that is updated to the end of the expression
    643  * when evaluation is complete. The function returns LT_FALSE if the value of
    644  * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
    645  * expression could not be evaluated.
    646  */
    647 struct ops;
    648 
    649 typedef Linetype eval_fn(const struct ops *, int *, const char **);
    650 
    651 static eval_fn eval_table, eval_unary;
    652 
    653 /*
    654  * The precedence table. Expressions involving binary operators are evaluated
    655  * in a table-driven way by eval_table. When it evaluates a subexpression it
    656  * calls the inner function with its first argument pointing to the next
    657  * element of the table. Innermost expressions have special non-table-driven
    658  * handling.
    659  */
    660 static const struct ops {
    661 	eval_fn *inner;
    662 	struct op {
    663 		const char *str;
    664 		int (*fn)(int, int);
    665 	} op[5];
    666 } eval_ops[] = {
    667 	{ eval_table, { { "||", op_or } } },
    668 	{ eval_table, { { "&&", op_and } } },
    669 	{ eval_table, { { "==", op_eq },
    670 			{ "!=", op_ne } } },
    671 	{ eval_unary, { { "<=", op_le },
    672 			{ ">=", op_ge },
    673 			{ "<", op_lt },
    674 			{ ">", op_gt } } }
    675 };
    676 
    677 /*
    678  * Function for evaluating the innermost parts of expressions,
    679  * viz. !expr (expr) defined(symbol) symbol number
    680  * We reset the keepthis flag when we find a non-constant subexpression.
    681  */
    682 static Linetype
    683 eval_unary(const struct ops *ops, int *valp, const char **cpp)
    684 {
    685 	const char *cp;
    686 	char *ep;
    687 	int sym;
    688 
    689 	cp = skipcomment(*cpp);
    690 	if (*cp == '!') {
    691 		debug("eval%d !", ops - eval_ops);
    692 		cp++;
    693 		if (eval_unary(ops, valp, &cp) == LT_IF)
    694 			return (LT_IF);
    695 		*valp = !*valp;
    696 	} else if (*cp == '(') {
    697 		cp++;
    698 		debug("eval%d (", ops - eval_ops);
    699 		if (eval_table(eval_ops, valp, &cp) == LT_IF)
    700 			return (LT_IF);
    701 		cp = skipcomment(cp);
    702 		if (*cp++ != ')')
    703 			return (LT_IF);
    704 	} else if (isdigit((unsigned char)*cp)) {
    705 		debug("eval%d number", ops - eval_ops);
    706 		*valp = strtol(cp, &ep, 0);
    707 		cp = skipsym(cp);
    708 	} else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
    709 		cp = skipcomment(cp+7);
    710 		debug("eval%d defined", ops - eval_ops);
    711 		if (*cp++ != '(')
    712 			return (LT_IF);
    713 		cp = skipcomment(cp);
    714 		sym = findsym(cp);
    715 		if (sym < 0 && !symlist)
    716 			return (LT_IF);
    717 		*valp = (value[sym] != NULL);
    718 		cp = skipsym(cp);
    719 		cp = skipcomment(cp);
    720 		if (*cp++ != ')')
    721 			return (LT_IF);
    722 		keepthis = false;
    723 	} else if (!endsym(*cp)) {
    724 		debug("eval%d symbol", ops - eval_ops);
    725 		sym = findsym(cp);
    726 		if (sym < 0 && !symlist)
    727 			return (LT_IF);
    728 		if (value[sym] == NULL)
    729 			*valp = 0;
    730 		else {
    731 			*valp = strtol(value[sym], &ep, 0);
    732 			if (*ep != '\0' || ep == value[sym])
    733 				return (LT_IF);
    734 		}
    735 		cp = skipsym(cp);
    736 		keepthis = false;
    737 	} else {
    738 		debug("eval%d bad expr", ops - eval_ops);
    739 		return (LT_IF);
    740 	}
    741 
    742 	*cpp = cp;
    743 	debug("eval%d = %d", ops - eval_ops, *valp);
    744 	return (*valp ? LT_TRUE : LT_FALSE);
    745 }
    746 
    747 /*
    748  * Table-driven evaluation of binary operators.
    749  */
    750 static Linetype
    751 eval_table(const struct ops *ops, int *valp, const char **cpp)
    752 {
    753 	const struct op *op;
    754 	const char *cp;
    755 	int val;
    756 
    757 	debug("eval%d", ops - eval_ops);
    758 	cp = *cpp;
    759 	if (ops->inner(ops+1, valp, &cp) == LT_IF)
    760 		return (LT_IF);
    761 	for (;;) {
    762 		cp = skipcomment(cp);
    763 		for (op = ops->op; op->str != NULL; op++)
    764 			if (strncmp(cp, op->str, strlen(op->str)) == 0)
    765 				break;
    766 		if (op->str == NULL)
    767 			break;
    768 		cp += strlen(op->str);
    769 		debug("eval%d %s", ops - eval_ops, op->str);
    770 		if (ops->inner(ops+1, &val, &cp) == LT_IF)
    771 			return (LT_IF);
    772 		*valp = op->fn(*valp, val);
    773 	}
    774 
    775 	*cpp = cp;
    776 	debug("eval%d = %d", ops - eval_ops, *valp);
    777 	return (*valp ? LT_TRUE : LT_FALSE);
    778 }
    779 
    780 /*
    781  * Evaluate the expression on a #if or #elif line. If we can work out
    782  * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
    783  * return just a generic LT_IF.
    784  */
    785 static Linetype
    786 ifeval(const char **cpp)
    787 {
    788 	int ret;
    789 	int val;
    790 
    791 	debug("eval %s", *cpp);
    792 	keepthis = killconsts ? false : true;
    793 	ret = eval_table(eval_ops, &val, cpp);
    794 	debug("eval = %d", val);
    795 	return (keepthis ? LT_IF : ret);
    796 }
    797 
    798 /*
    799  * Skip over comments and stop at the next character position that is
    800  * not whitespace. Between calls we keep the comment state in the
    801  * global variable incomment, and we also adjust the global variable
    802  * linestate when we see a newline.
    803  * XXX: doesn't cope with the buffer splitting inside a state transition.
    804  */
    805 static const char *
    806 skipcomment(const char *cp)
    807 {
    808 	if (text || ignoring[depth]) {
    809 		for (; isspace((unsigned char)*cp); cp++)
    810 			if (*cp == '\n')
    811 				linestate = LS_START;
    812 		return (cp);
    813 	}
    814 	while (*cp != '\0')
    815 		/* don't reset to LS_START after a line continuation */
    816 		if (strncmp(cp, "\\\n", 2) == 0)
    817 			cp += 2;
    818 		else switch (incomment) {
    819 		case NO_COMMENT:
    820 			if (strncmp(cp, "/\\\n", 3) == 0) {
    821 				incomment = STARTING_COMMENT;
    822 				cp += 3;
    823 			} else if (strncmp(cp, "/*", 2) == 0) {
    824 				incomment = C_COMMENT;
    825 				cp += 2;
    826 			} else if (strncmp(cp, "//", 2) == 0) {
    827 				incomment = CXX_COMMENT;
    828 				cp += 2;
    829 			} else if (strncmp(cp, "\n", 1) == 0) {
    830 				linestate = LS_START;
    831 				cp += 1;
    832 			} else if (strchr(" \t", *cp) != NULL) {
    833 				cp += 1;
    834 			} else
    835 				return (cp);
    836 			continue;
    837 		case CXX_COMMENT:
    838 			if (strncmp(cp, "\n", 1) == 0) {
    839 				incomment = NO_COMMENT;
    840 				linestate = LS_START;
    841 			}
    842 			cp += 1;
    843 			continue;
    844 		case C_COMMENT:
    845 			if (strncmp(cp, "*\\\n", 3) == 0) {
    846 				incomment = FINISHING_COMMENT;
    847 				cp += 3;
    848 			} else if (strncmp(cp, "*/", 2) == 0) {
    849 				incomment = NO_COMMENT;
    850 				cp += 2;
    851 			} else
    852 				cp += 1;
    853 			continue;
    854 		case STARTING_COMMENT:
    855 			if (*cp == '*') {
    856 				incomment = C_COMMENT;
    857 				cp += 1;
    858 			} else if (*cp == '/') {
    859 				incomment = CXX_COMMENT;
    860 				cp += 1;
    861 			} else {
    862 				incomment = NO_COMMENT;
    863 				linestate = LS_DIRTY;
    864 			}
    865 			continue;
    866 		case FINISHING_COMMENT:
    867 			if (*cp == '/') {
    868 				incomment = NO_COMMENT;
    869 				cp += 1;
    870 			} else
    871 				incomment = C_COMMENT;
    872 			continue;
    873 		default:
    874 			abort(); /* bug */
    875 		}
    876 	return (cp);
    877 }
    878 
    879 /*
    880  * Skip over an identifier.
    881  */
    882 static const char *
    883 skipsym(const char *cp)
    884 {
    885 	while (!endsym(*cp))
    886 		++cp;
    887 	return (cp);
    888 }
    889 
    890 /*
    891  * Look for the symbol in the symbol table. If is is found, we return
    892  * the symbol table index, else we return -1.
    893  */
    894 static int
    895 findsym(const char *str)
    896 {
    897 	const char *cp;
    898 	int symind;
    899 
    900 	cp = skipsym(str);
    901 	if (cp == str)
    902 		return (-1);
    903 	if (symlist)
    904 		printf("%.*s\n", (int)(cp-str), str);
    905 	for (symind = 0; symind < nsyms; ++symind) {
    906 		if (strlcmp(symname[symind], str, cp-str) == 0) {
    907 			debug("findsym %s %s", symname[symind],
    908 			    value[symind] ? value[symind] : "");
    909 			return (symind);
    910 		}
    911 	}
    912 	return (-1);
    913 }
    914 
    915 /*
    916  * Add a symbol to the symbol table.
    917  */
    918 static void
    919 addsym(bool ignorethis, bool definethis, char *sym)
    920 {
    921 	int symind;
    922 	char *val;
    923 
    924 	symind = findsym(sym);
    925 	if (symind < 0) {
    926 		if (nsyms >= MAXSYMS)
    927 			errx(2, "too many symbols");
    928 		symind = nsyms++;
    929 	}
    930 	symname[symind] = sym;
    931 	ignore[symind] = ignorethis;
    932 	val = sym + (skipsym(sym) - sym);
    933 	if (definethis) {
    934 		if (*val == '=') {
    935 			value[symind] = val+1;
    936 			*val = '\0';
    937 		} else if (*val == '\0')
    938 			value[symind] = "";
    939 		else
    940 			usage();
    941 	} else {
    942 		if (*val != '\0')
    943 			usage();
    944 		value[symind] = NULL;
    945 	}
    946 }
    947 
    948 /*
    949  * Compare s with n characters of t.
    950  * The same as strncmp() except that it checks that s[n] == '\0'.
    951  */
    952 static int
    953 strlcmp(const char *s, const char *t, size_t n)
    954 {
    955 	while (n-- && *t != '\0')
    956 		if (*s != *t)
    957 			return ((unsigned char)*s - (unsigned char)*t);
    958 		else
    959 			++s, ++t;
    960 	return ((unsigned char)*s);
    961 }
    962 
    963 /*
    964  * Diagnostics.
    965  */
    966 static void
    967 debug(const char *msg, ...)
    968 {
    969 	va_list ap;
    970 
    971 	if (debugging) {
    972 		va_start(ap, msg);
    973 		vwarnx(msg, ap);
    974 		va_end(ap);
    975 	}
    976 }
    977 
    978 static void
    979 error(const char *msg)
    980 {
    981 	if (depth == 0)
    982 		warnx("%s: %d: %s", filename, linenum, msg);
    983 	else
    984 		warnx("%s: %d: %s (#if line %d depth %d)",
    985 		    filename, linenum, msg, stifline[depth], depth);
    986 	errx(2, "output may be truncated");
    987 }
    988