Home | History | Annotate | Line # | Download | only in dist
optimize.c revision 1.9.12.2
      1 /*	$NetBSD: optimize.c,v 1.9.12.2 2020/04/13 07:46:11 martin Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that: (1) source code distributions
      9  * retain the above copyright notice and this paragraph in its entirety, (2)
     10  * distributions including binary code include the above copyright notice and
     11  * this paragraph in its entirety in the documentation or other materials
     12  * provided with the distribution, and (3) all advertising materials mentioning
     13  * features or use of this software display the following acknowledgement:
     14  * ``This product includes software developed by the University of California,
     15  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
     16  * the University nor the names of its contributors may be used to endorse
     17  * or promote products derived from this software without specific prior
     18  * written permission.
     19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
     20  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
     21  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
     22  *
     23  *  Optimization module for BPF code intermediate representation.
     24  */
     25 
     26 #include <sys/cdefs.h>
     27 __RCSID("$NetBSD: optimize.c,v 1.9.12.2 2020/04/13 07:46:11 martin Exp $");
     28 
     29 #ifdef HAVE_CONFIG_H
     30 #include <config.h>
     31 #endif
     32 
     33 #include <pcap-types.h>
     34 
     35 #include <stdio.h>
     36 #include <stdlib.h>
     37 #include <memory.h>
     38 #include <setjmp.h>
     39 #include <string.h>
     40 
     41 #include <errno.h>
     42 
     43 #include "pcap-int.h"
     44 
     45 #include "gencode.h"
     46 #include "optimize.h"
     47 
     48 #ifdef HAVE_OS_PROTO_H
     49 #include "os-proto.h"
     50 #endif
     51 
     52 #ifdef BDEBUG
     53 /*
     54  * The internal "debug printout" flag for the filter expression optimizer.
     55  * The code to print that stuff is present only if BDEBUG is defined, so
     56  * the flag, and the routine to set it, are defined only if BDEBUG is
     57  * defined.
     58  */
     59 static int pcap_optimizer_debug;
     60 
     61 /*
     62  * Routine to set that flag.
     63  *
     64  * This is intended for libpcap developers, not for general use.
     65  * If you want to set these in a program, you'll have to declare this
     66  * routine yourself, with the appropriate DLL import attribute on Windows;
     67  * it's not declared in any header file, and won't be declared in any
     68  * header file provided by libpcap.
     69  */
     70 PCAP_API void pcap_set_optimizer_debug(int value);
     71 
     72 PCAP_API_DEF void
     73 pcap_set_optimizer_debug(int value)
     74 {
     75 	pcap_optimizer_debug = value;
     76 }
     77 
     78 /*
     79  * The internal "print dot graph" flag for the filter expression optimizer.
     80  * The code to print that stuff is present only if BDEBUG is defined, so
     81  * the flag, and the routine to set it, are defined only if BDEBUG is
     82  * defined.
     83  */
     84 static int pcap_print_dot_graph;
     85 
     86 /*
     87  * Routine to set that flag.
     88  *
     89  * This is intended for libpcap developers, not for general use.
     90  * If you want to set these in a program, you'll have to declare this
     91  * routine yourself, with the appropriate DLL import attribute on Windows;
     92  * it's not declared in any header file, and won't be declared in any
     93  * header file provided by libpcap.
     94  */
     95 PCAP_API void pcap_set_print_dot_graph(int value);
     96 
     97 PCAP_API_DEF void
     98 pcap_set_print_dot_graph(int value)
     99 {
    100 	pcap_print_dot_graph = value;
    101 }
    102 
    103 #endif
    104 
    105 /*
    106  * lowest_set_bit().
    107  *
    108  * Takes a 32-bit integer as an argument.
    109  *
    110  * If handed a non-zero value, returns the index of the lowest set bit,
    111  * counting upwards fro zero.
    112  *
    113  * If handed zero, the results are platform- and compiler-dependent.
    114  * Keep it out of the light, don't give it any water, don't feed it
    115  * after midnight, and don't pass zero to it.
    116  *
    117  * This is the same as the count of trailing zeroes in the word.
    118  */
    119 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
    120   /*
    121    * GCC 3.4 and later; we have __builtin_ctz().
    122    */
    123   #define lowest_set_bit(mask) __builtin_ctz(mask)
    124 #elif defined(_MSC_VER)
    125   /*
    126    * Visual Studio; we support only 2005 and later, so use
    127    * _BitScanForward().
    128    */
    129 #include <intrin.h>
    130 
    131 #ifndef __clang__
    132 #pragma intrinsic(_BitScanForward)
    133 #endif
    134 
    135 static __forceinline int
    136 lowest_set_bit(int mask)
    137 {
    138 	unsigned long bit;
    139 
    140 	/*
    141 	 * Don't sign-extend mask if long is longer than int.
    142 	 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
    143 	 */
    144 	if (_BitScanForward(&bit, (unsigned int)mask) == 0)
    145 		return -1;	/* mask is zero */
    146 	return (int)bit;
    147 }
    148 #elif defined(MSDOS) && defined(__DJGPP__)
    149   /*
    150    * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
    151    * we've already included.
    152    */
    153   #define lowest_set_bit(mask)	(ffs((mask)) - 1)
    154 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
    155   /*
    156    * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
    157    * or some other platform (UN*X conforming to a sufficient recent version
    158    * of the Single UNIX Specification).
    159    */
    160   #include <strings.h>
    161   #define lowest_set_bit(mask)	(ffs((mask)) - 1)
    162 #else
    163 /*
    164  * None of the above.
    165  * Use a perfect-hash-function-based function.
    166  */
    167 static int
    168 lowest_set_bit(int mask)
    169 {
    170 	unsigned int v = (unsigned int)mask;
    171 
    172 	static const int MultiplyDeBruijnBitPosition[32] = {
    173 		0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
    174 		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
    175 	};
    176 
    177 	/*
    178 	 * We strip off all but the lowermost set bit (v & ~v),
    179 	 * and perform a minimal perfect hash on it to look up the
    180 	 * number of low-order zero bits in a table.
    181 	 *
    182 	 * See:
    183 	 *
    184 	 *	http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
    185 	 *
    186 	 *	http://supertech.csail.mit.edu/papers/debruijn.pdf
    187 	 */
    188 	return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
    189 }
    190 #endif
    191 
    192 /*
    193  * Represents a deleted instruction.
    194  */
    195 #define NOP -1
    196 
    197 /*
    198  * Register numbers for use-def values.
    199  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
    200  * location.  A_ATOM is the accumulator and X_ATOM is the index
    201  * register.
    202  */
    203 #define A_ATOM BPF_MEMWORDS
    204 #define X_ATOM (BPF_MEMWORDS+1)
    205 
    206 /*
    207  * This define is used to represent *both* the accumulator and
    208  * x register in use-def computations.
    209  * Currently, the use-def code assumes only one definition per instruction.
    210  */
    211 #define AX_ATOM N_ATOMS
    212 
    213 /*
    214  * These data structures are used in a Cocke and Shwarz style
    215  * value numbering scheme.  Since the flowgraph is acyclic,
    216  * exit values can be propagated from a node's predecessors
    217  * provided it is uniquely defined.
    218  */
    219 struct valnode {
    220 	int code;
    221 	int v0, v1;
    222 	int val;
    223 	struct valnode *next;
    224 };
    225 
    226 /* Integer constants mapped with the load immediate opcode. */
    227 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
    228 
    229 struct vmapinfo {
    230 	int is_const;
    231 	bpf_int32 const_val;
    232 };
    233 
    234 typedef struct {
    235 	/*
    236 	 * Place to longjmp to on an error.
    237 	 */
    238 	jmp_buf top_ctx;
    239 
    240 	/*
    241 	 * The buffer into which to put error message.
    242 	 */
    243 	char *errbuf;
    244 
    245 	/*
    246 	 * A flag to indicate that further optimization is needed.
    247 	 * Iterative passes are continued until a given pass yields no
    248 	 * branch movement.
    249 	 */
    250 	int done;
    251 
    252 	int n_blocks;
    253 	struct block **blocks;
    254 	int n_edges;
    255 	struct edge **edges;
    256 
    257 	/*
    258 	 * A bit vector set representation of the dominators.
    259 	 * We round up the set size to the next power of two.
    260 	 */
    261 	int nodewords;
    262 	int edgewords;
    263 	struct block **levels;
    264 	bpf_u_int32 *space;
    265 
    266 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
    267 /*
    268  * True if a is in uset {p}
    269  */
    270 #define SET_MEMBER(p, a) \
    271 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
    272 
    273 /*
    274  * Add 'a' to uset p.
    275  */
    276 #define SET_INSERT(p, a) \
    277 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
    278 
    279 /*
    280  * Delete 'a' from uset p.
    281  */
    282 #define SET_DELETE(p, a) \
    283 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
    284 
    285 /*
    286  * a := a intersect b
    287  */
    288 #define SET_INTERSECT(a, b, n)\
    289 {\
    290 	register bpf_u_int32 *_x = a, *_y = b;\
    291 	register int _n = n;\
    292 	while (--_n >= 0) *_x++ &= *_y++;\
    293 }
    294 
    295 /*
    296  * a := a - b
    297  */
    298 #define SET_SUBTRACT(a, b, n)\
    299 {\
    300 	register bpf_u_int32 *_x = a, *_y = b;\
    301 	register int _n = n;\
    302 	while (--_n >= 0) *_x++ &=~ *_y++;\
    303 }
    304 
    305 /*
    306  * a := a union b
    307  */
    308 #define SET_UNION(a, b, n)\
    309 {\
    310 	register bpf_u_int32 *_x = a, *_y = b;\
    311 	register int _n = n;\
    312 	while (--_n >= 0) *_x++ |= *_y++;\
    313 }
    314 
    315 	uset all_dom_sets;
    316 	uset all_closure_sets;
    317 	uset all_edge_sets;
    318 
    319 #define MODULUS 213
    320 	struct valnode *hashtbl[MODULUS];
    321 	int curval;
    322 	int maxval;
    323 
    324 	struct vmapinfo *vmap;
    325 	struct valnode *vnode_base;
    326 	struct valnode *next_vnode;
    327 } opt_state_t;
    328 
    329 typedef struct {
    330 	/*
    331 	 * Place to longjmp to on an error.
    332 	 */
    333 	jmp_buf top_ctx;
    334 
    335 	/*
    336 	 * The buffer into which to put error message.
    337 	 */
    338 	char *errbuf;
    339 
    340 	/*
    341 	 * Some pointers used to convert the basic block form of the code,
    342 	 * into the array form that BPF requires.  'fstart' will point to
    343 	 * the malloc'd array while 'ftail' is used during the recursive
    344 	 * traversal.
    345 	 */
    346 	struct bpf_insn *fstart;
    347 	struct bpf_insn *ftail;
    348 } conv_state_t;
    349 
    350 static void opt_init(opt_state_t *, struct icode *);
    351 static void opt_cleanup(opt_state_t *);
    352 static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
    353     PCAP_PRINTFLIKE(2, 3);
    354 
    355 static void intern_blocks(opt_state_t *, struct icode *);
    356 
    357 static void find_inedges(opt_state_t *, struct block *);
    358 #ifdef BDEBUG
    359 static void opt_dump(opt_state_t *, struct icode *);
    360 #endif
    361 
    362 #ifndef MAX
    363 #define MAX(a,b) ((a)>(b)?(a):(b))
    364 #endif
    365 
    366 static void
    367 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
    368 {
    369 	int level;
    370 
    371 	if (isMarked(ic, b))
    372 		return;
    373 
    374 	Mark(ic, b);
    375 	b->link = 0;
    376 
    377 	if (JT(b)) {
    378 		find_levels_r(opt_state, ic, JT(b));
    379 		find_levels_r(opt_state, ic, JF(b));
    380 		level = MAX(JT(b)->level, JF(b)->level) + 1;
    381 	} else
    382 		level = 0;
    383 	b->level = level;
    384 	b->link = opt_state->levels[level];
    385 	opt_state->levels[level] = b;
    386 }
    387 
    388 /*
    389  * Level graph.  The levels go from 0 at the leaves to
    390  * N_LEVELS at the root.  The opt_state->levels[] array points to the
    391  * first node of the level list, whose elements are linked
    392  * with the 'link' field of the struct block.
    393  */
    394 static void
    395 find_levels(opt_state_t *opt_state, struct icode *ic)
    396 {
    397 	memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
    398 	unMarkAll(ic);
    399 	find_levels_r(opt_state, ic, ic->root);
    400 }
    401 
    402 /*
    403  * Find dominator relationships.
    404  * Assumes graph has been leveled.
    405  */
    406 static void
    407 find_dom(opt_state_t *opt_state, struct block *root)
    408 {
    409 	int i;
    410 	struct block *b;
    411 	bpf_u_int32 *x;
    412 
    413 	/*
    414 	 * Initialize sets to contain all nodes.
    415 	 */
    416 	x = opt_state->all_dom_sets;
    417 	i = opt_state->n_blocks * opt_state->nodewords;
    418 	while (--i >= 0)
    419 		*x++ = 0xFFFFFFFFU;
    420 	/* Root starts off empty. */
    421 	for (i = opt_state->nodewords; --i >= 0;)
    422 		root->dom[i] = 0;
    423 
    424 	/* root->level is the highest level no found. */
    425 	for (i = root->level; i >= 0; --i) {
    426 		for (b = opt_state->levels[i]; b; b = b->link) {
    427 			SET_INSERT(b->dom, b->id);
    428 			if (JT(b) == 0)
    429 				continue;
    430 			SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
    431 			SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
    432 		}
    433 	}
    434 }
    435 
    436 static void
    437 propedom(opt_state_t *opt_state, struct edge *ep)
    438 {
    439 	SET_INSERT(ep->edom, ep->id);
    440 	if (ep->succ) {
    441 		SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
    442 		SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
    443 	}
    444 }
    445 
    446 /*
    447  * Compute edge dominators.
    448  * Assumes graph has been leveled and predecessors established.
    449  */
    450 static void
    451 find_edom(opt_state_t *opt_state, struct block *root)
    452 {
    453 	int i;
    454 	uset x;
    455 	struct block *b;
    456 
    457 	x = opt_state->all_edge_sets;
    458 	for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
    459 		x[i] = 0xFFFFFFFFU;
    460 
    461 	/* root->level is the highest level no found. */
    462 	memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
    463 	memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
    464 	for (i = root->level; i >= 0; --i) {
    465 		for (b = opt_state->levels[i]; b != 0; b = b->link) {
    466 			propedom(opt_state, &b->et);
    467 			propedom(opt_state, &b->ef);
    468 		}
    469 	}
    470 }
    471 
    472 /*
    473  * Find the backwards transitive closure of the flow graph.  These sets
    474  * are backwards in the sense that we find the set of nodes that reach
    475  * a given node, not the set of nodes that can be reached by a node.
    476  *
    477  * Assumes graph has been leveled.
    478  */
    479 static void
    480 find_closure(opt_state_t *opt_state, struct block *root)
    481 {
    482 	int i;
    483 	struct block *b;
    484 
    485 	/*
    486 	 * Initialize sets to contain no nodes.
    487 	 */
    488 	memset((char *)opt_state->all_closure_sets, 0,
    489 	      opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
    490 
    491 	/* root->level is the highest level no found. */
    492 	for (i = root->level; i >= 0; --i) {
    493 		for (b = opt_state->levels[i]; b; b = b->link) {
    494 			SET_INSERT(b->closure, b->id);
    495 			if (JT(b) == 0)
    496 				continue;
    497 			SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
    498 			SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
    499 		}
    500 	}
    501 }
    502 
    503 /*
    504  * Return the register number that is used by s.  If A and X are both
    505  * used, return AX_ATOM.  If no register is used, return -1.
    506  *
    507  * The implementation should probably change to an array access.
    508  */
    509 static int
    510 atomuse(struct stmt *s)
    511 {
    512 	register int c = s->code;
    513 
    514 	if (c == NOP)
    515 		return -1;
    516 
    517 	switch (BPF_CLASS(c)) {
    518 
    519 	case BPF_RET:
    520 		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
    521 			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
    522 
    523 	case BPF_LD:
    524 	case BPF_LDX:
    525 		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
    526 			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
    527 
    528 	case BPF_ST:
    529 		return A_ATOM;
    530 
    531 	case BPF_STX:
    532 		return X_ATOM;
    533 
    534 	case BPF_JMP:
    535 	case BPF_ALU:
    536 		if (BPF_SRC(c) == BPF_X)
    537 			return AX_ATOM;
    538 		return A_ATOM;
    539 
    540 	case BPF_MISC:
    541 		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
    542 	}
    543 	abort();
    544 	/* NOTREACHED */
    545 }
    546 
    547 /*
    548  * Return the register number that is defined by 's'.  We assume that
    549  * a single stmt cannot define more than one register.  If no register
    550  * is defined, return -1.
    551  *
    552  * The implementation should probably change to an array access.
    553  */
    554 static int
    555 atomdef(struct stmt *s)
    556 {
    557 	if (s->code == NOP)
    558 		return -1;
    559 
    560 	switch (BPF_CLASS(s->code)) {
    561 
    562 	case BPF_LD:
    563 	case BPF_ALU:
    564 		return A_ATOM;
    565 
    566 	case BPF_LDX:
    567 		return X_ATOM;
    568 
    569 	case BPF_ST:
    570 	case BPF_STX:
    571 		return s->k;
    572 
    573 	case BPF_MISC:
    574 		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
    575 	}
    576 	return -1;
    577 }
    578 
    579 /*
    580  * Compute the sets of registers used, defined, and killed by 'b'.
    581  *
    582  * "Used" means that a statement in 'b' uses the register before any
    583  * statement in 'b' defines it, i.e. it uses the value left in
    584  * that register by a predecessor block of this block.
    585  * "Defined" means that a statement in 'b' defines it.
    586  * "Killed" means that a statement in 'b' defines it before any
    587  * statement in 'b' uses it, i.e. it kills the value left in that
    588  * register by a predecessor block of this block.
    589  */
    590 static void
    591 compute_local_ud(struct block *b)
    592 {
    593 	struct slist *s;
    594 	atomset def = 0, use = 0, killed = 0;
    595 	int atom;
    596 
    597 	for (s = b->stmts; s; s = s->next) {
    598 		if (s->s.code == NOP)
    599 			continue;
    600 		atom = atomuse(&s->s);
    601 		if (atom >= 0) {
    602 			if (atom == AX_ATOM) {
    603 				if (!ATOMELEM(def, X_ATOM))
    604 					use |= ATOMMASK(X_ATOM);
    605 				if (!ATOMELEM(def, A_ATOM))
    606 					use |= ATOMMASK(A_ATOM);
    607 			}
    608 			else if (atom < N_ATOMS) {
    609 				if (!ATOMELEM(def, atom))
    610 					use |= ATOMMASK(atom);
    611 			}
    612 			else
    613 				abort();
    614 		}
    615 		atom = atomdef(&s->s);
    616 		if (atom >= 0) {
    617 			if (!ATOMELEM(use, atom))
    618 				killed |= ATOMMASK(atom);
    619 			def |= ATOMMASK(atom);
    620 		}
    621 	}
    622 	if (BPF_CLASS(b->s.code) == BPF_JMP) {
    623 		/*
    624 		 * XXX - what about RET?
    625 		 */
    626 		atom = atomuse(&b->s);
    627 		if (atom >= 0) {
    628 			if (atom == AX_ATOM) {
    629 				if (!ATOMELEM(def, X_ATOM))
    630 					use |= ATOMMASK(X_ATOM);
    631 				if (!ATOMELEM(def, A_ATOM))
    632 					use |= ATOMMASK(A_ATOM);
    633 			}
    634 			else if (atom < N_ATOMS) {
    635 				if (!ATOMELEM(def, atom))
    636 					use |= ATOMMASK(atom);
    637 			}
    638 			else
    639 				abort();
    640 		}
    641 	}
    642 
    643 	b->def = def;
    644 	b->kill = killed;
    645 	b->in_use = use;
    646 }
    647 
    648 /*
    649  * Assume graph is already leveled.
    650  */
    651 static void
    652 find_ud(opt_state_t *opt_state, struct block *root)
    653 {
    654 	int i, maxlevel;
    655 	struct block *p;
    656 
    657 	/*
    658 	 * root->level is the highest level no found;
    659 	 * count down from there.
    660 	 */
    661 	maxlevel = root->level;
    662 	for (i = maxlevel; i >= 0; --i)
    663 		for (p = opt_state->levels[i]; p; p = p->link) {
    664 			compute_local_ud(p);
    665 			p->out_use = 0;
    666 		}
    667 
    668 	for (i = 1; i <= maxlevel; ++i) {
    669 		for (p = opt_state->levels[i]; p; p = p->link) {
    670 			p->out_use |= JT(p)->in_use | JF(p)->in_use;
    671 			p->in_use |= p->out_use &~ p->kill;
    672 		}
    673 	}
    674 }
    675 static void
    676 init_val(opt_state_t *opt_state)
    677 {
    678 	opt_state->curval = 0;
    679 	opt_state->next_vnode = opt_state->vnode_base;
    680 	memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
    681 	memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
    682 }
    683 
    684 /* Because we really don't have an IR, this stuff is a little messy. */
    685 static int
    686 F(opt_state_t *opt_state, int code, int v0, int v1)
    687 {
    688 	u_int hash;
    689 	int val;
    690 	struct valnode *p;
    691 
    692 	hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8);
    693 	hash %= MODULUS;
    694 
    695 	for (p = opt_state->hashtbl[hash]; p; p = p->next)
    696 		if (p->code == code && p->v0 == v0 && p->v1 == v1)
    697 			return p->val;
    698 
    699 	val = ++opt_state->curval;
    700 	if (BPF_MODE(code) == BPF_IMM &&
    701 	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
    702 		opt_state->vmap[val].const_val = v0;
    703 		opt_state->vmap[val].is_const = 1;
    704 	}
    705 	p = opt_state->next_vnode++;
    706 	p->val = val;
    707 	p->code = code;
    708 	p->v0 = v0;
    709 	p->v1 = v1;
    710 	p->next = opt_state->hashtbl[hash];
    711 	opt_state->hashtbl[hash] = p;
    712 
    713 	return val;
    714 }
    715 
    716 static inline void
    717 vstore(struct stmt *s, int *valp, int newval, int alter)
    718 {
    719 	if (alter && newval != VAL_UNKNOWN && *valp == newval)
    720 		s->code = NOP;
    721 	else
    722 		*valp = newval;
    723 }
    724 
    725 /*
    726  * Do constant-folding on binary operators.
    727  * (Unary operators are handled elsewhere.)
    728  */
    729 static void
    730 fold_op(opt_state_t *opt_state, struct stmt *s, int v0, int v1)
    731 {
    732 	bpf_u_int32 a, b;
    733 
    734 	a = opt_state->vmap[v0].const_val;
    735 	b = opt_state->vmap[v1].const_val;
    736 
    737 	switch (BPF_OP(s->code)) {
    738 	case BPF_ADD:
    739 		a += b;
    740 		break;
    741 
    742 	case BPF_SUB:
    743 		a -= b;
    744 		break;
    745 
    746 	case BPF_MUL:
    747 		a *= b;
    748 		break;
    749 
    750 	case BPF_DIV:
    751 		if (b == 0)
    752 			opt_error(opt_state, "division by zero");
    753 		a /= b;
    754 		break;
    755 
    756 	case BPF_MOD:
    757 		if (b == 0)
    758 			opt_error(opt_state, "modulus by zero");
    759 		a %= b;
    760 		break;
    761 
    762 	case BPF_AND:
    763 		a &= b;
    764 		break;
    765 
    766 	case BPF_OR:
    767 		a |= b;
    768 		break;
    769 
    770 	case BPF_XOR:
    771 		a ^= b;
    772 		break;
    773 
    774 	case BPF_LSH:
    775 		/*
    776 		 * A left shift of more than the width of the type
    777 		 * is undefined in C; we'll just treat it as shifting
    778 		 * all the bits out.
    779 		 *
    780 		 * XXX - the BPF interpreter doesn't check for this,
    781 		 * so its behavior is dependent on the behavior of
    782 		 * the processor on which it's running.  There are
    783 		 * processors on which it shifts all the bits out
    784 		 * and processors on which it does no shift.
    785 		 */
    786 		if (b < 32)
    787 			a <<= b;
    788 		else
    789 			a = 0;
    790 		break;
    791 
    792 	case BPF_RSH:
    793 		/*
    794 		 * A right shift of more than the width of the type
    795 		 * is undefined in C; we'll just treat it as shifting
    796 		 * all the bits out.
    797 		 *
    798 		 * XXX - the BPF interpreter doesn't check for this,
    799 		 * so its behavior is dependent on the behavior of
    800 		 * the processor on which it's running.  There are
    801 		 * processors on which it shifts all the bits out
    802 		 * and processors on which it does no shift.
    803 		 */
    804 		if (b < 32)
    805 			a >>= b;
    806 		else
    807 			a = 0;
    808 		break;
    809 
    810 	default:
    811 		abort();
    812 	}
    813 	s->k = a;
    814 	s->code = BPF_LD|BPF_IMM;
    815 	opt_state->done = 0;
    816 }
    817 
    818 static inline struct slist *
    819 this_op(struct slist *s)
    820 {
    821 	while (s != 0 && s->s.code == NOP)
    822 		s = s->next;
    823 	return s;
    824 }
    825 
    826 static void
    827 opt_not(struct block *b)
    828 {
    829 	struct block *tmp = JT(b);
    830 
    831 	JT(b) = JF(b);
    832 	JF(b) = tmp;
    833 }
    834 
    835 static void
    836 opt_peep(opt_state_t *opt_state, struct block *b)
    837 {
    838 	struct slist *s;
    839 	struct slist *next, *last;
    840 	int val;
    841 
    842 	s = b->stmts;
    843 	if (s == 0)
    844 		return;
    845 
    846 	last = s;
    847 	for (/*empty*/; /*empty*/; s = next) {
    848 		/*
    849 		 * Skip over nops.
    850 		 */
    851 		s = this_op(s);
    852 		if (s == 0)
    853 			break;	/* nothing left in the block */
    854 
    855 		/*
    856 		 * Find the next real instruction after that one
    857 		 * (skipping nops).
    858 		 */
    859 		next = this_op(s->next);
    860 		if (next == 0)
    861 			break;	/* no next instruction */
    862 		last = next;
    863 
    864 		/*
    865 		 * st  M[k]	-->	st  M[k]
    866 		 * ldx M[k]		tax
    867 		 */
    868 		if (s->s.code == BPF_ST &&
    869 		    next->s.code == (BPF_LDX|BPF_MEM) &&
    870 		    s->s.k == next->s.k) {
    871 			opt_state->done = 0;
    872 			next->s.code = BPF_MISC|BPF_TAX;
    873 		}
    874 		/*
    875 		 * ld  #k	-->	ldx  #k
    876 		 * tax			txa
    877 		 */
    878 		if (s->s.code == (BPF_LD|BPF_IMM) &&
    879 		    next->s.code == (BPF_MISC|BPF_TAX)) {
    880 			s->s.code = BPF_LDX|BPF_IMM;
    881 			next->s.code = BPF_MISC|BPF_TXA;
    882 			opt_state->done = 0;
    883 		}
    884 		/*
    885 		 * This is an ugly special case, but it happens
    886 		 * when you say tcp[k] or udp[k] where k is a constant.
    887 		 */
    888 		if (s->s.code == (BPF_LD|BPF_IMM)) {
    889 			struct slist *add, *tax, *ild;
    890 
    891 			/*
    892 			 * Check that X isn't used on exit from this
    893 			 * block (which the optimizer might cause).
    894 			 * We know the code generator won't generate
    895 			 * any local dependencies.
    896 			 */
    897 			if (ATOMELEM(b->out_use, X_ATOM))
    898 				continue;
    899 
    900 			/*
    901 			 * Check that the instruction following the ldi
    902 			 * is an addx, or it's an ldxms with an addx
    903 			 * following it (with 0 or more nops between the
    904 			 * ldxms and addx).
    905 			 */
    906 			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
    907 				add = next;
    908 			else
    909 				add = this_op(next->next);
    910 			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
    911 				continue;
    912 
    913 			/*
    914 			 * Check that a tax follows that (with 0 or more
    915 			 * nops between them).
    916 			 */
    917 			tax = this_op(add->next);
    918 			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
    919 				continue;
    920 
    921 			/*
    922 			 * Check that an ild follows that (with 0 or more
    923 			 * nops between them).
    924 			 */
    925 			ild = this_op(tax->next);
    926 			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
    927 			    BPF_MODE(ild->s.code) != BPF_IND)
    928 				continue;
    929 			/*
    930 			 * We want to turn this sequence:
    931 			 *
    932 			 * (004) ldi     #0x2		{s}
    933 			 * (005) ldxms   [14]		{next}  -- optional
    934 			 * (006) addx			{add}
    935 			 * (007) tax			{tax}
    936 			 * (008) ild     [x+0]		{ild}
    937 			 *
    938 			 * into this sequence:
    939 			 *
    940 			 * (004) nop
    941 			 * (005) ldxms   [14]
    942 			 * (006) nop
    943 			 * (007) nop
    944 			 * (008) ild     [x+2]
    945 			 *
    946 			 * XXX We need to check that X is not
    947 			 * subsequently used, because we want to change
    948 			 * what'll be in it after this sequence.
    949 			 *
    950 			 * We know we can eliminate the accumulator
    951 			 * modifications earlier in the sequence since
    952 			 * it is defined by the last stmt of this sequence
    953 			 * (i.e., the last statement of the sequence loads
    954 			 * a value into the accumulator, so we can eliminate
    955 			 * earlier operations on the accumulator).
    956 			 */
    957 			ild->s.k += s->s.k;
    958 			s->s.code = NOP;
    959 			add->s.code = NOP;
    960 			tax->s.code = NOP;
    961 			opt_state->done = 0;
    962 		}
    963 	}
    964 	/*
    965 	 * If the comparison at the end of a block is an equality
    966 	 * comparison against a constant, and nobody uses the value
    967 	 * we leave in the A register at the end of a block, and
    968 	 * the operation preceding the comparison is an arithmetic
    969 	 * operation, we can sometime optimize it away.
    970 	 */
    971 	if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
    972 	    !ATOMELEM(b->out_use, A_ATOM)) {
    973 	    	/*
    974 	    	 * We can optimize away certain subtractions of the
    975 	    	 * X register.
    976 	    	 */
    977 		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
    978 			val = b->val[X_ATOM];
    979 			if (opt_state->vmap[val].is_const) {
    980 				/*
    981 				 * If we have a subtract to do a comparison,
    982 				 * and the X register is a known constant,
    983 				 * we can merge this value into the
    984 				 * comparison:
    985 				 *
    986 				 * sub x  ->	nop
    987 				 * jeq #y	jeq #(x+y)
    988 				 */
    989 				b->s.k += opt_state->vmap[val].const_val;
    990 				last->s.code = NOP;
    991 				opt_state->done = 0;
    992 			} else if (b->s.k == 0) {
    993 				/*
    994 				 * If the X register isn't a constant,
    995 				 * and the comparison in the test is
    996 				 * against 0, we can compare with the
    997 				 * X register, instead:
    998 				 *
    999 				 * sub x  ->	nop
   1000 				 * jeq #0	jeq x
   1001 				 */
   1002 				last->s.code = NOP;
   1003 				b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
   1004 				opt_state->done = 0;
   1005 			}
   1006 		}
   1007 		/*
   1008 		 * Likewise, a constant subtract can be simplified:
   1009 		 *
   1010 		 * sub #x ->	nop
   1011 		 * jeq #y ->	jeq #(x+y)
   1012 		 */
   1013 		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
   1014 			last->s.code = NOP;
   1015 			b->s.k += last->s.k;
   1016 			opt_state->done = 0;
   1017 		}
   1018 		/*
   1019 		 * And, similarly, a constant AND can be simplified
   1020 		 * if we're testing against 0, i.e.:
   1021 		 *
   1022 		 * and #k	nop
   1023 		 * jeq #0  ->	jset #k
   1024 		 */
   1025 		else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
   1026 		    b->s.k == 0) {
   1027 			b->s.k = last->s.k;
   1028 			b->s.code = BPF_JMP|BPF_K|BPF_JSET;
   1029 			last->s.code = NOP;
   1030 			opt_state->done = 0;
   1031 			opt_not(b);
   1032 		}
   1033 	}
   1034 	/*
   1035 	 * jset #0        ->   never
   1036 	 * jset #ffffffff ->   always
   1037 	 */
   1038 	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
   1039 		if (b->s.k == 0)
   1040 			JT(b) = JF(b);
   1041 		if ((u_int)b->s.k == 0xffffffffU)
   1042 			JF(b) = JT(b);
   1043 	}
   1044 	/*
   1045 	 * If we're comparing against the index register, and the index
   1046 	 * register is a known constant, we can just compare against that
   1047 	 * constant.
   1048 	 */
   1049 	val = b->val[X_ATOM];
   1050 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
   1051 		bpf_int32 v = opt_state->vmap[val].const_val;
   1052 		b->s.code &= ~BPF_X;
   1053 		b->s.k = v;
   1054 	}
   1055 	/*
   1056 	 * If the accumulator is a known constant, we can compute the
   1057 	 * comparison result.
   1058 	 */
   1059 	val = b->val[A_ATOM];
   1060 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
   1061 		bpf_int32 v = opt_state->vmap[val].const_val;
   1062 		switch (BPF_OP(b->s.code)) {
   1063 
   1064 		case BPF_JEQ:
   1065 			v = v == b->s.k;
   1066 			break;
   1067 
   1068 		case BPF_JGT:
   1069 			v = (unsigned)v > (unsigned)b->s.k;
   1070 			break;
   1071 
   1072 		case BPF_JGE:
   1073 			v = (unsigned)v >= (unsigned)b->s.k;
   1074 			break;
   1075 
   1076 		case BPF_JSET:
   1077 			v &= b->s.k;
   1078 			break;
   1079 
   1080 		default:
   1081 			abort();
   1082 		}
   1083 		if (JF(b) != JT(b))
   1084 			opt_state->done = 0;
   1085 		if (v)
   1086 			JF(b) = JT(b);
   1087 		else
   1088 			JT(b) = JF(b);
   1089 	}
   1090 }
   1091 
   1092 /*
   1093  * Compute the symbolic value of expression of 's', and update
   1094  * anything it defines in the value table 'val'.  If 'alter' is true,
   1095  * do various optimizations.  This code would be cleaner if symbolic
   1096  * evaluation and code transformations weren't folded together.
   1097  */
   1098 static void
   1099 opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter)
   1100 {
   1101 	int op;
   1102 	int v;
   1103 
   1104 	switch (s->code) {
   1105 
   1106 	case BPF_LD|BPF_ABS|BPF_W:
   1107 	case BPF_LD|BPF_ABS|BPF_H:
   1108 	case BPF_LD|BPF_ABS|BPF_B:
   1109 		v = F(opt_state, s->code, s->k, 0L);
   1110 		vstore(s, &val[A_ATOM], v, alter);
   1111 		break;
   1112 
   1113 	case BPF_LD|BPF_IND|BPF_W:
   1114 	case BPF_LD|BPF_IND|BPF_H:
   1115 	case BPF_LD|BPF_IND|BPF_B:
   1116 		v = val[X_ATOM];
   1117 		if (alter && opt_state->vmap[v].is_const) {
   1118 			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
   1119 			s->k += opt_state->vmap[v].const_val;
   1120 			v = F(opt_state, s->code, s->k, 0L);
   1121 			opt_state->done = 0;
   1122 		}
   1123 		else
   1124 			v = F(opt_state, s->code, s->k, v);
   1125 		vstore(s, &val[A_ATOM], v, alter);
   1126 		break;
   1127 
   1128 	case BPF_LD|BPF_LEN:
   1129 		v = F(opt_state, s->code, 0L, 0L);
   1130 		vstore(s, &val[A_ATOM], v, alter);
   1131 		break;
   1132 
   1133 	case BPF_LD|BPF_IMM:
   1134 		v = K(s->k);
   1135 		vstore(s, &val[A_ATOM], v, alter);
   1136 		break;
   1137 
   1138 	case BPF_LDX|BPF_IMM:
   1139 		v = K(s->k);
   1140 		vstore(s, &val[X_ATOM], v, alter);
   1141 		break;
   1142 
   1143 	case BPF_LDX|BPF_MSH|BPF_B:
   1144 		v = F(opt_state, s->code, s->k, 0L);
   1145 		vstore(s, &val[X_ATOM], v, alter);
   1146 		break;
   1147 
   1148 	case BPF_ALU|BPF_NEG:
   1149 		if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
   1150 			s->code = BPF_LD|BPF_IMM;
   1151 			/*
   1152 			 * Do this negation as unsigned arithmetic; that's
   1153 			 * what modern BPF engines do, and it guarantees
   1154 			 * that all possible values can be negated.  (Yeah,
   1155 			 * negating 0x80000000, the minimum signed 32-bit
   1156 			 * two's-complement value, results in 0x80000000,
   1157 			 * so it's still negative, but we *should* be doing
   1158 			 * all unsigned arithmetic here, to match what
   1159 			 * modern BPF engines do.)
   1160 			 *
   1161 			 * Express it as 0U - (unsigned value) so that we
   1162 			 * don't get compiler warnings about negating an
   1163 			 * unsigned value and don't get UBSan warnings
   1164 			 * about the result of negating 0x80000000 being
   1165 			 * undefined.
   1166 			 */
   1167 			s->k = 0U - (bpf_u_int32)(opt_state->vmap[val[A_ATOM]].const_val);
   1168 			val[A_ATOM] = K(s->k);
   1169 		}
   1170 		else
   1171 			val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
   1172 		break;
   1173 
   1174 	case BPF_ALU|BPF_ADD|BPF_K:
   1175 	case BPF_ALU|BPF_SUB|BPF_K:
   1176 	case BPF_ALU|BPF_MUL|BPF_K:
   1177 	case BPF_ALU|BPF_DIV|BPF_K:
   1178 	case BPF_ALU|BPF_MOD|BPF_K:
   1179 	case BPF_ALU|BPF_AND|BPF_K:
   1180 	case BPF_ALU|BPF_OR|BPF_K:
   1181 	case BPF_ALU|BPF_XOR|BPF_K:
   1182 	case BPF_ALU|BPF_LSH|BPF_K:
   1183 	case BPF_ALU|BPF_RSH|BPF_K:
   1184 		op = BPF_OP(s->code);
   1185 		if (alter) {
   1186 			if (s->k == 0) {
   1187 				/*
   1188 				 * Optimize operations where the constant
   1189 				 * is zero.
   1190 				 *
   1191 				 * Don't optimize away "sub #0"
   1192 				 * as it may be needed later to
   1193 				 * fixup the generated math code.
   1194 				 *
   1195 				 * Fail if we're dividing by zero or taking
   1196 				 * a modulus by zero.
   1197 				 */
   1198 				if (op == BPF_ADD ||
   1199 				    op == BPF_LSH || op == BPF_RSH ||
   1200 				    op == BPF_OR || op == BPF_XOR) {
   1201 					s->code = NOP;
   1202 					break;
   1203 				}
   1204 				if (op == BPF_MUL || op == BPF_AND) {
   1205 					s->code = BPF_LD|BPF_IMM;
   1206 					val[A_ATOM] = K(s->k);
   1207 					break;
   1208 				}
   1209 				if (op == BPF_DIV)
   1210 					opt_error(opt_state,
   1211 					    "division by zero");
   1212 				if (op == BPF_MOD)
   1213 					opt_error(opt_state,
   1214 					    "modulus by zero");
   1215 			}
   1216 			if (opt_state->vmap[val[A_ATOM]].is_const) {
   1217 				fold_op(opt_state, s, val[A_ATOM], K(s->k));
   1218 				val[A_ATOM] = K(s->k);
   1219 				break;
   1220 			}
   1221 		}
   1222 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
   1223 		break;
   1224 
   1225 	case BPF_ALU|BPF_ADD|BPF_X:
   1226 	case BPF_ALU|BPF_SUB|BPF_X:
   1227 	case BPF_ALU|BPF_MUL|BPF_X:
   1228 	case BPF_ALU|BPF_DIV|BPF_X:
   1229 	case BPF_ALU|BPF_MOD|BPF_X:
   1230 	case BPF_ALU|BPF_AND|BPF_X:
   1231 	case BPF_ALU|BPF_OR|BPF_X:
   1232 	case BPF_ALU|BPF_XOR|BPF_X:
   1233 	case BPF_ALU|BPF_LSH|BPF_X:
   1234 	case BPF_ALU|BPF_RSH|BPF_X:
   1235 		op = BPF_OP(s->code);
   1236 		if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
   1237 			if (opt_state->vmap[val[A_ATOM]].is_const) {
   1238 				fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
   1239 				val[A_ATOM] = K(s->k);
   1240 			}
   1241 			else {
   1242 				s->code = BPF_ALU|BPF_K|op;
   1243 				s->k = opt_state->vmap[val[X_ATOM]].const_val;
   1244 				/*
   1245 				 * XXX - we need to make up our minds
   1246 				 * as to what integers are signed and
   1247 				 * what integers are unsigned in BPF
   1248 				 * programs and in our IR.
   1249 				 */
   1250 				if ((op == BPF_LSH || op == BPF_RSH) &&
   1251 				    (s->k < 0 || s->k > 31))
   1252 					opt_error(opt_state,
   1253 					    "shift by more than 31 bits");
   1254 				opt_state->done = 0;
   1255 				val[A_ATOM] =
   1256 					F(opt_state, s->code, val[A_ATOM], K(s->k));
   1257 			}
   1258 			break;
   1259 		}
   1260 		/*
   1261 		 * Check if we're doing something to an accumulator
   1262 		 * that is 0, and simplify.  This may not seem like
   1263 		 * much of a simplification but it could open up further
   1264 		 * optimizations.
   1265 		 * XXX We could also check for mul by 1, etc.
   1266 		 */
   1267 		if (alter && opt_state->vmap[val[A_ATOM]].is_const
   1268 		    && opt_state->vmap[val[A_ATOM]].const_val == 0) {
   1269 			if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
   1270 				s->code = BPF_MISC|BPF_TXA;
   1271 				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
   1272 				break;
   1273 			}
   1274 			else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
   1275 				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
   1276 				s->code = BPF_LD|BPF_IMM;
   1277 				s->k = 0;
   1278 				vstore(s, &val[A_ATOM], K(s->k), alter);
   1279 				break;
   1280 			}
   1281 			else if (op == BPF_NEG) {
   1282 				s->code = NOP;
   1283 				break;
   1284 			}
   1285 		}
   1286 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
   1287 		break;
   1288 
   1289 	case BPF_MISC|BPF_TXA:
   1290 		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
   1291 		break;
   1292 
   1293 	case BPF_LD|BPF_MEM:
   1294 		v = val[s->k];
   1295 		if (alter && opt_state->vmap[v].is_const) {
   1296 			s->code = BPF_LD|BPF_IMM;
   1297 			s->k = opt_state->vmap[v].const_val;
   1298 			opt_state->done = 0;
   1299 		}
   1300 		vstore(s, &val[A_ATOM], v, alter);
   1301 		break;
   1302 
   1303 	case BPF_MISC|BPF_TAX:
   1304 		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
   1305 		break;
   1306 
   1307 	case BPF_LDX|BPF_MEM:
   1308 		v = val[s->k];
   1309 		if (alter && opt_state->vmap[v].is_const) {
   1310 			s->code = BPF_LDX|BPF_IMM;
   1311 			s->k = opt_state->vmap[v].const_val;
   1312 			opt_state->done = 0;
   1313 		}
   1314 		vstore(s, &val[X_ATOM], v, alter);
   1315 		break;
   1316 
   1317 	case BPF_ST:
   1318 		vstore(s, &val[s->k], val[A_ATOM], alter);
   1319 		break;
   1320 
   1321 	case BPF_STX:
   1322 		vstore(s, &val[s->k], val[X_ATOM], alter);
   1323 		break;
   1324 	}
   1325 }
   1326 
   1327 static void
   1328 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
   1329 {
   1330 	register int atom;
   1331 
   1332 	atom = atomuse(s);
   1333 	if (atom >= 0) {
   1334 		if (atom == AX_ATOM) {
   1335 			last[X_ATOM] = 0;
   1336 			last[A_ATOM] = 0;
   1337 		}
   1338 		else
   1339 			last[atom] = 0;
   1340 	}
   1341 	atom = atomdef(s);
   1342 	if (atom >= 0) {
   1343 		if (last[atom]) {
   1344 			opt_state->done = 0;
   1345 			last[atom]->code = NOP;
   1346 		}
   1347 		last[atom] = s;
   1348 	}
   1349 }
   1350 
   1351 static void
   1352 opt_deadstores(opt_state_t *opt_state, register struct block *b)
   1353 {
   1354 	register struct slist *s;
   1355 	register int atom;
   1356 	struct stmt *last[N_ATOMS];
   1357 
   1358 	memset((char *)last, 0, sizeof last);
   1359 
   1360 	for (s = b->stmts; s != 0; s = s->next)
   1361 		deadstmt(opt_state, &s->s, last);
   1362 	deadstmt(opt_state, &b->s, last);
   1363 
   1364 	for (atom = 0; atom < N_ATOMS; ++atom)
   1365 		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
   1366 			last[atom]->code = NOP;
   1367 			opt_state->done = 0;
   1368 		}
   1369 }
   1370 
   1371 static void
   1372 opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
   1373 {
   1374 	struct slist *s;
   1375 	struct edge *p;
   1376 	int i;
   1377 	bpf_int32 aval, xval;
   1378 
   1379 #if 0
   1380 	for (s = b->stmts; s && s->next; s = s->next)
   1381 		if (BPF_CLASS(s->s.code) == BPF_JMP) {
   1382 			do_stmts = 0;
   1383 			break;
   1384 		}
   1385 #endif
   1386 
   1387 	/*
   1388 	 * Initialize the atom values.
   1389 	 */
   1390 	p = b->in_edges;
   1391 	if (p == 0) {
   1392 		/*
   1393 		 * We have no predecessors, so everything is undefined
   1394 		 * upon entry to this block.
   1395 		 */
   1396 		memset((char *)b->val, 0, sizeof(b->val));
   1397 	} else {
   1398 		/*
   1399 		 * Inherit values from our predecessors.
   1400 		 *
   1401 		 * First, get the values from the predecessor along the
   1402 		 * first edge leading to this node.
   1403 		 */
   1404 		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
   1405 		/*
   1406 		 * Now look at all the other nodes leading to this node.
   1407 		 * If, for the predecessor along that edge, a register
   1408 		 * has a different value from the one we have (i.e.,
   1409 		 * control paths are merging, and the merging paths
   1410 		 * assign different values to that register), give the
   1411 		 * register the undefined value of 0.
   1412 		 */
   1413 		while ((p = p->next) != NULL) {
   1414 			for (i = 0; i < N_ATOMS; ++i)
   1415 				if (b->val[i] != p->pred->val[i])
   1416 					b->val[i] = 0;
   1417 		}
   1418 	}
   1419 	aval = b->val[A_ATOM];
   1420 	xval = b->val[X_ATOM];
   1421 	for (s = b->stmts; s; s = s->next)
   1422 		opt_stmt(opt_state, &s->s, b->val, do_stmts);
   1423 
   1424 	/*
   1425 	 * This is a special case: if we don't use anything from this
   1426 	 * block, and we load the accumulator or index register with a
   1427 	 * value that is already there, or if this block is a return,
   1428 	 * eliminate all the statements.
   1429 	 *
   1430 	 * XXX - what if it does a store?
   1431 	 *
   1432 	 * XXX - why does it matter whether we use anything from this
   1433 	 * block?  If the accumulator or index register doesn't change
   1434 	 * its value, isn't that OK even if we use that value?
   1435 	 *
   1436 	 * XXX - if we load the accumulator with a different value,
   1437 	 * and the block ends with a conditional branch, we obviously
   1438 	 * can't eliminate it, as the branch depends on that value.
   1439 	 * For the index register, the conditional branch only depends
   1440 	 * on the index register value if the test is against the index
   1441 	 * register value rather than a constant; if nothing uses the
   1442 	 * value we put into the index register, and we're not testing
   1443 	 * against the index register's value, and there aren't any
   1444 	 * other problems that would keep us from eliminating this
   1445 	 * block, can we eliminate it?
   1446 	 */
   1447 	if (do_stmts &&
   1448 	    ((b->out_use == 0 &&
   1449 	      aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
   1450 	      xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
   1451 	     BPF_CLASS(b->s.code) == BPF_RET)) {
   1452 		if (b->stmts != 0) {
   1453 			b->stmts = 0;
   1454 			opt_state->done = 0;
   1455 		}
   1456 	} else {
   1457 		opt_peep(opt_state, b);
   1458 		opt_deadstores(opt_state, b);
   1459 	}
   1460 	/*
   1461 	 * Set up values for branch optimizer.
   1462 	 */
   1463 	if (BPF_SRC(b->s.code) == BPF_K)
   1464 		b->oval = K(b->s.k);
   1465 	else
   1466 		b->oval = b->val[X_ATOM];
   1467 	b->et.code = b->s.code;
   1468 	b->ef.code = -b->s.code;
   1469 }
   1470 
   1471 /*
   1472  * Return true if any register that is used on exit from 'succ', has
   1473  * an exit value that is different from the corresponding exit value
   1474  * from 'b'.
   1475  */
   1476 static int
   1477 use_conflict(struct block *b, struct block *succ)
   1478 {
   1479 	int atom;
   1480 	atomset use = succ->out_use;
   1481 
   1482 	if (use == 0)
   1483 		return 0;
   1484 
   1485 	for (atom = 0; atom < N_ATOMS; ++atom)
   1486 		if (ATOMELEM(use, atom))
   1487 			if (b->val[atom] != succ->val[atom])
   1488 				return 1;
   1489 	return 0;
   1490 }
   1491 
   1492 static struct block *
   1493 fold_edge(struct block *child, struct edge *ep)
   1494 {
   1495 	int sense;
   1496 	int aval0, aval1, oval0, oval1;
   1497 	int code = ep->code;
   1498 
   1499 	if (code < 0) {
   1500 		code = -code;
   1501 		sense = 0;
   1502 	} else
   1503 		sense = 1;
   1504 
   1505 	if (child->s.code != code)
   1506 		return 0;
   1507 
   1508 	aval0 = child->val[A_ATOM];
   1509 	oval0 = child->oval;
   1510 	aval1 = ep->pred->val[A_ATOM];
   1511 	oval1 = ep->pred->oval;
   1512 
   1513 	if (aval0 != aval1)
   1514 		return 0;
   1515 
   1516 	if (oval0 == oval1)
   1517 		/*
   1518 		 * The operands of the branch instructions are
   1519 		 * identical, so the result is true if a true
   1520 		 * branch was taken to get here, otherwise false.
   1521 		 */
   1522 		return sense ? JT(child) : JF(child);
   1523 
   1524 	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
   1525 		/*
   1526 		 * At this point, we only know the comparison if we
   1527 		 * came down the true branch, and it was an equality
   1528 		 * comparison with a constant.
   1529 		 *
   1530 		 * I.e., if we came down the true branch, and the branch
   1531 		 * was an equality comparison with a constant, we know the
   1532 		 * accumulator contains that constant.  If we came down
   1533 		 * the false branch, or the comparison wasn't with a
   1534 		 * constant, we don't know what was in the accumulator.
   1535 		 *
   1536 		 * We rely on the fact that distinct constants have distinct
   1537 		 * value numbers.
   1538 		 */
   1539 		return JF(child);
   1540 
   1541 	return 0;
   1542 }
   1543 
   1544 static void
   1545 opt_j(opt_state_t *opt_state, struct edge *ep)
   1546 {
   1547 	register int i, k;
   1548 	register struct block *target;
   1549 
   1550 	if (JT(ep->succ) == 0)
   1551 		return;
   1552 
   1553 	if (JT(ep->succ) == JF(ep->succ)) {
   1554 		/*
   1555 		 * Common branch targets can be eliminated, provided
   1556 		 * there is no data dependency.
   1557 		 */
   1558 		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
   1559 			opt_state->done = 0;
   1560 			ep->succ = JT(ep->succ);
   1561 		}
   1562 	}
   1563 	/*
   1564 	 * For each edge dominator that matches the successor of this
   1565 	 * edge, promote the edge successor to the its grandchild.
   1566 	 *
   1567 	 * XXX We violate the set abstraction here in favor a reasonably
   1568 	 * efficient loop.
   1569 	 */
   1570  top:
   1571 	for (i = 0; i < opt_state->edgewords; ++i) {
   1572 		register bpf_u_int32 x = ep->edom[i];
   1573 
   1574 		while (x != 0) {
   1575 			k = lowest_set_bit(x);
   1576 			x &=~ ((bpf_u_int32)1 << k);
   1577 			k += i * BITS_PER_WORD;
   1578 
   1579 			target = fold_edge(ep->succ, opt_state->edges[k]);
   1580 			/*
   1581 			 * Check that there is no data dependency between
   1582 			 * nodes that will be violated if we move the edge.
   1583 			 */
   1584 			if (target != 0 && !use_conflict(ep->pred, target)) {
   1585 				opt_state->done = 0;
   1586 				ep->succ = target;
   1587 				if (JT(target) != 0)
   1588 					/*
   1589 					 * Start over unless we hit a leaf.
   1590 					 */
   1591 					goto top;
   1592 				return;
   1593 			}
   1594 		}
   1595 	}
   1596 }
   1597 
   1598 
   1599 static void
   1600 or_pullup(opt_state_t *opt_state, struct block *b)
   1601 {
   1602 	int val, at_top;
   1603 	struct block *pull;
   1604 	struct block **diffp, **samep;
   1605 	struct edge *ep;
   1606 
   1607 	ep = b->in_edges;
   1608 	if (ep == 0)
   1609 		return;
   1610 
   1611 	/*
   1612 	 * Make sure each predecessor loads the same value.
   1613 	 * XXX why?
   1614 	 */
   1615 	val = ep->pred->val[A_ATOM];
   1616 	for (ep = ep->next; ep != 0; ep = ep->next)
   1617 		if (val != ep->pred->val[A_ATOM])
   1618 			return;
   1619 
   1620 	if (JT(b->in_edges->pred) == b)
   1621 		diffp = &JT(b->in_edges->pred);
   1622 	else
   1623 		diffp = &JF(b->in_edges->pred);
   1624 
   1625 	at_top = 1;
   1626 	for (;;) {
   1627 		if (*diffp == 0)
   1628 			return;
   1629 
   1630 		if (JT(*diffp) != JT(b))
   1631 			return;
   1632 
   1633 		if (!SET_MEMBER((*diffp)->dom, b->id))
   1634 			return;
   1635 
   1636 		if ((*diffp)->val[A_ATOM] != val)
   1637 			break;
   1638 
   1639 		diffp = &JF(*diffp);
   1640 		at_top = 0;
   1641 	}
   1642 	samep = &JF(*diffp);
   1643 	for (;;) {
   1644 		if (*samep == 0)
   1645 			return;
   1646 
   1647 		if (JT(*samep) != JT(b))
   1648 			return;
   1649 
   1650 		if (!SET_MEMBER((*samep)->dom, b->id))
   1651 			return;
   1652 
   1653 		if ((*samep)->val[A_ATOM] == val)
   1654 			break;
   1655 
   1656 		/* XXX Need to check that there are no data dependencies
   1657 		   between dp0 and dp1.  Currently, the code generator
   1658 		   will not produce such dependencies. */
   1659 		samep = &JF(*samep);
   1660 	}
   1661 #ifdef notdef
   1662 	/* XXX This doesn't cover everything. */
   1663 	for (i = 0; i < N_ATOMS; ++i)
   1664 		if ((*samep)->val[i] != pred->val[i])
   1665 			return;
   1666 #endif
   1667 	/* Pull up the node. */
   1668 	pull = *samep;
   1669 	*samep = JF(pull);
   1670 	JF(pull) = *diffp;
   1671 
   1672 	/*
   1673 	 * At the top of the chain, each predecessor needs to point at the
   1674 	 * pulled up node.  Inside the chain, there is only one predecessor
   1675 	 * to worry about.
   1676 	 */
   1677 	if (at_top) {
   1678 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
   1679 			if (JT(ep->pred) == b)
   1680 				JT(ep->pred) = pull;
   1681 			else
   1682 				JF(ep->pred) = pull;
   1683 		}
   1684 	}
   1685 	else
   1686 		*diffp = pull;
   1687 
   1688 	opt_state->done = 0;
   1689 }
   1690 
   1691 static void
   1692 and_pullup(opt_state_t *opt_state, struct block *b)
   1693 {
   1694 	int val, at_top;
   1695 	struct block *pull;
   1696 	struct block **diffp, **samep;
   1697 	struct edge *ep;
   1698 
   1699 	ep = b->in_edges;
   1700 	if (ep == 0)
   1701 		return;
   1702 
   1703 	/*
   1704 	 * Make sure each predecessor loads the same value.
   1705 	 */
   1706 	val = ep->pred->val[A_ATOM];
   1707 	for (ep = ep->next; ep != 0; ep = ep->next)
   1708 		if (val != ep->pred->val[A_ATOM])
   1709 			return;
   1710 
   1711 	if (JT(b->in_edges->pred) == b)
   1712 		diffp = &JT(b->in_edges->pred);
   1713 	else
   1714 		diffp = &JF(b->in_edges->pred);
   1715 
   1716 	at_top = 1;
   1717 	for (;;) {
   1718 		if (*diffp == 0)
   1719 			return;
   1720 
   1721 		if (JF(*diffp) != JF(b))
   1722 			return;
   1723 
   1724 		if (!SET_MEMBER((*diffp)->dom, b->id))
   1725 			return;
   1726 
   1727 		if ((*diffp)->val[A_ATOM] != val)
   1728 			break;
   1729 
   1730 		diffp = &JT(*diffp);
   1731 		at_top = 0;
   1732 	}
   1733 	samep = &JT(*diffp);
   1734 	for (;;) {
   1735 		if (*samep == 0)
   1736 			return;
   1737 
   1738 		if (JF(*samep) != JF(b))
   1739 			return;
   1740 
   1741 		if (!SET_MEMBER((*samep)->dom, b->id))
   1742 			return;
   1743 
   1744 		if ((*samep)->val[A_ATOM] == val)
   1745 			break;
   1746 
   1747 		/* XXX Need to check that there are no data dependencies
   1748 		   between diffp and samep.  Currently, the code generator
   1749 		   will not produce such dependencies. */
   1750 		samep = &JT(*samep);
   1751 	}
   1752 #ifdef notdef
   1753 	/* XXX This doesn't cover everything. */
   1754 	for (i = 0; i < N_ATOMS; ++i)
   1755 		if ((*samep)->val[i] != pred->val[i])
   1756 			return;
   1757 #endif
   1758 	/* Pull up the node. */
   1759 	pull = *samep;
   1760 	*samep = JT(pull);
   1761 	JT(pull) = *diffp;
   1762 
   1763 	/*
   1764 	 * At the top of the chain, each predecessor needs to point at the
   1765 	 * pulled up node.  Inside the chain, there is only one predecessor
   1766 	 * to worry about.
   1767 	 */
   1768 	if (at_top) {
   1769 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
   1770 			if (JT(ep->pred) == b)
   1771 				JT(ep->pred) = pull;
   1772 			else
   1773 				JF(ep->pred) = pull;
   1774 		}
   1775 	}
   1776 	else
   1777 		*diffp = pull;
   1778 
   1779 	opt_state->done = 0;
   1780 }
   1781 
   1782 static void
   1783 opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
   1784 {
   1785 	int i, maxlevel;
   1786 	struct block *p;
   1787 
   1788 	init_val(opt_state);
   1789 	maxlevel = ic->root->level;
   1790 
   1791 	find_inedges(opt_state, ic->root);
   1792 	for (i = maxlevel; i >= 0; --i)
   1793 		for (p = opt_state->levels[i]; p; p = p->link)
   1794 			opt_blk(opt_state, p, do_stmts);
   1795 
   1796 	if (do_stmts)
   1797 		/*
   1798 		 * No point trying to move branches; it can't possibly
   1799 		 * make a difference at this point.
   1800 		 */
   1801 		return;
   1802 
   1803 	for (i = 1; i <= maxlevel; ++i) {
   1804 		for (p = opt_state->levels[i]; p; p = p->link) {
   1805 			opt_j(opt_state, &p->et);
   1806 			opt_j(opt_state, &p->ef);
   1807 		}
   1808 	}
   1809 
   1810 	find_inedges(opt_state, ic->root);
   1811 	for (i = 1; i <= maxlevel; ++i) {
   1812 		for (p = opt_state->levels[i]; p; p = p->link) {
   1813 			or_pullup(opt_state, p);
   1814 			and_pullup(opt_state, p);
   1815 		}
   1816 	}
   1817 }
   1818 
   1819 static inline void
   1820 link_inedge(struct edge *parent, struct block *child)
   1821 {
   1822 	parent->next = child->in_edges;
   1823 	child->in_edges = parent;
   1824 }
   1825 
   1826 static void
   1827 find_inedges(opt_state_t *opt_state, struct block *root)
   1828 {
   1829 	int i;
   1830 	struct block *b;
   1831 
   1832 	for (i = 0; i < opt_state->n_blocks; ++i)
   1833 		opt_state->blocks[i]->in_edges = 0;
   1834 
   1835 	/*
   1836 	 * Traverse the graph, adding each edge to the predecessor
   1837 	 * list of its successors.  Skip the leaves (i.e. level 0).
   1838 	 */
   1839 	for (i = root->level; i > 0; --i) {
   1840 		for (b = opt_state->levels[i]; b != 0; b = b->link) {
   1841 			link_inedge(&b->et, JT(b));
   1842 			link_inedge(&b->ef, JF(b));
   1843 		}
   1844 	}
   1845 }
   1846 
   1847 static void
   1848 opt_root(struct block **b)
   1849 {
   1850 	struct slist *tmp, *s;
   1851 
   1852 	s = (*b)->stmts;
   1853 	(*b)->stmts = 0;
   1854 	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
   1855 		*b = JT(*b);
   1856 
   1857 	tmp = (*b)->stmts;
   1858 	if (tmp != 0)
   1859 		sappend(s, tmp);
   1860 	(*b)->stmts = s;
   1861 
   1862 	/*
   1863 	 * If the root node is a return, then there is no
   1864 	 * point executing any statements (since the bpf machine
   1865 	 * has no side effects).
   1866 	 */
   1867 	if (BPF_CLASS((*b)->s.code) == BPF_RET)
   1868 		(*b)->stmts = 0;
   1869 }
   1870 
   1871 static void
   1872 opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
   1873 {
   1874 
   1875 #ifdef BDEBUG
   1876 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
   1877 		printf("opt_loop(root, %d) begin\n", do_stmts);
   1878 		opt_dump(opt_state, ic);
   1879 	}
   1880 #endif
   1881 	do {
   1882 		opt_state->done = 1;
   1883 		find_levels(opt_state, ic);
   1884 		find_dom(opt_state, ic->root);
   1885 		find_closure(opt_state, ic->root);
   1886 		find_ud(opt_state, ic->root);
   1887 		find_edom(opt_state, ic->root);
   1888 		opt_blks(opt_state, ic, do_stmts);
   1889 #ifdef BDEBUG
   1890 		if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
   1891 			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
   1892 			opt_dump(opt_state, ic);
   1893 		}
   1894 #endif
   1895 	} while (!opt_state->done);
   1896 }
   1897 
   1898 /*
   1899  * Optimize the filter code in its dag representation.
   1900  * Return 0 on success, -1 on error.
   1901  */
   1902 int
   1903 bpf_optimize(struct icode *ic, char *errbuf)
   1904 {
   1905 	opt_state_t opt_state;
   1906 
   1907 	memset(&opt_state, 0, sizeof(opt_state));
   1908 	opt_state.errbuf = errbuf;
   1909 	if (setjmp(opt_state.top_ctx)) {
   1910 		opt_cleanup(&opt_state);
   1911 		return -1;
   1912 	}
   1913 	opt_init(&opt_state, ic);
   1914 	opt_loop(&opt_state, ic, 0);
   1915 	opt_loop(&opt_state, ic, 1);
   1916 	intern_blocks(&opt_state, ic);
   1917 #ifdef BDEBUG
   1918 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
   1919 		printf("after intern_blocks()\n");
   1920 		opt_dump(&opt_state, ic);
   1921 	}
   1922 #endif
   1923 	opt_root(&ic->root);
   1924 #ifdef BDEBUG
   1925 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
   1926 		printf("after opt_root()\n");
   1927 		opt_dump(&opt_state, ic);
   1928 	}
   1929 #endif
   1930 	opt_cleanup(&opt_state);
   1931 	return 0;
   1932 }
   1933 
   1934 static void
   1935 make_marks(struct icode *ic, struct block *p)
   1936 {
   1937 	if (!isMarked(ic, p)) {
   1938 		Mark(ic, p);
   1939 		if (BPF_CLASS(p->s.code) != BPF_RET) {
   1940 			make_marks(ic, JT(p));
   1941 			make_marks(ic, JF(p));
   1942 		}
   1943 	}
   1944 }
   1945 
   1946 /*
   1947  * Mark code array such that isMarked(ic->cur_mark, i) is true
   1948  * only for nodes that are alive.
   1949  */
   1950 static void
   1951 mark_code(struct icode *ic)
   1952 {
   1953 	ic->cur_mark += 1;
   1954 	make_marks(ic, ic->root);
   1955 }
   1956 
   1957 /*
   1958  * True iff the two stmt lists load the same value from the packet into
   1959  * the accumulator.
   1960  */
   1961 static int
   1962 eq_slist(struct slist *x, struct slist *y)
   1963 {
   1964 	for (;;) {
   1965 		while (x && x->s.code == NOP)
   1966 			x = x->next;
   1967 		while (y && y->s.code == NOP)
   1968 			y = y->next;
   1969 		if (x == 0)
   1970 			return y == 0;
   1971 		if (y == 0)
   1972 			return x == 0;
   1973 		if (x->s.code != y->s.code || x->s.k != y->s.k)
   1974 			return 0;
   1975 		x = x->next;
   1976 		y = y->next;
   1977 	}
   1978 }
   1979 
   1980 static inline int
   1981 eq_blk(struct block *b0, struct block *b1)
   1982 {
   1983 	if (b0->s.code == b1->s.code &&
   1984 	    b0->s.k == b1->s.k &&
   1985 	    b0->et.succ == b1->et.succ &&
   1986 	    b0->ef.succ == b1->ef.succ)
   1987 		return eq_slist(b0->stmts, b1->stmts);
   1988 	return 0;
   1989 }
   1990 
   1991 static void
   1992 intern_blocks(opt_state_t *opt_state, struct icode *ic)
   1993 {
   1994 	struct block *p;
   1995 	int i, j;
   1996 	int done1; /* don't shadow global */
   1997  top:
   1998 	done1 = 1;
   1999 	for (i = 0; i < opt_state->n_blocks; ++i)
   2000 		opt_state->blocks[i]->link = 0;
   2001 
   2002 	mark_code(ic);
   2003 
   2004 	for (i = opt_state->n_blocks - 1; --i >= 0; ) {
   2005 		if (!isMarked(ic, opt_state->blocks[i]))
   2006 			continue;
   2007 		for (j = i + 1; j < opt_state->n_blocks; ++j) {
   2008 			if (!isMarked(ic, opt_state->blocks[j]))
   2009 				continue;
   2010 			if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
   2011 				opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
   2012 					opt_state->blocks[j]->link : opt_state->blocks[j];
   2013 				break;
   2014 			}
   2015 		}
   2016 	}
   2017 	for (i = 0; i < opt_state->n_blocks; ++i) {
   2018 		p = opt_state->blocks[i];
   2019 		if (JT(p) == 0)
   2020 			continue;
   2021 		if (JT(p)->link) {
   2022 			done1 = 0;
   2023 			JT(p) = JT(p)->link;
   2024 		}
   2025 		if (JF(p)->link) {
   2026 			done1 = 0;
   2027 			JF(p) = JF(p)->link;
   2028 		}
   2029 	}
   2030 	if (!done1)
   2031 		goto top;
   2032 }
   2033 
   2034 static void
   2035 opt_cleanup(opt_state_t *opt_state)
   2036 {
   2037 	free((void *)opt_state->vnode_base);
   2038 	free((void *)opt_state->vmap);
   2039 	free((void *)opt_state->edges);
   2040 	free((void *)opt_state->space);
   2041 	free((void *)opt_state->levels);
   2042 	free((void *)opt_state->blocks);
   2043 }
   2044 
   2045 /*
   2046  * For optimizer errors.
   2047  */
   2048 static void PCAP_NORETURN
   2049 opt_error(opt_state_t *opt_state, const char *fmt, ...)
   2050 {
   2051 	va_list ap;
   2052 
   2053 	if (opt_state->errbuf != NULL) {
   2054 		va_start(ap, fmt);
   2055 		(void)pcap_vsnprintf(opt_state->errbuf,
   2056 		    PCAP_ERRBUF_SIZE, fmt, ap);
   2057 		va_end(ap);
   2058 	}
   2059 	longjmp(opt_state->top_ctx, 1);
   2060 	/* NOTREACHED */
   2061 }
   2062 
   2063 /*
   2064  * Return the number of stmts in 's'.
   2065  */
   2066 static u_int
   2067 slength(struct slist *s)
   2068 {
   2069 	u_int n = 0;
   2070 
   2071 	for (; s; s = s->next)
   2072 		if (s->s.code != NOP)
   2073 			++n;
   2074 	return n;
   2075 }
   2076 
   2077 /*
   2078  * Return the number of nodes reachable by 'p'.
   2079  * All nodes should be initially unmarked.
   2080  */
   2081 static int
   2082 count_blocks(struct icode *ic, struct block *p)
   2083 {
   2084 	if (p == 0 || isMarked(ic, p))
   2085 		return 0;
   2086 	Mark(ic, p);
   2087 	return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
   2088 }
   2089 
   2090 /*
   2091  * Do a depth first search on the flow graph, numbering the
   2092  * the basic blocks, and entering them into the 'blocks' array.`
   2093  */
   2094 static void
   2095 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
   2096 {
   2097 	int n;
   2098 
   2099 	if (p == 0 || isMarked(ic, p))
   2100 		return;
   2101 
   2102 	Mark(ic, p);
   2103 	n = opt_state->n_blocks++;
   2104 	p->id = n;
   2105 	opt_state->blocks[n] = p;
   2106 
   2107 	number_blks_r(opt_state, ic, JT(p));
   2108 	number_blks_r(opt_state, ic, JF(p));
   2109 }
   2110 
   2111 /*
   2112  * Return the number of stmts in the flowgraph reachable by 'p'.
   2113  * The nodes should be unmarked before calling.
   2114  *
   2115  * Note that "stmts" means "instructions", and that this includes
   2116  *
   2117  *	side-effect statements in 'p' (slength(p->stmts));
   2118  *
   2119  *	statements in the true branch from 'p' (count_stmts(JT(p)));
   2120  *
   2121  *	statements in the false branch from 'p' (count_stmts(JF(p)));
   2122  *
   2123  *	the conditional jump itself (1);
   2124  *
   2125  *	an extra long jump if the true branch requires it (p->longjt);
   2126  *
   2127  *	an extra long jump if the false branch requires it (p->longjf).
   2128  */
   2129 static u_int
   2130 count_stmts(struct icode *ic, struct block *p)
   2131 {
   2132 	u_int n;
   2133 
   2134 	if (p == 0 || isMarked(ic, p))
   2135 		return 0;
   2136 	Mark(ic, p);
   2137 	n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
   2138 	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
   2139 }
   2140 
   2141 /*
   2142  * Allocate memory.  All allocation is done before optimization
   2143  * is begun.  A linear bound on the size of all data structures is computed
   2144  * from the total number of blocks and/or statements.
   2145  */
   2146 static void
   2147 opt_init(opt_state_t *opt_state, struct icode *ic)
   2148 {
   2149 	bpf_u_int32 *p;
   2150 	int i, n, max_stmts;
   2151 
   2152 	/*
   2153 	 * First, count the blocks, so we can malloc an array to map
   2154 	 * block number to block.  Then, put the blocks into the array.
   2155 	 */
   2156 	unMarkAll(ic);
   2157 	n = count_blocks(ic, ic->root);
   2158 	opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
   2159 	if (opt_state->blocks == NULL)
   2160 		opt_error(opt_state, "malloc");
   2161 	unMarkAll(ic);
   2162 	opt_state->n_blocks = 0;
   2163 	number_blks_r(opt_state, ic, ic->root);
   2164 
   2165 	opt_state->n_edges = 2 * opt_state->n_blocks;
   2166 	opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
   2167 	if (opt_state->edges == NULL) {
   2168 		opt_error(opt_state, "malloc");
   2169 	}
   2170 
   2171 	/*
   2172 	 * The number of levels is bounded by the number of nodes.
   2173 	 */
   2174 	opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
   2175 	if (opt_state->levels == NULL) {
   2176 		opt_error(opt_state, "malloc");
   2177 	}
   2178 
   2179 	opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
   2180 	opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
   2181 
   2182 	/* XXX */
   2183 	opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
   2184 				 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
   2185 	if (opt_state->space == NULL) {
   2186 		opt_error(opt_state, "malloc");
   2187 	}
   2188 	p = opt_state->space;
   2189 	opt_state->all_dom_sets = p;
   2190 	for (i = 0; i < n; ++i) {
   2191 		opt_state->blocks[i]->dom = p;
   2192 		p += opt_state->nodewords;
   2193 	}
   2194 	opt_state->all_closure_sets = p;
   2195 	for (i = 0; i < n; ++i) {
   2196 		opt_state->blocks[i]->closure = p;
   2197 		p += opt_state->nodewords;
   2198 	}
   2199 	opt_state->all_edge_sets = p;
   2200 	for (i = 0; i < n; ++i) {
   2201 		register struct block *b = opt_state->blocks[i];
   2202 
   2203 		b->et.edom = p;
   2204 		p += opt_state->edgewords;
   2205 		b->ef.edom = p;
   2206 		p += opt_state->edgewords;
   2207 		b->et.id = i;
   2208 		opt_state->edges[i] = &b->et;
   2209 		b->ef.id = opt_state->n_blocks + i;
   2210 		opt_state->edges[opt_state->n_blocks + i] = &b->ef;
   2211 		b->et.pred = b;
   2212 		b->ef.pred = b;
   2213 	}
   2214 	max_stmts = 0;
   2215 	for (i = 0; i < n; ++i)
   2216 		max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
   2217 	/*
   2218 	 * We allocate at most 3 value numbers per statement,
   2219 	 * so this is an upper bound on the number of valnodes
   2220 	 * we'll need.
   2221 	 */
   2222 	opt_state->maxval = 3 * max_stmts;
   2223 	opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
   2224 	if (opt_state->vmap == NULL) {
   2225 		opt_error(opt_state, "malloc");
   2226 	}
   2227 	opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
   2228 	if (opt_state->vnode_base == NULL) {
   2229 		opt_error(opt_state, "malloc");
   2230 	}
   2231 }
   2232 
   2233 /*
   2234  * This is only used when supporting optimizer debugging.  It is
   2235  * global state, so do *not* do more than one compile in parallel
   2236  * and expect it to provide meaningful information.
   2237  */
   2238 #ifdef BDEBUG
   2239 int bids[NBIDS];
   2240 #endif
   2241 
   2242 static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
   2243     PCAP_PRINTFLIKE(2, 3);
   2244 
   2245 /*
   2246  * Returns true if successful.  Returns false if a branch has
   2247  * an offset that is too large.  If so, we have marked that
   2248  * branch so that on a subsequent iteration, it will be treated
   2249  * properly.
   2250  */
   2251 static int
   2252 convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
   2253 {
   2254 	struct bpf_insn *dst;
   2255 	struct slist *src;
   2256 	u_int slen;
   2257 	u_int off;
   2258 	u_int extrajmps;	/* number of extra jumps inserted */
   2259 	struct slist **offset = NULL;
   2260 
   2261 	if (p == 0 || isMarked(ic, p))
   2262 		return (1);
   2263 	Mark(ic, p);
   2264 
   2265 	if (convert_code_r(conv_state, ic, JF(p)) == 0)
   2266 		return (0);
   2267 	if (convert_code_r(conv_state, ic, JT(p)) == 0)
   2268 		return (0);
   2269 
   2270 	slen = slength(p->stmts);
   2271 	dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
   2272 		/* inflate length by any extra jumps */
   2273 
   2274 	p->offset = (int)(dst - conv_state->fstart);
   2275 
   2276 	/* generate offset[] for convenience  */
   2277 	if (slen) {
   2278 		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
   2279 		if (!offset) {
   2280 			conv_error(conv_state, "not enough core");
   2281 			/*NOTREACHED*/
   2282 		}
   2283 	}
   2284 	src = p->stmts;
   2285 	for (off = 0; off < slen && src; off++) {
   2286 #if 0
   2287 		printf("off=%d src=%x\n", off, src);
   2288 #endif
   2289 		offset[off] = src;
   2290 		src = src->next;
   2291 	}
   2292 
   2293 	off = 0;
   2294 	for (src = p->stmts; src; src = src->next) {
   2295 		if (src->s.code == NOP)
   2296 			continue;
   2297 		dst->code = (u_short)src->s.code;
   2298 		dst->k = src->s.k;
   2299 
   2300 		/* fill block-local relative jump */
   2301 		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
   2302 #if 0
   2303 			if (src->s.jt || src->s.jf) {
   2304 				free(offset);
   2305 				conv_error(conv_state, "illegal jmp destination");
   2306 				/*NOTREACHED*/
   2307 			}
   2308 #endif
   2309 			goto filled;
   2310 		}
   2311 		if (off == slen - 2)	/*???*/
   2312 			goto filled;
   2313 
   2314 	    {
   2315 		u_int i;
   2316 		int jt, jf;
   2317 		const char ljerr[] = "%s for block-local relative jump: off=%d";
   2318 
   2319 #if 0
   2320 		printf("code=%x off=%d %x %x\n", src->s.code,
   2321 			off, src->s.jt, src->s.jf);
   2322 #endif
   2323 
   2324 		if (!src->s.jt || !src->s.jf) {
   2325 			free(offset);
   2326 			conv_error(conv_state, ljerr, "no jmp destination", off);
   2327 			/*NOTREACHED*/
   2328 		}
   2329 
   2330 		jt = jf = 0;
   2331 		for (i = 0; i < slen; i++) {
   2332 			if (offset[i] == src->s.jt) {
   2333 				if (jt) {
   2334 					free(offset);
   2335 					conv_error(conv_state, ljerr, "multiple matches", off);
   2336 					/*NOTREACHED*/
   2337 				}
   2338 
   2339 				if (i - off - 1 >= 256) {
   2340 					free(offset);
   2341 					conv_error(conv_state, ljerr, "out-of-range jump", off);
   2342 					/*NOTREACHED*/
   2343 				}
   2344 				dst->jt = (u_char)(i - off - 1);
   2345 				jt++;
   2346 			}
   2347 			if (offset[i] == src->s.jf) {
   2348 				if (jf) {
   2349 					free(offset);
   2350 					conv_error(conv_state, ljerr, "multiple matches", off);
   2351 					/*NOTREACHED*/
   2352 				}
   2353 				if (i - off - 1 >= 256) {
   2354 					free(offset);
   2355 					conv_error(conv_state, ljerr, "out-of-range jump", off);
   2356 					/*NOTREACHED*/
   2357 				}
   2358 				dst->jf = (u_char)(i - off - 1);
   2359 				jf++;
   2360 			}
   2361 		}
   2362 		if (!jt || !jf) {
   2363 			free(offset);
   2364 			conv_error(conv_state, ljerr, "no destination found", off);
   2365 			/*NOTREACHED*/
   2366 		}
   2367 	    }
   2368 filled:
   2369 		++dst;
   2370 		++off;
   2371 	}
   2372 	if (offset)
   2373 		free(offset);
   2374 
   2375 #ifdef BDEBUG
   2376 	if (dst - conv_state->fstart < NBIDS)
   2377 		bids[dst - conv_state->fstart] = p->id + 1;
   2378 #endif
   2379 	dst->code = (u_short)p->s.code;
   2380 	dst->k = p->s.k;
   2381 	if (JT(p)) {
   2382 		extrajmps = 0;
   2383 		off = JT(p)->offset - (p->offset + slen) - 1;
   2384 		if (off >= 256) {
   2385 		    /* offset too large for branch, must add a jump */
   2386 		    if (p->longjt == 0) {
   2387 		    	/* mark this instruction and retry */
   2388 			p->longjt++;
   2389 			return(0);
   2390 		    }
   2391 		    /* branch if T to following jump */
   2392 		    if (extrajmps >= 256) {
   2393 			conv_error(conv_state, "too many extra jumps");
   2394 			/*NOTREACHED*/
   2395 		    }
   2396 		    dst->jt = (u_char)extrajmps;
   2397 		    extrajmps++;
   2398 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
   2399 		    dst[extrajmps].k = off - extrajmps;
   2400 		}
   2401 		else
   2402 		    dst->jt = (u_char)off;
   2403 		off = JF(p)->offset - (p->offset + slen) - 1;
   2404 		if (off >= 256) {
   2405 		    /* offset too large for branch, must add a jump */
   2406 		    if (p->longjf == 0) {
   2407 		    	/* mark this instruction and retry */
   2408 			p->longjf++;
   2409 			return(0);
   2410 		    }
   2411 		    /* branch if F to following jump */
   2412 		    /* if two jumps are inserted, F goes to second one */
   2413 		    if (extrajmps >= 256) {
   2414 			conv_error(conv_state, "too many extra jumps");
   2415 			/*NOTREACHED*/
   2416 		    }
   2417 		    dst->jf = (u_char)extrajmps;
   2418 		    extrajmps++;
   2419 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
   2420 		    dst[extrajmps].k = off - extrajmps;
   2421 		}
   2422 		else
   2423 		    dst->jf = (u_char)off;
   2424 	}
   2425 	return (1);
   2426 }
   2427 
   2428 
   2429 /*
   2430  * Convert flowgraph intermediate representation to the
   2431  * BPF array representation.  Set *lenp to the number of instructions.
   2432  *
   2433  * This routine does *NOT* leak the memory pointed to by fp.  It *must
   2434  * not* do free(fp) before returning fp; doing so would make no sense,
   2435  * as the BPF array pointed to by the return value of icode_to_fcode()
   2436  * must be valid - it's being returned for use in a bpf_program structure.
   2437  *
   2438  * If it appears that icode_to_fcode() is leaking, the problem is that
   2439  * the program using pcap_compile() is failing to free the memory in
   2440  * the BPF program when it's done - the leak is in the program, not in
   2441  * the routine that happens to be allocating the memory.  (By analogy, if
   2442  * a program calls fopen() without ever calling fclose() on the FILE *,
   2443  * it will leak the FILE structure; the leak is not in fopen(), it's in
   2444  * the program.)  Change the program to use pcap_freecode() when it's
   2445  * done with the filter program.  See the pcap man page.
   2446  */
   2447 struct bpf_insn *
   2448 icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
   2449     char *errbuf)
   2450 {
   2451 	u_int n;
   2452 	struct bpf_insn *fp;
   2453 	conv_state_t conv_state;
   2454 
   2455 	conv_state.fstart = NULL;
   2456 	conv_state.errbuf = errbuf;
   2457 	if (setjmp(conv_state.top_ctx) != 0) {
   2458 		free(conv_state.fstart);
   2459 		return NULL;
   2460 	}
   2461 
   2462 	/*
   2463 	 * Loop doing convert_code_r() until no branches remain
   2464 	 * with too-large offsets.
   2465 	 */
   2466 	for (;;) {
   2467 	    unMarkAll(ic);
   2468 	    n = *lenp = count_stmts(ic, root);
   2469 
   2470 	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
   2471 	    if (fp == NULL) {
   2472 		(void)pcap_snprintf(errbuf, PCAP_ERRBUF_SIZE,
   2473 		    "malloc");
   2474 		free(fp);
   2475 		return NULL;
   2476 	    }
   2477 	    memset((char *)fp, 0, sizeof(*fp) * n);
   2478 	    conv_state.fstart = fp;
   2479 	    conv_state.ftail = fp + n;
   2480 
   2481 	    unMarkAll(ic);
   2482 	    if (convert_code_r(&conv_state, ic, root))
   2483 		break;
   2484 	    free(fp);
   2485 	}
   2486 
   2487 	return fp;
   2488 }
   2489 
   2490 /*
   2491  * For iconv_to_fconv() errors.
   2492  */
   2493 static void PCAP_NORETURN
   2494 conv_error(conv_state_t *conv_state, const char *fmt, ...)
   2495 {
   2496 	va_list ap;
   2497 
   2498 	va_start(ap, fmt);
   2499 	(void)pcap_vsnprintf(conv_state->errbuf,
   2500 	    PCAP_ERRBUF_SIZE, fmt, ap);
   2501 	va_end(ap);
   2502 	longjmp(conv_state->top_ctx, 1);
   2503 	/* NOTREACHED */
   2504 }
   2505 
   2506 /*
   2507  * Make a copy of a BPF program and put it in the "fcode" member of
   2508  * a "pcap_t".
   2509  *
   2510  * If we fail to allocate memory for the copy, fill in the "errbuf"
   2511  * member of the "pcap_t" with an error message, and return -1;
   2512  * otherwise, return 0.
   2513  */
   2514 int
   2515 install_bpf_program(pcap_t *p, struct bpf_program *fp)
   2516 {
   2517 	size_t prog_size;
   2518 
   2519 	/*
   2520 	 * Validate the program.
   2521 	 */
   2522 	if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
   2523 		pcap_snprintf(p->errbuf, sizeof(p->errbuf),
   2524 			"BPF program is not valid");
   2525 		return (-1);
   2526 	}
   2527 
   2528 	/*
   2529 	 * Free up any already installed program.
   2530 	 */
   2531 	pcap_freecode(&p->fcode);
   2532 
   2533 	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
   2534 	p->fcode.bf_len = fp->bf_len;
   2535 	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
   2536 	if (p->fcode.bf_insns == NULL) {
   2537 		pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
   2538 		    errno, "malloc");
   2539 		return (-1);
   2540 	}
   2541 	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
   2542 	return (0);
   2543 }
   2544 
   2545 #ifdef BDEBUG
   2546 static void
   2547 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
   2548     FILE *out)
   2549 {
   2550 	int icount, noffset;
   2551 	int i;
   2552 
   2553 	if (block == NULL || isMarked(ic, block))
   2554 		return;
   2555 	Mark(ic, block);
   2556 
   2557 	icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
   2558 	noffset = min(block->offset + icount, (int)prog->bf_len);
   2559 
   2560 	fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
   2561 	for (i = block->offset; i < noffset; i++) {
   2562 		fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
   2563 	}
   2564 	fprintf(out, "\" tooltip=\"");
   2565 	for (i = 0; i < BPF_MEMWORDS; i++)
   2566 		if (block->val[i] != VAL_UNKNOWN)
   2567 			fprintf(out, "val[%d]=%d ", i, block->val[i]);
   2568 	fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
   2569 	fprintf(out, "val[X]=%d", block->val[X_ATOM]);
   2570 	fprintf(out, "\"");
   2571 	if (JT(block) == NULL)
   2572 		fprintf(out, ", peripheries=2");
   2573 	fprintf(out, "];\n");
   2574 
   2575 	dot_dump_node(ic, JT(block), prog, out);
   2576 	dot_dump_node(ic, JF(block), prog, out);
   2577 }
   2578 
   2579 static void
   2580 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
   2581 {
   2582 	if (block == NULL || isMarked(ic, block))
   2583 		return;
   2584 	Mark(ic, block);
   2585 
   2586 	if (JT(block)) {
   2587 		fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
   2588 				block->id, JT(block)->id);
   2589 		fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
   2590 			   block->id, JF(block)->id);
   2591 	}
   2592 	dot_dump_edge(ic, JT(block), out);
   2593 	dot_dump_edge(ic, JF(block), out);
   2594 }
   2595 
   2596 /* Output the block CFG using graphviz/DOT language
   2597  * In the CFG, block's code, value index for each registers at EXIT,
   2598  * and the jump relationship is show.
   2599  *
   2600  * example DOT for BPF `ip src host 1.1.1.1' is:
   2601     digraph BPF {
   2602     	block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2	jf 5" tooltip="val[A]=0 val[X]=0"];
   2603     	block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4	jf 5" tooltip="val[A]=0 val[X]=0"];
   2604     	block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
   2605     	block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
   2606     	"block0":se -> "block1":n [label="T"];
   2607     	"block0":sw -> "block3":n [label="F"];
   2608     	"block1":se -> "block2":n [label="T"];
   2609     	"block1":sw -> "block3":n [label="F"];
   2610     }
   2611  *
   2612  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
   2613  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
   2614  */
   2615 static int
   2616 dot_dump(struct icode *ic, char *errbuf)
   2617 {
   2618 	struct bpf_program f;
   2619 	FILE *out = stdout;
   2620 
   2621 	memset(bids, 0, sizeof bids);
   2622 	f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
   2623 	if (f.bf_insns == NULL)
   2624 		return -1;
   2625 
   2626 	fprintf(out, "digraph BPF {\n");
   2627 	unMarkAll(ic);
   2628 	dot_dump_node(ic, ic->root, &f, out);
   2629 	unMarkAll(ic);
   2630 	dot_dump_edge(ic, ic->root, out);
   2631 	fprintf(out, "}\n");
   2632 
   2633 	free((char *)f.bf_insns);
   2634 	return 0;
   2635 }
   2636 
   2637 static int
   2638 plain_dump(struct icode *ic, char *errbuf)
   2639 {
   2640 	struct bpf_program f;
   2641 
   2642 	memset(bids, 0, sizeof bids);
   2643 	f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
   2644 	if (f.bf_insns == NULL)
   2645 		return -1;
   2646 	bpf_dump(&f, 1);
   2647 	putchar('\n');
   2648 	free((char *)f.bf_insns);
   2649 	return 0;
   2650 }
   2651 
   2652 static void
   2653 opt_dump(opt_state_t *opt_state, struct icode *ic)
   2654 {
   2655 	int status;
   2656 	char errbuf[PCAP_ERRBUF_SIZE];
   2657 
   2658 	/*
   2659 	 * If the CFG, in DOT format, is requested, output it rather than
   2660 	 * the code that would be generated from that graph.
   2661 	 */
   2662 	if (pcap_print_dot_graph)
   2663 		status = dot_dump(ic, errbuf);
   2664 	else
   2665 		status = plain_dump(ic, errbuf);
   2666 	if (status == -1)
   2667 		opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
   2668 }
   2669 #endif
   2670