libpcap/dist/optimize.c

1.9  christos /*	$NetBSD: optimize.c,v 1.9 2017/01/24 22:29:28 christos Exp $	*/
1.5  christos
1.1  christos /*
1.1  christos  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
1.1  christos  *	The Regents of the University of California.  All rights reserved.
1.1  christos  *
1.1  christos  * Redistribution and use in source and binary forms, with or without
1.1  christos  * modification, are permitted provided that: (1) source code distributions
1.1  christos  * retain the above copyright notice and this paragraph in its entirety, (2)
1.1  christos  * distributions including binary code include the above copyright notice and
1.1  christos  * this paragraph in its entirety in the documentation or other materials
1.1  christos  * provided with the distribution, and (3) all advertising materials mentioning
1.1  christos  * features or use of this software display the following acknowledgement:
1.1  christos  * ``This product includes software developed by the University of California,
1.1  christos  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
1.1  christos  * the University nor the names of its contributors may be used to endorse
1.1  christos  * or promote products derived from this software without specific prior
1.1  christos  * written permission.
1.1  christos  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
1.1  christos  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
1.1  christos  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1.1  christos  *
1.1  christos  *  Optimization module for tcpdump intermediate representation.
1.1  christos  */
1.7  christos
1.7  christos #include <sys/cdefs.h>
1.9  christos __RCSID("$NetBSD: optimize.c,v 1.9 2017/01/24 22:29:28 christos Exp $");
1.1  christos
1.1  christos #ifdef HAVE_CONFIG_H
1.1  christos #include "config.h"
1.1  christos #endif
1.1  christos
1.9  christos #ifdef _WIN32
1.1  christos #include <pcap-stdinc.h>
1.9  christos #else /* _WIN32 */
1.1  christos #if HAVE_INTTYPES_H
1.1  christos #include <inttypes.h>
1.1  christos #elif HAVE_STDINT_H
1.1  christos #include <stdint.h>
1.1  christos #endif
1.1  christos #ifdef HAVE_SYS_BITYPES_H
1.1  christos #include <sys/bitypes.h>
1.1  christos #endif
1.1  christos #include <sys/types.h>
1.9  christos #endif /* _WIN32 */
1.1  christos
1.1  christos #include <stdio.h>
1.1  christos #include <stdlib.h>
1.1  christos #include <memory.h>
1.1  christos #include <string.h>
1.1  christos
1.1  christos #include <errno.h>
1.1  christos
1.1  christos #include "pcap-int.h"
1.1  christos
1.1  christos #include "gencode.h"
1.1  christos
1.1  christos #ifdef HAVE_OS_PROTO_H
1.1  christos #include "os-proto.h"
1.1  christos #endif
1.1  christos
1.1  christos #ifdef BDEBUG
1.9  christos int pcap_optimizer_debug;
1.1  christos #endif
1.1  christos
1.1  christos #if defined(MSDOS) && !defined(__DJGPP__)
1.1  christos extern int _w32_ffs (int mask);
1.1  christos #define ffs _w32_ffs
1.1  christos #endif
1.1  christos
1.9  christos /*
1.9  christos  * So is the check for _MSC_VER done because MinGW has this?
1.9  christos  */
1.9  christos #if defined(_WIN32) && defined (_MSC_VER)
1.9  christos /*
1.9  christos  * ffs -- vax ffs instruction
1.9  christos  *
1.9  christos  * XXX - with versions of VS that have it, use _BitScanForward()?
1.9  christos  */
1.9  christos static int
1.9  christos ffs(int mask)
1.9  christos {
1.9  christos 	int bit;
1.9  christos
1.9  christos 	if (mask == 0)
1.9  christos 		return(0);
1.9  christos 	for (bit = 1; !(mask & 1); bit++)
1.9  christos 		mask >>= 1;
1.9  christos 	return(bit);
1.9  christos }
1.1  christos #endif
1.1  christos
1.1  christos /*
1.1  christos  * Represents a deleted instruction.
1.1  christos  */
1.1  christos #define NOP -1
1.1  christos
1.1  christos /*
1.1  christos  * Register numbers for use-def values.
1.1  christos  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
1.1  christos  * location.  A_ATOM is the accumulator and X_ATOM is the index
1.1  christos  * register.
1.1  christos  */
1.1  christos #define A_ATOM BPF_MEMWORDS
1.1  christos #define X_ATOM (BPF_MEMWORDS+1)
1.1  christos
1.1  christos /*
1.1  christos  * This define is used to represent *both* the accumulator and
1.1  christos  * x register in use-def computations.
1.1  christos  * Currently, the use-def code assumes only one definition per instruction.
1.1  christos  */
1.1  christos #define AX_ATOM N_ATOMS
1.1  christos
1.1  christos /*
1.9  christos  * These data structures are used in a Cocke and Shwarz style
1.9  christos  * value numbering scheme.  Since the flowgraph is acyclic,
1.9  christos  * exit values can be propagated from a node's predecessors
1.9  christos  * provided it is uniquely defined.
1.1  christos  */
1.9  christos struct valnode {
1.9  christos 	int code;
1.9  christos 	int v0, v1;
1.9  christos 	int val;
1.9  christos 	struct valnode *next;
1.9  christos };
1.1  christos
1.9  christos /* Integer constants mapped with the load immediate opcode. */
1.9  christos #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
1.1  christos
1.9  christos struct vmapinfo {
1.9  christos 	int is_const;
1.9  christos 	bpf_int32 const_val;
1.9  christos };
1.1  christos
1.9  christos struct _opt_state {
1.9  christos 	/*
1.9  christos 	 * A flag to indicate that further optimization is needed.
1.9  christos 	 * Iterative passes are continued until a given pass yields no
1.9  christos 	 * branch movement.
1.9  christos 	 */
1.9  christos 	int done;
1.1  christos
1.9  christos 	int n_blocks;
1.9  christos 	struct block **blocks;
1.9  christos 	int n_edges;
1.9  christos 	struct edge **edges;
1.1  christos
1.9  christos 	/*
1.9  christos 	 * A bit vector set representation of the dominators.
1.9  christos 	 * We round up the set size to the next power of two.
1.9  christos 	 */
1.9  christos 	int nodewords;
1.9  christos 	int edgewords;
1.9  christos 	struct block **levels;
1.9  christos 	bpf_u_int32 *space;
1.1  christos
1.1  christos #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
1.1  christos /*
1.1  christos  * True if a is in uset {p}
1.1  christos  */
1.1  christos #define SET_MEMBER(p, a) \
1.1  christos ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
1.1  christos
1.1  christos /*
1.1  christos  * Add 'a' to uset p.
1.1  christos  */
1.1  christos #define SET_INSERT(p, a) \
1.1  christos (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
1.1  christos
1.1  christos /*
1.1  christos  * Delete 'a' from uset p.
1.1  christos  */
1.1  christos #define SET_DELETE(p, a) \
1.1  christos (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
1.1  christos
1.1  christos /*
1.1  christos  * a := a intersect b
1.1  christos  */
1.1  christos #define SET_INTERSECT(a, b, n)\
1.1  christos {\
1.1  christos 	register bpf_u_int32 *_x = a, *_y = b;\
1.1  christos 	register int _n = n;\
1.1  christos 	while (--_n >= 0) *_x++ &= *_y++;\
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * a := a - b
1.1  christos  */
1.1  christos #define SET_SUBTRACT(a, b, n)\
1.1  christos {\
1.1  christos 	register bpf_u_int32 *_x = a, *_y = b;\
1.1  christos 	register int _n = n;\
1.1  christos 	while (--_n >= 0) *_x++ &=~ *_y++;\
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * a := a union b
1.1  christos  */
1.1  christos #define SET_UNION(a, b, n)\
1.1  christos {\
1.1  christos 	register bpf_u_int32 *_x = a, *_y = b;\
1.1  christos 	register int _n = n;\
1.1  christos 	while (--_n >= 0) *_x++ |= *_y++;\
1.1  christos }
1.1  christos
1.9  christos 	uset all_dom_sets;
1.9  christos 	uset all_closure_sets;
1.9  christos 	uset all_edge_sets;
1.9  christos
1.9  christos #define MODULUS 213
1.9  christos 	struct valnode *hashtbl[MODULUS];
1.9  christos 	int curval;
1.9  christos 	int maxval;
1.9  christos
1.9  christos 	struct vmapinfo *vmap;
1.9  christos 	struct valnode *vnode_base;
1.9  christos 	struct valnode *next_vnode;
1.9  christos };
1.9  christos
1.9  christos typedef struct {
1.9  christos 	/*
1.9  christos 	 * Some pointers used to convert the basic block form of the code,
1.9  christos 	 * into the array form that BPF requires.  'fstart' will point to
1.9  christos 	 * the malloc'd array while 'ftail' is used during the recursive
1.9  christos 	 * traversal.
1.9  christos 	 */
1.9  christos 	struct bpf_insn *fstart;
1.9  christos 	struct bpf_insn *ftail;
1.9  christos } conv_state_t;
1.9  christos
1.9  christos static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
1.9  christos static void opt_cleanup(opt_state_t *);
1.9  christos
1.9  christos static void intern_blocks(opt_state_t *, struct icode *);
1.9  christos
1.9  christos static void find_inedges(opt_state_t *, struct block *);
1.9  christos #ifdef BDEBUG
1.9  christos static void opt_dump(compiler_state_t *, struct icode *);
1.9  christos #endif
1.1  christos
1.1  christos #ifndef MAX
1.1  christos #define MAX(a,b) ((a)>(b)?(a):(b))
1.1  christos #endif
1.1  christos
1.1  christos static void
1.9  christos find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
1.1  christos {
1.1  christos 	int level;
1.1  christos
1.9  christos 	if (isMarked(ic, b))
1.1  christos 		return;
1.1  christos
1.9  christos 	Mark(ic, b);
1.1  christos 	b->link = 0;
1.1  christos
1.1  christos 	if (JT(b)) {
1.9  christos 		find_levels_r(opt_state, ic, JT(b));
1.9  christos 		find_levels_r(opt_state, ic, JF(b));
1.1  christos 		level = MAX(JT(b)->level, JF(b)->level) + 1;
1.1  christos 	} else
1.1  christos 		level = 0;
1.1  christos 	b->level = level;
1.9  christos 	b->link = opt_state->levels[level];
1.9  christos 	opt_state->levels[level] = b;
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Level graph.  The levels go from 0 at the leaves to
1.9  christos  * N_LEVELS at the root.  The opt_state->levels[] array points to the
1.1  christos  * first node of the level list, whose elements are linked
1.1  christos  * with the 'link' field of the struct block.
1.1  christos  */
1.1  christos static void
1.9  christos find_levels(opt_state_t *opt_state, struct icode *ic)
1.1  christos {
1.9  christos 	memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
1.9  christos 	unMarkAll(ic);
1.9  christos 	find_levels_r(opt_state, ic, ic->root);
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Find dominator relationships.
1.1  christos  * Assumes graph has been leveled.
1.1  christos  */
1.1  christos static void
1.9  christos find_dom(opt_state_t *opt_state, struct block *root)
1.1  christos {
1.1  christos 	int i;
1.1  christos 	struct block *b;
1.1  christos 	bpf_u_int32 *x;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * Initialize sets to contain all nodes.
1.1  christos 	 */
1.9  christos 	x = opt_state->all_dom_sets;
1.9  christos 	i = opt_state->n_blocks * opt_state->nodewords;
1.1  christos 	while (--i >= 0)
1.1  christos 		*x++ = ~0;
1.1  christos 	/* Root starts off empty. */
1.9  christos 	for (i = opt_state->nodewords; --i >= 0;)
1.1  christos 		root->dom[i] = 0;
1.1  christos
1.1  christos 	/* root->level is the highest level no found. */
1.1  christos 	for (i = root->level; i >= 0; --i) {
1.9  christos 		for (b = opt_state->levels[i]; b; b = b->link) {
1.1  christos 			SET_INSERT(b->dom, b->id);
1.1  christos 			if (JT(b) == 0)
1.1  christos 				continue;
1.9  christos 			SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
1.9  christos 			SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
1.1  christos 		}
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos propedom(opt_state_t *opt_state, struct edge *ep)
1.1  christos {
1.1  christos 	SET_INSERT(ep->edom, ep->id);
1.1  christos 	if (ep->succ) {
1.9  christos 		SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
1.9  christos 		SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Compute edge dominators.
1.1  christos  * Assumes graph has been leveled and predecessors established.
1.1  christos  */
1.1  christos static void
1.9  christos find_edom(opt_state_t *opt_state, struct block *root)
1.1  christos {
1.1  christos 	int i;
1.1  christos 	uset x;
1.1  christos 	struct block *b;
1.1  christos
1.9  christos 	x = opt_state->all_edge_sets;
1.9  christos 	for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
1.1  christos 		x[i] = ~0;
1.1  christos
1.1  christos 	/* root->level is the highest level no found. */
1.9  christos 	memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
1.9  christos 	memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
1.1  christos 	for (i = root->level; i >= 0; --i) {
1.9  christos 		for (b = opt_state->levels[i]; b != 0; b = b->link) {
1.9  christos 			propedom(opt_state, &b->et);
1.9  christos 			propedom(opt_state, &b->ef);
1.1  christos 		}
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Find the backwards transitive closure of the flow graph.  These sets
1.1  christos  * are backwards in the sense that we find the set of nodes that reach
1.1  christos  * a given node, not the set of nodes that can be reached by a node.
1.1  christos  *
1.1  christos  * Assumes graph has been leveled.
1.1  christos  */
1.1  christos static void
1.9  christos find_closure(opt_state_t *opt_state, struct block *root)
1.1  christos {
1.1  christos 	int i;
1.1  christos 	struct block *b;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * Initialize sets to contain no nodes.
1.1  christos 	 */
1.9  christos 	memset((char *)opt_state->all_closure_sets, 0,
1.9  christos 	      opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
1.1  christos
1.1  christos 	/* root->level is the highest level no found. */
1.1  christos 	for (i = root->level; i >= 0; --i) {
1.9  christos 		for (b = opt_state->levels[i]; b; b = b->link) {
1.1  christos 			SET_INSERT(b->closure, b->id);
1.1  christos 			if (JT(b) == 0)
1.1  christos 				continue;
1.9  christos 			SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
1.9  christos 			SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
1.1  christos 		}
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Return the register number that is used by s.  If A and X are both
1.1  christos  * used, return AX_ATOM.  If no register is used, return -1.
1.1  christos  *
1.1  christos  * The implementation should probably change to an array access.
1.1  christos  */
1.1  christos static int
1.6  christos atomuse(struct stmt *s)
1.1  christos {
1.1  christos 	register int c = s->code;
1.1  christos
1.1  christos 	if (c == NOP)
1.1  christos 		return -1;
1.1  christos
1.1  christos 	switch (BPF_CLASS(c)) {
1.1  christos
1.1  christos 	case BPF_RET:
1.1  christos 		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
1.1  christos 			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
1.1  christos
1.1  christos 	case BPF_LD:
1.1  christos 	case BPF_LDX:
1.1  christos 		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
1.1  christos 			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
1.1  christos
1.1  christos 	case BPF_ST:
1.1  christos 		return A_ATOM;
1.1  christos
1.1  christos 	case BPF_STX:
1.1  christos 		return X_ATOM;
1.1  christos
1.1  christos 	case BPF_JMP:
1.1  christos 	case BPF_ALU:
1.1  christos 		if (BPF_SRC(c) == BPF_X)
1.1  christos 			return AX_ATOM;
1.1  christos 		return A_ATOM;
1.1  christos
1.1  christos 	case BPF_MISC:
1.1  christos 		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
1.1  christos 	}
1.1  christos 	abort();
1.1  christos 	/* NOTREACHED */
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Return the register number that is defined by 's'.  We assume that
1.1  christos  * a single stmt cannot define more than one register.  If no register
1.1  christos  * is defined, return -1.
1.1  christos  *
1.1  christos  * The implementation should probably change to an array access.
1.1  christos  */
1.1  christos static int
1.6  christos atomdef(struct stmt *s)
1.1  christos {
1.1  christos 	if (s->code == NOP)
1.1  christos 		return -1;
1.1  christos
1.1  christos 	switch (BPF_CLASS(s->code)) {
1.1  christos
1.1  christos 	case BPF_LD:
1.1  christos 	case BPF_ALU:
1.1  christos 		return A_ATOM;
1.1  christos
1.1  christos 	case BPF_LDX:
1.1  christos 		return X_ATOM;
1.1  christos
1.1  christos 	case BPF_ST:
1.1  christos 	case BPF_STX:
1.1  christos 		return s->k;
1.1  christos
1.1  christos 	case BPF_MISC:
1.1  christos 		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
1.1  christos 	}
1.1  christos 	return -1;
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Compute the sets of registers used, defined, and killed by 'b'.
1.1  christos  *
1.1  christos  * "Used" means that a statement in 'b' uses the register before any
1.1  christos  * statement in 'b' defines it, i.e. it uses the value left in
1.1  christos  * that register by a predecessor block of this block.
1.1  christos  * "Defined" means that a statement in 'b' defines it.
1.1  christos  * "Killed" means that a statement in 'b' defines it before any
1.1  christos  * statement in 'b' uses it, i.e. it kills the value left in that
1.1  christos  * register by a predecessor block of this block.
1.1  christos  */
1.1  christos static void
1.6  christos compute_local_ud(struct block *b)
1.1  christos {
1.1  christos 	struct slist *s;
1.9  christos 	atomset def = 0, use = 0, killed = 0;
1.1  christos 	int atom;
1.1  christos
1.1  christos 	for (s = b->stmts; s; s = s->next) {
1.1  christos 		if (s->s.code == NOP)
1.1  christos 			continue;
1.1  christos 		atom = atomuse(&s->s);
1.1  christos 		if (atom >= 0) {
1.1  christos 			if (atom == AX_ATOM) {
1.1  christos 				if (!ATOMELEM(def, X_ATOM))
1.1  christos 					use |= ATOMMASK(X_ATOM);
1.1  christos 				if (!ATOMELEM(def, A_ATOM))
1.1  christos 					use |= ATOMMASK(A_ATOM);
1.1  christos 			}
1.1  christos 			else if (atom < N_ATOMS) {
1.1  christos 				if (!ATOMELEM(def, atom))
1.1  christos 					use |= ATOMMASK(atom);
1.1  christos 			}
1.1  christos 			else
1.1  christos 				abort();
1.1  christos 		}
1.1  christos 		atom = atomdef(&s->s);
1.1  christos 		if (atom >= 0) {
1.1  christos 			if (!ATOMELEM(use, atom))
1.9  christos 				killed |= ATOMMASK(atom);
1.1  christos 			def |= ATOMMASK(atom);
1.1  christos 		}
1.1  christos 	}
1.1  christos 	if (BPF_CLASS(b->s.code) == BPF_JMP) {
1.1  christos 		/*
1.1  christos 		 * XXX - what about RET?
1.1  christos 		 */
1.1  christos 		atom = atomuse(&b->s);
1.1  christos 		if (atom >= 0) {
1.1  christos 			if (atom == AX_ATOM) {
1.1  christos 				if (!ATOMELEM(def, X_ATOM))
1.1  christos 					use |= ATOMMASK(X_ATOM);
1.1  christos 				if (!ATOMELEM(def, A_ATOM))
1.1  christos 					use |= ATOMMASK(A_ATOM);
1.1  christos 			}
1.1  christos 			else if (atom < N_ATOMS) {
1.1  christos 				if (!ATOMELEM(def, atom))
1.1  christos 					use |= ATOMMASK(atom);
1.1  christos 			}
1.1  christos 			else
1.1  christos 				abort();
1.1  christos 		}
1.1  christos 	}
1.1  christos
1.1  christos 	b->def = def;
1.9  christos 	b->kill = killed;
1.1  christos 	b->in_use = use;
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Assume graph is already leveled.
1.1  christos  */
1.1  christos static void
1.9  christos find_ud(opt_state_t *opt_state, struct block *root)
1.1  christos {
1.1  christos 	int i, maxlevel;
1.1  christos 	struct block *p;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * root->level is the highest level no found;
1.1  christos 	 * count down from there.
1.1  christos 	 */
1.1  christos 	maxlevel = root->level;
1.1  christos 	for (i = maxlevel; i >= 0; --i)
1.9  christos 		for (p = opt_state->levels[i]; p; p = p->link) {
1.1  christos 			compute_local_ud(p);
1.1  christos 			p->out_use = 0;
1.1  christos 		}
1.1  christos
1.1  christos 	for (i = 1; i <= maxlevel; ++i) {
1.9  christos 		for (p = opt_state->levels[i]; p; p = p->link) {
1.1  christos 			p->out_use |= JT(p)->in_use | JF(p)->in_use;
1.1  christos 			p->in_use |= p->out_use &~ p->kill;
1.1  christos 		}
1.1  christos 	}
1.1  christos }
1.1  christos static void
1.9  christos init_val(opt_state_t *opt_state)
1.1  christos {
1.9  christos 	opt_state->curval = 0;
1.9  christos 	opt_state->next_vnode = opt_state->vnode_base;
1.9  christos 	memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
1.9  christos 	memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
1.1  christos }
1.1  christos
1.1  christos /* Because we really don't have an IR, this stuff is a little messy. */
1.1  christos static int
1.9  christos F(opt_state_t *opt_state, int code, int v0, int v1)
1.1  christos {
1.1  christos 	u_int hash;
1.1  christos 	int val;
1.1  christos 	struct valnode *p;
1.1  christos
1.1  christos 	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
1.1  christos 	hash %= MODULUS;
1.1  christos
1.9  christos 	for (p = opt_state->hashtbl[hash]; p; p = p->next)
1.1  christos 		if (p->code == code && p->v0 == v0 && p->v1 == v1)
1.1  christos 			return p->val;
1.1  christos
1.9  christos 	val = ++opt_state->curval;
1.1  christos 	if (BPF_MODE(code) == BPF_IMM &&
1.1  christos 	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
1.9  christos 		opt_state->vmap[val].const_val = v0;
1.9  christos 		opt_state->vmap[val].is_const = 1;
1.1  christos 	}
1.9  christos 	p = opt_state->next_vnode++;
1.1  christos 	p->val = val;
1.1  christos 	p->code = code;
1.1  christos 	p->v0 = v0;
1.1  christos 	p->v1 = v1;
1.9  christos 	p->next = opt_state->hashtbl[hash];
1.9  christos 	opt_state->hashtbl[hash] = p;
1.1  christos
1.1  christos 	return val;
1.1  christos }
1.1  christos
1.1  christos static inline void
1.6  christos vstore(struct stmt *s, int *valp, int newval, int alter)
1.1  christos {
1.1  christos 	if (alter && *valp == newval)
1.1  christos 		s->code = NOP;
1.1  christos 	else
1.1  christos 		*valp = newval;
1.1  christos }
1.1  christos
1.6  christos /*
1.6  christos  * Do constant-folding on binary operators.
1.6  christos  * (Unary operators are handled elsewhere.)
1.6  christos  */
1.1  christos static void
1.9  christos fold_op(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
1.9  christos     struct stmt *s, int v0, int v1)
1.1  christos {
1.1  christos 	bpf_u_int32 a, b;
1.1  christos
1.9  christos 	a = opt_state->vmap[v0].const_val;
1.9  christos 	b = opt_state->vmap[v1].const_val;
1.1  christos
1.1  christos 	switch (BPF_OP(s->code)) {
1.1  christos 	case BPF_ADD:
1.1  christos 		a += b;
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_SUB:
1.1  christos 		a -= b;
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_MUL:
1.1  christos 		a *= b;
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_DIV:
1.1  christos 		if (b == 0)
1.9  christos 			bpf_error(cstate, "division by zero");
1.1  christos 		a /= b;
1.1  christos 		break;
1.1  christos
1.7  christos 	case BPF_MOD:
1.7  christos 		if (b == 0)
1.9  christos 			bpf_error(cstate, "modulus by zero");
1.7  christos 		a %= b;
1.7  christos 		break;
1.7  christos
1.1  christos 	case BPF_AND:
1.1  christos 		a &= b;
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_OR:
1.1  christos 		a |= b;
1.1  christos 		break;
1.1  christos
1.7  christos 	case BPF_XOR:
1.7  christos 		a ^= b;
1.7  christos 		break;
1.7  christos
1.1  christos 	case BPF_LSH:
1.1  christos 		a <<= b;
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_RSH:
1.1  christos 		a >>= b;
1.1  christos 		break;
1.1  christos
1.1  christos 	default:
1.1  christos 		abort();
1.1  christos 	}
1.1  christos 	s->k = a;
1.1  christos 	s->code = BPF_LD|BPF_IMM;
1.9  christos 	opt_state->done = 0;
1.1  christos }
1.1  christos
1.1  christos static inline struct slist *
1.6  christos this_op(struct slist *s)
1.1  christos {
1.1  christos 	while (s != 0 && s->s.code == NOP)
1.1  christos 		s = s->next;
1.1  christos 	return s;
1.1  christos }
1.1  christos
1.1  christos static void
1.6  christos opt_not(struct block *b)
1.1  christos {
1.1  christos 	struct block *tmp = JT(b);
1.1  christos
1.1  christos 	JT(b) = JF(b);
1.1  christos 	JF(b) = tmp;
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos opt_peep(opt_state_t *opt_state, struct block *b)
1.1  christos {
1.1  christos 	struct slist *s;
1.1  christos 	struct slist *next, *last;
1.1  christos 	int val;
1.1  christos
1.1  christos 	s = b->stmts;
1.1  christos 	if (s == 0)
1.1  christos 		return;
1.1  christos
1.1  christos 	last = s;
1.1  christos 	for (/*empty*/; /*empty*/; s = next) {
1.1  christos 		/*
1.1  christos 		 * Skip over nops.
1.1  christos 		 */
1.1  christos 		s = this_op(s);
1.1  christos 		if (s == 0)
1.1  christos 			break;	/* nothing left in the block */
1.1  christos
1.1  christos 		/*
1.1  christos 		 * Find the next real instruction after that one
1.1  christos 		 * (skipping nops).
1.1  christos 		 */
1.1  christos 		next = this_op(s->next);
1.1  christos 		if (next == 0)
1.1  christos 			break;	/* no next instruction */
1.1  christos 		last = next;
1.1  christos
1.1  christos 		/*
1.1  christos 		 * st  M[k]	-->	st  M[k]
1.1  christos 		 * ldx M[k]		tax
1.1  christos 		 */
1.1  christos 		if (s->s.code == BPF_ST &&
1.1  christos 		    next->s.code == (BPF_LDX|BPF_MEM) &&
1.1  christos 		    s->s.k == next->s.k) {
1.9  christos 			opt_state->done = 0;
1.1  christos 			next->s.code = BPF_MISC|BPF_TAX;
1.1  christos 		}
1.1  christos 		/*
1.1  christos 		 * ld  #k	-->	ldx  #k
1.1  christos 		 * tax			txa
1.1  christos 		 */
1.1  christos 		if (s->s.code == (BPF_LD|BPF_IMM) &&
1.1  christos 		    next->s.code == (BPF_MISC|BPF_TAX)) {
1.1  christos 			s->s.code = BPF_LDX|BPF_IMM;
1.1  christos 			next->s.code = BPF_MISC|BPF_TXA;
1.9  christos 			opt_state->done = 0;
1.1  christos 		}
1.1  christos 		/*
1.1  christos 		 * This is an ugly special case, but it happens
1.1  christos 		 * when you say tcp[k] or udp[k] where k is a constant.
1.1  christos 		 */
1.1  christos 		if (s->s.code == (BPF_LD|BPF_IMM)) {
1.1  christos 			struct slist *add, *tax, *ild;
1.1  christos
1.1  christos 			/*
1.1  christos 			 * Check that X isn't used on exit from this
1.1  christos 			 * block (which the optimizer might cause).
1.1  christos 			 * We know the code generator won't generate
1.1  christos 			 * any local dependencies.
1.1  christos 			 */
1.1  christos 			if (ATOMELEM(b->out_use, X_ATOM))
1.1  christos 				continue;
1.1  christos
1.1  christos 			/*
1.1  christos 			 * Check that the instruction following the ldi
1.1  christos 			 * is an addx, or it's an ldxms with an addx
1.1  christos 			 * following it (with 0 or more nops between the
1.1  christos 			 * ldxms and addx).
1.1  christos 			 */
1.1  christos 			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
1.1  christos 				add = next;
1.1  christos 			else
1.1  christos 				add = this_op(next->next);
1.1  christos 			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
1.1  christos 				continue;
1.1  christos
1.1  christos 			/*
1.1  christos 			 * Check that a tax follows that (with 0 or more
1.1  christos 			 * nops between them).
1.1  christos 			 */
1.1  christos 			tax = this_op(add->next);
1.1  christos 			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
1.1  christos 				continue;
1.1  christos
1.1  christos 			/*
1.1  christos 			 * Check that an ild follows that (with 0 or more
1.1  christos 			 * nops between them).
1.1  christos 			 */
1.1  christos 			ild = this_op(tax->next);
1.1  christos 			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
1.1  christos 			    BPF_MODE(ild->s.code) != BPF_IND)
1.1  christos 				continue;
1.1  christos 			/*
1.1  christos 			 * We want to turn this sequence:
1.1  christos 			 *
1.1  christos 			 * (004) ldi     #0x2		{s}
1.1  christos 			 * (005) ldxms   [14]		{next}  -- optional
1.1  christos 			 * (006) addx			{add}
1.1  christos 			 * (007) tax			{tax}
1.1  christos 			 * (008) ild     [x+0]		{ild}
1.1  christos 			 *
1.1  christos 			 * into this sequence:
1.1  christos 			 *
1.1  christos 			 * (004) nop
1.1  christos 			 * (005) ldxms   [14]
1.1  christos 			 * (006) nop
1.1  christos 			 * (007) nop
1.1  christos 			 * (008) ild     [x+2]
1.1  christos 			 *
1.1  christos 			 * XXX We need to check that X is not
1.1  christos 			 * subsequently used, because we want to change
1.1  christos 			 * what'll be in it after this sequence.
1.1  christos 			 *
1.1  christos 			 * We know we can eliminate the accumulator
1.1  christos 			 * modifications earlier in the sequence since
1.1  christos 			 * it is defined by the last stmt of this sequence
1.1  christos 			 * (i.e., the last statement of the sequence loads
1.1  christos 			 * a value into the accumulator, so we can eliminate
1.1  christos 			 * earlier operations on the accumulator).
1.1  christos 			 */
1.1  christos 			ild->s.k += s->s.k;
1.1  christos 			s->s.code = NOP;
1.1  christos 			add->s.code = NOP;
1.1  christos 			tax->s.code = NOP;
1.9  christos 			opt_state->done = 0;
1.1  christos 		}
1.1  christos 	}
1.1  christos 	/*
1.1  christos 	 * If the comparison at the end of a block is an equality
1.1  christos 	 * comparison against a constant, and nobody uses the value
1.1  christos 	 * we leave in the A register at the end of a block, and
1.1  christos 	 * the operation preceding the comparison is an arithmetic
1.1  christos 	 * operation, we can sometime optimize it away.
1.1  christos 	 */
1.1  christos 	if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
1.1  christos 	    !ATOMELEM(b->out_use, A_ATOM)) {
1.1  christos 	    	/*
1.1  christos 	    	 * We can optimize away certain subtractions of the
1.1  christos 	    	 * X register.
1.1  christos 	    	 */
1.1  christos 		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
1.1  christos 			val = b->val[X_ATOM];
1.9  christos 			if (opt_state->vmap[val].is_const) {
1.1  christos 				/*
1.1  christos 				 * If we have a subtract to do a comparison,
1.1  christos 				 * and the X register is a known constant,
1.1  christos 				 * we can merge this value into the
1.1  christos 				 * comparison:
1.1  christos 				 *
1.1  christos 				 * sub x  ->	nop
1.1  christos 				 * jeq #y	jeq #(x+y)
1.1  christos 				 */
1.9  christos 				b->s.k += opt_state->vmap[val].const_val;
1.1  christos 				last->s.code = NOP;
1.9  christos 				opt_state->done = 0;
1.1  christos 			} else if (b->s.k == 0) {
1.1  christos 				/*
1.1  christos 				 * If the X register isn't a constant,
1.1  christos 				 * and the comparison in the test is
1.1  christos 				 * against 0, we can compare with the
1.1  christos 				 * X register, instead:
1.1  christos 				 *
1.1  christos 				 * sub x  ->	nop
1.1  christos 				 * jeq #0	jeq x
1.1  christos 				 */
1.1  christos 				last->s.code = NOP;
1.1  christos 				b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
1.9  christos 				opt_state->done = 0;
1.1  christos 			}
1.1  christos 		}
1.1  christos 		/*
1.1  christos 		 * Likewise, a constant subtract can be simplified:
1.1  christos 		 *
1.1  christos 		 * sub #x ->	nop
1.1  christos 		 * jeq #y ->	jeq #(x+y)
1.1  christos 		 */
1.1  christos 		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1.1  christos 			last->s.code = NOP;
1.1  christos 			b->s.k += last->s.k;
1.9  christos 			opt_state->done = 0;
1.1  christos 		}
1.1  christos 		/*
1.1  christos 		 * And, similarly, a constant AND can be simplified
1.1  christos 		 * if we're testing against 0, i.e.:
1.1  christos 		 *
1.1  christos 		 * and #k	nop
1.1  christos 		 * jeq #0  ->	jset #k
1.1  christos 		 */
1.1  christos 		else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1.1  christos 		    b->s.k == 0) {
1.1  christos 			b->s.k = last->s.k;
1.1  christos 			b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1.1  christos 			last->s.code = NOP;
1.9  christos 			opt_state->done = 0;
1.1  christos 			opt_not(b);
1.1  christos 		}
1.1  christos 	}
1.1  christos 	/*
1.1  christos 	 * jset #0        ->   never
1.1  christos 	 * jset #ffffffff ->   always
1.1  christos 	 */
1.1  christos 	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1.1  christos 		if (b->s.k == 0)
1.1  christos 			JT(b) = JF(b);
1.9  christos 		if ((u_int)b->s.k == 0xffffffffU)
1.1  christos 			JF(b) = JT(b);
1.1  christos 	}
1.1  christos 	/*
1.1  christos 	 * If we're comparing against the index register, and the index
1.1  christos 	 * register is a known constant, we can just compare against that
1.1  christos 	 * constant.
1.1  christos 	 */
1.1  christos 	val = b->val[X_ATOM];
1.9  christos 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1.9  christos 		bpf_int32 v = opt_state->vmap[val].const_val;
1.1  christos 		b->s.code &= ~BPF_X;
1.1  christos 		b->s.k = v;
1.1  christos 	}
1.1  christos 	/*
1.1  christos 	 * If the accumulator is a known constant, we can compute the
1.1  christos 	 * comparison result.
1.1  christos 	 */
1.1  christos 	val = b->val[A_ATOM];
1.9  christos 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1.9  christos 		bpf_int32 v = opt_state->vmap[val].const_val;
1.1  christos 		switch (BPF_OP(b->s.code)) {
1.1  christos
1.1  christos 		case BPF_JEQ:
1.1  christos 			v = v == b->s.k;
1.1  christos 			break;
1.1  christos
1.1  christos 		case BPF_JGT:
1.2  christos 			v = (unsigned)v > (unsigned)b->s.k;
1.1  christos 			break;
1.1  christos
1.1  christos 		case BPF_JGE:
1.2  christos 			v = (unsigned)v >= (unsigned)b->s.k;
1.1  christos 			break;
1.1  christos
1.1  christos 		case BPF_JSET:
1.1  christos 			v &= b->s.k;
1.1  christos 			break;
1.1  christos
1.1  christos 		default:
1.1  christos 			abort();
1.1  christos 		}
1.1  christos 		if (JF(b) != JT(b))
1.9  christos 			opt_state->done = 0;
1.1  christos 		if (v)
1.1  christos 			JF(b) = JT(b);
1.1  christos 		else
1.1  christos 			JT(b) = JF(b);
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Compute the symbolic value of expression of 's', and update
1.1  christos  * anything it defines in the value table 'val'.  If 'alter' is true,
1.1  christos  * do various optimizations.  This code would be cleaner if symbolic
1.1  christos  * evaluation and code transformations weren't folded together.
1.1  christos  */
1.1  christos static void
1.9  christos opt_stmt(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
1.9  christos     struct stmt *s, int val[], int alter)
1.1  christos {
1.1  christos 	int op;
1.1  christos 	int v;
1.1  christos
1.1  christos 	switch (s->code) {
1.1  christos
1.1  christos 	case BPF_LD|BPF_ABS|BPF_W:
1.1  christos 	case BPF_LD|BPF_ABS|BPF_H:
1.1  christos 	case BPF_LD|BPF_ABS|BPF_B:
1.9  christos 		v = F(opt_state, s->code, s->k, 0L);
1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_LD|BPF_IND|BPF_W:
1.1  christos 	case BPF_LD|BPF_IND|BPF_H:
1.1  christos 	case BPF_LD|BPF_IND|BPF_B:
1.1  christos 		v = val[X_ATOM];
1.9  christos 		if (alter && opt_state->vmap[v].is_const) {
1.1  christos 			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1.9  christos 			s->k += opt_state->vmap[v].const_val;
1.9  christos 			v = F(opt_state, s->code, s->k, 0L);
1.9  christos 			opt_state->done = 0;
1.1  christos 		}
1.1  christos 		else
1.9  christos 			v = F(opt_state, s->code, s->k, v);
1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_LD|BPF_LEN:
1.9  christos 		v = F(opt_state, s->code, 0L, 0L);
1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_LD|BPF_IMM:
1.1  christos 		v = K(s->k);
1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_LDX|BPF_IMM:
1.1  christos 		v = K(s->k);
1.1  christos 		vstore(s, &val[X_ATOM], v, alter);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_LDX|BPF_MSH|BPF_B:
1.9  christos 		v = F(opt_state, s->code, s->k, 0L);
1.1  christos 		vstore(s, &val[X_ATOM], v, alter);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_ALU|BPF_NEG:
1.9  christos 		if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1.1  christos 			s->code = BPF_LD|BPF_IMM;
1.9  christos 			s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1.1  christos 			val[A_ATOM] = K(s->k);
1.1  christos 		}
1.1  christos 		else
1.9  christos 			val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_ALU|BPF_ADD|BPF_K:
1.1  christos 	case BPF_ALU|BPF_SUB|BPF_K:
1.1  christos 	case BPF_ALU|BPF_MUL|BPF_K:
1.1  christos 	case BPF_ALU|BPF_DIV|BPF_K:
1.7  christos 	case BPF_ALU|BPF_MOD|BPF_K:
1.1  christos 	case BPF_ALU|BPF_AND|BPF_K:
1.1  christos 	case BPF_ALU|BPF_OR|BPF_K:
1.7  christos 	case BPF_ALU|BPF_XOR|BPF_K:
1.1  christos 	case BPF_ALU|BPF_LSH|BPF_K:
1.1  christos 	case BPF_ALU|BPF_RSH|BPF_K:
1.1  christos 		op = BPF_OP(s->code);
1.1  christos 		if (alter) {
1.1  christos 			if (s->k == 0) {
1.1  christos 				/* don't optimize away "sub #0"
1.1  christos 				 * as it may be needed later to
1.1  christos 				 * fixup the generated math code */
1.1  christos 				if (op == BPF_ADD ||
1.1  christos 				    op == BPF_LSH || op == BPF_RSH ||
1.7  christos 				    op == BPF_OR || op == BPF_XOR) {
1.1  christos 					s->code = NOP;
1.1  christos 					break;
1.1  christos 				}
1.1  christos 				if (op == BPF_MUL || op == BPF_AND) {
1.1  christos 					s->code = BPF_LD|BPF_IMM;
1.1  christos 					val[A_ATOM] = K(s->k);
1.1  christos 					break;
1.1  christos 				}
1.1  christos 			}
1.9  christos 			if (opt_state->vmap[val[A_ATOM]].is_const) {
1.9  christos 				fold_op(cstate, ic, opt_state, s, val[A_ATOM], K(s->k));
1.1  christos 				val[A_ATOM] = K(s->k);
1.1  christos 				break;
1.1  christos 			}
1.1  christos 		}
1.9  christos 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_ALU|BPF_ADD|BPF_X:
1.1  christos 	case BPF_ALU|BPF_SUB|BPF_X:
1.1  christos 	case BPF_ALU|BPF_MUL|BPF_X:
1.1  christos 	case BPF_ALU|BPF_DIV|BPF_X:
1.7  christos 	case BPF_ALU|BPF_MOD|BPF_X:
1.1  christos 	case BPF_ALU|BPF_AND|BPF_X:
1.1  christos 	case BPF_ALU|BPF_OR|BPF_X:
1.7  christos 	case BPF_ALU|BPF_XOR|BPF_X:
1.1  christos 	case BPF_ALU|BPF_LSH|BPF_X:
1.1  christos 	case BPF_ALU|BPF_RSH|BPF_X:
1.1  christos 		op = BPF_OP(s->code);
1.9  christos 		if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1.9  christos 			if (opt_state->vmap[val[A_ATOM]].is_const) {
1.9  christos 				fold_op(cstate, ic, opt_state, s, val[A_ATOM], val[X_ATOM]);
1.1  christos 				val[A_ATOM] = K(s->k);
1.1  christos 			}
1.1  christos 			else {
1.1  christos 				s->code = BPF_ALU|BPF_K|op;
1.9  christos 				s->k = opt_state->vmap[val[X_ATOM]].const_val;
1.9  christos 				opt_state->done = 0;
1.1  christos 				val[A_ATOM] =
1.9  christos 					F(opt_state, s->code, val[A_ATOM], K(s->k));
1.1  christos 			}
1.1  christos 			break;
1.1  christos 		}
1.1  christos 		/*
1.1  christos 		 * Check if we're doing something to an accumulator
1.1  christos 		 * that is 0, and simplify.  This may not seem like
1.1  christos 		 * much of a simplification but it could open up further
1.1  christos 		 * optimizations.
1.1  christos 		 * XXX We could also check for mul by 1, etc.
1.1  christos 		 */
1.9  christos 		if (alter && opt_state->vmap[val[A_ATOM]].is_const
1.9  christos 		    && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1.7  christos 			if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1.1  christos 				s->code = BPF_MISC|BPF_TXA;
1.1  christos 				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1.1  christos 				break;
1.1  christos 			}
1.7  christos 			else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1.1  christos 				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1.1  christos 				s->code = BPF_LD|BPF_IMM;
1.1  christos 				s->k = 0;
1.1  christos 				vstore(s, &val[A_ATOM], K(s->k), alter);
1.1  christos 				break;
1.1  christos 			}
1.1  christos 			else if (op == BPF_NEG) {
1.1  christos 				s->code = NOP;
1.1  christos 				break;
1.1  christos 			}
1.1  christos 		}
1.9  christos 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_MISC|BPF_TXA:
1.1  christos 		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_LD|BPF_MEM:
1.1  christos 		v = val[s->k];
1.9  christos 		if (alter && opt_state->vmap[v].is_const) {
1.1  christos 			s->code = BPF_LD|BPF_IMM;
1.9  christos 			s->k = opt_state->vmap[v].const_val;
1.9  christos 			opt_state->done = 0;
1.1  christos 		}
1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_MISC|BPF_TAX:
1.1  christos 		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_LDX|BPF_MEM:
1.1  christos 		v = val[s->k];
1.9  christos 		if (alter && opt_state->vmap[v].is_const) {
1.1  christos 			s->code = BPF_LDX|BPF_IMM;
1.9  christos 			s->k = opt_state->vmap[v].const_val;
1.9  christos 			opt_state->done = 0;
1.1  christos 		}
1.1  christos 		vstore(s, &val[X_ATOM], v, alter);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_ST:
1.1  christos 		vstore(s, &val[s->k], val[A_ATOM], alter);
1.1  christos 		break;
1.1  christos
1.1  christos 	case BPF_STX:
1.1  christos 		vstore(s, &val[s->k], val[X_ATOM], alter);
1.1  christos 		break;
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1.1  christos {
1.1  christos 	register int atom;
1.1  christos
1.1  christos 	atom = atomuse(s);
1.1  christos 	if (atom >= 0) {
1.1  christos 		if (atom == AX_ATOM) {
1.1  christos 			last[X_ATOM] = 0;
1.1  christos 			last[A_ATOM] = 0;
1.1  christos 		}
1.1  christos 		else
1.1  christos 			last[atom] = 0;
1.1  christos 	}
1.1  christos 	atom = atomdef(s);
1.1  christos 	if (atom >= 0) {
1.1  christos 		if (last[atom]) {
1.9  christos 			opt_state->done = 0;
1.1  christos 			last[atom]->code = NOP;
1.1  christos 		}
1.1  christos 		last[atom] = s;
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos opt_deadstores(opt_state_t *opt_state, register struct block *b)
1.1  christos {
1.1  christos 	register struct slist *s;
1.1  christos 	register int atom;
1.1  christos 	struct stmt *last[N_ATOMS];
1.1  christos
1.1  christos 	memset((char *)last, 0, sizeof last);
1.1  christos
1.1  christos 	for (s = b->stmts; s != 0; s = s->next)
1.9  christos 		deadstmt(opt_state, &s->s, last);
1.9  christos 	deadstmt(opt_state, &b->s, last);
1.1  christos
1.1  christos 	for (atom = 0; atom < N_ATOMS; ++atom)
1.1  christos 		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1.1  christos 			last[atom]->code = NOP;
1.9  christos 			opt_state->done = 0;
1.1  christos 		}
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos opt_blk(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
1.9  christos     struct block *b, int do_stmts)
1.1  christos {
1.1  christos 	struct slist *s;
1.1  christos 	struct edge *p;
1.1  christos 	int i;
1.1  christos 	bpf_int32 aval, xval;
1.1  christos
1.1  christos #if 0
1.1  christos 	for (s = b->stmts; s && s->next; s = s->next)
1.1  christos 		if (BPF_CLASS(s->s.code) == BPF_JMP) {
1.1  christos 			do_stmts = 0;
1.1  christos 			break;
1.1  christos 		}
1.1  christos #endif
1.1  christos
1.1  christos 	/*
1.1  christos 	 * Initialize the atom values.
1.1  christos 	 */
1.1  christos 	p = b->in_edges;
1.1  christos 	if (p == 0) {
1.1  christos 		/*
1.1  christos 		 * We have no predecessors, so everything is undefined
1.1  christos 		 * upon entry to this block.
1.1  christos 		 */
1.1  christos 		memset((char *)b->val, 0, sizeof(b->val));
1.1  christos 	} else {
1.1  christos 		/*
1.1  christos 		 * Inherit values from our predecessors.
1.1  christos 		 *
1.1  christos 		 * First, get the values from the predecessor along the
1.1  christos 		 * first edge leading to this node.
1.1  christos 		 */
1.1  christos 		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1.1  christos 		/*
1.1  christos 		 * Now look at all the other nodes leading to this node.
1.1  christos 		 * If, for the predecessor along that edge, a register
1.1  christos 		 * has a different value from the one we have (i.e.,
1.1  christos 		 * control paths are merging, and the merging paths
1.1  christos 		 * assign different values to that register), give the
1.1  christos 		 * register the undefined value of 0.
1.1  christos 		 */
1.1  christos 		while ((p = p->next) != NULL) {
1.1  christos 			for (i = 0; i < N_ATOMS; ++i)
1.1  christos 				if (b->val[i] != p->pred->val[i])
1.1  christos 					b->val[i] = 0;
1.1  christos 		}
1.1  christos 	}
1.1  christos 	aval = b->val[A_ATOM];
1.1  christos 	xval = b->val[X_ATOM];
1.1  christos 	for (s = b->stmts; s; s = s->next)
1.9  christos 		opt_stmt(cstate, ic, opt_state, &s->s, b->val, do_stmts);
1.1  christos
1.1  christos 	/*
1.1  christos 	 * This is a special case: if we don't use anything from this
1.1  christos 	 * block, and we load the accumulator or index register with a
1.1  christos 	 * value that is already there, or if this block is a return,
1.1  christos 	 * eliminate all the statements.
1.1  christos 	 *
1.1  christos 	 * XXX - what if it does a store?
1.1  christos 	 *
1.1  christos 	 * XXX - why does it matter whether we use anything from this
1.1  christos 	 * block?  If the accumulator or index register doesn't change
1.1  christos 	 * its value, isn't that OK even if we use that value?
1.1  christos 	 *
1.1  christos 	 * XXX - if we load the accumulator with a different value,
1.1  christos 	 * and the block ends with a conditional branch, we obviously
1.1  christos 	 * can't eliminate it, as the branch depends on that value.
1.1  christos 	 * For the index register, the conditional branch only depends
1.1  christos 	 * on the index register value if the test is against the index
1.1  christos 	 * register value rather than a constant; if nothing uses the
1.1  christos 	 * value we put into the index register, and we're not testing
1.1  christos 	 * against the index register's value, and there aren't any
1.1  christos 	 * other problems that would keep us from eliminating this
1.1  christos 	 * block, can we eliminate it?
1.1  christos 	 */
1.1  christos 	if (do_stmts &&
1.1  christos 	    ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1.1  christos 	      xval != 0 && b->val[X_ATOM] == xval) ||
1.1  christos 	     BPF_CLASS(b->s.code) == BPF_RET)) {
1.1  christos 		if (b->stmts != 0) {
1.1  christos 			b->stmts = 0;
1.9  christos 			opt_state->done = 0;
1.1  christos 		}
1.1  christos 	} else {
1.9  christos 		opt_peep(opt_state, b);
1.9  christos 		opt_deadstores(opt_state, b);
1.1  christos 	}
1.1  christos 	/*
1.1  christos 	 * Set up values for branch optimizer.
1.1  christos 	 */
1.1  christos 	if (BPF_SRC(b->s.code) == BPF_K)
1.1  christos 		b->oval = K(b->s.k);
1.1  christos 	else
1.1  christos 		b->oval = b->val[X_ATOM];
1.1  christos 	b->et.code = b->s.code;
1.1  christos 	b->ef.code = -b->s.code;
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Return true if any register that is used on exit from 'succ', has
1.1  christos  * an exit value that is different from the corresponding exit value
1.1  christos  * from 'b'.
1.1  christos  */
1.1  christos static int
1.6  christos use_conflict(struct block *b, struct block *succ)
1.1  christos {
1.1  christos 	int atom;
1.1  christos 	atomset use = succ->out_use;
1.1  christos
1.1  christos 	if (use == 0)
1.1  christos 		return 0;
1.1  christos
1.1  christos 	for (atom = 0; atom < N_ATOMS; ++atom)
1.1  christos 		if (ATOMELEM(use, atom))
1.1  christos 			if (b->val[atom] != succ->val[atom])
1.1  christos 				return 1;
1.1  christos 	return 0;
1.1  christos }
1.1  christos
1.1  christos static struct block *
1.6  christos fold_edge(struct block *child, struct edge *ep)
1.1  christos {
1.1  christos 	int sense;
1.1  christos 	int aval0, aval1, oval0, oval1;
1.1  christos 	int code = ep->code;
1.1  christos
1.1  christos 	if (code < 0) {
1.1  christos 		code = -code;
1.1  christos 		sense = 0;
1.1  christos 	} else
1.1  christos 		sense = 1;
1.1  christos
1.1  christos 	if (child->s.code != code)
1.1  christos 		return 0;
1.1  christos
1.1  christos 	aval0 = child->val[A_ATOM];
1.1  christos 	oval0 = child->oval;
1.1  christos 	aval1 = ep->pred->val[A_ATOM];
1.1  christos 	oval1 = ep->pred->oval;
1.1  christos
1.1  christos 	if (aval0 != aval1)
1.1  christos 		return 0;
1.1  christos
1.1  christos 	if (oval0 == oval1)
1.1  christos 		/*
1.1  christos 		 * The operands of the branch instructions are
1.1  christos 		 * identical, so the result is true if a true
1.1  christos 		 * branch was taken to get here, otherwise false.
1.1  christos 		 */
1.1  christos 		return sense ? JT(child) : JF(child);
1.1  christos
1.1  christos 	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1.1  christos 		/*
1.1  christos 		 * At this point, we only know the comparison if we
1.1  christos 		 * came down the true branch, and it was an equality
1.1  christos 		 * comparison with a constant.
1.1  christos 		 *
1.1  christos 		 * I.e., if we came down the true branch, and the branch
1.1  christos 		 * was an equality comparison with a constant, we know the
1.1  christos 		 * accumulator contains that constant.  If we came down
1.1  christos 		 * the false branch, or the comparison wasn't with a
1.1  christos 		 * constant, we don't know what was in the accumulator.
1.1  christos 		 *
1.1  christos 		 * We rely on the fact that distinct constants have distinct
1.1  christos 		 * value numbers.
1.1  christos 		 */
1.1  christos 		return JF(child);
1.1  christos
1.1  christos 	return 0;
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos opt_j(opt_state_t *opt_state, struct edge *ep)
1.1  christos {
1.1  christos 	register int i, k;
1.1  christos 	register struct block *target;
1.1  christos
1.1  christos 	if (JT(ep->succ) == 0)
1.1  christos 		return;
1.1  christos
1.1  christos 	if (JT(ep->succ) == JF(ep->succ)) {
1.1  christos 		/*
1.1  christos 		 * Common branch targets can be eliminated, provided
1.1  christos 		 * there is no data dependency.
1.1  christos 		 */
1.1  christos 		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1.9  christos 			opt_state->done = 0;
1.1  christos 			ep->succ = JT(ep->succ);
1.1  christos 		}
1.1  christos 	}
1.1  christos 	/*
1.1  christos 	 * For each edge dominator that matches the successor of this
1.1  christos 	 * edge, promote the edge successor to the its grandchild.
1.1  christos 	 *
1.1  christos 	 * XXX We violate the set abstraction here in favor a reasonably
1.1  christos 	 * efficient loop.
1.1  christos 	 */
1.1  christos  top:
1.9  christos 	for (i = 0; i < opt_state->edgewords; ++i) {
1.1  christos 		register bpf_u_int32 x = ep->edom[i];
1.1  christos
1.1  christos 		while (x != 0) {
1.1  christos 			k = ffs(x) - 1;
1.1  christos 			x &=~ (1 << k);
1.1  christos 			k += i * BITS_PER_WORD;
1.1  christos
1.9  christos 			target = fold_edge(ep->succ, opt_state->edges[k]);
1.1  christos 			/*
1.1  christos 			 * Check that there is no data dependency between
1.1  christos 			 * nodes that will be violated if we move the edge.
1.1  christos 			 */
1.1  christos 			if (target != 0 && !use_conflict(ep->pred, target)) {
1.9  christos 				opt_state->done = 0;
1.1  christos 				ep->succ = target;
1.1  christos 				if (JT(target) != 0)
1.1  christos 					/*
1.1  christos 					 * Start over unless we hit a leaf.
1.1  christos 					 */
1.1  christos 					goto top;
1.1  christos 				return;
1.1  christos 			}
1.1  christos 		}
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos
1.1  christos static void
1.9  christos or_pullup(opt_state_t *opt_state, struct block *b)
1.1  christos {
1.1  christos 	int val, at_top;
1.1  christos 	struct block *pull;
1.1  christos 	struct block **diffp, **samep;
1.1  christos 	struct edge *ep;
1.1  christos
1.1  christos 	ep = b->in_edges;
1.1  christos 	if (ep == 0)
1.1  christos 		return;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * Make sure each predecessor loads the same value.
1.1  christos 	 * XXX why?
1.1  christos 	 */
1.1  christos 	val = ep->pred->val[A_ATOM];
1.1  christos 	for (ep = ep->next; ep != 0; ep = ep->next)
1.1  christos 		if (val != ep->pred->val[A_ATOM])
1.1  christos 			return;
1.1  christos
1.1  christos 	if (JT(b->in_edges->pred) == b)
1.1  christos 		diffp = &JT(b->in_edges->pred);
1.1  christos 	else
1.1  christos 		diffp = &JF(b->in_edges->pred);
1.1  christos
1.1  christos 	at_top = 1;
1.1  christos 	while (1) {
1.1  christos 		if (*diffp == 0)
1.1  christos 			return;
1.1  christos
1.1  christos 		if (JT(*diffp) != JT(b))
1.1  christos 			return;
1.1  christos
1.1  christos 		if (!SET_MEMBER((*diffp)->dom, b->id))
1.1  christos 			return;
1.1  christos
1.1  christos 		if ((*diffp)->val[A_ATOM] != val)
1.1  christos 			break;
1.1  christos
1.1  christos 		diffp = &JF(*diffp);
1.1  christos 		at_top = 0;
1.1  christos 	}
1.1  christos 	samep = &JF(*diffp);
1.1  christos 	while (1) {
1.1  christos 		if (*samep == 0)
1.1  christos 			return;
1.1  christos
1.1  christos 		if (JT(*samep) != JT(b))
1.1  christos 			return;
1.1  christos
1.1  christos 		if (!SET_MEMBER((*samep)->dom, b->id))
1.1  christos 			return;
1.1  christos
1.1  christos 		if ((*samep)->val[A_ATOM] == val)
1.1  christos 			break;
1.1  christos
1.1  christos 		/* XXX Need to check that there are no data dependencies
1.1  christos 		   between dp0 and dp1.  Currently, the code generator
1.1  christos 		   will not produce such dependencies. */
1.1  christos 		samep = &JF(*samep);
1.1  christos 	}
1.1  christos #ifdef notdef
1.1  christos 	/* XXX This doesn't cover everything. */
1.1  christos 	for (i = 0; i < N_ATOMS; ++i)
1.1  christos 		if ((*samep)->val[i] != pred->val[i])
1.1  christos 			return;
1.1  christos #endif
1.1  christos 	/* Pull up the node. */
1.1  christos 	pull = *samep;
1.1  christos 	*samep = JF(pull);
1.1  christos 	JF(pull) = *diffp;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * At the top of the chain, each predecessor needs to point at the
1.1  christos 	 * pulled up node.  Inside the chain, there is only one predecessor
1.1  christos 	 * to worry about.
1.1  christos 	 */
1.1  christos 	if (at_top) {
1.1  christos 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1.1  christos 			if (JT(ep->pred) == b)
1.1  christos 				JT(ep->pred) = pull;
1.1  christos 			else
1.1  christos 				JF(ep->pred) = pull;
1.1  christos 		}
1.1  christos 	}
1.1  christos 	else
1.1  christos 		*diffp = pull;
1.1  christos
1.9  christos 	opt_state->done = 0;
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos and_pullup(opt_state_t *opt_state, struct block *b)
1.1  christos {
1.1  christos 	int val, at_top;
1.1  christos 	struct block *pull;
1.1  christos 	struct block **diffp, **samep;
1.1  christos 	struct edge *ep;
1.1  christos
1.1  christos 	ep = b->in_edges;
1.1  christos 	if (ep == 0)
1.1  christos 		return;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * Make sure each predecessor loads the same value.
1.1  christos 	 */
1.1  christos 	val = ep->pred->val[A_ATOM];
1.1  christos 	for (ep = ep->next; ep != 0; ep = ep->next)
1.1  christos 		if (val != ep->pred->val[A_ATOM])
1.1  christos 			return;
1.1  christos
1.1  christos 	if (JT(b->in_edges->pred) == b)
1.1  christos 		diffp = &JT(b->in_edges->pred);
1.1  christos 	else
1.1  christos 		diffp = &JF(b->in_edges->pred);
1.1  christos
1.1  christos 	at_top = 1;
1.1  christos 	while (1) {
1.1  christos 		if (*diffp == 0)
1.1  christos 			return;
1.1  christos
1.1  christos 		if (JF(*diffp) != JF(b))
1.1  christos 			return;
1.1  christos
1.1  christos 		if (!SET_MEMBER((*diffp)->dom, b->id))
1.1  christos 			return;
1.1  christos
1.1  christos 		if ((*diffp)->val[A_ATOM] != val)
1.1  christos 			break;
1.1  christos
1.1  christos 		diffp = &JT(*diffp);
1.1  christos 		at_top = 0;
1.1  christos 	}
1.1  christos 	samep = &JT(*diffp);
1.1  christos 	while (1) {
1.1  christos 		if (*samep == 0)
1.1  christos 			return;
1.1  christos
1.1  christos 		if (JF(*samep) != JF(b))
1.1  christos 			return;
1.1  christos
1.1  christos 		if (!SET_MEMBER((*samep)->dom, b->id))
1.1  christos 			return;
1.1  christos
1.1  christos 		if ((*samep)->val[A_ATOM] == val)
1.1  christos 			break;
1.1  christos
1.1  christos 		/* XXX Need to check that there are no data dependencies
1.1  christos 		   between diffp and samep.  Currently, the code generator
1.1  christos 		   will not produce such dependencies. */
1.1  christos 		samep = &JT(*samep);
1.1  christos 	}
1.1  christos #ifdef notdef
1.1  christos 	/* XXX This doesn't cover everything. */
1.1  christos 	for (i = 0; i < N_ATOMS; ++i)
1.1  christos 		if ((*samep)->val[i] != pred->val[i])
1.1  christos 			return;
1.1  christos #endif
1.1  christos 	/* Pull up the node. */
1.1  christos 	pull = *samep;
1.1  christos 	*samep = JT(pull);
1.1  christos 	JT(pull) = *diffp;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * At the top of the chain, each predecessor needs to point at the
1.1  christos 	 * pulled up node.  Inside the chain, there is only one predecessor
1.1  christos 	 * to worry about.
1.1  christos 	 */
1.1  christos 	if (at_top) {
1.1  christos 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1.1  christos 			if (JT(ep->pred) == b)
1.1  christos 				JT(ep->pred) = pull;
1.1  christos 			else
1.1  christos 				JF(ep->pred) = pull;
1.1  christos 		}
1.1  christos 	}
1.1  christos 	else
1.1  christos 		*diffp = pull;
1.1  christos
1.9  christos 	opt_state->done = 0;
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1.9  christos     int do_stmts)
1.1  christos {
1.1  christos 	int i, maxlevel;
1.1  christos 	struct block *p;
1.1  christos
1.9  christos 	init_val(opt_state);
1.9  christos 	maxlevel = ic->root->level;
1.1  christos
1.9  christos 	find_inedges(opt_state, ic->root);
1.1  christos 	for (i = maxlevel; i >= 0; --i)
1.9  christos 		for (p = opt_state->levels[i]; p; p = p->link)
1.9  christos 			opt_blk(cstate, ic, opt_state, p, do_stmts);
1.1  christos
1.1  christos 	if (do_stmts)
1.1  christos 		/*
1.1  christos 		 * No point trying to move branches; it can't possibly
1.1  christos 		 * make a difference at this point.
1.1  christos 		 */
1.1  christos 		return;
1.1  christos
1.1  christos 	for (i = 1; i <= maxlevel; ++i) {
1.9  christos 		for (p = opt_state->levels[i]; p; p = p->link) {
1.9  christos 			opt_j(opt_state, &p->et);
1.9  christos 			opt_j(opt_state, &p->ef);
1.1  christos 		}
1.1  christos 	}
1.1  christos
1.9  christos 	find_inedges(opt_state, ic->root);
1.1  christos 	for (i = 1; i <= maxlevel; ++i) {
1.9  christos 		for (p = opt_state->levels[i]; p; p = p->link) {
1.9  christos 			or_pullup(opt_state, p);
1.9  christos 			and_pullup(opt_state, p);
1.1  christos 		}
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos static inline void
1.6  christos link_inedge(struct edge *parent, struct block *child)
1.1  christos {
1.1  christos 	parent->next = child->in_edges;
1.1  christos 	child->in_edges = parent;
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos find_inedges(opt_state_t *opt_state, struct block *root)
1.1  christos {
1.1  christos 	int i;
1.1  christos 	struct block *b;
1.1  christos
1.9  christos 	for (i = 0; i < opt_state->n_blocks; ++i)
1.9  christos 		opt_state->blocks[i]->in_edges = 0;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * Traverse the graph, adding each edge to the predecessor
1.1  christos 	 * list of its successors.  Skip the leaves (i.e. level 0).
1.1  christos 	 */
1.1  christos 	for (i = root->level; i > 0; --i) {
1.9  christos 		for (b = opt_state->levels[i]; b != 0; b = b->link) {
1.1  christos 			link_inedge(&b->et, JT(b));
1.1  christos 			link_inedge(&b->ef, JF(b));
1.1  christos 		}
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos static void
1.6  christos opt_root(struct block **b)
1.1  christos {
1.1  christos 	struct slist *tmp, *s;
1.1  christos
1.1  christos 	s = (*b)->stmts;
1.1  christos 	(*b)->stmts = 0;
1.1  christos 	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1.1  christos 		*b = JT(*b);
1.1  christos
1.1  christos 	tmp = (*b)->stmts;
1.1  christos 	if (tmp != 0)
1.1  christos 		sappend(s, tmp);
1.1  christos 	(*b)->stmts = s;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * If the root node is a return, then there is no
1.1  christos 	 * point executing any statements (since the bpf machine
1.1  christos 	 * has no side effects).
1.1  christos 	 */
1.1  christos 	if (BPF_CLASS((*b)->s.code) == BPF_RET)
1.1  christos 		(*b)->stmts = 0;
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1.9  christos     int do_stmts)
1.1  christos {
1.1  christos
1.1  christos #ifdef BDEBUG
1.9  christos 	if (pcap_optimizer_debug > 1) {
1.1  christos 		printf("opt_loop(root, %d) begin\n", do_stmts);
1.9  christos 		opt_dump(cstate, ic);
1.1  christos 	}
1.1  christos #endif
1.1  christos 	do {
1.9  christos 		opt_state->done = 1;
1.9  christos 		find_levels(opt_state, ic);
1.9  christos 		find_dom(opt_state, ic->root);
1.9  christos 		find_closure(opt_state, ic->root);
1.9  christos 		find_ud(opt_state, ic->root);
1.9  christos 		find_edom(opt_state, ic->root);
1.9  christos 		opt_blks(cstate, opt_state, ic, do_stmts);
1.1  christos #ifdef BDEBUG
1.9  christos 		if (pcap_optimizer_debug > 1) {
1.9  christos 			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1.9  christos 			opt_dump(cstate, ic);
1.1  christos 		}
1.1  christos #endif
1.9  christos 	} while (!opt_state->done);
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Optimize the filter code in its dag representation.
1.1  christos  */
1.1  christos void
1.9  christos bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1.1  christos {
1.9  christos 	opt_state_t opt_state;
1.1  christos
1.9  christos 	opt_init(cstate, &opt_state, ic);
1.9  christos 	opt_loop(cstate, &opt_state, ic, 0);
1.9  christos 	opt_loop(cstate, &opt_state, ic, 1);
1.9  christos 	intern_blocks(&opt_state, ic);
1.1  christos #ifdef BDEBUG
1.9  christos 	if (pcap_optimizer_debug > 1) {
1.1  christos 		printf("after intern_blocks()\n");
1.9  christos 		opt_dump(cstate, ic);
1.1  christos 	}
1.1  christos #endif
1.9  christos 	opt_root(&ic->root);
1.1  christos #ifdef BDEBUG
1.9  christos 	if (pcap_optimizer_debug > 1) {
1.1  christos 		printf("after opt_root()\n");
1.9  christos 		opt_dump(cstate, ic);
1.1  christos 	}
1.1  christos #endif
1.9  christos 	opt_cleanup(&opt_state);
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos make_marks(struct icode *ic, struct block *p)
1.1  christos {
1.9  christos 	if (!isMarked(ic, p)) {
1.9  christos 		Mark(ic, p);
1.1  christos 		if (BPF_CLASS(p->s.code) != BPF_RET) {
1.9  christos 			make_marks(ic, JT(p));
1.9  christos 			make_marks(ic, JF(p));
1.1  christos 		}
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos /*
1.9  christos  * Mark code array such that isMarked(ic->cur_mark, i) is true
1.1  christos  * only for nodes that are alive.
1.1  christos  */
1.1  christos static void
1.9  christos mark_code(struct icode *ic)
1.1  christos {
1.9  christos 	ic->cur_mark += 1;
1.9  christos 	make_marks(ic, ic->root);
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * True iff the two stmt lists load the same value from the packet into
1.1  christos  * the accumulator.
1.1  christos  */
1.1  christos static int
1.6  christos eq_slist(struct slist *x, struct slist *y)
1.1  christos {
1.1  christos 	while (1) {
1.1  christos 		while (x && x->s.code == NOP)
1.1  christos 			x = x->next;
1.1  christos 		while (y && y->s.code == NOP)
1.1  christos 			y = y->next;
1.1  christos 		if (x == 0)
1.1  christos 			return y == 0;
1.1  christos 		if (y == 0)
1.1  christos 			return x == 0;
1.1  christos 		if (x->s.code != y->s.code || x->s.k != y->s.k)
1.1  christos 			return 0;
1.1  christos 		x = x->next;
1.1  christos 		y = y->next;
1.1  christos 	}
1.1  christos }
1.1  christos
1.1  christos static inline int
1.6  christos eq_blk(struct block *b0, struct block *b1)
1.1  christos {
1.1  christos 	if (b0->s.code == b1->s.code &&
1.1  christos 	    b0->s.k == b1->s.k &&
1.1  christos 	    b0->et.succ == b1->et.succ &&
1.1  christos 	    b0->ef.succ == b1->ef.succ)
1.1  christos 		return eq_slist(b0->stmts, b1->stmts);
1.1  christos 	return 0;
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos intern_blocks(opt_state_t *opt_state, struct icode *ic)
1.1  christos {
1.1  christos 	struct block *p;
1.1  christos 	int i, j;
1.1  christos 	int done1; /* don't shadow global */
1.1  christos  top:
1.1  christos 	done1 = 1;
1.9  christos 	for (i = 0; i < opt_state->n_blocks; ++i)
1.9  christos 		opt_state->blocks[i]->link = 0;
1.1  christos
1.9  christos 	mark_code(ic);
1.1  christos
1.9  christos 	for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1.9  christos 		if (!isMarked(ic, opt_state->blocks[i]))
1.1  christos 			continue;
1.9  christos 		for (j = i + 1; j < opt_state->n_blocks; ++j) {
1.9  christos 			if (!isMarked(ic, opt_state->blocks[j]))
1.1  christos 				continue;
1.9  christos 			if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1.9  christos 				opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1.9  christos 					opt_state->blocks[j]->link : opt_state->blocks[j];
1.1  christos 				break;
1.1  christos 			}
1.1  christos 		}
1.1  christos 	}
1.9  christos 	for (i = 0; i < opt_state->n_blocks; ++i) {
1.9  christos 		p = opt_state->blocks[i];
1.1  christos 		if (JT(p) == 0)
1.1  christos 			continue;
1.1  christos 		if (JT(p)->link) {
1.1  christos 			done1 = 0;
1.1  christos 			JT(p) = JT(p)->link;
1.1  christos 		}
1.1  christos 		if (JF(p)->link) {
1.1  christos 			done1 = 0;
1.1  christos 			JF(p) = JF(p)->link;
1.1  christos 		}
1.1  christos 	}
1.1  christos 	if (!done1)
1.1  christos 		goto top;
1.1  christos }
1.1  christos
1.1  christos static void
1.9  christos opt_cleanup(opt_state_t *opt_state)
1.1  christos {
1.9  christos 	free((void *)opt_state->vnode_base);
1.9  christos 	free((void *)opt_state->vmap);
1.9  christos 	free((void *)opt_state->edges);
1.9  christos 	free((void *)opt_state->space);
1.9  christos 	free((void *)opt_state->levels);
1.9  christos 	free((void *)opt_state->blocks);
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Return the number of stmts in 's'.
1.1  christos  */
1.5  christos static u_int
1.6  christos slength(struct slist *s)
1.1  christos {
1.5  christos 	u_int n = 0;
1.1  christos
1.1  christos 	for (; s; s = s->next)
1.1  christos 		if (s->s.code != NOP)
1.1  christos 			++n;
1.1  christos 	return n;
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Return the number of nodes reachable by 'p'.
1.1  christos  * All nodes should be initially unmarked.
1.1  christos  */
1.1  christos static int
1.9  christos count_blocks(struct icode *ic, struct block *p)
1.1  christos {
1.9  christos 	if (p == 0 || isMarked(ic, p))
1.1  christos 		return 0;
1.9  christos 	Mark(ic, p);
1.9  christos 	return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Do a depth first search on the flow graph, numbering the
1.1  christos  * the basic blocks, and entering them into the 'blocks' array.`
1.1  christos  */
1.1  christos static void
1.9  christos number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1.1  christos {
1.1  christos 	int n;
1.1  christos
1.9  christos 	if (p == 0 || isMarked(ic, p))
1.1  christos 		return;
1.1  christos
1.9  christos 	Mark(ic, p);
1.9  christos 	n = opt_state->n_blocks++;
1.1  christos 	p->id = n;
1.9  christos 	opt_state->blocks[n] = p;
1.1  christos
1.9  christos 	number_blks_r(opt_state, ic, JT(p));
1.9  christos 	number_blks_r(opt_state, ic, JF(p));
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Return the number of stmts in the flowgraph reachable by 'p'.
1.1  christos  * The nodes should be unmarked before calling.
1.1  christos  *
1.1  christos  * Note that "stmts" means "instructions", and that this includes
1.1  christos  *
1.1  christos  *	side-effect statements in 'p' (slength(p->stmts));
1.1  christos  *
1.1  christos  *	statements in the true branch from 'p' (count_stmts(JT(p)));
1.1  christos  *
1.1  christos  *	statements in the false branch from 'p' (count_stmts(JF(p)));
1.1  christos  *
1.1  christos  *	the conditional jump itself (1);
1.1  christos  *
1.1  christos  *	an extra long jump if the true branch requires it (p->longjt);
1.1  christos  *
1.1  christos  *	an extra long jump if the false branch requires it (p->longjf).
1.1  christos  */
1.5  christos static u_int
1.9  christos count_stmts(struct icode *ic, struct block *p)
1.1  christos {
1.5  christos 	u_int n;
1.1  christos
1.9  christos 	if (p == 0 || isMarked(ic, p))
1.1  christos 		return 0;
1.9  christos 	Mark(ic, p);
1.9  christos 	n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
1.1  christos 	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Allocate memory.  All allocation is done before optimization
1.1  christos  * is begun.  A linear bound on the size of all data structures is computed
1.1  christos  * from the total number of blocks and/or statements.
1.1  christos  */
1.1  christos static void
1.9  christos opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
1.1  christos {
1.1  christos 	bpf_u_int32 *p;
1.1  christos 	int i, n, max_stmts;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * First, count the blocks, so we can malloc an array to map
1.1  christos 	 * block number to block.  Then, put the blocks into the array.
1.1  christos 	 */
1.9  christos 	unMarkAll(ic);
1.9  christos 	n = count_blocks(ic, ic->root);
1.9  christos 	opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
1.9  christos 	if (opt_state->blocks == NULL)
1.9  christos 		bpf_error(cstate, "malloc");
1.9  christos 	unMarkAll(ic);
1.9  christos 	opt_state->n_blocks = 0;
1.9  christos 	number_blks_r(opt_state, ic, ic->root);
1.9  christos
1.9  christos 	opt_state->n_edges = 2 * opt_state->n_blocks;
1.9  christos 	opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
1.9  christos 	if (opt_state->edges == NULL)
1.9  christos 		bpf_error(cstate, "malloc");
1.1  christos
1.1  christos 	/*
1.1  christos 	 * The number of levels is bounded by the number of nodes.
1.1  christos 	 */
1.9  christos 	opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
1.9  christos 	if (opt_state->levels == NULL)
1.9  christos 		bpf_error(cstate, "malloc");
1.1  christos
1.9  christos 	opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1.9  christos 	opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1.1  christos
1.1  christos 	/* XXX */
1.9  christos 	opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
1.9  christos 				 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
1.9  christos 	if (opt_state->space == NULL)
1.9  christos 		bpf_error(cstate, "malloc");
1.9  christos 	p = opt_state->space;
1.9  christos 	opt_state->all_dom_sets = p;
1.1  christos 	for (i = 0; i < n; ++i) {
1.9  christos 		opt_state->blocks[i]->dom = p;
1.9  christos 		p += opt_state->nodewords;
1.1  christos 	}
1.9  christos 	opt_state->all_closure_sets = p;
1.1  christos 	for (i = 0; i < n; ++i) {
1.9  christos 		opt_state->blocks[i]->closure = p;
1.9  christos 		p += opt_state->nodewords;
1.1  christos 	}
1.9  christos 	opt_state->all_edge_sets = p;
1.1  christos 	for (i = 0; i < n; ++i) {
1.9  christos 		register struct block *b = opt_state->blocks[i];
1.1  christos
1.1  christos 		b->et.edom = p;
1.9  christos 		p += opt_state->edgewords;
1.1  christos 		b->ef.edom = p;
1.9  christos 		p += opt_state->edgewords;
1.1  christos 		b->et.id = i;
1.9  christos 		opt_state->edges[i] = &b->et;
1.9  christos 		b->ef.id = opt_state->n_blocks + i;
1.9  christos 		opt_state->edges[opt_state->n_blocks + i] = &b->ef;
1.1  christos 		b->et.pred = b;
1.1  christos 		b->ef.pred = b;
1.1  christos 	}
1.1  christos 	max_stmts = 0;
1.1  christos 	for (i = 0; i < n; ++i)
1.9  christos 		max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
1.1  christos 	/*
1.1  christos 	 * We allocate at most 3 value numbers per statement,
1.1  christos 	 * so this is an upper bound on the number of valnodes
1.1  christos 	 * we'll need.
1.1  christos 	 */
1.9  christos 	opt_state->maxval = 3 * max_stmts;
1.9  christos 	opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
1.9  christos 	opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
1.9  christos 	if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
1.9  christos 		bpf_error(cstate, "malloc");
1.1  christos }
1.1  christos
1.1  christos /*
1.9  christos  * This is only used when supporting optimizer debugging.  It is
1.9  christos  * global state, so do *not* do more than one compile in parallel
1.9  christos  * and expect it to provide meaningful information.
1.1  christos  */
1.1  christos #ifdef BDEBUG
1.1  christos int bids[1000];
1.1  christos #endif
1.1  christos
1.1  christos /*
1.1  christos  * Returns true if successful.  Returns false if a branch has
1.1  christos  * an offset that is too large.  If so, we have marked that
1.1  christos  * branch so that on a subsequent iteration, it will be treated
1.1  christos  * properly.
1.1  christos  */
1.1  christos static int
1.9  christos convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
1.9  christos     struct icode *ic, struct block *p)
1.1  christos {
1.1  christos 	struct bpf_insn *dst;
1.1  christos 	struct slist *src;
1.2  christos 	u_int slen;
1.1  christos 	u_int off;
1.1  christos 	int extrajmps;		/* number of extra jumps inserted */
1.1  christos 	struct slist **offset = NULL;
1.1  christos
1.9  christos 	if (p == 0 || isMarked(ic, p))
1.1  christos 		return (1);
1.9  christos 	Mark(ic, p);
1.1  christos
1.9  christos 	if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
1.1  christos 		return (0);
1.9  christos 	if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
1.1  christos 		return (0);
1.1  christos
1.1  christos 	slen = slength(p->stmts);
1.9  christos 	dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
1.1  christos 		/* inflate length by any extra jumps */
1.1  christos
1.9  christos 	p->offset = (int)(dst - conv_state->fstart);
1.1  christos
1.1  christos 	/* generate offset[] for convenience  */
1.1  christos 	if (slen) {
1.1  christos 		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
1.1  christos 		if (!offset) {
1.9  christos 			bpf_error(cstate, "not enough core");
1.1  christos 			/*NOTREACHED*/
1.1  christos 		}
1.1  christos 	}
1.1  christos 	src = p->stmts;
1.1  christos 	for (off = 0; off < slen && src; off++) {
1.1  christos #if 0
1.1  christos 		printf("off=%d src=%x\n", off, src);
1.1  christos #endif
1.1  christos 		offset[off] = src;
1.1  christos 		src = src->next;
1.1  christos 	}
1.1  christos
1.1  christos 	off = 0;
1.1  christos 	for (src = p->stmts; src; src = src->next) {
1.1  christos 		if (src->s.code == NOP)
1.1  christos 			continue;
1.1  christos 		dst->code = (u_short)src->s.code;
1.1  christos 		dst->k = src->s.k;
1.1  christos
1.1  christos 		/* fill block-local relative jump */
1.1  christos 		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
1.1  christos #if 0
1.1  christos 			if (src->s.jt || src->s.jf) {
1.9  christos 				bpf_error(cstate, "illegal jmp destination");
1.1  christos 				/*NOTREACHED*/
1.1  christos 			}
1.1  christos #endif
1.1  christos 			goto filled;
1.1  christos 		}
1.1  christos 		if (off == slen - 2)	/*???*/
1.1  christos 			goto filled;
1.1  christos
1.1  christos 	    {
1.2  christos 		u_int i;
1.1  christos 		int jt, jf;
1.4  christos static const char ljerr[] = "%s for block-local relative jump: off=%d";
1.1  christos
1.1  christos #if 0
1.1  christos 		printf("code=%x off=%d %x %x\n", src->s.code,
1.1  christos 			off, src->s.jt, src->s.jf);
1.1  christos #endif
1.1  christos
1.1  christos 		if (!src->s.jt || !src->s.jf) {
1.9  christos 			bpf_error(cstate, ljerr, "no jmp destination", off);
1.1  christos 			/*NOTREACHED*/
1.1  christos 		}
1.1  christos
1.1  christos 		jt = jf = 0;
1.1  christos 		for (i = 0; i < slen; i++) {
1.1  christos 			if (offset[i] == src->s.jt) {
1.1  christos 				if (jt) {
1.9  christos 					bpf_error(cstate, ljerr, "multiple matches", off);
1.1  christos 					/*NOTREACHED*/
1.1  christos 				}
1.1  christos
1.1  christos 				dst->jt = i - off - 1;
1.1  christos 				jt++;
1.1  christos 			}
1.1  christos 			if (offset[i] == src->s.jf) {
1.1  christos 				if (jf) {
1.9  christos 					bpf_error(cstate, ljerr, "multiple matches", off);
1.1  christos 					/*NOTREACHED*/
1.1  christos 				}
1.1  christos 				dst->jf = i - off - 1;
1.1  christos 				jf++;
1.1  christos 			}
1.1  christos 		}
1.1  christos 		if (!jt || !jf) {
1.9  christos 			bpf_error(cstate, ljerr, "no destination found", off);
1.1  christos 			/*NOTREACHED*/
1.1  christos 		}
1.1  christos 	    }
1.1  christos filled:
1.1  christos 		++dst;
1.1  christos 		++off;
1.1  christos 	}
1.1  christos 	if (offset)
1.1  christos 		free(offset);
1.1  christos
1.1  christos #ifdef BDEBUG
1.9  christos 	bids[dst - conv_state->fstart] = p->id + 1;
1.1  christos #endif
1.1  christos 	dst->code = (u_short)p->s.code;
1.1  christos 	dst->k = p->s.k;
1.1  christos 	if (JT(p)) {
1.1  christos 		extrajmps = 0;
1.1  christos 		off = JT(p)->offset - (p->offset + slen) - 1;
1.1  christos 		if (off >= 256) {
1.1  christos 		    /* offset too large for branch, must add a jump */
1.1  christos 		    if (p->longjt == 0) {
1.1  christos 		    	/* mark this instruction and retry */
1.1  christos 			p->longjt++;
1.1  christos 			return(0);
1.1  christos 		    }
1.1  christos 		    /* branch if T to following jump */
1.1  christos 		    dst->jt = extrajmps;
1.1  christos 		    extrajmps++;
1.1  christos 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
1.1  christos 		    dst[extrajmps].k = off - extrajmps;
1.1  christos 		}
1.1  christos 		else
1.1  christos 		    dst->jt = off;
1.1  christos 		off = JF(p)->offset - (p->offset + slen) - 1;
1.1  christos 		if (off >= 256) {
1.1  christos 		    /* offset too large for branch, must add a jump */
1.1  christos 		    if (p->longjf == 0) {
1.1  christos 		    	/* mark this instruction and retry */
1.1  christos 			p->longjf++;
1.1  christos 			return(0);
1.1  christos 		    }
1.1  christos 		    /* branch if F to following jump */
1.1  christos 		    /* if two jumps are inserted, F goes to second one */
1.1  christos 		    dst->jf = extrajmps;
1.1  christos 		    extrajmps++;
1.1  christos 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
1.1  christos 		    dst[extrajmps].k = off - extrajmps;
1.1  christos 		}
1.1  christos 		else
1.1  christos 		    dst->jf = off;
1.1  christos 	}
1.1  christos 	return (1);
1.1  christos }
1.1  christos
1.1  christos
1.1  christos /*
1.1  christos  * Convert flowgraph intermediate representation to the
1.1  christos  * BPF array representation.  Set *lenp to the number of instructions.
1.1  christos  *
1.1  christos  * This routine does *NOT* leak the memory pointed to by fp.  It *must
1.1  christos  * not* do free(fp) before returning fp; doing so would make no sense,
1.1  christos  * as the BPF array pointed to by the return value of icode_to_fcode()
1.1  christos  * must be valid - it's being returned for use in a bpf_program structure.
1.1  christos  *
1.1  christos  * If it appears that icode_to_fcode() is leaking, the problem is that
1.1  christos  * the program using pcap_compile() is failing to free the memory in
1.1  christos  * the BPF program when it's done - the leak is in the program, not in
1.1  christos  * the routine that happens to be allocating the memory.  (By analogy, if
1.1  christos  * a program calls fopen() without ever calling fclose() on the FILE *,
1.1  christos  * it will leak the FILE structure; the leak is not in fopen(), it's in
1.1  christos  * the program.)  Change the program to use pcap_freecode() when it's
1.1  christos  * done with the filter program.  See the pcap man page.
1.1  christos  */
1.1  christos struct bpf_insn *
1.9  christos icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
1.9  christos     struct block *root, u_int *lenp)
1.1  christos {
1.5  christos 	u_int n;
1.1  christos 	struct bpf_insn *fp;
1.9  christos 	conv_state_t conv_state;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * Loop doing convert_code_r() until no branches remain
1.1  christos 	 * with too-large offsets.
1.1  christos 	 */
1.1  christos 	while (1) {
1.9  christos 	    unMarkAll(ic);
1.9  christos 	    n = *lenp = count_stmts(ic, root);
1.1  christos
1.1  christos 	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
1.1  christos 	    if (fp == NULL)
1.9  christos 		    bpf_error(cstate, "malloc");
1.1  christos 	    memset((char *)fp, 0, sizeof(*fp) * n);
1.9  christos 	    conv_state.fstart = fp;
1.9  christos 	    conv_state.ftail = fp + n;
1.1  christos
1.9  christos 	    unMarkAll(ic);
1.9  christos 	    if (convert_code_r(cstate, &conv_state, ic, root))
1.1  christos 		break;
1.1  christos 	    free(fp);
1.1  christos 	}
1.1  christos
1.1  christos 	return fp;
1.1  christos }
1.1  christos
1.1  christos /*
1.1  christos  * Make a copy of a BPF program and put it in the "fcode" member of
1.1  christos  * a "pcap_t".
1.1  christos  *
1.1  christos  * If we fail to allocate memory for the copy, fill in the "errbuf"
1.1  christos  * member of the "pcap_t" with an error message, and return -1;
1.1  christos  * otherwise, return 0.
1.1  christos  */
1.1  christos int
1.1  christos install_bpf_program(pcap_t *p, struct bpf_program *fp)
1.1  christos {
1.1  christos 	size_t prog_size;
1.1  christos
1.1  christos 	/*
1.1  christos 	 * Validate the program.
1.1  christos 	 */
1.1  christos 	if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
1.9  christos 		pcap_snprintf(p->errbuf, sizeof(p->errbuf),
1.1  christos 			"BPF program is not valid");
1.1  christos 		return (-1);
1.1  christos 	}
1.1  christos
1.1  christos 	/*
1.1  christos 	 * Free up any already installed program.
1.1  christos 	 */
1.1  christos 	pcap_freecode(&p->fcode);
1.1  christos
1.1  christos 	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
1.1  christos 	p->fcode.bf_len = fp->bf_len;
1.1  christos 	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
1.1  christos 	if (p->fcode.bf_insns == NULL) {
1.9  christos 		pcap_snprintf(p->errbuf, sizeof(p->errbuf),
1.1  christos 			 "malloc: %s", pcap_strerror(errno));
1.1  christos 		return (-1);
1.1  christos 	}
1.1  christos 	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
1.1  christos 	return (0);
1.1  christos }
1.1  christos
1.1  christos #ifdef BDEBUG
1.1  christos static void
1.9  christos dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
1.9  christos     FILE *out)
1.8  christos {
1.8  christos 	int icount, noffset;
1.8  christos 	int i;
1.8  christos
1.9  christos 	if (block == NULL || isMarked(ic, block))
1.8  christos 		return;
1.9  christos 	Mark(ic, block);
1.8  christos
1.8  christos 	icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
1.8  christos 	noffset = min(block->offset + icount, (int)prog->bf_len);
1.8  christos
1.8  christos 	fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
1.8  christos 	for (i = block->offset; i < noffset; i++) {
1.8  christos 		fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
1.8  christos 	}
1.8  christos 	fprintf(out, "\" tooltip=\"");
1.8  christos 	for (i = 0; i < BPF_MEMWORDS; i++)
1.8  christos 		if (block->val[i] != 0)
1.8  christos 			fprintf(out, "val[%d]=%d ", i, block->val[i]);
1.8  christos 	fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
1.8  christos 	fprintf(out, "val[X]=%d", block->val[X_ATOM]);
1.8  christos 	fprintf(out, "\"");
1.8  christos 	if (JT(block) == NULL)
1.8  christos 		fprintf(out, ", peripheries=2");
1.8  christos 	fprintf(out, "];\n");
1.8  christos
1.9  christos 	dot_dump_node(ic, JT(block), prog, out);
1.9  christos 	dot_dump_node(ic, JF(block), prog, out);
1.8  christos }
1.9  christos
1.8  christos static void
1.9  christos dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
1.8  christos {
1.9  christos 	if (block == NULL || isMarked(ic, block))
1.8  christos 		return;
1.9  christos 	Mark(ic, block);
1.8  christos
1.8  christos 	if (JT(block)) {
1.8  christos 		fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
1.8  christos 				block->id, JT(block)->id);
1.8  christos 		fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
1.8  christos 			   block->id, JF(block)->id);
1.8  christos 	}
1.9  christos 	dot_dump_edge(ic, JT(block), out);
1.9  christos 	dot_dump_edge(ic, JF(block), out);
1.8  christos }
1.9  christos
1.8  christos /* Output the block CFG using graphviz/DOT language
1.8  christos  * In the CFG, block's code, value index for each registers at EXIT,
1.8  christos  * and the jump relationship is show.
1.8  christos  *
1.8  christos  * example DOT for BPF `ip src host 1.1.1.1' is:
1.8  christos     digraph BPF {
1.8  christos     	block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2	jf 5" tooltip="val[A]=0 val[X]=0"];
1.8  christos     	block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4	jf 5" tooltip="val[A]=0 val[X]=0"];
1.8  christos     	block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
1.8  christos     	block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
1.8  christos     	"block0":se -> "block1":n [label="T"];
1.8  christos     	"block0":sw -> "block3":n [label="F"];
1.8  christos     	"block1":se -> "block2":n [label="T"];
1.8  christos     	"block1":sw -> "block3":n [label="F"];
1.8  christos     }
1.8  christos  *
1.8  christos  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
1.8  christos  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
1.8  christos  */
1.8  christos static void
1.9  christos dot_dump(compiler_state_t *cstate, struct icode *ic)
1.8  christos {
1.8  christos 	struct bpf_program f;
1.8  christos 	FILE *out = stdout;
1.8  christos
1.8  christos 	memset(bids, 0, sizeof bids);
1.9  christos 	f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
1.8  christos
1.8  christos 	fprintf(out, "digraph BPF {\n");
1.9  christos 	ic->cur_mark = 0;
1.9  christos 	unMarkAll(ic);
1.9  christos 	dot_dump_node(ic, ic->root, &f, out);
1.9  christos 	ic->cur_mark = 0;
1.9  christos 	unMarkAll(ic);
1.9  christos 	dot_dump_edge(ic, ic->root, out);
1.8  christos 	fprintf(out, "}\n");
1.8  christos
1.8  christos 	free((char *)f.bf_insns);
1.8  christos }
1.8  christos
1.8  christos static void
1.9  christos plain_dump(compiler_state_t *cstate, struct icode *ic)
1.1  christos {
1.1  christos 	struct bpf_program f;
1.1  christos
1.1  christos 	memset(bids, 0, sizeof bids);
1.9  christos 	f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
1.1  christos 	bpf_dump(&f, 1);
1.1  christos 	putchar('\n');
1.1  christos 	free((char *)f.bf_insns);
1.1  christos }
1.9  christos
1.8  christos static void
1.9  christos opt_dump(compiler_state_t *cstate, struct icode *ic)
1.8  christos {
1.8  christos 	/* if optimizer debugging is enabled, output DOT graph
1.9  christos 	 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
1.9  christos 	 * convention in tcpdump command line
1.8  christos 	 */
1.9  christos 	if (pcap_optimizer_debug > 3)
1.9  christos 		dot_dump(cstate, ic);
1.8  christos 	else
1.9  christos 		plain_dump(cstate, ic);
1.8  christos }
1.1  christos #endif