Home | History | Annotate | Line # | Download | only in dist
optimize.c revision 1.1.1.3
      1  1.1.1.3  christos /*	$NetBSD: optimize.c,v 1.1.1.3 2013/04/06 15:57:45 christos Exp $	*/
      2  1.1.1.3  christos 
      3      1.1  christos /*
      4      1.1  christos  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
      5      1.1  christos  *	The Regents of the University of California.  All rights reserved.
      6      1.1  christos  *
      7      1.1  christos  * Redistribution and use in source and binary forms, with or without
      8      1.1  christos  * modification, are permitted provided that: (1) source code distributions
      9      1.1  christos  * retain the above copyright notice and this paragraph in its entirety, (2)
     10      1.1  christos  * distributions including binary code include the above copyright notice and
     11      1.1  christos  * this paragraph in its entirety in the documentation or other materials
     12      1.1  christos  * provided with the distribution, and (3) all advertising materials mentioning
     13      1.1  christos  * features or use of this software display the following acknowledgement:
     14      1.1  christos  * ``This product includes software developed by the University of California,
     15      1.1  christos  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
     16      1.1  christos  * the University nor the names of its contributors may be used to endorse
     17      1.1  christos  * or promote products derived from this software without specific prior
     18      1.1  christos  * written permission.
     19      1.1  christos  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
     20      1.1  christos  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
     21      1.1  christos  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
     22      1.1  christos  *
     23      1.1  christos  *  Optimization module for tcpdump intermediate representation.
     24      1.1  christos  */
     25      1.1  christos #ifndef lint
     26      1.1  christos static const char rcsid[] _U_ =
     27  1.1.1.3  christos     "@(#) Header: /tcpdump/master/libpcap/optimize.c,v 1.91 2008-01-02 04:16:46 guy Exp  (LBL)";
     28      1.1  christos #endif
     29      1.1  christos 
     30      1.1  christos #ifdef HAVE_CONFIG_H
     31      1.1  christos #include "config.h"
     32      1.1  christos #endif
     33      1.1  christos 
     34      1.1  christos #ifdef WIN32
     35      1.1  christos #include <pcap-stdinc.h>
     36      1.1  christos #else /* WIN32 */
     37      1.1  christos #if HAVE_INTTYPES_H
     38      1.1  christos #include <inttypes.h>
     39      1.1  christos #elif HAVE_STDINT_H
     40      1.1  christos #include <stdint.h>
     41      1.1  christos #endif
     42      1.1  christos #ifdef HAVE_SYS_BITYPES_H
     43      1.1  christos #include <sys/bitypes.h>
     44      1.1  christos #endif
     45      1.1  christos #include <sys/types.h>
     46      1.1  christos #endif /* WIN32 */
     47      1.1  christos 
     48      1.1  christos #include <stdio.h>
     49      1.1  christos #include <stdlib.h>
     50      1.1  christos #include <memory.h>
     51      1.1  christos #include <string.h>
     52      1.1  christos 
     53      1.1  christos #include <errno.h>
     54      1.1  christos 
     55      1.1  christos #include "pcap-int.h"
     56      1.1  christos 
     57      1.1  christos #include "gencode.h"
     58      1.1  christos 
     59      1.1  christos #ifdef HAVE_OS_PROTO_H
     60      1.1  christos #include "os-proto.h"
     61      1.1  christos #endif
     62      1.1  christos 
     63      1.1  christos #ifdef BDEBUG
     64      1.1  christos extern int dflag;
     65      1.1  christos #endif
     66      1.1  christos 
     67      1.1  christos #if defined(MSDOS) && !defined(__DJGPP__)
     68      1.1  christos extern int _w32_ffs (int mask);
     69      1.1  christos #define ffs _w32_ffs
     70      1.1  christos #endif
     71      1.1  christos 
     72      1.1  christos #if defined(WIN32) && defined (_MSC_VER)
     73      1.1  christos int ffs(int mask);
     74      1.1  christos #endif
     75      1.1  christos 
     76      1.1  christos /*
     77      1.1  christos  * Represents a deleted instruction.
     78      1.1  christos  */
     79      1.1  christos #define NOP -1
     80      1.1  christos 
     81      1.1  christos /*
     82      1.1  christos  * Register numbers for use-def values.
     83      1.1  christos  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
     84      1.1  christos  * location.  A_ATOM is the accumulator and X_ATOM is the index
     85      1.1  christos  * register.
     86      1.1  christos  */
     87      1.1  christos #define A_ATOM BPF_MEMWORDS
     88      1.1  christos #define X_ATOM (BPF_MEMWORDS+1)
     89      1.1  christos 
     90      1.1  christos /*
     91      1.1  christos  * This define is used to represent *both* the accumulator and
     92      1.1  christos  * x register in use-def computations.
     93      1.1  christos  * Currently, the use-def code assumes only one definition per instruction.
     94      1.1  christos  */
     95      1.1  christos #define AX_ATOM N_ATOMS
     96      1.1  christos 
     97      1.1  christos /*
     98      1.1  christos  * A flag to indicate that further optimization is needed.
     99      1.1  christos  * Iterative passes are continued until a given pass yields no
    100      1.1  christos  * branch movement.
    101      1.1  christos  */
    102      1.1  christos static int done;
    103      1.1  christos 
    104      1.1  christos /*
    105      1.1  christos  * A block is marked if only if its mark equals the current mark.
    106      1.1  christos  * Rather than traverse the code array, marking each item, 'cur_mark' is
    107      1.1  christos  * incremented.  This automatically makes each element unmarked.
    108      1.1  christos  */
    109      1.1  christos static int cur_mark;
    110      1.1  christos #define isMarked(p) ((p)->mark == cur_mark)
    111      1.1  christos #define unMarkAll() cur_mark += 1
    112      1.1  christos #define Mark(p) ((p)->mark = cur_mark)
    113      1.1  christos 
    114      1.1  christos static void opt_init(struct block *);
    115      1.1  christos static void opt_cleanup(void);
    116      1.1  christos 
    117      1.1  christos static void make_marks(struct block *);
    118      1.1  christos static void mark_code(struct block *);
    119      1.1  christos 
    120      1.1  christos static void intern_blocks(struct block *);
    121      1.1  christos 
    122      1.1  christos static int eq_slist(struct slist *, struct slist *);
    123      1.1  christos 
    124      1.1  christos static void find_levels_r(struct block *);
    125      1.1  christos 
    126      1.1  christos static void find_levels(struct block *);
    127      1.1  christos static void find_dom(struct block *);
    128      1.1  christos static void propedom(struct edge *);
    129      1.1  christos static void find_edom(struct block *);
    130      1.1  christos static void find_closure(struct block *);
    131      1.1  christos static int atomuse(struct stmt *);
    132      1.1  christos static int atomdef(struct stmt *);
    133      1.1  christos static void compute_local_ud(struct block *);
    134      1.1  christos static void find_ud(struct block *);
    135      1.1  christos static void init_val(void);
    136      1.1  christos static int F(int, int, int);
    137      1.1  christos static inline void vstore(struct stmt *, int *, int, int);
    138      1.1  christos static void opt_blk(struct block *, int);
    139      1.1  christos static int use_conflict(struct block *, struct block *);
    140      1.1  christos static void opt_j(struct edge *);
    141      1.1  christos static void or_pullup(struct block *);
    142      1.1  christos static void and_pullup(struct block *);
    143      1.1  christos static void opt_blks(struct block *, int);
    144      1.1  christos static inline void link_inedge(struct edge *, struct block *);
    145      1.1  christos static void find_inedges(struct block *);
    146      1.1  christos static void opt_root(struct block **);
    147      1.1  christos static void opt_loop(struct block *, int);
    148      1.1  christos static void fold_op(struct stmt *, int, int);
    149      1.1  christos static inline struct slist *this_op(struct slist *);
    150      1.1  christos static void opt_not(struct block *);
    151      1.1  christos static void opt_peep(struct block *);
    152      1.1  christos static void opt_stmt(struct stmt *, int[], int);
    153      1.1  christos static void deadstmt(struct stmt *, struct stmt *[]);
    154      1.1  christos static void opt_deadstores(struct block *);
    155      1.1  christos static struct block *fold_edge(struct block *, struct edge *);
    156      1.1  christos static inline int eq_blk(struct block *, struct block *);
    157  1.1.1.3  christos static u_int slength(struct slist *);
    158      1.1  christos static int count_blocks(struct block *);
    159      1.1  christos static void number_blks_r(struct block *);
    160  1.1.1.3  christos static u_int count_stmts(struct block *);
    161      1.1  christos static int convert_code_r(struct block *);
    162      1.1  christos #ifdef BDEBUG
    163      1.1  christos static void opt_dump(struct block *);
    164      1.1  christos #endif
    165      1.1  christos 
    166      1.1  christos static int n_blocks;
    167      1.1  christos struct block **blocks;
    168      1.1  christos static int n_edges;
    169      1.1  christos struct edge **edges;
    170      1.1  christos 
    171      1.1  christos /*
    172      1.1  christos  * A bit vector set representation of the dominators.
    173      1.1  christos  * We round up the set size to the next power of two.
    174      1.1  christos  */
    175      1.1  christos static int nodewords;
    176      1.1  christos static int edgewords;
    177      1.1  christos struct block **levels;
    178      1.1  christos bpf_u_int32 *space;
    179      1.1  christos #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
    180      1.1  christos /*
    181      1.1  christos  * True if a is in uset {p}
    182      1.1  christos  */
    183      1.1  christos #define SET_MEMBER(p, a) \
    184      1.1  christos ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
    185      1.1  christos 
    186      1.1  christos /*
    187      1.1  christos  * Add 'a' to uset p.
    188      1.1  christos  */
    189      1.1  christos #define SET_INSERT(p, a) \
    190      1.1  christos (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
    191      1.1  christos 
    192      1.1  christos /*
    193      1.1  christos  * Delete 'a' from uset p.
    194      1.1  christos  */
    195      1.1  christos #define SET_DELETE(p, a) \
    196      1.1  christos (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
    197      1.1  christos 
    198      1.1  christos /*
    199      1.1  christos  * a := a intersect b
    200      1.1  christos  */
    201      1.1  christos #define SET_INTERSECT(a, b, n)\
    202      1.1  christos {\
    203      1.1  christos 	register bpf_u_int32 *_x = a, *_y = b;\
    204      1.1  christos 	register int _n = n;\
    205      1.1  christos 	while (--_n >= 0) *_x++ &= *_y++;\
    206      1.1  christos }
    207      1.1  christos 
    208      1.1  christos /*
    209      1.1  christos  * a := a - b
    210      1.1  christos  */
    211      1.1  christos #define SET_SUBTRACT(a, b, n)\
    212      1.1  christos {\
    213      1.1  christos 	register bpf_u_int32 *_x = a, *_y = b;\
    214      1.1  christos 	register int _n = n;\
    215      1.1  christos 	while (--_n >= 0) *_x++ &=~ *_y++;\
    216      1.1  christos }
    217      1.1  christos 
    218      1.1  christos /*
    219      1.1  christos  * a := a union b
    220      1.1  christos  */
    221      1.1  christos #define SET_UNION(a, b, n)\
    222      1.1  christos {\
    223      1.1  christos 	register bpf_u_int32 *_x = a, *_y = b;\
    224      1.1  christos 	register int _n = n;\
    225      1.1  christos 	while (--_n >= 0) *_x++ |= *_y++;\
    226      1.1  christos }
    227      1.1  christos 
    228      1.1  christos static uset all_dom_sets;
    229      1.1  christos static uset all_closure_sets;
    230      1.1  christos static uset all_edge_sets;
    231      1.1  christos 
    232      1.1  christos #ifndef MAX
    233      1.1  christos #define MAX(a,b) ((a)>(b)?(a):(b))
    234      1.1  christos #endif
    235      1.1  christos 
    236      1.1  christos static void
    237      1.1  christos find_levels_r(b)
    238      1.1  christos 	struct block *b;
    239      1.1  christos {
    240      1.1  christos 	int level;
    241      1.1  christos 
    242      1.1  christos 	if (isMarked(b))
    243      1.1  christos 		return;
    244      1.1  christos 
    245      1.1  christos 	Mark(b);
    246      1.1  christos 	b->link = 0;
    247      1.1  christos 
    248      1.1  christos 	if (JT(b)) {
    249      1.1  christos 		find_levels_r(JT(b));
    250      1.1  christos 		find_levels_r(JF(b));
    251      1.1  christos 		level = MAX(JT(b)->level, JF(b)->level) + 1;
    252      1.1  christos 	} else
    253      1.1  christos 		level = 0;
    254      1.1  christos 	b->level = level;
    255      1.1  christos 	b->link = levels[level];
    256      1.1  christos 	levels[level] = b;
    257      1.1  christos }
    258      1.1  christos 
    259      1.1  christos /*
    260      1.1  christos  * Level graph.  The levels go from 0 at the leaves to
    261      1.1  christos  * N_LEVELS at the root.  The levels[] array points to the
    262      1.1  christos  * first node of the level list, whose elements are linked
    263      1.1  christos  * with the 'link' field of the struct block.
    264      1.1  christos  */
    265      1.1  christos static void
    266      1.1  christos find_levels(root)
    267      1.1  christos 	struct block *root;
    268      1.1  christos {
    269      1.1  christos 	memset((char *)levels, 0, n_blocks * sizeof(*levels));
    270      1.1  christos 	unMarkAll();
    271      1.1  christos 	find_levels_r(root);
    272      1.1  christos }
    273      1.1  christos 
    274      1.1  christos /*
    275      1.1  christos  * Find dominator relationships.
    276      1.1  christos  * Assumes graph has been leveled.
    277      1.1  christos  */
    278      1.1  christos static void
    279      1.1  christos find_dom(root)
    280      1.1  christos 	struct block *root;
    281      1.1  christos {
    282      1.1  christos 	int i;
    283      1.1  christos 	struct block *b;
    284      1.1  christos 	bpf_u_int32 *x;
    285      1.1  christos 
    286      1.1  christos 	/*
    287      1.1  christos 	 * Initialize sets to contain all nodes.
    288      1.1  christos 	 */
    289      1.1  christos 	x = all_dom_sets;
    290      1.1  christos 	i = n_blocks * nodewords;
    291      1.1  christos 	while (--i >= 0)
    292      1.1  christos 		*x++ = ~0;
    293      1.1  christos 	/* Root starts off empty. */
    294      1.1  christos 	for (i = nodewords; --i >= 0;)
    295      1.1  christos 		root->dom[i] = 0;
    296      1.1  christos 
    297      1.1  christos 	/* root->level is the highest level no found. */
    298      1.1  christos 	for (i = root->level; i >= 0; --i) {
    299      1.1  christos 		for (b = levels[i]; b; b = b->link) {
    300      1.1  christos 			SET_INSERT(b->dom, b->id);
    301      1.1  christos 			if (JT(b) == 0)
    302      1.1  christos 				continue;
    303      1.1  christos 			SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
    304      1.1  christos 			SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
    305      1.1  christos 		}
    306      1.1  christos 	}
    307      1.1  christos }
    308      1.1  christos 
    309      1.1  christos static void
    310      1.1  christos propedom(ep)
    311      1.1  christos 	struct edge *ep;
    312      1.1  christos {
    313      1.1  christos 	SET_INSERT(ep->edom, ep->id);
    314      1.1  christos 	if (ep->succ) {
    315      1.1  christos 		SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
    316      1.1  christos 		SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
    317      1.1  christos 	}
    318      1.1  christos }
    319      1.1  christos 
    320      1.1  christos /*
    321      1.1  christos  * Compute edge dominators.
    322      1.1  christos  * Assumes graph has been leveled and predecessors established.
    323      1.1  christos  */
    324      1.1  christos static void
    325      1.1  christos find_edom(root)
    326      1.1  christos 	struct block *root;
    327      1.1  christos {
    328      1.1  christos 	int i;
    329      1.1  christos 	uset x;
    330      1.1  christos 	struct block *b;
    331      1.1  christos 
    332      1.1  christos 	x = all_edge_sets;
    333      1.1  christos 	for (i = n_edges * edgewords; --i >= 0; )
    334      1.1  christos 		x[i] = ~0;
    335      1.1  christos 
    336      1.1  christos 	/* root->level is the highest level no found. */
    337      1.1  christos 	memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
    338      1.1  christos 	memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
    339      1.1  christos 	for (i = root->level; i >= 0; --i) {
    340      1.1  christos 		for (b = levels[i]; b != 0; b = b->link) {
    341      1.1  christos 			propedom(&b->et);
    342      1.1  christos 			propedom(&b->ef);
    343      1.1  christos 		}
    344      1.1  christos 	}
    345      1.1  christos }
    346      1.1  christos 
    347      1.1  christos /*
    348      1.1  christos  * Find the backwards transitive closure of the flow graph.  These sets
    349      1.1  christos  * are backwards in the sense that we find the set of nodes that reach
    350      1.1  christos  * a given node, not the set of nodes that can be reached by a node.
    351      1.1  christos  *
    352      1.1  christos  * Assumes graph has been leveled.
    353      1.1  christos  */
    354      1.1  christos static void
    355      1.1  christos find_closure(root)
    356      1.1  christos 	struct block *root;
    357      1.1  christos {
    358      1.1  christos 	int i;
    359      1.1  christos 	struct block *b;
    360      1.1  christos 
    361      1.1  christos 	/*
    362      1.1  christos 	 * Initialize sets to contain no nodes.
    363      1.1  christos 	 */
    364      1.1  christos 	memset((char *)all_closure_sets, 0,
    365      1.1  christos 	      n_blocks * nodewords * sizeof(*all_closure_sets));
    366      1.1  christos 
    367      1.1  christos 	/* root->level is the highest level no found. */
    368      1.1  christos 	for (i = root->level; i >= 0; --i) {
    369      1.1  christos 		for (b = levels[i]; b; b = b->link) {
    370      1.1  christos 			SET_INSERT(b->closure, b->id);
    371      1.1  christos 			if (JT(b) == 0)
    372      1.1  christos 				continue;
    373      1.1  christos 			SET_UNION(JT(b)->closure, b->closure, nodewords);
    374      1.1  christos 			SET_UNION(JF(b)->closure, b->closure, nodewords);
    375      1.1  christos 		}
    376      1.1  christos 	}
    377      1.1  christos }
    378      1.1  christos 
    379      1.1  christos /*
    380      1.1  christos  * Return the register number that is used by s.  If A and X are both
    381      1.1  christos  * used, return AX_ATOM.  If no register is used, return -1.
    382      1.1  christos  *
    383      1.1  christos  * The implementation should probably change to an array access.
    384      1.1  christos  */
    385      1.1  christos static int
    386      1.1  christos atomuse(s)
    387      1.1  christos 	struct stmt *s;
    388      1.1  christos {
    389      1.1  christos 	register int c = s->code;
    390      1.1  christos 
    391      1.1  christos 	if (c == NOP)
    392      1.1  christos 		return -1;
    393      1.1  christos 
    394      1.1  christos 	switch (BPF_CLASS(c)) {
    395      1.1  christos 
    396      1.1  christos 	case BPF_RET:
    397      1.1  christos 		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
    398      1.1  christos 			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
    399      1.1  christos 
    400      1.1  christos 	case BPF_LD:
    401      1.1  christos 	case BPF_LDX:
    402      1.1  christos 		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
    403      1.1  christos 			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
    404      1.1  christos 
    405      1.1  christos 	case BPF_ST:
    406      1.1  christos 		return A_ATOM;
    407      1.1  christos 
    408      1.1  christos 	case BPF_STX:
    409      1.1  christos 		return X_ATOM;
    410      1.1  christos 
    411      1.1  christos 	case BPF_JMP:
    412      1.1  christos 	case BPF_ALU:
    413      1.1  christos 		if (BPF_SRC(c) == BPF_X)
    414      1.1  christos 			return AX_ATOM;
    415      1.1  christos 		return A_ATOM;
    416      1.1  christos 
    417      1.1  christos 	case BPF_MISC:
    418      1.1  christos 		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
    419      1.1  christos 	}
    420      1.1  christos 	abort();
    421      1.1  christos 	/* NOTREACHED */
    422      1.1  christos }
    423      1.1  christos 
    424      1.1  christos /*
    425      1.1  christos  * Return the register number that is defined by 's'.  We assume that
    426      1.1  christos  * a single stmt cannot define more than one register.  If no register
    427      1.1  christos  * is defined, return -1.
    428      1.1  christos  *
    429      1.1  christos  * The implementation should probably change to an array access.
    430      1.1  christos  */
    431      1.1  christos static int
    432      1.1  christos atomdef(s)
    433      1.1  christos 	struct stmt *s;
    434      1.1  christos {
    435      1.1  christos 	if (s->code == NOP)
    436      1.1  christos 		return -1;
    437      1.1  christos 
    438      1.1  christos 	switch (BPF_CLASS(s->code)) {
    439      1.1  christos 
    440      1.1  christos 	case BPF_LD:
    441      1.1  christos 	case BPF_ALU:
    442      1.1  christos 		return A_ATOM;
    443      1.1  christos 
    444      1.1  christos 	case BPF_LDX:
    445      1.1  christos 		return X_ATOM;
    446      1.1  christos 
    447      1.1  christos 	case BPF_ST:
    448      1.1  christos 	case BPF_STX:
    449      1.1  christos 		return s->k;
    450      1.1  christos 
    451      1.1  christos 	case BPF_MISC:
    452      1.1  christos 		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
    453      1.1  christos 	}
    454      1.1  christos 	return -1;
    455      1.1  christos }
    456      1.1  christos 
    457      1.1  christos /*
    458      1.1  christos  * Compute the sets of registers used, defined, and killed by 'b'.
    459      1.1  christos  *
    460      1.1  christos  * "Used" means that a statement in 'b' uses the register before any
    461      1.1  christos  * statement in 'b' defines it, i.e. it uses the value left in
    462      1.1  christos  * that register by a predecessor block of this block.
    463      1.1  christos  * "Defined" means that a statement in 'b' defines it.
    464      1.1  christos  * "Killed" means that a statement in 'b' defines it before any
    465      1.1  christos  * statement in 'b' uses it, i.e. it kills the value left in that
    466      1.1  christos  * register by a predecessor block of this block.
    467      1.1  christos  */
    468      1.1  christos static void
    469      1.1  christos compute_local_ud(b)
    470      1.1  christos 	struct block *b;
    471      1.1  christos {
    472      1.1  christos 	struct slist *s;
    473      1.1  christos 	atomset def = 0, use = 0, kill = 0;
    474      1.1  christos 	int atom;
    475      1.1  christos 
    476      1.1  christos 	for (s = b->stmts; s; s = s->next) {
    477      1.1  christos 		if (s->s.code == NOP)
    478      1.1  christos 			continue;
    479      1.1  christos 		atom = atomuse(&s->s);
    480      1.1  christos 		if (atom >= 0) {
    481      1.1  christos 			if (atom == AX_ATOM) {
    482      1.1  christos 				if (!ATOMELEM(def, X_ATOM))
    483      1.1  christos 					use |= ATOMMASK(X_ATOM);
    484      1.1  christos 				if (!ATOMELEM(def, A_ATOM))
    485      1.1  christos 					use |= ATOMMASK(A_ATOM);
    486      1.1  christos 			}
    487      1.1  christos 			else if (atom < N_ATOMS) {
    488      1.1  christos 				if (!ATOMELEM(def, atom))
    489      1.1  christos 					use |= ATOMMASK(atom);
    490      1.1  christos 			}
    491      1.1  christos 			else
    492      1.1  christos 				abort();
    493      1.1  christos 		}
    494      1.1  christos 		atom = atomdef(&s->s);
    495      1.1  christos 		if (atom >= 0) {
    496      1.1  christos 			if (!ATOMELEM(use, atom))
    497      1.1  christos 				kill |= ATOMMASK(atom);
    498      1.1  christos 			def |= ATOMMASK(atom);
    499      1.1  christos 		}
    500      1.1  christos 	}
    501      1.1  christos 	if (BPF_CLASS(b->s.code) == BPF_JMP) {
    502      1.1  christos 		/*
    503      1.1  christos 		 * XXX - what about RET?
    504      1.1  christos 		 */
    505      1.1  christos 		atom = atomuse(&b->s);
    506      1.1  christos 		if (atom >= 0) {
    507      1.1  christos 			if (atom == AX_ATOM) {
    508      1.1  christos 				if (!ATOMELEM(def, X_ATOM))
    509      1.1  christos 					use |= ATOMMASK(X_ATOM);
    510      1.1  christos 				if (!ATOMELEM(def, A_ATOM))
    511      1.1  christos 					use |= ATOMMASK(A_ATOM);
    512      1.1  christos 			}
    513      1.1  christos 			else if (atom < N_ATOMS) {
    514      1.1  christos 				if (!ATOMELEM(def, atom))
    515      1.1  christos 					use |= ATOMMASK(atom);
    516      1.1  christos 			}
    517      1.1  christos 			else
    518      1.1  christos 				abort();
    519      1.1  christos 		}
    520      1.1  christos 	}
    521      1.1  christos 
    522      1.1  christos 	b->def = def;
    523      1.1  christos 	b->kill = kill;
    524      1.1  christos 	b->in_use = use;
    525      1.1  christos }
    526      1.1  christos 
    527      1.1  christos /*
    528      1.1  christos  * Assume graph is already leveled.
    529      1.1  christos  */
    530      1.1  christos static void
    531      1.1  christos find_ud(root)
    532      1.1  christos 	struct block *root;
    533      1.1  christos {
    534      1.1  christos 	int i, maxlevel;
    535      1.1  christos 	struct block *p;
    536      1.1  christos 
    537      1.1  christos 	/*
    538      1.1  christos 	 * root->level is the highest level no found;
    539      1.1  christos 	 * count down from there.
    540      1.1  christos 	 */
    541      1.1  christos 	maxlevel = root->level;
    542      1.1  christos 	for (i = maxlevel; i >= 0; --i)
    543      1.1  christos 		for (p = levels[i]; p; p = p->link) {
    544      1.1  christos 			compute_local_ud(p);
    545      1.1  christos 			p->out_use = 0;
    546      1.1  christos 		}
    547      1.1  christos 
    548      1.1  christos 	for (i = 1; i <= maxlevel; ++i) {
    549      1.1  christos 		for (p = levels[i]; p; p = p->link) {
    550      1.1  christos 			p->out_use |= JT(p)->in_use | JF(p)->in_use;
    551      1.1  christos 			p->in_use |= p->out_use &~ p->kill;
    552      1.1  christos 		}
    553      1.1  christos 	}
    554      1.1  christos }
    555      1.1  christos 
    556      1.1  christos /*
    557      1.1  christos  * These data structures are used in a Cocke and Shwarz style
    558      1.1  christos  * value numbering scheme.  Since the flowgraph is acyclic,
    559      1.1  christos  * exit values can be propagated from a node's predecessors
    560      1.1  christos  * provided it is uniquely defined.
    561      1.1  christos  */
    562      1.1  christos struct valnode {
    563      1.1  christos 	int code;
    564      1.1  christos 	int v0, v1;
    565      1.1  christos 	int val;
    566      1.1  christos 	struct valnode *next;
    567      1.1  christos };
    568      1.1  christos 
    569      1.1  christos #define MODULUS 213
    570      1.1  christos static struct valnode *hashtbl[MODULUS];
    571      1.1  christos static int curval;
    572      1.1  christos static int maxval;
    573      1.1  christos 
    574      1.1  christos /* Integer constants mapped with the load immediate opcode. */
    575      1.1  christos #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
    576      1.1  christos 
    577      1.1  christos struct vmapinfo {
    578      1.1  christos 	int is_const;
    579      1.1  christos 	bpf_int32 const_val;
    580      1.1  christos };
    581      1.1  christos 
    582      1.1  christos struct vmapinfo *vmap;
    583      1.1  christos struct valnode *vnode_base;
    584      1.1  christos struct valnode *next_vnode;
    585      1.1  christos 
    586      1.1  christos static void
    587      1.1  christos init_val()
    588      1.1  christos {
    589      1.1  christos 	curval = 0;
    590      1.1  christos 	next_vnode = vnode_base;
    591      1.1  christos 	memset((char *)vmap, 0, maxval * sizeof(*vmap));
    592      1.1  christos 	memset((char *)hashtbl, 0, sizeof hashtbl);
    593      1.1  christos }
    594      1.1  christos 
    595      1.1  christos /* Because we really don't have an IR, this stuff is a little messy. */
    596      1.1  christos static int
    597      1.1  christos F(code, v0, v1)
    598      1.1  christos 	int code;
    599      1.1  christos 	int v0, v1;
    600      1.1  christos {
    601      1.1  christos 	u_int hash;
    602      1.1  christos 	int val;
    603      1.1  christos 	struct valnode *p;
    604      1.1  christos 
    605      1.1  christos 	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
    606      1.1  christos 	hash %= MODULUS;
    607      1.1  christos 
    608      1.1  christos 	for (p = hashtbl[hash]; p; p = p->next)
    609      1.1  christos 		if (p->code == code && p->v0 == v0 && p->v1 == v1)
    610      1.1  christos 			return p->val;
    611      1.1  christos 
    612      1.1  christos 	val = ++curval;
    613      1.1  christos 	if (BPF_MODE(code) == BPF_IMM &&
    614      1.1  christos 	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
    615      1.1  christos 		vmap[val].const_val = v0;
    616      1.1  christos 		vmap[val].is_const = 1;
    617      1.1  christos 	}
    618      1.1  christos 	p = next_vnode++;
    619      1.1  christos 	p->val = val;
    620      1.1  christos 	p->code = code;
    621      1.1  christos 	p->v0 = v0;
    622      1.1  christos 	p->v1 = v1;
    623      1.1  christos 	p->next = hashtbl[hash];
    624      1.1  christos 	hashtbl[hash] = p;
    625      1.1  christos 
    626      1.1  christos 	return val;
    627      1.1  christos }
    628      1.1  christos 
    629      1.1  christos static inline void
    630      1.1  christos vstore(s, valp, newval, alter)
    631      1.1  christos 	struct stmt *s;
    632      1.1  christos 	int *valp;
    633      1.1  christos 	int newval;
    634      1.1  christos 	int alter;
    635      1.1  christos {
    636      1.1  christos 	if (alter && *valp == newval)
    637      1.1  christos 		s->code = NOP;
    638      1.1  christos 	else
    639      1.1  christos 		*valp = newval;
    640      1.1  christos }
    641      1.1  christos 
    642      1.1  christos static void
    643      1.1  christos fold_op(s, v0, v1)
    644      1.1  christos 	struct stmt *s;
    645      1.1  christos 	int v0, v1;
    646      1.1  christos {
    647      1.1  christos 	bpf_u_int32 a, b;
    648      1.1  christos 
    649      1.1  christos 	a = vmap[v0].const_val;
    650      1.1  christos 	b = vmap[v1].const_val;
    651      1.1  christos 
    652      1.1  christos 	switch (BPF_OP(s->code)) {
    653      1.1  christos 	case BPF_ADD:
    654      1.1  christos 		a += b;
    655      1.1  christos 		break;
    656      1.1  christos 
    657      1.1  christos 	case BPF_SUB:
    658      1.1  christos 		a -= b;
    659      1.1  christos 		break;
    660      1.1  christos 
    661      1.1  christos 	case BPF_MUL:
    662      1.1  christos 		a *= b;
    663      1.1  christos 		break;
    664      1.1  christos 
    665      1.1  christos 	case BPF_DIV:
    666      1.1  christos 		if (b == 0)
    667      1.1  christos 			bpf_error("division by zero");
    668      1.1  christos 		a /= b;
    669      1.1  christos 		break;
    670      1.1  christos 
    671      1.1  christos 	case BPF_AND:
    672      1.1  christos 		a &= b;
    673      1.1  christos 		break;
    674      1.1  christos 
    675      1.1  christos 	case BPF_OR:
    676      1.1  christos 		a |= b;
    677      1.1  christos 		break;
    678      1.1  christos 
    679      1.1  christos 	case BPF_LSH:
    680      1.1  christos 		a <<= b;
    681      1.1  christos 		break;
    682      1.1  christos 
    683      1.1  christos 	case BPF_RSH:
    684      1.1  christos 		a >>= b;
    685      1.1  christos 		break;
    686      1.1  christos 
    687      1.1  christos 	case BPF_NEG:
    688      1.1  christos 		a = -a;
    689      1.1  christos 		break;
    690      1.1  christos 
    691      1.1  christos 	default:
    692      1.1  christos 		abort();
    693      1.1  christos 	}
    694      1.1  christos 	s->k = a;
    695      1.1  christos 	s->code = BPF_LD|BPF_IMM;
    696      1.1  christos 	done = 0;
    697      1.1  christos }
    698      1.1  christos 
    699      1.1  christos static inline struct slist *
    700      1.1  christos this_op(s)
    701      1.1  christos 	struct slist *s;
    702      1.1  christos {
    703      1.1  christos 	while (s != 0 && s->s.code == NOP)
    704      1.1  christos 		s = s->next;
    705      1.1  christos 	return s;
    706      1.1  christos }
    707      1.1  christos 
    708      1.1  christos static void
    709      1.1  christos opt_not(b)
    710      1.1  christos 	struct block *b;
    711      1.1  christos {
    712      1.1  christos 	struct block *tmp = JT(b);
    713      1.1  christos 
    714      1.1  christos 	JT(b) = JF(b);
    715      1.1  christos 	JF(b) = tmp;
    716      1.1  christos }
    717      1.1  christos 
    718      1.1  christos static void
    719      1.1  christos opt_peep(b)
    720      1.1  christos 	struct block *b;
    721      1.1  christos {
    722      1.1  christos 	struct slist *s;
    723      1.1  christos 	struct slist *next, *last;
    724      1.1  christos 	int val;
    725      1.1  christos 
    726      1.1  christos 	s = b->stmts;
    727      1.1  christos 	if (s == 0)
    728      1.1  christos 		return;
    729      1.1  christos 
    730      1.1  christos 	last = s;
    731      1.1  christos 	for (/*empty*/; /*empty*/; s = next) {
    732      1.1  christos 		/*
    733      1.1  christos 		 * Skip over nops.
    734      1.1  christos 		 */
    735      1.1  christos 		s = this_op(s);
    736      1.1  christos 		if (s == 0)
    737      1.1  christos 			break;	/* nothing left in the block */
    738      1.1  christos 
    739      1.1  christos 		/*
    740      1.1  christos 		 * Find the next real instruction after that one
    741      1.1  christos 		 * (skipping nops).
    742      1.1  christos 		 */
    743      1.1  christos 		next = this_op(s->next);
    744      1.1  christos 		if (next == 0)
    745      1.1  christos 			break;	/* no next instruction */
    746      1.1  christos 		last = next;
    747      1.1  christos 
    748      1.1  christos 		/*
    749      1.1  christos 		 * st  M[k]	-->	st  M[k]
    750      1.1  christos 		 * ldx M[k]		tax
    751      1.1  christos 		 */
    752      1.1  christos 		if (s->s.code == BPF_ST &&
    753      1.1  christos 		    next->s.code == (BPF_LDX|BPF_MEM) &&
    754      1.1  christos 		    s->s.k == next->s.k) {
    755      1.1  christos 			done = 0;
    756      1.1  christos 			next->s.code = BPF_MISC|BPF_TAX;
    757      1.1  christos 		}
    758      1.1  christos 		/*
    759      1.1  christos 		 * ld  #k	-->	ldx  #k
    760      1.1  christos 		 * tax			txa
    761      1.1  christos 		 */
    762      1.1  christos 		if (s->s.code == (BPF_LD|BPF_IMM) &&
    763      1.1  christos 		    next->s.code == (BPF_MISC|BPF_TAX)) {
    764      1.1  christos 			s->s.code = BPF_LDX|BPF_IMM;
    765      1.1  christos 			next->s.code = BPF_MISC|BPF_TXA;
    766      1.1  christos 			done = 0;
    767      1.1  christos 		}
    768      1.1  christos 		/*
    769      1.1  christos 		 * This is an ugly special case, but it happens
    770      1.1  christos 		 * when you say tcp[k] or udp[k] where k is a constant.
    771      1.1  christos 		 */
    772      1.1  christos 		if (s->s.code == (BPF_LD|BPF_IMM)) {
    773      1.1  christos 			struct slist *add, *tax, *ild;
    774      1.1  christos 
    775      1.1  christos 			/*
    776      1.1  christos 			 * Check that X isn't used on exit from this
    777      1.1  christos 			 * block (which the optimizer might cause).
    778      1.1  christos 			 * We know the code generator won't generate
    779      1.1  christos 			 * any local dependencies.
    780      1.1  christos 			 */
    781      1.1  christos 			if (ATOMELEM(b->out_use, X_ATOM))
    782      1.1  christos 				continue;
    783      1.1  christos 
    784      1.1  christos 			/*
    785      1.1  christos 			 * Check that the instruction following the ldi
    786      1.1  christos 			 * is an addx, or it's an ldxms with an addx
    787      1.1  christos 			 * following it (with 0 or more nops between the
    788      1.1  christos 			 * ldxms and addx).
    789      1.1  christos 			 */
    790      1.1  christos 			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
    791      1.1  christos 				add = next;
    792      1.1  christos 			else
    793      1.1  christos 				add = this_op(next->next);
    794      1.1  christos 			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
    795      1.1  christos 				continue;
    796      1.1  christos 
    797      1.1  christos 			/*
    798      1.1  christos 			 * Check that a tax follows that (with 0 or more
    799      1.1  christos 			 * nops between them).
    800      1.1  christos 			 */
    801      1.1  christos 			tax = this_op(add->next);
    802      1.1  christos 			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
    803      1.1  christos 				continue;
    804      1.1  christos 
    805      1.1  christos 			/*
    806      1.1  christos 			 * Check that an ild follows that (with 0 or more
    807      1.1  christos 			 * nops between them).
    808      1.1  christos 			 */
    809      1.1  christos 			ild = this_op(tax->next);
    810      1.1  christos 			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
    811      1.1  christos 			    BPF_MODE(ild->s.code) != BPF_IND)
    812      1.1  christos 				continue;
    813      1.1  christos 			/*
    814      1.1  christos 			 * We want to turn this sequence:
    815      1.1  christos 			 *
    816      1.1  christos 			 * (004) ldi     #0x2		{s}
    817      1.1  christos 			 * (005) ldxms   [14]		{next}  -- optional
    818      1.1  christos 			 * (006) addx			{add}
    819      1.1  christos 			 * (007) tax			{tax}
    820      1.1  christos 			 * (008) ild     [x+0]		{ild}
    821      1.1  christos 			 *
    822      1.1  christos 			 * into this sequence:
    823      1.1  christos 			 *
    824      1.1  christos 			 * (004) nop
    825      1.1  christos 			 * (005) ldxms   [14]
    826      1.1  christos 			 * (006) nop
    827      1.1  christos 			 * (007) nop
    828      1.1  christos 			 * (008) ild     [x+2]
    829      1.1  christos 			 *
    830      1.1  christos 			 * XXX We need to check that X is not
    831      1.1  christos 			 * subsequently used, because we want to change
    832      1.1  christos 			 * what'll be in it after this sequence.
    833      1.1  christos 			 *
    834      1.1  christos 			 * We know we can eliminate the accumulator
    835      1.1  christos 			 * modifications earlier in the sequence since
    836      1.1  christos 			 * it is defined by the last stmt of this sequence
    837      1.1  christos 			 * (i.e., the last statement of the sequence loads
    838      1.1  christos 			 * a value into the accumulator, so we can eliminate
    839      1.1  christos 			 * earlier operations on the accumulator).
    840      1.1  christos 			 */
    841      1.1  christos 			ild->s.k += s->s.k;
    842      1.1  christos 			s->s.code = NOP;
    843      1.1  christos 			add->s.code = NOP;
    844      1.1  christos 			tax->s.code = NOP;
    845      1.1  christos 			done = 0;
    846      1.1  christos 		}
    847      1.1  christos 	}
    848      1.1  christos 	/*
    849      1.1  christos 	 * If the comparison at the end of a block is an equality
    850      1.1  christos 	 * comparison against a constant, and nobody uses the value
    851      1.1  christos 	 * we leave in the A register at the end of a block, and
    852      1.1  christos 	 * the operation preceding the comparison is an arithmetic
    853      1.1  christos 	 * operation, we can sometime optimize it away.
    854      1.1  christos 	 */
    855      1.1  christos 	if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
    856      1.1  christos 	    !ATOMELEM(b->out_use, A_ATOM)) {
    857      1.1  christos 	    	/*
    858      1.1  christos 	    	 * We can optimize away certain subtractions of the
    859      1.1  christos 	    	 * X register.
    860      1.1  christos 	    	 */
    861      1.1  christos 		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
    862      1.1  christos 			val = b->val[X_ATOM];
    863      1.1  christos 			if (vmap[val].is_const) {
    864      1.1  christos 				/*
    865      1.1  christos 				 * If we have a subtract to do a comparison,
    866      1.1  christos 				 * and the X register is a known constant,
    867      1.1  christos 				 * we can merge this value into the
    868      1.1  christos 				 * comparison:
    869      1.1  christos 				 *
    870      1.1  christos 				 * sub x  ->	nop
    871      1.1  christos 				 * jeq #y	jeq #(x+y)
    872      1.1  christos 				 */
    873      1.1  christos 				b->s.k += vmap[val].const_val;
    874      1.1  christos 				last->s.code = NOP;
    875      1.1  christos 				done = 0;
    876      1.1  christos 			} else if (b->s.k == 0) {
    877      1.1  christos 				/*
    878      1.1  christos 				 * If the X register isn't a constant,
    879      1.1  christos 				 * and the comparison in the test is
    880      1.1  christos 				 * against 0, we can compare with the
    881      1.1  christos 				 * X register, instead:
    882      1.1  christos 				 *
    883      1.1  christos 				 * sub x  ->	nop
    884      1.1  christos 				 * jeq #0	jeq x
    885      1.1  christos 				 */
    886      1.1  christos 				last->s.code = NOP;
    887      1.1  christos 				b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
    888      1.1  christos 				done = 0;
    889      1.1  christos 			}
    890      1.1  christos 		}
    891      1.1  christos 		/*
    892      1.1  christos 		 * Likewise, a constant subtract can be simplified:
    893      1.1  christos 		 *
    894      1.1  christos 		 * sub #x ->	nop
    895      1.1  christos 		 * jeq #y ->	jeq #(x+y)
    896      1.1  christos 		 */
    897      1.1  christos 		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
    898      1.1  christos 			last->s.code = NOP;
    899      1.1  christos 			b->s.k += last->s.k;
    900      1.1  christos 			done = 0;
    901      1.1  christos 		}
    902      1.1  christos 		/*
    903      1.1  christos 		 * And, similarly, a constant AND can be simplified
    904      1.1  christos 		 * if we're testing against 0, i.e.:
    905      1.1  christos 		 *
    906      1.1  christos 		 * and #k	nop
    907      1.1  christos 		 * jeq #0  ->	jset #k
    908      1.1  christos 		 */
    909      1.1  christos 		else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
    910      1.1  christos 		    b->s.k == 0) {
    911      1.1  christos 			b->s.k = last->s.k;
    912      1.1  christos 			b->s.code = BPF_JMP|BPF_K|BPF_JSET;
    913      1.1  christos 			last->s.code = NOP;
    914      1.1  christos 			done = 0;
    915      1.1  christos 			opt_not(b);
    916      1.1  christos 		}
    917      1.1  christos 	}
    918      1.1  christos 	/*
    919      1.1  christos 	 * jset #0        ->   never
    920      1.1  christos 	 * jset #ffffffff ->   always
    921      1.1  christos 	 */
    922      1.1  christos 	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
    923      1.1  christos 		if (b->s.k == 0)
    924      1.1  christos 			JT(b) = JF(b);
    925      1.1  christos 		if (b->s.k == 0xffffffff)
    926      1.1  christos 			JF(b) = JT(b);
    927      1.1  christos 	}
    928      1.1  christos 	/*
    929      1.1  christos 	 * If we're comparing against the index register, and the index
    930      1.1  christos 	 * register is a known constant, we can just compare against that
    931      1.1  christos 	 * constant.
    932      1.1  christos 	 */
    933      1.1  christos 	val = b->val[X_ATOM];
    934      1.1  christos 	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
    935      1.1  christos 		bpf_int32 v = vmap[val].const_val;
    936      1.1  christos 		b->s.code &= ~BPF_X;
    937      1.1  christos 		b->s.k = v;
    938      1.1  christos 	}
    939      1.1  christos 	/*
    940      1.1  christos 	 * If the accumulator is a known constant, we can compute the
    941      1.1  christos 	 * comparison result.
    942      1.1  christos 	 */
    943      1.1  christos 	val = b->val[A_ATOM];
    944      1.1  christos 	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
    945      1.1  christos 		bpf_int32 v = vmap[val].const_val;
    946      1.1  christos 		switch (BPF_OP(b->s.code)) {
    947      1.1  christos 
    948      1.1  christos 		case BPF_JEQ:
    949      1.1  christos 			v = v == b->s.k;
    950      1.1  christos 			break;
    951      1.1  christos 
    952      1.1  christos 		case BPF_JGT:
    953      1.1  christos 			v = (unsigned)v > b->s.k;
    954      1.1  christos 			break;
    955      1.1  christos 
    956      1.1  christos 		case BPF_JGE:
    957      1.1  christos 			v = (unsigned)v >= b->s.k;
    958      1.1  christos 			break;
    959      1.1  christos 
    960      1.1  christos 		case BPF_JSET:
    961      1.1  christos 			v &= b->s.k;
    962      1.1  christos 			break;
    963      1.1  christos 
    964      1.1  christos 		default:
    965      1.1  christos 			abort();
    966      1.1  christos 		}
    967      1.1  christos 		if (JF(b) != JT(b))
    968      1.1  christos 			done = 0;
    969      1.1  christos 		if (v)
    970      1.1  christos 			JF(b) = JT(b);
    971      1.1  christos 		else
    972      1.1  christos 			JT(b) = JF(b);
    973      1.1  christos 	}
    974      1.1  christos }
    975      1.1  christos 
    976      1.1  christos /*
    977      1.1  christos  * Compute the symbolic value of expression of 's', and update
    978      1.1  christos  * anything it defines in the value table 'val'.  If 'alter' is true,
    979      1.1  christos  * do various optimizations.  This code would be cleaner if symbolic
    980      1.1  christos  * evaluation and code transformations weren't folded together.
    981      1.1  christos  */
    982      1.1  christos static void
    983      1.1  christos opt_stmt(s, val, alter)
    984      1.1  christos 	struct stmt *s;
    985      1.1  christos 	int val[];
    986      1.1  christos 	int alter;
    987      1.1  christos {
    988      1.1  christos 	int op;
    989      1.1  christos 	int v;
    990      1.1  christos 
    991      1.1  christos 	switch (s->code) {
    992      1.1  christos 
    993      1.1  christos 	case BPF_LD|BPF_ABS|BPF_W:
    994      1.1  christos 	case BPF_LD|BPF_ABS|BPF_H:
    995      1.1  christos 	case BPF_LD|BPF_ABS|BPF_B:
    996      1.1  christos 		v = F(s->code, s->k, 0L);
    997      1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
    998      1.1  christos 		break;
    999      1.1  christos 
   1000      1.1  christos 	case BPF_LD|BPF_IND|BPF_W:
   1001      1.1  christos 	case BPF_LD|BPF_IND|BPF_H:
   1002      1.1  christos 	case BPF_LD|BPF_IND|BPF_B:
   1003      1.1  christos 		v = val[X_ATOM];
   1004      1.1  christos 		if (alter && vmap[v].is_const) {
   1005      1.1  christos 			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
   1006      1.1  christos 			s->k += vmap[v].const_val;
   1007      1.1  christos 			v = F(s->code, s->k, 0L);
   1008      1.1  christos 			done = 0;
   1009      1.1  christos 		}
   1010      1.1  christos 		else
   1011      1.1  christos 			v = F(s->code, s->k, v);
   1012      1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
   1013      1.1  christos 		break;
   1014      1.1  christos 
   1015      1.1  christos 	case BPF_LD|BPF_LEN:
   1016      1.1  christos 		v = F(s->code, 0L, 0L);
   1017      1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
   1018      1.1  christos 		break;
   1019      1.1  christos 
   1020      1.1  christos 	case BPF_LD|BPF_IMM:
   1021      1.1  christos 		v = K(s->k);
   1022      1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
   1023      1.1  christos 		break;
   1024      1.1  christos 
   1025      1.1  christos 	case BPF_LDX|BPF_IMM:
   1026      1.1  christos 		v = K(s->k);
   1027      1.1  christos 		vstore(s, &val[X_ATOM], v, alter);
   1028      1.1  christos 		break;
   1029      1.1  christos 
   1030      1.1  christos 	case BPF_LDX|BPF_MSH|BPF_B:
   1031      1.1  christos 		v = F(s->code, s->k, 0L);
   1032      1.1  christos 		vstore(s, &val[X_ATOM], v, alter);
   1033      1.1  christos 		break;
   1034      1.1  christos 
   1035      1.1  christos 	case BPF_ALU|BPF_NEG:
   1036      1.1  christos 		if (alter && vmap[val[A_ATOM]].is_const) {
   1037      1.1  christos 			s->code = BPF_LD|BPF_IMM;
   1038      1.1  christos 			s->k = -vmap[val[A_ATOM]].const_val;
   1039      1.1  christos 			val[A_ATOM] = K(s->k);
   1040      1.1  christos 		}
   1041      1.1  christos 		else
   1042      1.1  christos 			val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
   1043      1.1  christos 		break;
   1044      1.1  christos 
   1045      1.1  christos 	case BPF_ALU|BPF_ADD|BPF_K:
   1046      1.1  christos 	case BPF_ALU|BPF_SUB|BPF_K:
   1047      1.1  christos 	case BPF_ALU|BPF_MUL|BPF_K:
   1048      1.1  christos 	case BPF_ALU|BPF_DIV|BPF_K:
   1049      1.1  christos 	case BPF_ALU|BPF_AND|BPF_K:
   1050      1.1  christos 	case BPF_ALU|BPF_OR|BPF_K:
   1051      1.1  christos 	case BPF_ALU|BPF_LSH|BPF_K:
   1052      1.1  christos 	case BPF_ALU|BPF_RSH|BPF_K:
   1053      1.1  christos 		op = BPF_OP(s->code);
   1054      1.1  christos 		if (alter) {
   1055      1.1  christos 			if (s->k == 0) {
   1056      1.1  christos 				/* don't optimize away "sub #0"
   1057      1.1  christos 				 * as it may be needed later to
   1058      1.1  christos 				 * fixup the generated math code */
   1059      1.1  christos 				if (op == BPF_ADD ||
   1060      1.1  christos 				    op == BPF_LSH || op == BPF_RSH ||
   1061      1.1  christos 				    op == BPF_OR) {
   1062      1.1  christos 					s->code = NOP;
   1063      1.1  christos 					break;
   1064      1.1  christos 				}
   1065      1.1  christos 				if (op == BPF_MUL || op == BPF_AND) {
   1066      1.1  christos 					s->code = BPF_LD|BPF_IMM;
   1067      1.1  christos 					val[A_ATOM] = K(s->k);
   1068      1.1  christos 					break;
   1069      1.1  christos 				}
   1070      1.1  christos 			}
   1071      1.1  christos 			if (vmap[val[A_ATOM]].is_const) {
   1072      1.1  christos 				fold_op(s, val[A_ATOM], K(s->k));
   1073      1.1  christos 				val[A_ATOM] = K(s->k);
   1074      1.1  christos 				break;
   1075      1.1  christos 			}
   1076      1.1  christos 		}
   1077      1.1  christos 		val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
   1078      1.1  christos 		break;
   1079      1.1  christos 
   1080      1.1  christos 	case BPF_ALU|BPF_ADD|BPF_X:
   1081      1.1  christos 	case BPF_ALU|BPF_SUB|BPF_X:
   1082      1.1  christos 	case BPF_ALU|BPF_MUL|BPF_X:
   1083      1.1  christos 	case BPF_ALU|BPF_DIV|BPF_X:
   1084      1.1  christos 	case BPF_ALU|BPF_AND|BPF_X:
   1085      1.1  christos 	case BPF_ALU|BPF_OR|BPF_X:
   1086      1.1  christos 	case BPF_ALU|BPF_LSH|BPF_X:
   1087      1.1  christos 	case BPF_ALU|BPF_RSH|BPF_X:
   1088      1.1  christos 		op = BPF_OP(s->code);
   1089      1.1  christos 		if (alter && vmap[val[X_ATOM]].is_const) {
   1090      1.1  christos 			if (vmap[val[A_ATOM]].is_const) {
   1091      1.1  christos 				fold_op(s, val[A_ATOM], val[X_ATOM]);
   1092      1.1  christos 				val[A_ATOM] = K(s->k);
   1093      1.1  christos 			}
   1094      1.1  christos 			else {
   1095      1.1  christos 				s->code = BPF_ALU|BPF_K|op;
   1096      1.1  christos 				s->k = vmap[val[X_ATOM]].const_val;
   1097      1.1  christos 				done = 0;
   1098      1.1  christos 				val[A_ATOM] =
   1099      1.1  christos 					F(s->code, val[A_ATOM], K(s->k));
   1100      1.1  christos 			}
   1101      1.1  christos 			break;
   1102      1.1  christos 		}
   1103      1.1  christos 		/*
   1104      1.1  christos 		 * Check if we're doing something to an accumulator
   1105      1.1  christos 		 * that is 0, and simplify.  This may not seem like
   1106      1.1  christos 		 * much of a simplification but it could open up further
   1107      1.1  christos 		 * optimizations.
   1108      1.1  christos 		 * XXX We could also check for mul by 1, etc.
   1109      1.1  christos 		 */
   1110      1.1  christos 		if (alter && vmap[val[A_ATOM]].is_const
   1111      1.1  christos 		    && vmap[val[A_ATOM]].const_val == 0) {
   1112      1.1  christos 			if (op == BPF_ADD || op == BPF_OR) {
   1113      1.1  christos 				s->code = BPF_MISC|BPF_TXA;
   1114      1.1  christos 				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
   1115      1.1  christos 				break;
   1116      1.1  christos 			}
   1117      1.1  christos 			else if (op == BPF_MUL || op == BPF_DIV ||
   1118      1.1  christos 				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
   1119      1.1  christos 				s->code = BPF_LD|BPF_IMM;
   1120      1.1  christos 				s->k = 0;
   1121      1.1  christos 				vstore(s, &val[A_ATOM], K(s->k), alter);
   1122      1.1  christos 				break;
   1123      1.1  christos 			}
   1124      1.1  christos 			else if (op == BPF_NEG) {
   1125      1.1  christos 				s->code = NOP;
   1126      1.1  christos 				break;
   1127      1.1  christos 			}
   1128      1.1  christos 		}
   1129      1.1  christos 		val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
   1130      1.1  christos 		break;
   1131      1.1  christos 
   1132      1.1  christos 	case BPF_MISC|BPF_TXA:
   1133      1.1  christos 		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
   1134      1.1  christos 		break;
   1135      1.1  christos 
   1136      1.1  christos 	case BPF_LD|BPF_MEM:
   1137      1.1  christos 		v = val[s->k];
   1138      1.1  christos 		if (alter && vmap[v].is_const) {
   1139      1.1  christos 			s->code = BPF_LD|BPF_IMM;
   1140      1.1  christos 			s->k = vmap[v].const_val;
   1141      1.1  christos 			done = 0;
   1142      1.1  christos 		}
   1143      1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
   1144      1.1  christos 		break;
   1145      1.1  christos 
   1146      1.1  christos 	case BPF_MISC|BPF_TAX:
   1147      1.1  christos 		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
   1148      1.1  christos 		break;
   1149      1.1  christos 
   1150      1.1  christos 	case BPF_LDX|BPF_MEM:
   1151      1.1  christos 		v = val[s->k];
   1152      1.1  christos 		if (alter && vmap[v].is_const) {
   1153      1.1  christos 			s->code = BPF_LDX|BPF_IMM;
   1154      1.1  christos 			s->k = vmap[v].const_val;
   1155      1.1  christos 			done = 0;
   1156      1.1  christos 		}
   1157      1.1  christos 		vstore(s, &val[X_ATOM], v, alter);
   1158      1.1  christos 		break;
   1159      1.1  christos 
   1160      1.1  christos 	case BPF_ST:
   1161      1.1  christos 		vstore(s, &val[s->k], val[A_ATOM], alter);
   1162      1.1  christos 		break;
   1163      1.1  christos 
   1164      1.1  christos 	case BPF_STX:
   1165      1.1  christos 		vstore(s, &val[s->k], val[X_ATOM], alter);
   1166      1.1  christos 		break;
   1167      1.1  christos 	}
   1168      1.1  christos }
   1169      1.1  christos 
   1170      1.1  christos static void
   1171      1.1  christos deadstmt(s, last)
   1172      1.1  christos 	register struct stmt *s;
   1173      1.1  christos 	register struct stmt *last[];
   1174      1.1  christos {
   1175      1.1  christos 	register int atom;
   1176      1.1  christos 
   1177      1.1  christos 	atom = atomuse(s);
   1178      1.1  christos 	if (atom >= 0) {
   1179      1.1  christos 		if (atom == AX_ATOM) {
   1180      1.1  christos 			last[X_ATOM] = 0;
   1181      1.1  christos 			last[A_ATOM] = 0;
   1182      1.1  christos 		}
   1183      1.1  christos 		else
   1184      1.1  christos 			last[atom] = 0;
   1185      1.1  christos 	}
   1186      1.1  christos 	atom = atomdef(s);
   1187      1.1  christos 	if (atom >= 0) {
   1188      1.1  christos 		if (last[atom]) {
   1189      1.1  christos 			done = 0;
   1190      1.1  christos 			last[atom]->code = NOP;
   1191      1.1  christos 		}
   1192      1.1  christos 		last[atom] = s;
   1193      1.1  christos 	}
   1194      1.1  christos }
   1195      1.1  christos 
   1196      1.1  christos static void
   1197      1.1  christos opt_deadstores(b)
   1198      1.1  christos 	register struct block *b;
   1199      1.1  christos {
   1200      1.1  christos 	register struct slist *s;
   1201      1.1  christos 	register int atom;
   1202      1.1  christos 	struct stmt *last[N_ATOMS];
   1203      1.1  christos 
   1204      1.1  christos 	memset((char *)last, 0, sizeof last);
   1205      1.1  christos 
   1206      1.1  christos 	for (s = b->stmts; s != 0; s = s->next)
   1207      1.1  christos 		deadstmt(&s->s, last);
   1208      1.1  christos 	deadstmt(&b->s, last);
   1209      1.1  christos 
   1210      1.1  christos 	for (atom = 0; atom < N_ATOMS; ++atom)
   1211      1.1  christos 		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
   1212      1.1  christos 			last[atom]->code = NOP;
   1213      1.1  christos 			done = 0;
   1214      1.1  christos 		}
   1215      1.1  christos }
   1216      1.1  christos 
   1217      1.1  christos static void
   1218      1.1  christos opt_blk(b, do_stmts)
   1219      1.1  christos 	struct block *b;
   1220      1.1  christos 	int do_stmts;
   1221      1.1  christos {
   1222      1.1  christos 	struct slist *s;
   1223      1.1  christos 	struct edge *p;
   1224      1.1  christos 	int i;
   1225      1.1  christos 	bpf_int32 aval, xval;
   1226      1.1  christos 
   1227      1.1  christos #if 0
   1228      1.1  christos 	for (s = b->stmts; s && s->next; s = s->next)
   1229      1.1  christos 		if (BPF_CLASS(s->s.code) == BPF_JMP) {
   1230      1.1  christos 			do_stmts = 0;
   1231      1.1  christos 			break;
   1232      1.1  christos 		}
   1233      1.1  christos #endif
   1234      1.1  christos 
   1235      1.1  christos 	/*
   1236      1.1  christos 	 * Initialize the atom values.
   1237      1.1  christos 	 */
   1238      1.1  christos 	p = b->in_edges;
   1239      1.1  christos 	if (p == 0) {
   1240      1.1  christos 		/*
   1241      1.1  christos 		 * We have no predecessors, so everything is undefined
   1242      1.1  christos 		 * upon entry to this block.
   1243      1.1  christos 		 */
   1244      1.1  christos 		memset((char *)b->val, 0, sizeof(b->val));
   1245      1.1  christos 	} else {
   1246      1.1  christos 		/*
   1247      1.1  christos 		 * Inherit values from our predecessors.
   1248      1.1  christos 		 *
   1249      1.1  christos 		 * First, get the values from the predecessor along the
   1250      1.1  christos 		 * first edge leading to this node.
   1251      1.1  christos 		 */
   1252      1.1  christos 		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
   1253      1.1  christos 		/*
   1254      1.1  christos 		 * Now look at all the other nodes leading to this node.
   1255      1.1  christos 		 * If, for the predecessor along that edge, a register
   1256      1.1  christos 		 * has a different value from the one we have (i.e.,
   1257      1.1  christos 		 * control paths are merging, and the merging paths
   1258      1.1  christos 		 * assign different values to that register), give the
   1259      1.1  christos 		 * register the undefined value of 0.
   1260      1.1  christos 		 */
   1261      1.1  christos 		while ((p = p->next) != NULL) {
   1262      1.1  christos 			for (i = 0; i < N_ATOMS; ++i)
   1263      1.1  christos 				if (b->val[i] != p->pred->val[i])
   1264      1.1  christos 					b->val[i] = 0;
   1265      1.1  christos 		}
   1266      1.1  christos 	}
   1267      1.1  christos 	aval = b->val[A_ATOM];
   1268      1.1  christos 	xval = b->val[X_ATOM];
   1269      1.1  christos 	for (s = b->stmts; s; s = s->next)
   1270      1.1  christos 		opt_stmt(&s->s, b->val, do_stmts);
   1271      1.1  christos 
   1272      1.1  christos 	/*
   1273      1.1  christos 	 * This is a special case: if we don't use anything from this
   1274      1.1  christos 	 * block, and we load the accumulator or index register with a
   1275      1.1  christos 	 * value that is already there, or if this block is a return,
   1276      1.1  christos 	 * eliminate all the statements.
   1277      1.1  christos 	 *
   1278      1.1  christos 	 * XXX - what if it does a store?
   1279      1.1  christos 	 *
   1280      1.1  christos 	 * XXX - why does it matter whether we use anything from this
   1281      1.1  christos 	 * block?  If the accumulator or index register doesn't change
   1282      1.1  christos 	 * its value, isn't that OK even if we use that value?
   1283      1.1  christos 	 *
   1284      1.1  christos 	 * XXX - if we load the accumulator with a different value,
   1285      1.1  christos 	 * and the block ends with a conditional branch, we obviously
   1286      1.1  christos 	 * can't eliminate it, as the branch depends on that value.
   1287      1.1  christos 	 * For the index register, the conditional branch only depends
   1288      1.1  christos 	 * on the index register value if the test is against the index
   1289      1.1  christos 	 * register value rather than a constant; if nothing uses the
   1290      1.1  christos 	 * value we put into the index register, and we're not testing
   1291      1.1  christos 	 * against the index register's value, and there aren't any
   1292      1.1  christos 	 * other problems that would keep us from eliminating this
   1293      1.1  christos 	 * block, can we eliminate it?
   1294      1.1  christos 	 */
   1295      1.1  christos 	if (do_stmts &&
   1296      1.1  christos 	    ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
   1297      1.1  christos 	      xval != 0 && b->val[X_ATOM] == xval) ||
   1298      1.1  christos 	     BPF_CLASS(b->s.code) == BPF_RET)) {
   1299      1.1  christos 		if (b->stmts != 0) {
   1300      1.1  christos 			b->stmts = 0;
   1301      1.1  christos 			done = 0;
   1302      1.1  christos 		}
   1303      1.1  christos 	} else {
   1304      1.1  christos 		opt_peep(b);
   1305      1.1  christos 		opt_deadstores(b);
   1306      1.1  christos 	}
   1307      1.1  christos 	/*
   1308      1.1  christos 	 * Set up values for branch optimizer.
   1309      1.1  christos 	 */
   1310      1.1  christos 	if (BPF_SRC(b->s.code) == BPF_K)
   1311      1.1  christos 		b->oval = K(b->s.k);
   1312      1.1  christos 	else
   1313      1.1  christos 		b->oval = b->val[X_ATOM];
   1314      1.1  christos 	b->et.code = b->s.code;
   1315      1.1  christos 	b->ef.code = -b->s.code;
   1316      1.1  christos }
   1317      1.1  christos 
   1318      1.1  christos /*
   1319      1.1  christos  * Return true if any register that is used on exit from 'succ', has
   1320      1.1  christos  * an exit value that is different from the corresponding exit value
   1321      1.1  christos  * from 'b'.
   1322      1.1  christos  */
   1323      1.1  christos static int
   1324      1.1  christos use_conflict(b, succ)
   1325      1.1  christos 	struct block *b, *succ;
   1326      1.1  christos {
   1327      1.1  christos 	int atom;
   1328      1.1  christos 	atomset use = succ->out_use;
   1329      1.1  christos 
   1330      1.1  christos 	if (use == 0)
   1331      1.1  christos 		return 0;
   1332      1.1  christos 
   1333      1.1  christos 	for (atom = 0; atom < N_ATOMS; ++atom)
   1334      1.1  christos 		if (ATOMELEM(use, atom))
   1335      1.1  christos 			if (b->val[atom] != succ->val[atom])
   1336      1.1  christos 				return 1;
   1337      1.1  christos 	return 0;
   1338      1.1  christos }
   1339      1.1  christos 
   1340      1.1  christos static struct block *
   1341      1.1  christos fold_edge(child, ep)
   1342      1.1  christos 	struct block *child;
   1343      1.1  christos 	struct edge *ep;
   1344      1.1  christos {
   1345      1.1  christos 	int sense;
   1346      1.1  christos 	int aval0, aval1, oval0, oval1;
   1347      1.1  christos 	int code = ep->code;
   1348      1.1  christos 
   1349      1.1  christos 	if (code < 0) {
   1350      1.1  christos 		code = -code;
   1351      1.1  christos 		sense = 0;
   1352      1.1  christos 	} else
   1353      1.1  christos 		sense = 1;
   1354      1.1  christos 
   1355      1.1  christos 	if (child->s.code != code)
   1356      1.1  christos 		return 0;
   1357      1.1  christos 
   1358      1.1  christos 	aval0 = child->val[A_ATOM];
   1359      1.1  christos 	oval0 = child->oval;
   1360      1.1  christos 	aval1 = ep->pred->val[A_ATOM];
   1361      1.1  christos 	oval1 = ep->pred->oval;
   1362      1.1  christos 
   1363      1.1  christos 	if (aval0 != aval1)
   1364      1.1  christos 		return 0;
   1365      1.1  christos 
   1366      1.1  christos 	if (oval0 == oval1)
   1367      1.1  christos 		/*
   1368      1.1  christos 		 * The operands of the branch instructions are
   1369      1.1  christos 		 * identical, so the result is true if a true
   1370      1.1  christos 		 * branch was taken to get here, otherwise false.
   1371      1.1  christos 		 */
   1372      1.1  christos 		return sense ? JT(child) : JF(child);
   1373      1.1  christos 
   1374      1.1  christos 	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
   1375      1.1  christos 		/*
   1376      1.1  christos 		 * At this point, we only know the comparison if we
   1377      1.1  christos 		 * came down the true branch, and it was an equality
   1378      1.1  christos 		 * comparison with a constant.
   1379      1.1  christos 		 *
   1380      1.1  christos 		 * I.e., if we came down the true branch, and the branch
   1381      1.1  christos 		 * was an equality comparison with a constant, we know the
   1382      1.1  christos 		 * accumulator contains that constant.  If we came down
   1383      1.1  christos 		 * the false branch, or the comparison wasn't with a
   1384      1.1  christos 		 * constant, we don't know what was in the accumulator.
   1385      1.1  christos 		 *
   1386      1.1  christos 		 * We rely on the fact that distinct constants have distinct
   1387      1.1  christos 		 * value numbers.
   1388      1.1  christos 		 */
   1389      1.1  christos 		return JF(child);
   1390      1.1  christos 
   1391      1.1  christos 	return 0;
   1392      1.1  christos }
   1393      1.1  christos 
   1394      1.1  christos static void
   1395      1.1  christos opt_j(ep)
   1396      1.1  christos 	struct edge *ep;
   1397      1.1  christos {
   1398      1.1  christos 	register int i, k;
   1399      1.1  christos 	register struct block *target;
   1400      1.1  christos 
   1401      1.1  christos 	if (JT(ep->succ) == 0)
   1402      1.1  christos 		return;
   1403      1.1  christos 
   1404      1.1  christos 	if (JT(ep->succ) == JF(ep->succ)) {
   1405      1.1  christos 		/*
   1406      1.1  christos 		 * Common branch targets can be eliminated, provided
   1407      1.1  christos 		 * there is no data dependency.
   1408      1.1  christos 		 */
   1409      1.1  christos 		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
   1410      1.1  christos 			done = 0;
   1411      1.1  christos 			ep->succ = JT(ep->succ);
   1412      1.1  christos 		}
   1413      1.1  christos 	}
   1414      1.1  christos 	/*
   1415      1.1  christos 	 * For each edge dominator that matches the successor of this
   1416      1.1  christos 	 * edge, promote the edge successor to the its grandchild.
   1417      1.1  christos 	 *
   1418      1.1  christos 	 * XXX We violate the set abstraction here in favor a reasonably
   1419      1.1  christos 	 * efficient loop.
   1420      1.1  christos 	 */
   1421      1.1  christos  top:
   1422      1.1  christos 	for (i = 0; i < edgewords; ++i) {
   1423      1.1  christos 		register bpf_u_int32 x = ep->edom[i];
   1424      1.1  christos 
   1425      1.1  christos 		while (x != 0) {
   1426      1.1  christos 			k = ffs(x) - 1;
   1427      1.1  christos 			x &=~ (1 << k);
   1428      1.1  christos 			k += i * BITS_PER_WORD;
   1429      1.1  christos 
   1430      1.1  christos 			target = fold_edge(ep->succ, edges[k]);
   1431      1.1  christos 			/*
   1432      1.1  christos 			 * Check that there is no data dependency between
   1433      1.1  christos 			 * nodes that will be violated if we move the edge.
   1434      1.1  christos 			 */
   1435      1.1  christos 			if (target != 0 && !use_conflict(ep->pred, target)) {
   1436      1.1  christos 				done = 0;
   1437      1.1  christos 				ep->succ = target;
   1438      1.1  christos 				if (JT(target) != 0)
   1439      1.1  christos 					/*
   1440      1.1  christos 					 * Start over unless we hit a leaf.
   1441      1.1  christos 					 */
   1442      1.1  christos 					goto top;
   1443      1.1  christos 				return;
   1444      1.1  christos 			}
   1445      1.1  christos 		}
   1446      1.1  christos 	}
   1447      1.1  christos }
   1448      1.1  christos 
   1449      1.1  christos 
   1450      1.1  christos static void
   1451      1.1  christos or_pullup(b)
   1452      1.1  christos 	struct block *b;
   1453      1.1  christos {
   1454      1.1  christos 	int val, at_top;
   1455      1.1  christos 	struct block *pull;
   1456      1.1  christos 	struct block **diffp, **samep;
   1457      1.1  christos 	struct edge *ep;
   1458      1.1  christos 
   1459      1.1  christos 	ep = b->in_edges;
   1460      1.1  christos 	if (ep == 0)
   1461      1.1  christos 		return;
   1462      1.1  christos 
   1463      1.1  christos 	/*
   1464      1.1  christos 	 * Make sure each predecessor loads the same value.
   1465      1.1  christos 	 * XXX why?
   1466      1.1  christos 	 */
   1467      1.1  christos 	val = ep->pred->val[A_ATOM];
   1468      1.1  christos 	for (ep = ep->next; ep != 0; ep = ep->next)
   1469      1.1  christos 		if (val != ep->pred->val[A_ATOM])
   1470      1.1  christos 			return;
   1471      1.1  christos 
   1472      1.1  christos 	if (JT(b->in_edges->pred) == b)
   1473      1.1  christos 		diffp = &JT(b->in_edges->pred);
   1474      1.1  christos 	else
   1475      1.1  christos 		diffp = &JF(b->in_edges->pred);
   1476      1.1  christos 
   1477      1.1  christos 	at_top = 1;
   1478      1.1  christos 	while (1) {
   1479      1.1  christos 		if (*diffp == 0)
   1480      1.1  christos 			return;
   1481      1.1  christos 
   1482      1.1  christos 		if (JT(*diffp) != JT(b))
   1483      1.1  christos 			return;
   1484      1.1  christos 
   1485      1.1  christos 		if (!SET_MEMBER((*diffp)->dom, b->id))
   1486      1.1  christos 			return;
   1487      1.1  christos 
   1488      1.1  christos 		if ((*diffp)->val[A_ATOM] != val)
   1489      1.1  christos 			break;
   1490      1.1  christos 
   1491      1.1  christos 		diffp = &JF(*diffp);
   1492      1.1  christos 		at_top = 0;
   1493      1.1  christos 	}
   1494      1.1  christos 	samep = &JF(*diffp);
   1495      1.1  christos 	while (1) {
   1496      1.1  christos 		if (*samep == 0)
   1497      1.1  christos 			return;
   1498      1.1  christos 
   1499      1.1  christos 		if (JT(*samep) != JT(b))
   1500      1.1  christos 			return;
   1501      1.1  christos 
   1502      1.1  christos 		if (!SET_MEMBER((*samep)->dom, b->id))
   1503      1.1  christos 			return;
   1504      1.1  christos 
   1505      1.1  christos 		if ((*samep)->val[A_ATOM] == val)
   1506      1.1  christos 			break;
   1507      1.1  christos 
   1508      1.1  christos 		/* XXX Need to check that there are no data dependencies
   1509      1.1  christos 		   between dp0 and dp1.  Currently, the code generator
   1510      1.1  christos 		   will not produce such dependencies. */
   1511      1.1  christos 		samep = &JF(*samep);
   1512      1.1  christos 	}
   1513      1.1  christos #ifdef notdef
   1514      1.1  christos 	/* XXX This doesn't cover everything. */
   1515      1.1  christos 	for (i = 0; i < N_ATOMS; ++i)
   1516      1.1  christos 		if ((*samep)->val[i] != pred->val[i])
   1517      1.1  christos 			return;
   1518      1.1  christos #endif
   1519      1.1  christos 	/* Pull up the node. */
   1520      1.1  christos 	pull = *samep;
   1521      1.1  christos 	*samep = JF(pull);
   1522      1.1  christos 	JF(pull) = *diffp;
   1523      1.1  christos 
   1524      1.1  christos 	/*
   1525      1.1  christos 	 * At the top of the chain, each predecessor needs to point at the
   1526      1.1  christos 	 * pulled up node.  Inside the chain, there is only one predecessor
   1527      1.1  christos 	 * to worry about.
   1528      1.1  christos 	 */
   1529      1.1  christos 	if (at_top) {
   1530      1.1  christos 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
   1531      1.1  christos 			if (JT(ep->pred) == b)
   1532      1.1  christos 				JT(ep->pred) = pull;
   1533      1.1  christos 			else
   1534      1.1  christos 				JF(ep->pred) = pull;
   1535      1.1  christos 		}
   1536      1.1  christos 	}
   1537      1.1  christos 	else
   1538      1.1  christos 		*diffp = pull;
   1539      1.1  christos 
   1540      1.1  christos 	done = 0;
   1541      1.1  christos }
   1542      1.1  christos 
   1543      1.1  christos static void
   1544      1.1  christos and_pullup(b)
   1545      1.1  christos 	struct block *b;
   1546      1.1  christos {
   1547      1.1  christos 	int val, at_top;
   1548      1.1  christos 	struct block *pull;
   1549      1.1  christos 	struct block **diffp, **samep;
   1550      1.1  christos 	struct edge *ep;
   1551      1.1  christos 
   1552      1.1  christos 	ep = b->in_edges;
   1553      1.1  christos 	if (ep == 0)
   1554      1.1  christos 		return;
   1555      1.1  christos 
   1556      1.1  christos 	/*
   1557      1.1  christos 	 * Make sure each predecessor loads the same value.
   1558      1.1  christos 	 */
   1559      1.1  christos 	val = ep->pred->val[A_ATOM];
   1560      1.1  christos 	for (ep = ep->next; ep != 0; ep = ep->next)
   1561      1.1  christos 		if (val != ep->pred->val[A_ATOM])
   1562      1.1  christos 			return;
   1563      1.1  christos 
   1564      1.1  christos 	if (JT(b->in_edges->pred) == b)
   1565      1.1  christos 		diffp = &JT(b->in_edges->pred);
   1566      1.1  christos 	else
   1567      1.1  christos 		diffp = &JF(b->in_edges->pred);
   1568      1.1  christos 
   1569      1.1  christos 	at_top = 1;
   1570      1.1  christos 	while (1) {
   1571      1.1  christos 		if (*diffp == 0)
   1572      1.1  christos 			return;
   1573      1.1  christos 
   1574      1.1  christos 		if (JF(*diffp) != JF(b))
   1575      1.1  christos 			return;
   1576      1.1  christos 
   1577      1.1  christos 		if (!SET_MEMBER((*diffp)->dom, b->id))
   1578      1.1  christos 			return;
   1579      1.1  christos 
   1580      1.1  christos 		if ((*diffp)->val[A_ATOM] != val)
   1581      1.1  christos 			break;
   1582      1.1  christos 
   1583      1.1  christos 		diffp = &JT(*diffp);
   1584      1.1  christos 		at_top = 0;
   1585      1.1  christos 	}
   1586      1.1  christos 	samep = &JT(*diffp);
   1587      1.1  christos 	while (1) {
   1588      1.1  christos 		if (*samep == 0)
   1589      1.1  christos 			return;
   1590      1.1  christos 
   1591      1.1  christos 		if (JF(*samep) != JF(b))
   1592      1.1  christos 			return;
   1593      1.1  christos 
   1594      1.1  christos 		if (!SET_MEMBER((*samep)->dom, b->id))
   1595      1.1  christos 			return;
   1596      1.1  christos 
   1597      1.1  christos 		if ((*samep)->val[A_ATOM] == val)
   1598      1.1  christos 			break;
   1599      1.1  christos 
   1600      1.1  christos 		/* XXX Need to check that there are no data dependencies
   1601      1.1  christos 		   between diffp and samep.  Currently, the code generator
   1602      1.1  christos 		   will not produce such dependencies. */
   1603      1.1  christos 		samep = &JT(*samep);
   1604      1.1  christos 	}
   1605      1.1  christos #ifdef notdef
   1606      1.1  christos 	/* XXX This doesn't cover everything. */
   1607      1.1  christos 	for (i = 0; i < N_ATOMS; ++i)
   1608      1.1  christos 		if ((*samep)->val[i] != pred->val[i])
   1609      1.1  christos 			return;
   1610      1.1  christos #endif
   1611      1.1  christos 	/* Pull up the node. */
   1612      1.1  christos 	pull = *samep;
   1613      1.1  christos 	*samep = JT(pull);
   1614      1.1  christos 	JT(pull) = *diffp;
   1615      1.1  christos 
   1616      1.1  christos 	/*
   1617      1.1  christos 	 * At the top of the chain, each predecessor needs to point at the
   1618      1.1  christos 	 * pulled up node.  Inside the chain, there is only one predecessor
   1619      1.1  christos 	 * to worry about.
   1620      1.1  christos 	 */
   1621      1.1  christos 	if (at_top) {
   1622      1.1  christos 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
   1623      1.1  christos 			if (JT(ep->pred) == b)
   1624      1.1  christos 				JT(ep->pred) = pull;
   1625      1.1  christos 			else
   1626      1.1  christos 				JF(ep->pred) = pull;
   1627      1.1  christos 		}
   1628      1.1  christos 	}
   1629      1.1  christos 	else
   1630      1.1  christos 		*diffp = pull;
   1631      1.1  christos 
   1632      1.1  christos 	done = 0;
   1633      1.1  christos }
   1634      1.1  christos 
   1635      1.1  christos static void
   1636      1.1  christos opt_blks(root, do_stmts)
   1637      1.1  christos 	struct block *root;
   1638      1.1  christos 	int do_stmts;
   1639      1.1  christos {
   1640      1.1  christos 	int i, maxlevel;
   1641      1.1  christos 	struct block *p;
   1642      1.1  christos 
   1643      1.1  christos 	init_val();
   1644      1.1  christos 	maxlevel = root->level;
   1645      1.1  christos 
   1646      1.1  christos 	find_inedges(root);
   1647      1.1  christos 	for (i = maxlevel; i >= 0; --i)
   1648      1.1  christos 		for (p = levels[i]; p; p = p->link)
   1649      1.1  christos 			opt_blk(p, do_stmts);
   1650      1.1  christos 
   1651      1.1  christos 	if (do_stmts)
   1652      1.1  christos 		/*
   1653      1.1  christos 		 * No point trying to move branches; it can't possibly
   1654      1.1  christos 		 * make a difference at this point.
   1655      1.1  christos 		 */
   1656      1.1  christos 		return;
   1657      1.1  christos 
   1658      1.1  christos 	for (i = 1; i <= maxlevel; ++i) {
   1659      1.1  christos 		for (p = levels[i]; p; p = p->link) {
   1660      1.1  christos 			opt_j(&p->et);
   1661      1.1  christos 			opt_j(&p->ef);
   1662      1.1  christos 		}
   1663      1.1  christos 	}
   1664      1.1  christos 
   1665      1.1  christos 	find_inedges(root);
   1666      1.1  christos 	for (i = 1; i <= maxlevel; ++i) {
   1667      1.1  christos 		for (p = levels[i]; p; p = p->link) {
   1668      1.1  christos 			or_pullup(p);
   1669      1.1  christos 			and_pullup(p);
   1670      1.1  christos 		}
   1671      1.1  christos 	}
   1672      1.1  christos }
   1673      1.1  christos 
   1674      1.1  christos static inline void
   1675      1.1  christos link_inedge(parent, child)
   1676      1.1  christos 	struct edge *parent;
   1677      1.1  christos 	struct block *child;
   1678      1.1  christos {
   1679      1.1  christos 	parent->next = child->in_edges;
   1680      1.1  christos 	child->in_edges = parent;
   1681      1.1  christos }
   1682      1.1  christos 
   1683      1.1  christos static void
   1684      1.1  christos find_inedges(root)
   1685      1.1  christos 	struct block *root;
   1686      1.1  christos {
   1687      1.1  christos 	int i;
   1688      1.1  christos 	struct block *b;
   1689      1.1  christos 
   1690      1.1  christos 	for (i = 0; i < n_blocks; ++i)
   1691      1.1  christos 		blocks[i]->in_edges = 0;
   1692      1.1  christos 
   1693      1.1  christos 	/*
   1694      1.1  christos 	 * Traverse the graph, adding each edge to the predecessor
   1695      1.1  christos 	 * list of its successors.  Skip the leaves (i.e. level 0).
   1696      1.1  christos 	 */
   1697      1.1  christos 	for (i = root->level; i > 0; --i) {
   1698      1.1  christos 		for (b = levels[i]; b != 0; b = b->link) {
   1699      1.1  christos 			link_inedge(&b->et, JT(b));
   1700      1.1  christos 			link_inedge(&b->ef, JF(b));
   1701      1.1  christos 		}
   1702      1.1  christos 	}
   1703      1.1  christos }
   1704      1.1  christos 
   1705      1.1  christos static void
   1706      1.1  christos opt_root(b)
   1707      1.1  christos 	struct block **b;
   1708      1.1  christos {
   1709      1.1  christos 	struct slist *tmp, *s;
   1710      1.1  christos 
   1711      1.1  christos 	s = (*b)->stmts;
   1712      1.1  christos 	(*b)->stmts = 0;
   1713      1.1  christos 	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
   1714      1.1  christos 		*b = JT(*b);
   1715      1.1  christos 
   1716      1.1  christos 	tmp = (*b)->stmts;
   1717      1.1  christos 	if (tmp != 0)
   1718      1.1  christos 		sappend(s, tmp);
   1719      1.1  christos 	(*b)->stmts = s;
   1720      1.1  christos 
   1721      1.1  christos 	/*
   1722      1.1  christos 	 * If the root node is a return, then there is no
   1723      1.1  christos 	 * point executing any statements (since the bpf machine
   1724      1.1  christos 	 * has no side effects).
   1725      1.1  christos 	 */
   1726      1.1  christos 	if (BPF_CLASS((*b)->s.code) == BPF_RET)
   1727      1.1  christos 		(*b)->stmts = 0;
   1728      1.1  christos }
   1729      1.1  christos 
   1730      1.1  christos static void
   1731      1.1  christos opt_loop(root, do_stmts)
   1732      1.1  christos 	struct block *root;
   1733      1.1  christos 	int do_stmts;
   1734      1.1  christos {
   1735      1.1  christos 
   1736      1.1  christos #ifdef BDEBUG
   1737      1.1  christos 	if (dflag > 1) {
   1738      1.1  christos 		printf("opt_loop(root, %d) begin\n", do_stmts);
   1739      1.1  christos 		opt_dump(root);
   1740      1.1  christos 	}
   1741      1.1  christos #endif
   1742      1.1  christos 	do {
   1743      1.1  christos 		done = 1;
   1744      1.1  christos 		find_levels(root);
   1745      1.1  christos 		find_dom(root);
   1746      1.1  christos 		find_closure(root);
   1747      1.1  christos 		find_ud(root);
   1748      1.1  christos 		find_edom(root);
   1749      1.1  christos 		opt_blks(root, do_stmts);
   1750      1.1  christos #ifdef BDEBUG
   1751      1.1  christos 		if (dflag > 1) {
   1752      1.1  christos 			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
   1753      1.1  christos 			opt_dump(root);
   1754      1.1  christos 		}
   1755      1.1  christos #endif
   1756      1.1  christos 	} while (!done);
   1757      1.1  christos }
   1758      1.1  christos 
   1759      1.1  christos /*
   1760      1.1  christos  * Optimize the filter code in its dag representation.
   1761      1.1  christos  */
   1762      1.1  christos void
   1763      1.1  christos bpf_optimize(rootp)
   1764      1.1  christos 	struct block **rootp;
   1765      1.1  christos {
   1766      1.1  christos 	struct block *root;
   1767      1.1  christos 
   1768      1.1  christos 	root = *rootp;
   1769      1.1  christos 
   1770      1.1  christos 	opt_init(root);
   1771      1.1  christos 	opt_loop(root, 0);
   1772      1.1  christos 	opt_loop(root, 1);
   1773      1.1  christos 	intern_blocks(root);
   1774      1.1  christos #ifdef BDEBUG
   1775      1.1  christos 	if (dflag > 1) {
   1776      1.1  christos 		printf("after intern_blocks()\n");
   1777      1.1  christos 		opt_dump(root);
   1778      1.1  christos 	}
   1779      1.1  christos #endif
   1780      1.1  christos 	opt_root(rootp);
   1781      1.1  christos #ifdef BDEBUG
   1782      1.1  christos 	if (dflag > 1) {
   1783      1.1  christos 		printf("after opt_root()\n");
   1784      1.1  christos 		opt_dump(root);
   1785      1.1  christos 	}
   1786      1.1  christos #endif
   1787      1.1  christos 	opt_cleanup();
   1788      1.1  christos }
   1789      1.1  christos 
   1790      1.1  christos static void
   1791      1.1  christos make_marks(p)
   1792      1.1  christos 	struct block *p;
   1793      1.1  christos {
   1794      1.1  christos 	if (!isMarked(p)) {
   1795      1.1  christos 		Mark(p);
   1796      1.1  christos 		if (BPF_CLASS(p->s.code) != BPF_RET) {
   1797      1.1  christos 			make_marks(JT(p));
   1798      1.1  christos 			make_marks(JF(p));
   1799      1.1  christos 		}
   1800      1.1  christos 	}
   1801      1.1  christos }
   1802      1.1  christos 
   1803      1.1  christos /*
   1804      1.1  christos  * Mark code array such that isMarked(i) is true
   1805      1.1  christos  * only for nodes that are alive.
   1806      1.1  christos  */
   1807      1.1  christos static void
   1808      1.1  christos mark_code(p)
   1809      1.1  christos 	struct block *p;
   1810      1.1  christos {
   1811      1.1  christos 	cur_mark += 1;
   1812      1.1  christos 	make_marks(p);
   1813      1.1  christos }
   1814      1.1  christos 
   1815      1.1  christos /*
   1816      1.1  christos  * True iff the two stmt lists load the same value from the packet into
   1817      1.1  christos  * the accumulator.
   1818      1.1  christos  */
   1819      1.1  christos static int
   1820      1.1  christos eq_slist(x, y)
   1821      1.1  christos 	struct slist *x, *y;
   1822      1.1  christos {
   1823      1.1  christos 	while (1) {
   1824      1.1  christos 		while (x && x->s.code == NOP)
   1825      1.1  christos 			x = x->next;
   1826      1.1  christos 		while (y && y->s.code == NOP)
   1827      1.1  christos 			y = y->next;
   1828      1.1  christos 		if (x == 0)
   1829      1.1  christos 			return y == 0;
   1830      1.1  christos 		if (y == 0)
   1831      1.1  christos 			return x == 0;
   1832      1.1  christos 		if (x->s.code != y->s.code || x->s.k != y->s.k)
   1833      1.1  christos 			return 0;
   1834      1.1  christos 		x = x->next;
   1835      1.1  christos 		y = y->next;
   1836      1.1  christos 	}
   1837      1.1  christos }
   1838      1.1  christos 
   1839      1.1  christos static inline int
   1840      1.1  christos eq_blk(b0, b1)
   1841      1.1  christos 	struct block *b0, *b1;
   1842      1.1  christos {
   1843      1.1  christos 	if (b0->s.code == b1->s.code &&
   1844      1.1  christos 	    b0->s.k == b1->s.k &&
   1845      1.1  christos 	    b0->et.succ == b1->et.succ &&
   1846      1.1  christos 	    b0->ef.succ == b1->ef.succ)
   1847      1.1  christos 		return eq_slist(b0->stmts, b1->stmts);
   1848      1.1  christos 	return 0;
   1849      1.1  christos }
   1850      1.1  christos 
   1851      1.1  christos static void
   1852      1.1  christos intern_blocks(root)
   1853      1.1  christos 	struct block *root;
   1854      1.1  christos {
   1855      1.1  christos 	struct block *p;
   1856      1.1  christos 	int i, j;
   1857      1.1  christos 	int done1; /* don't shadow global */
   1858      1.1  christos  top:
   1859      1.1  christos 	done1 = 1;
   1860      1.1  christos 	for (i = 0; i < n_blocks; ++i)
   1861      1.1  christos 		blocks[i]->link = 0;
   1862      1.1  christos 
   1863      1.1  christos 	mark_code(root);
   1864      1.1  christos 
   1865      1.1  christos 	for (i = n_blocks - 1; --i >= 0; ) {
   1866      1.1  christos 		if (!isMarked(blocks[i]))
   1867      1.1  christos 			continue;
   1868      1.1  christos 		for (j = i + 1; j < n_blocks; ++j) {
   1869      1.1  christos 			if (!isMarked(blocks[j]))
   1870      1.1  christos 				continue;
   1871      1.1  christos 			if (eq_blk(blocks[i], blocks[j])) {
   1872      1.1  christos 				blocks[i]->link = blocks[j]->link ?
   1873      1.1  christos 					blocks[j]->link : blocks[j];
   1874      1.1  christos 				break;
   1875      1.1  christos 			}
   1876      1.1  christos 		}
   1877      1.1  christos 	}
   1878      1.1  christos 	for (i = 0; i < n_blocks; ++i) {
   1879      1.1  christos 		p = blocks[i];
   1880      1.1  christos 		if (JT(p) == 0)
   1881      1.1  christos 			continue;
   1882      1.1  christos 		if (JT(p)->link) {
   1883      1.1  christos 			done1 = 0;
   1884      1.1  christos 			JT(p) = JT(p)->link;
   1885      1.1  christos 		}
   1886      1.1  christos 		if (JF(p)->link) {
   1887      1.1  christos 			done1 = 0;
   1888      1.1  christos 			JF(p) = JF(p)->link;
   1889      1.1  christos 		}
   1890      1.1  christos 	}
   1891      1.1  christos 	if (!done1)
   1892      1.1  christos 		goto top;
   1893      1.1  christos }
   1894      1.1  christos 
   1895      1.1  christos static void
   1896      1.1  christos opt_cleanup()
   1897      1.1  christos {
   1898      1.1  christos 	free((void *)vnode_base);
   1899      1.1  christos 	free((void *)vmap);
   1900      1.1  christos 	free((void *)edges);
   1901      1.1  christos 	free((void *)space);
   1902      1.1  christos 	free((void *)levels);
   1903      1.1  christos 	free((void *)blocks);
   1904      1.1  christos }
   1905      1.1  christos 
   1906      1.1  christos /*
   1907      1.1  christos  * Return the number of stmts in 's'.
   1908      1.1  christos  */
   1909  1.1.1.3  christos static u_int
   1910      1.1  christos slength(s)
   1911      1.1  christos 	struct slist *s;
   1912      1.1  christos {
   1913  1.1.1.3  christos 	u_int n = 0;
   1914      1.1  christos 
   1915      1.1  christos 	for (; s; s = s->next)
   1916      1.1  christos 		if (s->s.code != NOP)
   1917      1.1  christos 			++n;
   1918      1.1  christos 	return n;
   1919      1.1  christos }
   1920      1.1  christos 
   1921      1.1  christos /*
   1922      1.1  christos  * Return the number of nodes reachable by 'p'.
   1923      1.1  christos  * All nodes should be initially unmarked.
   1924      1.1  christos  */
   1925      1.1  christos static int
   1926      1.1  christos count_blocks(p)
   1927      1.1  christos 	struct block *p;
   1928      1.1  christos {
   1929      1.1  christos 	if (p == 0 || isMarked(p))
   1930      1.1  christos 		return 0;
   1931      1.1  christos 	Mark(p);
   1932      1.1  christos 	return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
   1933      1.1  christos }
   1934      1.1  christos 
   1935      1.1  christos /*
   1936      1.1  christos  * Do a depth first search on the flow graph, numbering the
   1937      1.1  christos  * the basic blocks, and entering them into the 'blocks' array.`
   1938      1.1  christos  */
   1939      1.1  christos static void
   1940      1.1  christos number_blks_r(p)
   1941      1.1  christos 	struct block *p;
   1942      1.1  christos {
   1943      1.1  christos 	int n;
   1944      1.1  christos 
   1945      1.1  christos 	if (p == 0 || isMarked(p))
   1946      1.1  christos 		return;
   1947      1.1  christos 
   1948      1.1  christos 	Mark(p);
   1949      1.1  christos 	n = n_blocks++;
   1950      1.1  christos 	p->id = n;
   1951      1.1  christos 	blocks[n] = p;
   1952      1.1  christos 
   1953      1.1  christos 	number_blks_r(JT(p));
   1954      1.1  christos 	number_blks_r(JF(p));
   1955      1.1  christos }
   1956      1.1  christos 
   1957      1.1  christos /*
   1958      1.1  christos  * Return the number of stmts in the flowgraph reachable by 'p'.
   1959      1.1  christos  * The nodes should be unmarked before calling.
   1960      1.1  christos  *
   1961      1.1  christos  * Note that "stmts" means "instructions", and that this includes
   1962      1.1  christos  *
   1963      1.1  christos  *	side-effect statements in 'p' (slength(p->stmts));
   1964      1.1  christos  *
   1965      1.1  christos  *	statements in the true branch from 'p' (count_stmts(JT(p)));
   1966      1.1  christos  *
   1967      1.1  christos  *	statements in the false branch from 'p' (count_stmts(JF(p)));
   1968      1.1  christos  *
   1969      1.1  christos  *	the conditional jump itself (1);
   1970      1.1  christos  *
   1971      1.1  christos  *	an extra long jump if the true branch requires it (p->longjt);
   1972      1.1  christos  *
   1973      1.1  christos  *	an extra long jump if the false branch requires it (p->longjf).
   1974      1.1  christos  */
   1975  1.1.1.3  christos static u_int
   1976      1.1  christos count_stmts(p)
   1977      1.1  christos 	struct block *p;
   1978      1.1  christos {
   1979  1.1.1.3  christos 	u_int n;
   1980      1.1  christos 
   1981      1.1  christos 	if (p == 0 || isMarked(p))
   1982      1.1  christos 		return 0;
   1983      1.1  christos 	Mark(p);
   1984      1.1  christos 	n = count_stmts(JT(p)) + count_stmts(JF(p));
   1985      1.1  christos 	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
   1986      1.1  christos }
   1987      1.1  christos 
   1988      1.1  christos /*
   1989      1.1  christos  * Allocate memory.  All allocation is done before optimization
   1990      1.1  christos  * is begun.  A linear bound on the size of all data structures is computed
   1991      1.1  christos  * from the total number of blocks and/or statements.
   1992      1.1  christos  */
   1993      1.1  christos static void
   1994      1.1  christos opt_init(root)
   1995      1.1  christos 	struct block *root;
   1996      1.1  christos {
   1997      1.1  christos 	bpf_u_int32 *p;
   1998      1.1  christos 	int i, n, max_stmts;
   1999      1.1  christos 
   2000      1.1  christos 	/*
   2001      1.1  christos 	 * First, count the blocks, so we can malloc an array to map
   2002      1.1  christos 	 * block number to block.  Then, put the blocks into the array.
   2003      1.1  christos 	 */
   2004      1.1  christos 	unMarkAll();
   2005      1.1  christos 	n = count_blocks(root);
   2006      1.1  christos 	blocks = (struct block **)calloc(n, sizeof(*blocks));
   2007      1.1  christos 	if (blocks == NULL)
   2008      1.1  christos 		bpf_error("malloc");
   2009      1.1  christos 	unMarkAll();
   2010      1.1  christos 	n_blocks = 0;
   2011      1.1  christos 	number_blks_r(root);
   2012      1.1  christos 
   2013      1.1  christos 	n_edges = 2 * n_blocks;
   2014      1.1  christos 	edges = (struct edge **)calloc(n_edges, sizeof(*edges));
   2015      1.1  christos 	if (edges == NULL)
   2016      1.1  christos 		bpf_error("malloc");
   2017      1.1  christos 
   2018      1.1  christos 	/*
   2019      1.1  christos 	 * The number of levels is bounded by the number of nodes.
   2020      1.1  christos 	 */
   2021      1.1  christos 	levels = (struct block **)calloc(n_blocks, sizeof(*levels));
   2022      1.1  christos 	if (levels == NULL)
   2023      1.1  christos 		bpf_error("malloc");
   2024      1.1  christos 
   2025      1.1  christos 	edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
   2026      1.1  christos 	nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
   2027      1.1  christos 
   2028      1.1  christos 	/* XXX */
   2029      1.1  christos 	space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
   2030      1.1  christos 				 + n_edges * edgewords * sizeof(*space));
   2031      1.1  christos 	if (space == NULL)
   2032      1.1  christos 		bpf_error("malloc");
   2033      1.1  christos 	p = space;
   2034      1.1  christos 	all_dom_sets = p;
   2035      1.1  christos 	for (i = 0; i < n; ++i) {
   2036      1.1  christos 		blocks[i]->dom = p;
   2037      1.1  christos 		p += nodewords;
   2038      1.1  christos 	}
   2039      1.1  christos 	all_closure_sets = p;
   2040      1.1  christos 	for (i = 0; i < n; ++i) {
   2041      1.1  christos 		blocks[i]->closure = p;
   2042      1.1  christos 		p += nodewords;
   2043      1.1  christos 	}
   2044      1.1  christos 	all_edge_sets = p;
   2045      1.1  christos 	for (i = 0; i < n; ++i) {
   2046      1.1  christos 		register struct block *b = blocks[i];
   2047      1.1  christos 
   2048      1.1  christos 		b->et.edom = p;
   2049      1.1  christos 		p += edgewords;
   2050      1.1  christos 		b->ef.edom = p;
   2051      1.1  christos 		p += edgewords;
   2052      1.1  christos 		b->et.id = i;
   2053      1.1  christos 		edges[i] = &b->et;
   2054      1.1  christos 		b->ef.id = n_blocks + i;
   2055      1.1  christos 		edges[n_blocks + i] = &b->ef;
   2056      1.1  christos 		b->et.pred = b;
   2057      1.1  christos 		b->ef.pred = b;
   2058      1.1  christos 	}
   2059      1.1  christos 	max_stmts = 0;
   2060      1.1  christos 	for (i = 0; i < n; ++i)
   2061      1.1  christos 		max_stmts += slength(blocks[i]->stmts) + 1;
   2062      1.1  christos 	/*
   2063      1.1  christos 	 * We allocate at most 3 value numbers per statement,
   2064      1.1  christos 	 * so this is an upper bound on the number of valnodes
   2065      1.1  christos 	 * we'll need.
   2066      1.1  christos 	 */
   2067      1.1  christos 	maxval = 3 * max_stmts;
   2068      1.1  christos 	vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap));
   2069      1.1  christos 	vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base));
   2070      1.1  christos 	if (vmap == NULL || vnode_base == NULL)
   2071      1.1  christos 		bpf_error("malloc");
   2072      1.1  christos }
   2073      1.1  christos 
   2074      1.1  christos /*
   2075      1.1  christos  * Some pointers used to convert the basic block form of the code,
   2076      1.1  christos  * into the array form that BPF requires.  'fstart' will point to
   2077      1.1  christos  * the malloc'd array while 'ftail' is used during the recursive traversal.
   2078      1.1  christos  */
   2079      1.1  christos static struct bpf_insn *fstart;
   2080      1.1  christos static struct bpf_insn *ftail;
   2081      1.1  christos 
   2082      1.1  christos #ifdef BDEBUG
   2083      1.1  christos int bids[1000];
   2084      1.1  christos #endif
   2085      1.1  christos 
   2086      1.1  christos /*
   2087      1.1  christos  * Returns true if successful.  Returns false if a branch has
   2088      1.1  christos  * an offset that is too large.  If so, we have marked that
   2089      1.1  christos  * branch so that on a subsequent iteration, it will be treated
   2090      1.1  christos  * properly.
   2091      1.1  christos  */
   2092      1.1  christos static int
   2093      1.1  christos convert_code_r(p)
   2094      1.1  christos 	struct block *p;
   2095      1.1  christos {
   2096      1.1  christos 	struct bpf_insn *dst;
   2097      1.1  christos 	struct slist *src;
   2098      1.1  christos 	int slen;
   2099      1.1  christos 	u_int off;
   2100      1.1  christos 	int extrajmps;		/* number of extra jumps inserted */
   2101      1.1  christos 	struct slist **offset = NULL;
   2102      1.1  christos 
   2103      1.1  christos 	if (p == 0 || isMarked(p))
   2104      1.1  christos 		return (1);
   2105      1.1  christos 	Mark(p);
   2106      1.1  christos 
   2107      1.1  christos 	if (convert_code_r(JF(p)) == 0)
   2108      1.1  christos 		return (0);
   2109      1.1  christos 	if (convert_code_r(JT(p)) == 0)
   2110      1.1  christos 		return (0);
   2111      1.1  christos 
   2112      1.1  christos 	slen = slength(p->stmts);
   2113      1.1  christos 	dst = ftail -= (slen + 1 + p->longjt + p->longjf);
   2114      1.1  christos 		/* inflate length by any extra jumps */
   2115      1.1  christos 
   2116      1.1  christos 	p->offset = dst - fstart;
   2117      1.1  christos 
   2118      1.1  christos 	/* generate offset[] for convenience  */
   2119      1.1  christos 	if (slen) {
   2120      1.1  christos 		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
   2121      1.1  christos 		if (!offset) {
   2122      1.1  christos 			bpf_error("not enough core");
   2123      1.1  christos 			/*NOTREACHED*/
   2124      1.1  christos 		}
   2125      1.1  christos 	}
   2126      1.1  christos 	src = p->stmts;
   2127      1.1  christos 	for (off = 0; off < slen && src; off++) {
   2128      1.1  christos #if 0
   2129      1.1  christos 		printf("off=%d src=%x\n", off, src);
   2130      1.1  christos #endif
   2131      1.1  christos 		offset[off] = src;
   2132      1.1  christos 		src = src->next;
   2133      1.1  christos 	}
   2134      1.1  christos 
   2135      1.1  christos 	off = 0;
   2136      1.1  christos 	for (src = p->stmts; src; src = src->next) {
   2137      1.1  christos 		if (src->s.code == NOP)
   2138      1.1  christos 			continue;
   2139      1.1  christos 		dst->code = (u_short)src->s.code;
   2140      1.1  christos 		dst->k = src->s.k;
   2141      1.1  christos 
   2142      1.1  christos 		/* fill block-local relative jump */
   2143      1.1  christos 		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
   2144      1.1  christos #if 0
   2145      1.1  christos 			if (src->s.jt || src->s.jf) {
   2146      1.1  christos 				bpf_error("illegal jmp destination");
   2147      1.1  christos 				/*NOTREACHED*/
   2148      1.1  christos 			}
   2149      1.1  christos #endif
   2150      1.1  christos 			goto filled;
   2151      1.1  christos 		}
   2152      1.1  christos 		if (off == slen - 2)	/*???*/
   2153      1.1  christos 			goto filled;
   2154      1.1  christos 
   2155      1.1  christos 	    {
   2156      1.1  christos 		int i;
   2157      1.1  christos 		int jt, jf;
   2158      1.1  christos 		const char *ljerr = "%s for block-local relative jump: off=%d";
   2159      1.1  christos 
   2160      1.1  christos #if 0
   2161      1.1  christos 		printf("code=%x off=%d %x %x\n", src->s.code,
   2162      1.1  christos 			off, src->s.jt, src->s.jf);
   2163      1.1  christos #endif
   2164      1.1  christos 
   2165      1.1  christos 		if (!src->s.jt || !src->s.jf) {
   2166      1.1  christos 			bpf_error(ljerr, "no jmp destination", off);
   2167      1.1  christos 			/*NOTREACHED*/
   2168      1.1  christos 		}
   2169      1.1  christos 
   2170      1.1  christos 		jt = jf = 0;
   2171      1.1  christos 		for (i = 0; i < slen; i++) {
   2172      1.1  christos 			if (offset[i] == src->s.jt) {
   2173      1.1  christos 				if (jt) {
   2174      1.1  christos 					bpf_error(ljerr, "multiple matches", off);
   2175      1.1  christos 					/*NOTREACHED*/
   2176      1.1  christos 				}
   2177      1.1  christos 
   2178      1.1  christos 				dst->jt = i - off - 1;
   2179      1.1  christos 				jt++;
   2180      1.1  christos 			}
   2181      1.1  christos 			if (offset[i] == src->s.jf) {
   2182      1.1  christos 				if (jf) {
   2183      1.1  christos 					bpf_error(ljerr, "multiple matches", off);
   2184      1.1  christos 					/*NOTREACHED*/
   2185      1.1  christos 				}
   2186      1.1  christos 				dst->jf = i - off - 1;
   2187      1.1  christos 				jf++;
   2188      1.1  christos 			}
   2189      1.1  christos 		}
   2190      1.1  christos 		if (!jt || !jf) {
   2191      1.1  christos 			bpf_error(ljerr, "no destination found", off);
   2192      1.1  christos 			/*NOTREACHED*/
   2193      1.1  christos 		}
   2194      1.1  christos 	    }
   2195      1.1  christos filled:
   2196      1.1  christos 		++dst;
   2197      1.1  christos 		++off;
   2198      1.1  christos 	}
   2199      1.1  christos 	if (offset)
   2200      1.1  christos 		free(offset);
   2201      1.1  christos 
   2202      1.1  christos #ifdef BDEBUG
   2203      1.1  christos 	bids[dst - fstart] = p->id + 1;
   2204      1.1  christos #endif
   2205      1.1  christos 	dst->code = (u_short)p->s.code;
   2206      1.1  christos 	dst->k = p->s.k;
   2207      1.1  christos 	if (JT(p)) {
   2208      1.1  christos 		extrajmps = 0;
   2209      1.1  christos 		off = JT(p)->offset - (p->offset + slen) - 1;
   2210      1.1  christos 		if (off >= 256) {
   2211      1.1  christos 		    /* offset too large for branch, must add a jump */
   2212      1.1  christos 		    if (p->longjt == 0) {
   2213      1.1  christos 		    	/* mark this instruction and retry */
   2214      1.1  christos 			p->longjt++;
   2215      1.1  christos 			return(0);
   2216      1.1  christos 		    }
   2217      1.1  christos 		    /* branch if T to following jump */
   2218      1.1  christos 		    dst->jt = extrajmps;
   2219      1.1  christos 		    extrajmps++;
   2220      1.1  christos 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
   2221      1.1  christos 		    dst[extrajmps].k = off - extrajmps;
   2222      1.1  christos 		}
   2223      1.1  christos 		else
   2224      1.1  christos 		    dst->jt = off;
   2225      1.1  christos 		off = JF(p)->offset - (p->offset + slen) - 1;
   2226      1.1  christos 		if (off >= 256) {
   2227      1.1  christos 		    /* offset too large for branch, must add a jump */
   2228      1.1  christos 		    if (p->longjf == 0) {
   2229      1.1  christos 		    	/* mark this instruction and retry */
   2230      1.1  christos 			p->longjf++;
   2231      1.1  christos 			return(0);
   2232      1.1  christos 		    }
   2233      1.1  christos 		    /* branch if F to following jump */
   2234      1.1  christos 		    /* if two jumps are inserted, F goes to second one */
   2235      1.1  christos 		    dst->jf = extrajmps;
   2236      1.1  christos 		    extrajmps++;
   2237      1.1  christos 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
   2238      1.1  christos 		    dst[extrajmps].k = off - extrajmps;
   2239      1.1  christos 		}
   2240      1.1  christos 		else
   2241      1.1  christos 		    dst->jf = off;
   2242      1.1  christos 	}
   2243      1.1  christos 	return (1);
   2244      1.1  christos }
   2245      1.1  christos 
   2246      1.1  christos 
   2247      1.1  christos /*
   2248      1.1  christos  * Convert flowgraph intermediate representation to the
   2249      1.1  christos  * BPF array representation.  Set *lenp to the number of instructions.
   2250      1.1  christos  *
   2251      1.1  christos  * This routine does *NOT* leak the memory pointed to by fp.  It *must
   2252      1.1  christos  * not* do free(fp) before returning fp; doing so would make no sense,
   2253      1.1  christos  * as the BPF array pointed to by the return value of icode_to_fcode()
   2254      1.1  christos  * must be valid - it's being returned for use in a bpf_program structure.
   2255      1.1  christos  *
   2256      1.1  christos  * If it appears that icode_to_fcode() is leaking, the problem is that
   2257      1.1  christos  * the program using pcap_compile() is failing to free the memory in
   2258      1.1  christos  * the BPF program when it's done - the leak is in the program, not in
   2259      1.1  christos  * the routine that happens to be allocating the memory.  (By analogy, if
   2260      1.1  christos  * a program calls fopen() without ever calling fclose() on the FILE *,
   2261      1.1  christos  * it will leak the FILE structure; the leak is not in fopen(), it's in
   2262      1.1  christos  * the program.)  Change the program to use pcap_freecode() when it's
   2263      1.1  christos  * done with the filter program.  See the pcap man page.
   2264      1.1  christos  */
   2265      1.1  christos struct bpf_insn *
   2266      1.1  christos icode_to_fcode(root, lenp)
   2267      1.1  christos 	struct block *root;
   2268  1.1.1.3  christos 	u_int *lenp;
   2269      1.1  christos {
   2270  1.1.1.3  christos 	u_int n;
   2271      1.1  christos 	struct bpf_insn *fp;
   2272      1.1  christos 
   2273      1.1  christos 	/*
   2274      1.1  christos 	 * Loop doing convert_code_r() until no branches remain
   2275      1.1  christos 	 * with too-large offsets.
   2276      1.1  christos 	 */
   2277      1.1  christos 	while (1) {
   2278      1.1  christos 	    unMarkAll();
   2279      1.1  christos 	    n = *lenp = count_stmts(root);
   2280      1.1  christos 
   2281      1.1  christos 	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
   2282      1.1  christos 	    if (fp == NULL)
   2283      1.1  christos 		    bpf_error("malloc");
   2284      1.1  christos 	    memset((char *)fp, 0, sizeof(*fp) * n);
   2285      1.1  christos 	    fstart = fp;
   2286      1.1  christos 	    ftail = fp + n;
   2287      1.1  christos 
   2288      1.1  christos 	    unMarkAll();
   2289      1.1  christos 	    if (convert_code_r(root))
   2290      1.1  christos 		break;
   2291      1.1  christos 	    free(fp);
   2292      1.1  christos 	}
   2293      1.1  christos 
   2294      1.1  christos 	return fp;
   2295      1.1  christos }
   2296      1.1  christos 
   2297      1.1  christos /*
   2298      1.1  christos  * Make a copy of a BPF program and put it in the "fcode" member of
   2299      1.1  christos  * a "pcap_t".
   2300      1.1  christos  *
   2301      1.1  christos  * If we fail to allocate memory for the copy, fill in the "errbuf"
   2302      1.1  christos  * member of the "pcap_t" with an error message, and return -1;
   2303      1.1  christos  * otherwise, return 0.
   2304      1.1  christos  */
   2305      1.1  christos int
   2306      1.1  christos install_bpf_program(pcap_t *p, struct bpf_program *fp)
   2307      1.1  christos {
   2308      1.1  christos 	size_t prog_size;
   2309      1.1  christos 
   2310      1.1  christos 	/*
   2311      1.1  christos 	 * Validate the program.
   2312      1.1  christos 	 */
   2313      1.1  christos 	if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
   2314      1.1  christos 		snprintf(p->errbuf, sizeof(p->errbuf),
   2315      1.1  christos 			"BPF program is not valid");
   2316      1.1  christos 		return (-1);
   2317      1.1  christos 	}
   2318      1.1  christos 
   2319      1.1  christos 	/*
   2320      1.1  christos 	 * Free up any already installed program.
   2321      1.1  christos 	 */
   2322      1.1  christos 	pcap_freecode(&p->fcode);
   2323      1.1  christos 
   2324      1.1  christos 	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
   2325      1.1  christos 	p->fcode.bf_len = fp->bf_len;
   2326      1.1  christos 	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
   2327      1.1  christos 	if (p->fcode.bf_insns == NULL) {
   2328      1.1  christos 		snprintf(p->errbuf, sizeof(p->errbuf),
   2329      1.1  christos 			 "malloc: %s", pcap_strerror(errno));
   2330      1.1  christos 		return (-1);
   2331      1.1  christos 	}
   2332      1.1  christos 	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
   2333      1.1  christos 	return (0);
   2334      1.1  christos }
   2335      1.1  christos 
   2336      1.1  christos #ifdef BDEBUG
   2337      1.1  christos static void
   2338      1.1  christos opt_dump(root)
   2339      1.1  christos 	struct block *root;
   2340      1.1  christos {
   2341      1.1  christos 	struct bpf_program f;
   2342      1.1  christos 
   2343      1.1  christos 	memset(bids, 0, sizeof bids);
   2344      1.1  christos 	f.bf_insns = icode_to_fcode(root, &f.bf_len);
   2345      1.1  christos 	bpf_dump(&f, 1);
   2346      1.1  christos 	putchar('\n');
   2347      1.1  christos 	free((char *)f.bf_insns);
   2348      1.1  christos }
   2349      1.1  christos #endif
   2350