Home | History | Annotate | Line # | Download | only in dist
optimize.c revision 1.13
      1  1.13  christos /*	$NetBSD: optimize.c,v 1.13 2024/09/02 15:33:37 christos Exp $	*/
      2   1.5  christos 
      3   1.1  christos /*
      4   1.1  christos  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
      5   1.1  christos  *	The Regents of the University of California.  All rights reserved.
      6   1.1  christos  *
      7   1.1  christos  * Redistribution and use in source and binary forms, with or without
      8   1.1  christos  * modification, are permitted provided that: (1) source code distributions
      9   1.1  christos  * retain the above copyright notice and this paragraph in its entirety, (2)
     10   1.1  christos  * distributions including binary code include the above copyright notice and
     11   1.1  christos  * this paragraph in its entirety in the documentation or other materials
     12   1.1  christos  * provided with the distribution, and (3) all advertising materials mentioning
     13   1.1  christos  * features or use of this software display the following acknowledgement:
     14   1.1  christos  * ``This product includes software developed by the University of California,
     15   1.1  christos  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
     16   1.1  christos  * the University nor the names of its contributors may be used to endorse
     17   1.1  christos  * or promote products derived from this software without specific prior
     18   1.1  christos  * written permission.
     19   1.1  christos  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
     20   1.1  christos  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
     21   1.1  christos  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
     22   1.1  christos  *
     23  1.10  christos  *  Optimization module for BPF code intermediate representation.
     24   1.1  christos  */
     25   1.7  christos 
     26   1.7  christos #include <sys/cdefs.h>
     27  1.13  christos __RCSID("$NetBSD: optimize.c,v 1.13 2024/09/02 15:33:37 christos Exp $");
     28   1.1  christos 
     29  1.10  christos #include <config.h>
     30   1.1  christos 
     31  1.10  christos #include <pcap-types.h>
     32   1.1  christos 
     33   1.1  christos #include <stdio.h>
     34   1.1  christos #include <stdlib.h>
     35   1.1  christos #include <memory.h>
     36  1.11  christos #include <setjmp.h>
     37   1.1  christos #include <string.h>
     38  1.12  christos #include <limits.h> /* for SIZE_MAX */
     39   1.1  christos #include <errno.h>
     40   1.1  christos 
     41   1.1  christos #include "pcap-int.h"
     42   1.1  christos 
     43   1.1  christos #include "gencode.h"
     44  1.10  christos #include "optimize.h"
     45  1.12  christos #include "diag-control.h"
     46   1.1  christos 
     47   1.1  christos #ifdef HAVE_OS_PROTO_H
     48   1.1  christos #include "os-proto.h"
     49   1.1  christos #endif
     50   1.1  christos 
     51   1.1  christos #ifdef BDEBUG
     52  1.10  christos /*
     53  1.10  christos  * The internal "debug printout" flag for the filter expression optimizer.
     54  1.10  christos  * The code to print that stuff is present only if BDEBUG is defined, so
     55  1.10  christos  * the flag, and the routine to set it, are defined only if BDEBUG is
     56  1.10  christos  * defined.
     57  1.10  christos  */
     58  1.10  christos static int pcap_optimizer_debug;
     59  1.10  christos 
     60  1.10  christos /*
     61  1.10  christos  * Routine to set that flag.
     62  1.10  christos  *
     63  1.10  christos  * This is intended for libpcap developers, not for general use.
     64  1.10  christos  * If you want to set these in a program, you'll have to declare this
     65  1.10  christos  * routine yourself, with the appropriate DLL import attribute on Windows;
     66  1.10  christos  * it's not declared in any header file, and won't be declared in any
     67  1.10  christos  * header file provided by libpcap.
     68  1.10  christos  */
     69  1.10  christos PCAP_API void pcap_set_optimizer_debug(int value);
     70  1.10  christos 
     71  1.10  christos PCAP_API_DEF void
     72  1.10  christos pcap_set_optimizer_debug(int value)
     73  1.10  christos {
     74  1.10  christos 	pcap_optimizer_debug = value;
     75  1.10  christos }
     76  1.10  christos 
     77  1.10  christos /*
     78  1.10  christos  * The internal "print dot graph" flag for the filter expression optimizer.
     79  1.10  christos  * The code to print that stuff is present only if BDEBUG is defined, so
     80  1.10  christos  * the flag, and the routine to set it, are defined only if BDEBUG is
     81  1.10  christos  * defined.
     82  1.10  christos  */
     83  1.10  christos static int pcap_print_dot_graph;
     84  1.10  christos 
     85  1.10  christos /*
     86  1.10  christos  * Routine to set that flag.
     87  1.10  christos  *
     88  1.10  christos  * This is intended for libpcap developers, not for general use.
     89  1.10  christos  * If you want to set these in a program, you'll have to declare this
     90  1.10  christos  * routine yourself, with the appropriate DLL import attribute on Windows;
     91  1.10  christos  * it's not declared in any header file, and won't be declared in any
     92  1.10  christos  * header file provided by libpcap.
     93  1.10  christos  */
     94  1.10  christos PCAP_API void pcap_set_print_dot_graph(int value);
     95  1.10  christos 
     96  1.10  christos PCAP_API_DEF void
     97  1.10  christos pcap_set_print_dot_graph(int value)
     98  1.10  christos {
     99  1.10  christos 	pcap_print_dot_graph = value;
    100  1.10  christos }
    101   1.1  christos 
    102   1.1  christos #endif
    103   1.1  christos 
    104   1.9  christos /*
    105  1.10  christos  * lowest_set_bit().
    106  1.10  christos  *
    107  1.10  christos  * Takes a 32-bit integer as an argument.
    108  1.10  christos  *
    109  1.10  christos  * If handed a non-zero value, returns the index of the lowest set bit,
    110  1.12  christos  * counting upwards from zero.
    111  1.10  christos  *
    112  1.10  christos  * If handed zero, the results are platform- and compiler-dependent.
    113  1.10  christos  * Keep it out of the light, don't give it any water, don't feed it
    114  1.10  christos  * after midnight, and don't pass zero to it.
    115  1.10  christos  *
    116  1.10  christos  * This is the same as the count of trailing zeroes in the word.
    117   1.9  christos  */
    118  1.10  christos #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
    119  1.10  christos   /*
    120  1.10  christos    * GCC 3.4 and later; we have __builtin_ctz().
    121  1.10  christos    */
    122  1.12  christos   #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
    123  1.10  christos #elif defined(_MSC_VER)
    124  1.10  christos   /*
    125  1.10  christos    * Visual Studio; we support only 2005 and later, so use
    126  1.10  christos    * _BitScanForward().
    127  1.10  christos    */
    128  1.10  christos #include <intrin.h>
    129  1.10  christos 
    130  1.10  christos #ifndef __clang__
    131  1.10  christos #pragma intrinsic(_BitScanForward)
    132  1.10  christos #endif
    133  1.10  christos 
    134  1.12  christos static __forceinline u_int
    135  1.10  christos lowest_set_bit(int mask)
    136  1.10  christos {
    137  1.10  christos 	unsigned long bit;
    138  1.10  christos 
    139  1.10  christos 	/*
    140  1.10  christos 	 * Don't sign-extend mask if long is longer than int.
    141  1.10  christos 	 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
    142  1.10  christos 	 */
    143  1.10  christos 	if (_BitScanForward(&bit, (unsigned int)mask) == 0)
    144  1.12  christos 		abort();	/* mask is zero */
    145  1.12  christos 	return (u_int)bit;
    146  1.10  christos }
    147  1.10  christos #elif defined(MSDOS) && defined(__DJGPP__)
    148  1.10  christos   /*
    149  1.10  christos    * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
    150  1.10  christos    * we've already included.
    151  1.10  christos    */
    152  1.12  christos   #define lowest_set_bit(mask)	((u_int)(ffs((mask)) - 1))
    153  1.10  christos #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
    154  1.10  christos   /*
    155  1.10  christos    * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
    156  1.10  christos    * or some other platform (UN*X conforming to a sufficient recent version
    157  1.10  christos    * of the Single UNIX Specification).
    158  1.10  christos    */
    159  1.10  christos   #include <strings.h>
    160  1.12  christos   #define lowest_set_bit(mask)	(u_int)((ffs((mask)) - 1))
    161  1.10  christos #else
    162   1.9  christos /*
    163  1.10  christos  * None of the above.
    164  1.10  christos  * Use a perfect-hash-function-based function.
    165   1.9  christos  */
    166  1.12  christos static u_int
    167  1.10  christos lowest_set_bit(int mask)
    168   1.9  christos {
    169  1.10  christos 	unsigned int v = (unsigned int)mask;
    170  1.10  christos 
    171  1.12  christos 	static const u_int MultiplyDeBruijnBitPosition[32] = {
    172  1.10  christos 		0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
    173  1.10  christos 		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
    174  1.10  christos 	};
    175   1.9  christos 
    176  1.10  christos 	/*
    177  1.10  christos 	 * We strip off all but the lowermost set bit (v & ~v),
    178  1.10  christos 	 * and perform a minimal perfect hash on it to look up the
    179  1.10  christos 	 * number of low-order zero bits in a table.
    180  1.10  christos 	 *
    181  1.10  christos 	 * See:
    182  1.10  christos 	 *
    183  1.10  christos 	 *	http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
    184  1.10  christos 	 *
    185  1.10  christos 	 *	http://supertech.csail.mit.edu/papers/debruijn.pdf
    186  1.10  christos 	 */
    187  1.10  christos 	return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
    188   1.9  christos }
    189   1.1  christos #endif
    190   1.1  christos 
    191   1.1  christos /*
    192   1.1  christos  * Represents a deleted instruction.
    193   1.1  christos  */
    194   1.1  christos #define NOP -1
    195   1.1  christos 
    196   1.1  christos /*
    197   1.1  christos  * Register numbers for use-def values.
    198   1.1  christos  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
    199   1.1  christos  * location.  A_ATOM is the accumulator and X_ATOM is the index
    200   1.1  christos  * register.
    201   1.1  christos  */
    202   1.1  christos #define A_ATOM BPF_MEMWORDS
    203   1.1  christos #define X_ATOM (BPF_MEMWORDS+1)
    204   1.1  christos 
    205   1.1  christos /*
    206   1.1  christos  * This define is used to represent *both* the accumulator and
    207   1.1  christos  * x register in use-def computations.
    208   1.1  christos  * Currently, the use-def code assumes only one definition per instruction.
    209   1.1  christos  */
    210   1.1  christos #define AX_ATOM N_ATOMS
    211   1.1  christos 
    212   1.1  christos /*
    213  1.13  christos  * These data structures are used in a Cocke and Schwartz style
    214   1.9  christos  * value numbering scheme.  Since the flowgraph is acyclic,
    215   1.9  christos  * exit values can be propagated from a node's predecessors
    216   1.9  christos  * provided it is uniquely defined.
    217   1.1  christos  */
    218   1.9  christos struct valnode {
    219   1.9  christos 	int code;
    220  1.12  christos 	bpf_u_int32 v0, v1;
    221  1.12  christos 	int val;		/* the value number */
    222   1.9  christos 	struct valnode *next;
    223   1.9  christos };
    224   1.1  christos 
    225   1.9  christos /* Integer constants mapped with the load immediate opcode. */
    226  1.12  christos #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U)
    227   1.1  christos 
    228   1.9  christos struct vmapinfo {
    229   1.9  christos 	int is_const;
    230  1.12  christos 	bpf_u_int32 const_val;
    231   1.9  christos };
    232   1.1  christos 
    233  1.10  christos typedef struct {
    234   1.9  christos 	/*
    235  1.11  christos 	 * Place to longjmp to on an error.
    236  1.11  christos 	 */
    237  1.11  christos 	jmp_buf top_ctx;
    238  1.11  christos 
    239  1.11  christos 	/*
    240  1.11  christos 	 * The buffer into which to put error message.
    241  1.11  christos 	 */
    242  1.11  christos 	char *errbuf;
    243  1.11  christos 
    244  1.11  christos 	/*
    245   1.9  christos 	 * A flag to indicate that further optimization is needed.
    246   1.9  christos 	 * Iterative passes are continued until a given pass yields no
    247  1.12  christos 	 * code simplification or branch movement.
    248   1.9  christos 	 */
    249   1.9  christos 	int done;
    250   1.1  christos 
    251  1.12  christos 	/*
    252  1.12  christos 	 * XXX - detect loops that do nothing but repeated AND/OR pullups
    253  1.12  christos 	 * and edge moves.
    254  1.12  christos 	 * If 100 passes in a row do nothing but that, treat that as a
    255  1.12  christos 	 * sign that we're in a loop that just shuffles in a cycle in
    256  1.12  christos 	 * which each pass just shuffles the code and we eventually
    257  1.12  christos 	 * get back to the original configuration.
    258  1.12  christos 	 *
    259  1.12  christos 	 * XXX - we need a non-heuristic way of detecting, or preventing,
    260  1.12  christos 	 * such a cycle.
    261  1.12  christos 	 */
    262  1.12  christos 	int non_branch_movement_performed;
    263  1.12  christos 
    264  1.12  christos 	u_int n_blocks;		/* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
    265   1.9  christos 	struct block **blocks;
    266  1.12  christos 	u_int n_edges;		/* twice n_blocks, so guaranteed to be > 0 */
    267   1.9  christos 	struct edge **edges;
    268   1.1  christos 
    269   1.9  christos 	/*
    270   1.9  christos 	 * A bit vector set representation of the dominators.
    271   1.9  christos 	 * We round up the set size to the next power of two.
    272   1.9  christos 	 */
    273  1.12  christos 	u_int nodewords;	/* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
    274  1.12  christos 	u_int edgewords;	/* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
    275   1.9  christos 	struct block **levels;
    276   1.9  christos 	bpf_u_int32 *space;
    277   1.1  christos 
    278   1.1  christos #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
    279   1.1  christos /*
    280   1.1  christos  * True if a is in uset {p}
    281   1.1  christos  */
    282   1.1  christos #define SET_MEMBER(p, a) \
    283  1.11  christos ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
    284   1.1  christos 
    285   1.1  christos /*
    286   1.1  christos  * Add 'a' to uset p.
    287   1.1  christos  */
    288   1.1  christos #define SET_INSERT(p, a) \
    289  1.11  christos (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
    290   1.1  christos 
    291   1.1  christos /*
    292   1.1  christos  * Delete 'a' from uset p.
    293   1.1  christos  */
    294   1.1  christos #define SET_DELETE(p, a) \
    295  1.11  christos (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
    296   1.1  christos 
    297   1.1  christos /*
    298   1.1  christos  * a := a intersect b
    299  1.12  christos  * n must be guaranteed to be > 0
    300   1.1  christos  */
    301   1.1  christos #define SET_INTERSECT(a, b, n)\
    302   1.1  christos {\
    303   1.1  christos 	register bpf_u_int32 *_x = a, *_y = b;\
    304  1.12  christos 	register u_int _n = n;\
    305  1.12  christos 	do *_x++ &= *_y++; while (--_n != 0);\
    306   1.1  christos }
    307   1.1  christos 
    308   1.1  christos /*
    309   1.1  christos  * a := a - b
    310  1.12  christos  * n must be guaranteed to be > 0
    311   1.1  christos  */
    312   1.1  christos #define SET_SUBTRACT(a, b, n)\
    313   1.1  christos {\
    314   1.1  christos 	register bpf_u_int32 *_x = a, *_y = b;\
    315  1.12  christos 	register u_int _n = n;\
    316  1.12  christos 	do *_x++ &=~ *_y++; while (--_n != 0);\
    317   1.1  christos }
    318   1.1  christos 
    319   1.1  christos /*
    320   1.1  christos  * a := a union b
    321  1.12  christos  * n must be guaranteed to be > 0
    322   1.1  christos  */
    323   1.1  christos #define SET_UNION(a, b, n)\
    324   1.1  christos {\
    325   1.1  christos 	register bpf_u_int32 *_x = a, *_y = b;\
    326  1.12  christos 	register u_int _n = n;\
    327  1.12  christos 	do *_x++ |= *_y++; while (--_n != 0);\
    328   1.1  christos }
    329   1.1  christos 
    330   1.9  christos 	uset all_dom_sets;
    331   1.9  christos 	uset all_closure_sets;
    332   1.9  christos 	uset all_edge_sets;
    333   1.9  christos 
    334   1.9  christos #define MODULUS 213
    335   1.9  christos 	struct valnode *hashtbl[MODULUS];
    336  1.12  christos 	bpf_u_int32 curval;
    337  1.12  christos 	bpf_u_int32 maxval;
    338   1.9  christos 
    339   1.9  christos 	struct vmapinfo *vmap;
    340   1.9  christos 	struct valnode *vnode_base;
    341   1.9  christos 	struct valnode *next_vnode;
    342  1.10  christos } opt_state_t;
    343   1.9  christos 
    344   1.9  christos typedef struct {
    345   1.9  christos 	/*
    346  1.11  christos 	 * Place to longjmp to on an error.
    347  1.11  christos 	 */
    348  1.11  christos 	jmp_buf top_ctx;
    349  1.11  christos 
    350  1.11  christos 	/*
    351  1.11  christos 	 * The buffer into which to put error message.
    352  1.11  christos 	 */
    353  1.11  christos 	char *errbuf;
    354  1.11  christos 
    355  1.11  christos 	/*
    356   1.9  christos 	 * Some pointers used to convert the basic block form of the code,
    357   1.9  christos 	 * into the array form that BPF requires.  'fstart' will point to
    358   1.9  christos 	 * the malloc'd array while 'ftail' is used during the recursive
    359   1.9  christos 	 * traversal.
    360   1.9  christos 	 */
    361   1.9  christos 	struct bpf_insn *fstart;
    362   1.9  christos 	struct bpf_insn *ftail;
    363   1.9  christos } conv_state_t;
    364   1.9  christos 
    365  1.11  christos static void opt_init(opt_state_t *, struct icode *);
    366   1.9  christos static void opt_cleanup(opt_state_t *);
    367  1.11  christos static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
    368  1.11  christos     PCAP_PRINTFLIKE(2, 3);
    369   1.9  christos 
    370   1.9  christos static void intern_blocks(opt_state_t *, struct icode *);
    371   1.9  christos 
    372   1.9  christos static void find_inedges(opt_state_t *, struct block *);
    373   1.9  christos #ifdef BDEBUG
    374  1.11  christos static void opt_dump(opt_state_t *, struct icode *);
    375   1.9  christos #endif
    376   1.1  christos 
    377   1.1  christos #ifndef MAX
    378   1.1  christos #define MAX(a,b) ((a)>(b)?(a):(b))
    379   1.1  christos #endif
    380   1.1  christos 
    381   1.1  christos static void
    382   1.9  christos find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
    383   1.1  christos {
    384   1.1  christos 	int level;
    385   1.1  christos 
    386   1.9  christos 	if (isMarked(ic, b))
    387   1.1  christos 		return;
    388   1.1  christos 
    389   1.9  christos 	Mark(ic, b);
    390   1.1  christos 	b->link = 0;
    391   1.1  christos 
    392   1.1  christos 	if (JT(b)) {
    393   1.9  christos 		find_levels_r(opt_state, ic, JT(b));
    394   1.9  christos 		find_levels_r(opt_state, ic, JF(b));
    395   1.1  christos 		level = MAX(JT(b)->level, JF(b)->level) + 1;
    396   1.1  christos 	} else
    397   1.1  christos 		level = 0;
    398   1.1  christos 	b->level = level;
    399   1.9  christos 	b->link = opt_state->levels[level];
    400   1.9  christos 	opt_state->levels[level] = b;
    401   1.1  christos }
    402   1.1  christos 
    403   1.1  christos /*
    404   1.1  christos  * Level graph.  The levels go from 0 at the leaves to
    405   1.9  christos  * N_LEVELS at the root.  The opt_state->levels[] array points to the
    406   1.1  christos  * first node of the level list, whose elements are linked
    407   1.1  christos  * with the 'link' field of the struct block.
    408   1.1  christos  */
    409   1.1  christos static void
    410   1.9  christos find_levels(opt_state_t *opt_state, struct icode *ic)
    411   1.1  christos {
    412   1.9  christos 	memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
    413   1.9  christos 	unMarkAll(ic);
    414   1.9  christos 	find_levels_r(opt_state, ic, ic->root);
    415   1.1  christos }
    416   1.1  christos 
    417   1.1  christos /*
    418   1.1  christos  * Find dominator relationships.
    419   1.1  christos  * Assumes graph has been leveled.
    420   1.1  christos  */
    421   1.1  christos static void
    422   1.9  christos find_dom(opt_state_t *opt_state, struct block *root)
    423   1.1  christos {
    424  1.12  christos 	u_int i;
    425  1.12  christos 	int level;
    426   1.1  christos 	struct block *b;
    427   1.1  christos 	bpf_u_int32 *x;
    428   1.1  christos 
    429   1.1  christos 	/*
    430   1.1  christos 	 * Initialize sets to contain all nodes.
    431   1.1  christos 	 */
    432   1.9  christos 	x = opt_state->all_dom_sets;
    433  1.12  christos 	/*
    434  1.12  christos 	 * In opt_init(), we've made sure the product doesn't overflow.
    435  1.12  christos 	 */
    436   1.9  christos 	i = opt_state->n_blocks * opt_state->nodewords;
    437  1.12  christos 	while (i != 0) {
    438  1.12  christos 		--i;
    439  1.10  christos 		*x++ = 0xFFFFFFFFU;
    440  1.12  christos 	}
    441   1.1  christos 	/* Root starts off empty. */
    442  1.12  christos 	for (i = opt_state->nodewords; i != 0;) {
    443  1.12  christos 		--i;
    444   1.1  christos 		root->dom[i] = 0;
    445  1.12  christos 	}
    446   1.1  christos 
    447   1.1  christos 	/* root->level is the highest level no found. */
    448  1.12  christos 	for (level = root->level; level >= 0; --level) {
    449  1.12  christos 		for (b = opt_state->levels[level]; b; b = b->link) {
    450   1.1  christos 			SET_INSERT(b->dom, b->id);
    451   1.1  christos 			if (JT(b) == 0)
    452   1.1  christos 				continue;
    453   1.9  christos 			SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
    454   1.9  christos 			SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
    455   1.1  christos 		}
    456   1.1  christos 	}
    457   1.1  christos }
    458   1.1  christos 
    459   1.1  christos static void
    460   1.9  christos propedom(opt_state_t *opt_state, struct edge *ep)
    461   1.1  christos {
    462   1.1  christos 	SET_INSERT(ep->edom, ep->id);
    463   1.1  christos 	if (ep->succ) {
    464   1.9  christos 		SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
    465   1.9  christos 		SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
    466   1.1  christos 	}
    467   1.1  christos }
    468   1.1  christos 
    469   1.1  christos /*
    470   1.1  christos  * Compute edge dominators.
    471   1.1  christos  * Assumes graph has been leveled and predecessors established.
    472   1.1  christos  */
    473   1.1  christos static void
    474   1.9  christos find_edom(opt_state_t *opt_state, struct block *root)
    475   1.1  christos {
    476  1.12  christos 	u_int i;
    477   1.1  christos 	uset x;
    478  1.12  christos 	int level;
    479   1.1  christos 	struct block *b;
    480   1.1  christos 
    481   1.9  christos 	x = opt_state->all_edge_sets;
    482  1.12  christos 	/*
    483  1.12  christos 	 * In opt_init(), we've made sure the product doesn't overflow.
    484  1.12  christos 	 */
    485  1.12  christos 	for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
    486  1.12  christos 		--i;
    487  1.10  christos 		x[i] = 0xFFFFFFFFU;
    488  1.12  christos 	}
    489   1.1  christos 
    490   1.1  christos 	/* root->level is the highest level no found. */
    491   1.9  christos 	memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
    492   1.9  christos 	memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
    493  1.12  christos 	for (level = root->level; level >= 0; --level) {
    494  1.12  christos 		for (b = opt_state->levels[level]; b != 0; b = b->link) {
    495   1.9  christos 			propedom(opt_state, &b->et);
    496   1.9  christos 			propedom(opt_state, &b->ef);
    497   1.1  christos 		}
    498   1.1  christos 	}
    499   1.1  christos }
    500   1.1  christos 
    501   1.1  christos /*
    502   1.1  christos  * Find the backwards transitive closure of the flow graph.  These sets
    503   1.1  christos  * are backwards in the sense that we find the set of nodes that reach
    504   1.1  christos  * a given node, not the set of nodes that can be reached by a node.
    505   1.1  christos  *
    506   1.1  christos  * Assumes graph has been leveled.
    507   1.1  christos  */
    508   1.1  christos static void
    509   1.9  christos find_closure(opt_state_t *opt_state, struct block *root)
    510   1.1  christos {
    511  1.12  christos 	int level;
    512   1.1  christos 	struct block *b;
    513   1.1  christos 
    514   1.1  christos 	/*
    515   1.1  christos 	 * Initialize sets to contain no nodes.
    516   1.1  christos 	 */
    517   1.9  christos 	memset((char *)opt_state->all_closure_sets, 0,
    518   1.9  christos 	      opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
    519   1.1  christos 
    520   1.1  christos 	/* root->level is the highest level no found. */
    521  1.12  christos 	for (level = root->level; level >= 0; --level) {
    522  1.12  christos 		for (b = opt_state->levels[level]; b; b = b->link) {
    523   1.1  christos 			SET_INSERT(b->closure, b->id);
    524   1.1  christos 			if (JT(b) == 0)
    525   1.1  christos 				continue;
    526   1.9  christos 			SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
    527   1.9  christos 			SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
    528   1.1  christos 		}
    529   1.1  christos 	}
    530   1.1  christos }
    531   1.1  christos 
    532   1.1  christos /*
    533  1.12  christos  * Return the register number that is used by s.
    534  1.12  christos  *
    535  1.12  christos  * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X
    536  1.12  christos  * are used, the scratch memory location's number if a scratch memory
    537  1.12  christos  * location is used (e.g., 0 for M[0]), or -1 if none of those are used.
    538   1.1  christos  *
    539   1.1  christos  * The implementation should probably change to an array access.
    540   1.1  christos  */
    541   1.1  christos static int
    542   1.6  christos atomuse(struct stmt *s)
    543   1.1  christos {
    544   1.1  christos 	register int c = s->code;
    545   1.1  christos 
    546   1.1  christos 	if (c == NOP)
    547   1.1  christos 		return -1;
    548   1.1  christos 
    549   1.1  christos 	switch (BPF_CLASS(c)) {
    550   1.1  christos 
    551   1.1  christos 	case BPF_RET:
    552   1.1  christos 		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
    553   1.1  christos 			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
    554   1.1  christos 
    555   1.1  christos 	case BPF_LD:
    556   1.1  christos 	case BPF_LDX:
    557  1.12  christos 		/*
    558  1.12  christos 		 * As there are fewer than 2^31 memory locations,
    559  1.12  christos 		 * s->k should be convertible to int without problems.
    560  1.12  christos 		 */
    561   1.1  christos 		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
    562  1.12  christos 			(BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
    563   1.1  christos 
    564   1.1  christos 	case BPF_ST:
    565   1.1  christos 		return A_ATOM;
    566   1.1  christos 
    567   1.1  christos 	case BPF_STX:
    568   1.1  christos 		return X_ATOM;
    569   1.1  christos 
    570   1.1  christos 	case BPF_JMP:
    571   1.1  christos 	case BPF_ALU:
    572   1.1  christos 		if (BPF_SRC(c) == BPF_X)
    573   1.1  christos 			return AX_ATOM;
    574   1.1  christos 		return A_ATOM;
    575   1.1  christos 
    576   1.1  christos 	case BPF_MISC:
    577   1.1  christos 		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
    578   1.1  christos 	}
    579   1.1  christos 	abort();
    580   1.1  christos 	/* NOTREACHED */
    581   1.1  christos }
    582   1.1  christos 
    583   1.1  christos /*
    584   1.1  christos  * Return the register number that is defined by 's'.  We assume that
    585   1.1  christos  * a single stmt cannot define more than one register.  If no register
    586   1.1  christos  * is defined, return -1.
    587   1.1  christos  *
    588   1.1  christos  * The implementation should probably change to an array access.
    589   1.1  christos  */
    590   1.1  christos static int
    591   1.6  christos atomdef(struct stmt *s)
    592   1.1  christos {
    593   1.1  christos 	if (s->code == NOP)
    594   1.1  christos 		return -1;
    595   1.1  christos 
    596   1.1  christos 	switch (BPF_CLASS(s->code)) {
    597   1.1  christos 
    598   1.1  christos 	case BPF_LD:
    599   1.1  christos 	case BPF_ALU:
    600   1.1  christos 		return A_ATOM;
    601   1.1  christos 
    602   1.1  christos 	case BPF_LDX:
    603   1.1  christos 		return X_ATOM;
    604   1.1  christos 
    605   1.1  christos 	case BPF_ST:
    606   1.1  christos 	case BPF_STX:
    607   1.1  christos 		return s->k;
    608   1.1  christos 
    609   1.1  christos 	case BPF_MISC:
    610   1.1  christos 		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
    611   1.1  christos 	}
    612   1.1  christos 	return -1;
    613   1.1  christos }
    614   1.1  christos 
    615   1.1  christos /*
    616   1.1  christos  * Compute the sets of registers used, defined, and killed by 'b'.
    617   1.1  christos  *
    618   1.1  christos  * "Used" means that a statement in 'b' uses the register before any
    619   1.1  christos  * statement in 'b' defines it, i.e. it uses the value left in
    620   1.1  christos  * that register by a predecessor block of this block.
    621   1.1  christos  * "Defined" means that a statement in 'b' defines it.
    622   1.1  christos  * "Killed" means that a statement in 'b' defines it before any
    623   1.1  christos  * statement in 'b' uses it, i.e. it kills the value left in that
    624   1.1  christos  * register by a predecessor block of this block.
    625   1.1  christos  */
    626   1.1  christos static void
    627   1.6  christos compute_local_ud(struct block *b)
    628   1.1  christos {
    629   1.1  christos 	struct slist *s;
    630   1.9  christos 	atomset def = 0, use = 0, killed = 0;
    631   1.1  christos 	int atom;
    632   1.1  christos 
    633   1.1  christos 	for (s = b->stmts; s; s = s->next) {
    634   1.1  christos 		if (s->s.code == NOP)
    635   1.1  christos 			continue;
    636   1.1  christos 		atom = atomuse(&s->s);
    637   1.1  christos 		if (atom >= 0) {
    638   1.1  christos 			if (atom == AX_ATOM) {
    639   1.1  christos 				if (!ATOMELEM(def, X_ATOM))
    640   1.1  christos 					use |= ATOMMASK(X_ATOM);
    641   1.1  christos 				if (!ATOMELEM(def, A_ATOM))
    642   1.1  christos 					use |= ATOMMASK(A_ATOM);
    643   1.1  christos 			}
    644   1.1  christos 			else if (atom < N_ATOMS) {
    645   1.1  christos 				if (!ATOMELEM(def, atom))
    646   1.1  christos 					use |= ATOMMASK(atom);
    647   1.1  christos 			}
    648   1.1  christos 			else
    649   1.1  christos 				abort();
    650   1.1  christos 		}
    651   1.1  christos 		atom = atomdef(&s->s);
    652   1.1  christos 		if (atom >= 0) {
    653   1.1  christos 			if (!ATOMELEM(use, atom))
    654   1.9  christos 				killed |= ATOMMASK(atom);
    655   1.1  christos 			def |= ATOMMASK(atom);
    656   1.1  christos 		}
    657   1.1  christos 	}
    658   1.1  christos 	if (BPF_CLASS(b->s.code) == BPF_JMP) {
    659   1.1  christos 		/*
    660   1.1  christos 		 * XXX - what about RET?
    661   1.1  christos 		 */
    662   1.1  christos 		atom = atomuse(&b->s);
    663   1.1  christos 		if (atom >= 0) {
    664   1.1  christos 			if (atom == AX_ATOM) {
    665   1.1  christos 				if (!ATOMELEM(def, X_ATOM))
    666   1.1  christos 					use |= ATOMMASK(X_ATOM);
    667   1.1  christos 				if (!ATOMELEM(def, A_ATOM))
    668   1.1  christos 					use |= ATOMMASK(A_ATOM);
    669   1.1  christos 			}
    670   1.1  christos 			else if (atom < N_ATOMS) {
    671   1.1  christos 				if (!ATOMELEM(def, atom))
    672   1.1  christos 					use |= ATOMMASK(atom);
    673   1.1  christos 			}
    674   1.1  christos 			else
    675   1.1  christos 				abort();
    676   1.1  christos 		}
    677   1.1  christos 	}
    678   1.1  christos 
    679   1.1  christos 	b->def = def;
    680   1.9  christos 	b->kill = killed;
    681   1.1  christos 	b->in_use = use;
    682   1.1  christos }
    683   1.1  christos 
    684   1.1  christos /*
    685   1.1  christos  * Assume graph is already leveled.
    686   1.1  christos  */
    687   1.1  christos static void
    688   1.9  christos find_ud(opt_state_t *opt_state, struct block *root)
    689   1.1  christos {
    690   1.1  christos 	int i, maxlevel;
    691   1.1  christos 	struct block *p;
    692   1.1  christos 
    693   1.1  christos 	/*
    694   1.1  christos 	 * root->level is the highest level no found;
    695   1.1  christos 	 * count down from there.
    696   1.1  christos 	 */
    697   1.1  christos 	maxlevel = root->level;
    698   1.1  christos 	for (i = maxlevel; i >= 0; --i)
    699   1.9  christos 		for (p = opt_state->levels[i]; p; p = p->link) {
    700   1.1  christos 			compute_local_ud(p);
    701   1.1  christos 			p->out_use = 0;
    702   1.1  christos 		}
    703   1.1  christos 
    704   1.1  christos 	for (i = 1; i <= maxlevel; ++i) {
    705   1.9  christos 		for (p = opt_state->levels[i]; p; p = p->link) {
    706   1.1  christos 			p->out_use |= JT(p)->in_use | JF(p)->in_use;
    707   1.1  christos 			p->in_use |= p->out_use &~ p->kill;
    708   1.1  christos 		}
    709   1.1  christos 	}
    710   1.1  christos }
    711   1.1  christos static void
    712   1.9  christos init_val(opt_state_t *opt_state)
    713   1.1  christos {
    714   1.9  christos 	opt_state->curval = 0;
    715   1.9  christos 	opt_state->next_vnode = opt_state->vnode_base;
    716   1.9  christos 	memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
    717   1.9  christos 	memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
    718   1.1  christos }
    719   1.1  christos 
    720  1.12  christos /*
    721  1.12  christos  * Because we really don't have an IR, this stuff is a little messy.
    722  1.12  christos  *
    723  1.12  christos  * This routine looks in the table of existing value number for a value
    724  1.12  christos  * with generated from an operation with the specified opcode and
    725  1.12  christos  * the specified values.  If it finds it, it returns its value number,
    726  1.12  christos  * otherwise it makes a new entry in the table and returns the
    727  1.12  christos  * value number of that entry.
    728  1.12  christos  */
    729  1.12  christos static bpf_u_int32
    730  1.12  christos F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1)
    731   1.1  christos {
    732   1.1  christos 	u_int hash;
    733  1.12  christos 	bpf_u_int32 val;
    734   1.1  christos 	struct valnode *p;
    735   1.1  christos 
    736  1.12  christos 	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
    737   1.1  christos 	hash %= MODULUS;
    738   1.1  christos 
    739   1.9  christos 	for (p = opt_state->hashtbl[hash]; p; p = p->next)
    740   1.1  christos 		if (p->code == code && p->v0 == v0 && p->v1 == v1)
    741   1.1  christos 			return p->val;
    742   1.1  christos 
    743  1.12  christos 	/*
    744  1.12  christos 	 * Not found.  Allocate a new value, and assign it a new
    745  1.12  christos 	 * value number.
    746  1.12  christos 	 *
    747  1.12  christos 	 * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we
    748  1.12  christos 	 * increment it before using it as the new value number, which
    749  1.12  christos 	 * means we never assign VAL_UNKNOWN.
    750  1.12  christos 	 *
    751  1.12  christos 	 * XXX - unless we overflow, but we probably won't have 2^32-1
    752  1.12  christos 	 * values; we treat 32 bits as effectively infinite.
    753  1.12  christos 	 */
    754   1.9  christos 	val = ++opt_state->curval;
    755   1.1  christos 	if (BPF_MODE(code) == BPF_IMM &&
    756   1.1  christos 	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
    757   1.9  christos 		opt_state->vmap[val].const_val = v0;
    758   1.9  christos 		opt_state->vmap[val].is_const = 1;
    759   1.1  christos 	}
    760   1.9  christos 	p = opt_state->next_vnode++;
    761   1.1  christos 	p->val = val;
    762   1.1  christos 	p->code = code;
    763   1.1  christos 	p->v0 = v0;
    764   1.1  christos 	p->v1 = v1;
    765   1.9  christos 	p->next = opt_state->hashtbl[hash];
    766   1.9  christos 	opt_state->hashtbl[hash] = p;
    767   1.1  christos 
    768   1.1  christos 	return val;
    769   1.1  christos }
    770   1.1  christos 
    771   1.1  christos static inline void
    772  1.12  christos vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter)
    773   1.1  christos {
    774  1.10  christos 	if (alter && newval != VAL_UNKNOWN && *valp == newval)
    775   1.1  christos 		s->code = NOP;
    776   1.1  christos 	else
    777   1.1  christos 		*valp = newval;
    778   1.1  christos }
    779   1.1  christos 
    780   1.6  christos /*
    781   1.6  christos  * Do constant-folding on binary operators.
    782   1.6  christos  * (Unary operators are handled elsewhere.)
    783   1.6  christos  */
    784   1.1  christos static void
    785  1.12  christos fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1)
    786   1.1  christos {
    787   1.1  christos 	bpf_u_int32 a, b;
    788   1.1  christos 
    789   1.9  christos 	a = opt_state->vmap[v0].const_val;
    790   1.9  christos 	b = opt_state->vmap[v1].const_val;
    791   1.1  christos 
    792   1.1  christos 	switch (BPF_OP(s->code)) {
    793   1.1  christos 	case BPF_ADD:
    794   1.1  christos 		a += b;
    795   1.1  christos 		break;
    796   1.1  christos 
    797   1.1  christos 	case BPF_SUB:
    798   1.1  christos 		a -= b;
    799   1.1  christos 		break;
    800   1.1  christos 
    801   1.1  christos 	case BPF_MUL:
    802   1.1  christos 		a *= b;
    803   1.1  christos 		break;
    804   1.1  christos 
    805   1.1  christos 	case BPF_DIV:
    806   1.1  christos 		if (b == 0)
    807  1.11  christos 			opt_error(opt_state, "division by zero");
    808   1.1  christos 		a /= b;
    809   1.1  christos 		break;
    810   1.1  christos 
    811   1.7  christos 	case BPF_MOD:
    812   1.7  christos 		if (b == 0)
    813  1.11  christos 			opt_error(opt_state, "modulus by zero");
    814   1.7  christos 		a %= b;
    815   1.7  christos 		break;
    816   1.7  christos 
    817   1.1  christos 	case BPF_AND:
    818   1.1  christos 		a &= b;
    819   1.1  christos 		break;
    820   1.1  christos 
    821   1.1  christos 	case BPF_OR:
    822   1.1  christos 		a |= b;
    823   1.1  christos 		break;
    824   1.1  christos 
    825   1.7  christos 	case BPF_XOR:
    826   1.7  christos 		a ^= b;
    827   1.7  christos 		break;
    828   1.7  christos 
    829   1.1  christos 	case BPF_LSH:
    830  1.11  christos 		/*
    831  1.11  christos 		 * A left shift of more than the width of the type
    832  1.11  christos 		 * is undefined in C; we'll just treat it as shifting
    833  1.11  christos 		 * all the bits out.
    834  1.11  christos 		 *
    835  1.11  christos 		 * XXX - the BPF interpreter doesn't check for this,
    836  1.11  christos 		 * so its behavior is dependent on the behavior of
    837  1.11  christos 		 * the processor on which it's running.  There are
    838  1.11  christos 		 * processors on which it shifts all the bits out
    839  1.11  christos 		 * and processors on which it does no shift.
    840  1.11  christos 		 */
    841  1.11  christos 		if (b < 32)
    842  1.11  christos 			a <<= b;
    843  1.11  christos 		else
    844  1.11  christos 			a = 0;
    845   1.1  christos 		break;
    846   1.1  christos 
    847   1.1  christos 	case BPF_RSH:
    848  1.11  christos 		/*
    849  1.11  christos 		 * A right shift of more than the width of the type
    850  1.11  christos 		 * is undefined in C; we'll just treat it as shifting
    851  1.11  christos 		 * all the bits out.
    852  1.11  christos 		 *
    853  1.11  christos 		 * XXX - the BPF interpreter doesn't check for this,
    854  1.11  christos 		 * so its behavior is dependent on the behavior of
    855  1.11  christos 		 * the processor on which it's running.  There are
    856  1.11  christos 		 * processors on which it shifts all the bits out
    857  1.11  christos 		 * and processors on which it does no shift.
    858  1.11  christos 		 */
    859  1.11  christos 		if (b < 32)
    860  1.11  christos 			a >>= b;
    861  1.11  christos 		else
    862  1.11  christos 			a = 0;
    863   1.1  christos 		break;
    864   1.1  christos 
    865   1.1  christos 	default:
    866   1.1  christos 		abort();
    867   1.1  christos 	}
    868   1.1  christos 	s->k = a;
    869   1.1  christos 	s->code = BPF_LD|BPF_IMM;
    870  1.12  christos 	/*
    871  1.12  christos 	 * XXX - optimizer loop detection.
    872  1.12  christos 	 */
    873  1.12  christos 	opt_state->non_branch_movement_performed = 1;
    874   1.9  christos 	opt_state->done = 0;
    875   1.1  christos }
    876   1.1  christos 
    877   1.1  christos static inline struct slist *
    878   1.6  christos this_op(struct slist *s)
    879   1.1  christos {
    880   1.1  christos 	while (s != 0 && s->s.code == NOP)
    881   1.1  christos 		s = s->next;
    882   1.1  christos 	return s;
    883   1.1  christos }
    884   1.1  christos 
    885   1.1  christos static void
    886   1.6  christos opt_not(struct block *b)
    887   1.1  christos {
    888   1.1  christos 	struct block *tmp = JT(b);
    889   1.1  christos 
    890   1.1  christos 	JT(b) = JF(b);
    891   1.1  christos 	JF(b) = tmp;
    892   1.1  christos }
    893   1.1  christos 
    894   1.1  christos static void
    895   1.9  christos opt_peep(opt_state_t *opt_state, struct block *b)
    896   1.1  christos {
    897   1.1  christos 	struct slist *s;
    898   1.1  christos 	struct slist *next, *last;
    899  1.12  christos 	bpf_u_int32 val;
    900   1.1  christos 
    901   1.1  christos 	s = b->stmts;
    902   1.1  christos 	if (s == 0)
    903   1.1  christos 		return;
    904   1.1  christos 
    905   1.1  christos 	last = s;
    906   1.1  christos 	for (/*empty*/; /*empty*/; s = next) {
    907   1.1  christos 		/*
    908   1.1  christos 		 * Skip over nops.
    909   1.1  christos 		 */
    910   1.1  christos 		s = this_op(s);
    911   1.1  christos 		if (s == 0)
    912   1.1  christos 			break;	/* nothing left in the block */
    913   1.1  christos 
    914   1.1  christos 		/*
    915   1.1  christos 		 * Find the next real instruction after that one
    916   1.1  christos 		 * (skipping nops).
    917   1.1  christos 		 */
    918   1.1  christos 		next = this_op(s->next);
    919   1.1  christos 		if (next == 0)
    920   1.1  christos 			break;	/* no next instruction */
    921   1.1  christos 		last = next;
    922   1.1  christos 
    923   1.1  christos 		/*
    924   1.1  christos 		 * st  M[k]	-->	st  M[k]
    925   1.1  christos 		 * ldx M[k]		tax
    926   1.1  christos 		 */
    927   1.1  christos 		if (s->s.code == BPF_ST &&
    928   1.1  christos 		    next->s.code == (BPF_LDX|BPF_MEM) &&
    929   1.1  christos 		    s->s.k == next->s.k) {
    930  1.12  christos 			/*
    931  1.12  christos 			 * XXX - optimizer loop detection.
    932  1.12  christos 			 */
    933  1.12  christos 			opt_state->non_branch_movement_performed = 1;
    934   1.9  christos 			opt_state->done = 0;
    935   1.1  christos 			next->s.code = BPF_MISC|BPF_TAX;
    936   1.1  christos 		}
    937   1.1  christos 		/*
    938   1.1  christos 		 * ld  #k	-->	ldx  #k
    939   1.1  christos 		 * tax			txa
    940   1.1  christos 		 */
    941   1.1  christos 		if (s->s.code == (BPF_LD|BPF_IMM) &&
    942   1.1  christos 		    next->s.code == (BPF_MISC|BPF_TAX)) {
    943   1.1  christos 			s->s.code = BPF_LDX|BPF_IMM;
    944   1.1  christos 			next->s.code = BPF_MISC|BPF_TXA;
    945  1.12  christos 			/*
    946  1.12  christos 			 * XXX - optimizer loop detection.
    947  1.12  christos 			 */
    948  1.12  christos 			opt_state->non_branch_movement_performed = 1;
    949   1.9  christos 			opt_state->done = 0;
    950   1.1  christos 		}
    951   1.1  christos 		/*
    952   1.1  christos 		 * This is an ugly special case, but it happens
    953   1.1  christos 		 * when you say tcp[k] or udp[k] where k is a constant.
    954   1.1  christos 		 */
    955   1.1  christos 		if (s->s.code == (BPF_LD|BPF_IMM)) {
    956   1.1  christos 			struct slist *add, *tax, *ild;
    957   1.1  christos 
    958   1.1  christos 			/*
    959   1.1  christos 			 * Check that X isn't used on exit from this
    960   1.1  christos 			 * block (which the optimizer might cause).
    961   1.1  christos 			 * We know the code generator won't generate
    962   1.1  christos 			 * any local dependencies.
    963   1.1  christos 			 */
    964   1.1  christos 			if (ATOMELEM(b->out_use, X_ATOM))
    965   1.1  christos 				continue;
    966   1.1  christos 
    967   1.1  christos 			/*
    968   1.1  christos 			 * Check that the instruction following the ldi
    969   1.1  christos 			 * is an addx, or it's an ldxms with an addx
    970   1.1  christos 			 * following it (with 0 or more nops between the
    971   1.1  christos 			 * ldxms and addx).
    972   1.1  christos 			 */
    973   1.1  christos 			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
    974   1.1  christos 				add = next;
    975   1.1  christos 			else
    976   1.1  christos 				add = this_op(next->next);
    977   1.1  christos 			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
    978   1.1  christos 				continue;
    979   1.1  christos 
    980   1.1  christos 			/*
    981   1.1  christos 			 * Check that a tax follows that (with 0 or more
    982   1.1  christos 			 * nops between them).
    983   1.1  christos 			 */
    984   1.1  christos 			tax = this_op(add->next);
    985   1.1  christos 			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
    986   1.1  christos 				continue;
    987   1.1  christos 
    988   1.1  christos 			/*
    989   1.1  christos 			 * Check that an ild follows that (with 0 or more
    990   1.1  christos 			 * nops between them).
    991   1.1  christos 			 */
    992   1.1  christos 			ild = this_op(tax->next);
    993   1.1  christos 			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
    994   1.1  christos 			    BPF_MODE(ild->s.code) != BPF_IND)
    995   1.1  christos 				continue;
    996   1.1  christos 			/*
    997   1.1  christos 			 * We want to turn this sequence:
    998   1.1  christos 			 *
    999   1.1  christos 			 * (004) ldi     #0x2		{s}
   1000   1.1  christos 			 * (005) ldxms   [14]		{next}  -- optional
   1001   1.1  christos 			 * (006) addx			{add}
   1002   1.1  christos 			 * (007) tax			{tax}
   1003   1.1  christos 			 * (008) ild     [x+0]		{ild}
   1004   1.1  christos 			 *
   1005   1.1  christos 			 * into this sequence:
   1006   1.1  christos 			 *
   1007   1.1  christos 			 * (004) nop
   1008   1.1  christos 			 * (005) ldxms   [14]
   1009   1.1  christos 			 * (006) nop
   1010   1.1  christos 			 * (007) nop
   1011   1.1  christos 			 * (008) ild     [x+2]
   1012   1.1  christos 			 *
   1013   1.1  christos 			 * XXX We need to check that X is not
   1014   1.1  christos 			 * subsequently used, because we want to change
   1015   1.1  christos 			 * what'll be in it after this sequence.
   1016   1.1  christos 			 *
   1017   1.1  christos 			 * We know we can eliminate the accumulator
   1018   1.1  christos 			 * modifications earlier in the sequence since
   1019   1.1  christos 			 * it is defined by the last stmt of this sequence
   1020   1.1  christos 			 * (i.e., the last statement of the sequence loads
   1021   1.1  christos 			 * a value into the accumulator, so we can eliminate
   1022   1.1  christos 			 * earlier operations on the accumulator).
   1023   1.1  christos 			 */
   1024   1.1  christos 			ild->s.k += s->s.k;
   1025   1.1  christos 			s->s.code = NOP;
   1026   1.1  christos 			add->s.code = NOP;
   1027   1.1  christos 			tax->s.code = NOP;
   1028  1.12  christos 			/*
   1029  1.12  christos 			 * XXX - optimizer loop detection.
   1030  1.12  christos 			 */
   1031  1.12  christos 			opt_state->non_branch_movement_performed = 1;
   1032   1.9  christos 			opt_state->done = 0;
   1033   1.1  christos 		}
   1034   1.1  christos 	}
   1035   1.1  christos 	/*
   1036   1.1  christos 	 * If the comparison at the end of a block is an equality
   1037   1.1  christos 	 * comparison against a constant, and nobody uses the value
   1038   1.1  christos 	 * we leave in the A register at the end of a block, and
   1039   1.1  christos 	 * the operation preceding the comparison is an arithmetic
   1040   1.1  christos 	 * operation, we can sometime optimize it away.
   1041   1.1  christos 	 */
   1042   1.1  christos 	if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
   1043   1.1  christos 	    !ATOMELEM(b->out_use, A_ATOM)) {
   1044  1.12  christos 		/*
   1045  1.12  christos 		 * We can optimize away certain subtractions of the
   1046  1.12  christos 		 * X register.
   1047  1.12  christos 		 */
   1048   1.1  christos 		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
   1049   1.1  christos 			val = b->val[X_ATOM];
   1050   1.9  christos 			if (opt_state->vmap[val].is_const) {
   1051   1.1  christos 				/*
   1052   1.1  christos 				 * If we have a subtract to do a comparison,
   1053   1.1  christos 				 * and the X register is a known constant,
   1054   1.1  christos 				 * we can merge this value into the
   1055   1.1  christos 				 * comparison:
   1056   1.1  christos 				 *
   1057   1.1  christos 				 * sub x  ->	nop
   1058   1.1  christos 				 * jeq #y	jeq #(x+y)
   1059   1.1  christos 				 */
   1060   1.9  christos 				b->s.k += opt_state->vmap[val].const_val;
   1061   1.1  christos 				last->s.code = NOP;
   1062  1.12  christos 				/*
   1063  1.12  christos 				 * XXX - optimizer loop detection.
   1064  1.12  christos 				 */
   1065  1.12  christos 				opt_state->non_branch_movement_performed = 1;
   1066   1.9  christos 				opt_state->done = 0;
   1067   1.1  christos 			} else if (b->s.k == 0) {
   1068   1.1  christos 				/*
   1069   1.1  christos 				 * If the X register isn't a constant,
   1070   1.1  christos 				 * and the comparison in the test is
   1071   1.1  christos 				 * against 0, we can compare with the
   1072   1.1  christos 				 * X register, instead:
   1073   1.1  christos 				 *
   1074   1.1  christos 				 * sub x  ->	nop
   1075   1.1  christos 				 * jeq #0	jeq x
   1076   1.1  christos 				 */
   1077   1.1  christos 				last->s.code = NOP;
   1078   1.1  christos 				b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
   1079  1.12  christos 				/*
   1080  1.12  christos 				 * XXX - optimizer loop detection.
   1081  1.12  christos 				 */
   1082  1.12  christos 				opt_state->non_branch_movement_performed = 1;
   1083   1.9  christos 				opt_state->done = 0;
   1084   1.1  christos 			}
   1085   1.1  christos 		}
   1086   1.1  christos 		/*
   1087   1.1  christos 		 * Likewise, a constant subtract can be simplified:
   1088   1.1  christos 		 *
   1089   1.1  christos 		 * sub #x ->	nop
   1090   1.1  christos 		 * jeq #y ->	jeq #(x+y)
   1091   1.1  christos 		 */
   1092   1.1  christos 		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
   1093   1.1  christos 			last->s.code = NOP;
   1094   1.1  christos 			b->s.k += last->s.k;
   1095  1.12  christos 			/*
   1096  1.12  christos 			 * XXX - optimizer loop detection.
   1097  1.12  christos 			 */
   1098  1.12  christos 			opt_state->non_branch_movement_performed = 1;
   1099   1.9  christos 			opt_state->done = 0;
   1100   1.1  christos 		}
   1101   1.1  christos 		/*
   1102   1.1  christos 		 * And, similarly, a constant AND can be simplified
   1103   1.1  christos 		 * if we're testing against 0, i.e.:
   1104   1.1  christos 		 *
   1105   1.1  christos 		 * and #k	nop
   1106   1.1  christos 		 * jeq #0  ->	jset #k
   1107   1.1  christos 		 */
   1108   1.1  christos 		else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
   1109   1.1  christos 		    b->s.k == 0) {
   1110   1.1  christos 			b->s.k = last->s.k;
   1111   1.1  christos 			b->s.code = BPF_JMP|BPF_K|BPF_JSET;
   1112   1.1  christos 			last->s.code = NOP;
   1113  1.12  christos 			/*
   1114  1.12  christos 			 * XXX - optimizer loop detection.
   1115  1.12  christos 			 */
   1116  1.12  christos 			opt_state->non_branch_movement_performed = 1;
   1117   1.9  christos 			opt_state->done = 0;
   1118   1.1  christos 			opt_not(b);
   1119   1.1  christos 		}
   1120   1.1  christos 	}
   1121   1.1  christos 	/*
   1122   1.1  christos 	 * jset #0        ->   never
   1123   1.1  christos 	 * jset #ffffffff ->   always
   1124   1.1  christos 	 */
   1125   1.1  christos 	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
   1126   1.1  christos 		if (b->s.k == 0)
   1127   1.1  christos 			JT(b) = JF(b);
   1128  1.12  christos 		if (b->s.k == 0xffffffffU)
   1129   1.1  christos 			JF(b) = JT(b);
   1130   1.1  christos 	}
   1131   1.1  christos 	/*
   1132   1.1  christos 	 * If we're comparing against the index register, and the index
   1133   1.1  christos 	 * register is a known constant, we can just compare against that
   1134   1.1  christos 	 * constant.
   1135   1.1  christos 	 */
   1136   1.1  christos 	val = b->val[X_ATOM];
   1137   1.9  christos 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
   1138  1.12  christos 		bpf_u_int32 v = opt_state->vmap[val].const_val;
   1139   1.1  christos 		b->s.code &= ~BPF_X;
   1140   1.1  christos 		b->s.k = v;
   1141   1.1  christos 	}
   1142   1.1  christos 	/*
   1143   1.1  christos 	 * If the accumulator is a known constant, we can compute the
   1144   1.1  christos 	 * comparison result.
   1145   1.1  christos 	 */
   1146   1.1  christos 	val = b->val[A_ATOM];
   1147   1.9  christos 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
   1148  1.12  christos 		bpf_u_int32 v = opt_state->vmap[val].const_val;
   1149   1.1  christos 		switch (BPF_OP(b->s.code)) {
   1150   1.1  christos 
   1151   1.1  christos 		case BPF_JEQ:
   1152   1.1  christos 			v = v == b->s.k;
   1153   1.1  christos 			break;
   1154   1.1  christos 
   1155   1.1  christos 		case BPF_JGT:
   1156  1.12  christos 			v = v > b->s.k;
   1157   1.1  christos 			break;
   1158   1.1  christos 
   1159   1.1  christos 		case BPF_JGE:
   1160  1.12  christos 			v = v >= b->s.k;
   1161   1.1  christos 			break;
   1162   1.1  christos 
   1163   1.1  christos 		case BPF_JSET:
   1164   1.1  christos 			v &= b->s.k;
   1165   1.1  christos 			break;
   1166   1.1  christos 
   1167   1.1  christos 		default:
   1168   1.1  christos 			abort();
   1169   1.1  christos 		}
   1170  1.12  christos 		if (JF(b) != JT(b)) {
   1171  1.12  christos 			/*
   1172  1.12  christos 			 * XXX - optimizer loop detection.
   1173  1.12  christos 			 */
   1174  1.12  christos 			opt_state->non_branch_movement_performed = 1;
   1175   1.9  christos 			opt_state->done = 0;
   1176  1.12  christos 		}
   1177   1.1  christos 		if (v)
   1178   1.1  christos 			JF(b) = JT(b);
   1179   1.1  christos 		else
   1180   1.1  christos 			JT(b) = JF(b);
   1181   1.1  christos 	}
   1182   1.1  christos }
   1183   1.1  christos 
   1184   1.1  christos /*
   1185   1.1  christos  * Compute the symbolic value of expression of 's', and update
   1186   1.1  christos  * anything it defines in the value table 'val'.  If 'alter' is true,
   1187   1.1  christos  * do various optimizations.  This code would be cleaner if symbolic
   1188   1.1  christos  * evaluation and code transformations weren't folded together.
   1189   1.1  christos  */
   1190   1.1  christos static void
   1191  1.12  christos opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter)
   1192   1.1  christos {
   1193   1.1  christos 	int op;
   1194  1.12  christos 	bpf_u_int32 v;
   1195   1.1  christos 
   1196   1.1  christos 	switch (s->code) {
   1197   1.1  christos 
   1198   1.1  christos 	case BPF_LD|BPF_ABS|BPF_W:
   1199   1.1  christos 	case BPF_LD|BPF_ABS|BPF_H:
   1200   1.1  christos 	case BPF_LD|BPF_ABS|BPF_B:
   1201   1.9  christos 		v = F(opt_state, s->code, s->k, 0L);
   1202   1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
   1203   1.1  christos 		break;
   1204   1.1  christos 
   1205   1.1  christos 	case BPF_LD|BPF_IND|BPF_W:
   1206   1.1  christos 	case BPF_LD|BPF_IND|BPF_H:
   1207   1.1  christos 	case BPF_LD|BPF_IND|BPF_B:
   1208   1.1  christos 		v = val[X_ATOM];
   1209   1.9  christos 		if (alter && opt_state->vmap[v].is_const) {
   1210   1.1  christos 			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
   1211   1.9  christos 			s->k += opt_state->vmap[v].const_val;
   1212   1.9  christos 			v = F(opt_state, s->code, s->k, 0L);
   1213  1.12  christos 			/*
   1214  1.12  christos 			 * XXX - optimizer loop detection.
   1215  1.12  christos 			 */
   1216  1.12  christos 			opt_state->non_branch_movement_performed = 1;
   1217   1.9  christos 			opt_state->done = 0;
   1218   1.1  christos 		}
   1219   1.1  christos 		else
   1220   1.9  christos 			v = F(opt_state, s->code, s->k, v);
   1221   1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
   1222   1.1  christos 		break;
   1223   1.1  christos 
   1224   1.1  christos 	case BPF_LD|BPF_LEN:
   1225   1.9  christos 		v = F(opt_state, s->code, 0L, 0L);
   1226   1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
   1227   1.1  christos 		break;
   1228   1.1  christos 
   1229   1.1  christos 	case BPF_LD|BPF_IMM:
   1230   1.1  christos 		v = K(s->k);
   1231   1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
   1232   1.1  christos 		break;
   1233   1.1  christos 
   1234   1.1  christos 	case BPF_LDX|BPF_IMM:
   1235   1.1  christos 		v = K(s->k);
   1236   1.1  christos 		vstore(s, &val[X_ATOM], v, alter);
   1237   1.1  christos 		break;
   1238   1.1  christos 
   1239   1.1  christos 	case BPF_LDX|BPF_MSH|BPF_B:
   1240   1.9  christos 		v = F(opt_state, s->code, s->k, 0L);
   1241   1.1  christos 		vstore(s, &val[X_ATOM], v, alter);
   1242   1.1  christos 		break;
   1243   1.1  christos 
   1244   1.1  christos 	case BPF_ALU|BPF_NEG:
   1245   1.9  christos 		if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
   1246   1.1  christos 			s->code = BPF_LD|BPF_IMM;
   1247  1.11  christos 			/*
   1248  1.11  christos 			 * Do this negation as unsigned arithmetic; that's
   1249  1.11  christos 			 * what modern BPF engines do, and it guarantees
   1250  1.11  christos 			 * that all possible values can be negated.  (Yeah,
   1251  1.11  christos 			 * negating 0x80000000, the minimum signed 32-bit
   1252  1.11  christos 			 * two's-complement value, results in 0x80000000,
   1253  1.11  christos 			 * so it's still negative, but we *should* be doing
   1254  1.11  christos 			 * all unsigned arithmetic here, to match what
   1255  1.11  christos 			 * modern BPF engines do.)
   1256  1.11  christos 			 *
   1257  1.11  christos 			 * Express it as 0U - (unsigned value) so that we
   1258  1.11  christos 			 * don't get compiler warnings about negating an
   1259  1.11  christos 			 * unsigned value and don't get UBSan warnings
   1260  1.11  christos 			 * about the result of negating 0x80000000 being
   1261  1.11  christos 			 * undefined.
   1262  1.11  christos 			 */
   1263  1.12  christos 			s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val;
   1264   1.1  christos 			val[A_ATOM] = K(s->k);
   1265   1.1  christos 		}
   1266   1.1  christos 		else
   1267   1.9  christos 			val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
   1268   1.1  christos 		break;
   1269   1.1  christos 
   1270   1.1  christos 	case BPF_ALU|BPF_ADD|BPF_K:
   1271   1.1  christos 	case BPF_ALU|BPF_SUB|BPF_K:
   1272   1.1  christos 	case BPF_ALU|BPF_MUL|BPF_K:
   1273   1.1  christos 	case BPF_ALU|BPF_DIV|BPF_K:
   1274   1.7  christos 	case BPF_ALU|BPF_MOD|BPF_K:
   1275   1.1  christos 	case BPF_ALU|BPF_AND|BPF_K:
   1276   1.1  christos 	case BPF_ALU|BPF_OR|BPF_K:
   1277   1.7  christos 	case BPF_ALU|BPF_XOR|BPF_K:
   1278   1.1  christos 	case BPF_ALU|BPF_LSH|BPF_K:
   1279   1.1  christos 	case BPF_ALU|BPF_RSH|BPF_K:
   1280   1.1  christos 		op = BPF_OP(s->code);
   1281   1.1  christos 		if (alter) {
   1282   1.1  christos 			if (s->k == 0) {
   1283  1.11  christos 				/*
   1284  1.11  christos 				 * Optimize operations where the constant
   1285  1.11  christos 				 * is zero.
   1286  1.11  christos 				 *
   1287  1.11  christos 				 * Don't optimize away "sub #0"
   1288   1.1  christos 				 * as it may be needed later to
   1289  1.11  christos 				 * fixup the generated math code.
   1290  1.11  christos 				 *
   1291  1.11  christos 				 * Fail if we're dividing by zero or taking
   1292  1.11  christos 				 * a modulus by zero.
   1293  1.11  christos 				 */
   1294   1.1  christos 				if (op == BPF_ADD ||
   1295   1.1  christos 				    op == BPF_LSH || op == BPF_RSH ||
   1296   1.7  christos 				    op == BPF_OR || op == BPF_XOR) {
   1297   1.1  christos 					s->code = NOP;
   1298   1.1  christos 					break;
   1299   1.1  christos 				}
   1300   1.1  christos 				if (op == BPF_MUL || op == BPF_AND) {
   1301   1.1  christos 					s->code = BPF_LD|BPF_IMM;
   1302   1.1  christos 					val[A_ATOM] = K(s->k);
   1303   1.1  christos 					break;
   1304   1.1  christos 				}
   1305  1.11  christos 				if (op == BPF_DIV)
   1306  1.11  christos 					opt_error(opt_state,
   1307  1.11  christos 					    "division by zero");
   1308  1.11  christos 				if (op == BPF_MOD)
   1309  1.11  christos 					opt_error(opt_state,
   1310  1.11  christos 					    "modulus by zero");
   1311   1.1  christos 			}
   1312   1.9  christos 			if (opt_state->vmap[val[A_ATOM]].is_const) {
   1313  1.11  christos 				fold_op(opt_state, s, val[A_ATOM], K(s->k));
   1314   1.1  christos 				val[A_ATOM] = K(s->k);
   1315   1.1  christos 				break;
   1316   1.1  christos 			}
   1317   1.1  christos 		}
   1318   1.9  christos 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
   1319   1.1  christos 		break;
   1320   1.1  christos 
   1321   1.1  christos 	case BPF_ALU|BPF_ADD|BPF_X:
   1322   1.1  christos 	case BPF_ALU|BPF_SUB|BPF_X:
   1323   1.1  christos 	case BPF_ALU|BPF_MUL|BPF_X:
   1324   1.1  christos 	case BPF_ALU|BPF_DIV|BPF_X:
   1325   1.7  christos 	case BPF_ALU|BPF_MOD|BPF_X:
   1326   1.1  christos 	case BPF_ALU|BPF_AND|BPF_X:
   1327   1.1  christos 	case BPF_ALU|BPF_OR|BPF_X:
   1328   1.7  christos 	case BPF_ALU|BPF_XOR|BPF_X:
   1329   1.1  christos 	case BPF_ALU|BPF_LSH|BPF_X:
   1330   1.1  christos 	case BPF_ALU|BPF_RSH|BPF_X:
   1331   1.1  christos 		op = BPF_OP(s->code);
   1332   1.9  christos 		if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
   1333   1.9  christos 			if (opt_state->vmap[val[A_ATOM]].is_const) {
   1334  1.11  christos 				fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
   1335   1.1  christos 				val[A_ATOM] = K(s->k);
   1336   1.1  christos 			}
   1337   1.1  christos 			else {
   1338   1.1  christos 				s->code = BPF_ALU|BPF_K|op;
   1339   1.9  christos 				s->k = opt_state->vmap[val[X_ATOM]].const_val;
   1340  1.11  christos 				if ((op == BPF_LSH || op == BPF_RSH) &&
   1341  1.12  christos 				    s->k > 31)
   1342  1.11  christos 					opt_error(opt_state,
   1343  1.11  christos 					    "shift by more than 31 bits");
   1344  1.12  christos 				/*
   1345  1.12  christos 				 * XXX - optimizer loop detection.
   1346  1.12  christos 				 */
   1347  1.12  christos 				opt_state->non_branch_movement_performed = 1;
   1348   1.9  christos 				opt_state->done = 0;
   1349   1.1  christos 				val[A_ATOM] =
   1350   1.9  christos 					F(opt_state, s->code, val[A_ATOM], K(s->k));
   1351   1.1  christos 			}
   1352   1.1  christos 			break;
   1353   1.1  christos 		}
   1354   1.1  christos 		/*
   1355   1.1  christos 		 * Check if we're doing something to an accumulator
   1356   1.1  christos 		 * that is 0, and simplify.  This may not seem like
   1357   1.1  christos 		 * much of a simplification but it could open up further
   1358   1.1  christos 		 * optimizations.
   1359   1.1  christos 		 * XXX We could also check for mul by 1, etc.
   1360   1.1  christos 		 */
   1361   1.9  christos 		if (alter && opt_state->vmap[val[A_ATOM]].is_const
   1362   1.9  christos 		    && opt_state->vmap[val[A_ATOM]].const_val == 0) {
   1363   1.7  christos 			if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
   1364   1.1  christos 				s->code = BPF_MISC|BPF_TXA;
   1365   1.1  christos 				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
   1366   1.1  christos 				break;
   1367   1.1  christos 			}
   1368   1.7  christos 			else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
   1369   1.1  christos 				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
   1370   1.1  christos 				s->code = BPF_LD|BPF_IMM;
   1371   1.1  christos 				s->k = 0;
   1372   1.1  christos 				vstore(s, &val[A_ATOM], K(s->k), alter);
   1373   1.1  christos 				break;
   1374   1.1  christos 			}
   1375   1.1  christos 			else if (op == BPF_NEG) {
   1376   1.1  christos 				s->code = NOP;
   1377   1.1  christos 				break;
   1378   1.1  christos 			}
   1379   1.1  christos 		}
   1380   1.9  christos 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
   1381   1.1  christos 		break;
   1382   1.1  christos 
   1383   1.1  christos 	case BPF_MISC|BPF_TXA:
   1384   1.1  christos 		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
   1385   1.1  christos 		break;
   1386   1.1  christos 
   1387   1.1  christos 	case BPF_LD|BPF_MEM:
   1388   1.1  christos 		v = val[s->k];
   1389   1.9  christos 		if (alter && opt_state->vmap[v].is_const) {
   1390   1.1  christos 			s->code = BPF_LD|BPF_IMM;
   1391   1.9  christos 			s->k = opt_state->vmap[v].const_val;
   1392  1.12  christos 			/*
   1393  1.12  christos 			 * XXX - optimizer loop detection.
   1394  1.12  christos 			 */
   1395  1.12  christos 			opt_state->non_branch_movement_performed = 1;
   1396   1.9  christos 			opt_state->done = 0;
   1397   1.1  christos 		}
   1398   1.1  christos 		vstore(s, &val[A_ATOM], v, alter);
   1399   1.1  christos 		break;
   1400   1.1  christos 
   1401   1.1  christos 	case BPF_MISC|BPF_TAX:
   1402   1.1  christos 		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
   1403   1.1  christos 		break;
   1404   1.1  christos 
   1405   1.1  christos 	case BPF_LDX|BPF_MEM:
   1406   1.1  christos 		v = val[s->k];
   1407   1.9  christos 		if (alter && opt_state->vmap[v].is_const) {
   1408   1.1  christos 			s->code = BPF_LDX|BPF_IMM;
   1409   1.9  christos 			s->k = opt_state->vmap[v].const_val;
   1410  1.12  christos 			/*
   1411  1.12  christos 			 * XXX - optimizer loop detection.
   1412  1.12  christos 			 */
   1413  1.12  christos 			opt_state->non_branch_movement_performed = 1;
   1414   1.9  christos 			opt_state->done = 0;
   1415   1.1  christos 		}
   1416   1.1  christos 		vstore(s, &val[X_ATOM], v, alter);
   1417   1.1  christos 		break;
   1418   1.1  christos 
   1419   1.1  christos 	case BPF_ST:
   1420   1.1  christos 		vstore(s, &val[s->k], val[A_ATOM], alter);
   1421   1.1  christos 		break;
   1422   1.1  christos 
   1423   1.1  christos 	case BPF_STX:
   1424   1.1  christos 		vstore(s, &val[s->k], val[X_ATOM], alter);
   1425   1.1  christos 		break;
   1426   1.1  christos 	}
   1427   1.1  christos }
   1428   1.1  christos 
   1429   1.1  christos static void
   1430   1.9  christos deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
   1431   1.1  christos {
   1432   1.1  christos 	register int atom;
   1433   1.1  christos 
   1434   1.1  christos 	atom = atomuse(s);
   1435   1.1  christos 	if (atom >= 0) {
   1436   1.1  christos 		if (atom == AX_ATOM) {
   1437   1.1  christos 			last[X_ATOM] = 0;
   1438   1.1  christos 			last[A_ATOM] = 0;
   1439   1.1  christos 		}
   1440   1.1  christos 		else
   1441   1.1  christos 			last[atom] = 0;
   1442   1.1  christos 	}
   1443   1.1  christos 	atom = atomdef(s);
   1444   1.1  christos 	if (atom >= 0) {
   1445   1.1  christos 		if (last[atom]) {
   1446  1.12  christos 			/*
   1447  1.12  christos 			 * XXX - optimizer loop detection.
   1448  1.12  christos 			 */
   1449  1.12  christos 			opt_state->non_branch_movement_performed = 1;
   1450   1.9  christos 			opt_state->done = 0;
   1451   1.1  christos 			last[atom]->code = NOP;
   1452   1.1  christos 		}
   1453   1.1  christos 		last[atom] = s;
   1454   1.1  christos 	}
   1455   1.1  christos }
   1456   1.1  christos 
   1457   1.1  christos static void
   1458   1.9  christos opt_deadstores(opt_state_t *opt_state, register struct block *b)
   1459   1.1  christos {
   1460   1.1  christos 	register struct slist *s;
   1461   1.1  christos 	register int atom;
   1462   1.1  christos 	struct stmt *last[N_ATOMS];
   1463   1.1  christos 
   1464   1.1  christos 	memset((char *)last, 0, sizeof last);
   1465   1.1  christos 
   1466   1.1  christos 	for (s = b->stmts; s != 0; s = s->next)
   1467   1.9  christos 		deadstmt(opt_state, &s->s, last);
   1468   1.9  christos 	deadstmt(opt_state, &b->s, last);
   1469   1.1  christos 
   1470   1.1  christos 	for (atom = 0; atom < N_ATOMS; ++atom)
   1471   1.1  christos 		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
   1472   1.1  christos 			last[atom]->code = NOP;
   1473  1.12  christos 			/*
   1474  1.13  christos 			 * The store was removed as it's dead,
   1475  1.13  christos 			 * so the value stored into now has
   1476  1.13  christos 			 * an unknown value.
   1477  1.13  christos 			 */
   1478  1.13  christos 			vstore(0, &b->val[atom], VAL_UNKNOWN, 0);
   1479  1.13  christos 			/*
   1480  1.12  christos 			 * XXX - optimizer loop detection.
   1481  1.12  christos 			 */
   1482  1.12  christos 			opt_state->non_branch_movement_performed = 1;
   1483   1.9  christos 			opt_state->done = 0;
   1484   1.1  christos 		}
   1485   1.1  christos }
   1486   1.1  christos 
   1487   1.1  christos static void
   1488  1.11  christos opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
   1489   1.1  christos {
   1490   1.1  christos 	struct slist *s;
   1491   1.1  christos 	struct edge *p;
   1492   1.1  christos 	int i;
   1493  1.12  christos 	bpf_u_int32 aval, xval;
   1494   1.1  christos 
   1495   1.1  christos #if 0
   1496   1.1  christos 	for (s = b->stmts; s && s->next; s = s->next)
   1497   1.1  christos 		if (BPF_CLASS(s->s.code) == BPF_JMP) {
   1498   1.1  christos 			do_stmts = 0;
   1499   1.1  christos 			break;
   1500   1.1  christos 		}
   1501   1.1  christos #endif
   1502   1.1  christos 
   1503   1.1  christos 	/*
   1504   1.1  christos 	 * Initialize the atom values.
   1505   1.1  christos 	 */
   1506   1.1  christos 	p = b->in_edges;
   1507   1.1  christos 	if (p == 0) {
   1508   1.1  christos 		/*
   1509   1.1  christos 		 * We have no predecessors, so everything is undefined
   1510   1.1  christos 		 * upon entry to this block.
   1511   1.1  christos 		 */
   1512   1.1  christos 		memset((char *)b->val, 0, sizeof(b->val));
   1513   1.1  christos 	} else {
   1514   1.1  christos 		/*
   1515   1.1  christos 		 * Inherit values from our predecessors.
   1516   1.1  christos 		 *
   1517   1.1  christos 		 * First, get the values from the predecessor along the
   1518   1.1  christos 		 * first edge leading to this node.
   1519   1.1  christos 		 */
   1520   1.1  christos 		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
   1521   1.1  christos 		/*
   1522   1.1  christos 		 * Now look at all the other nodes leading to this node.
   1523   1.1  christos 		 * If, for the predecessor along that edge, a register
   1524   1.1  christos 		 * has a different value from the one we have (i.e.,
   1525   1.1  christos 		 * control paths are merging, and the merging paths
   1526   1.1  christos 		 * assign different values to that register), give the
   1527   1.1  christos 		 * register the undefined value of 0.
   1528   1.1  christos 		 */
   1529   1.1  christos 		while ((p = p->next) != NULL) {
   1530   1.1  christos 			for (i = 0; i < N_ATOMS; ++i)
   1531   1.1  christos 				if (b->val[i] != p->pred->val[i])
   1532   1.1  christos 					b->val[i] = 0;
   1533   1.1  christos 		}
   1534   1.1  christos 	}
   1535   1.1  christos 	aval = b->val[A_ATOM];
   1536   1.1  christos 	xval = b->val[X_ATOM];
   1537   1.1  christos 	for (s = b->stmts; s; s = s->next)
   1538  1.11  christos 		opt_stmt(opt_state, &s->s, b->val, do_stmts);
   1539   1.1  christos 
   1540   1.1  christos 	/*
   1541   1.1  christos 	 * This is a special case: if we don't use anything from this
   1542   1.1  christos 	 * block, and we load the accumulator or index register with a
   1543   1.1  christos 	 * value that is already there, or if this block is a return,
   1544   1.1  christos 	 * eliminate all the statements.
   1545   1.1  christos 	 *
   1546  1.12  christos 	 * XXX - what if it does a store?  Presumably that falls under
   1547  1.12  christos 	 * the heading of "if we don't use anything from this block",
   1548  1.12  christos 	 * i.e., if we use any memory location set to a different
   1549  1.12  christos 	 * value by this block, then we use something from this block.
   1550   1.1  christos 	 *
   1551   1.1  christos 	 * XXX - why does it matter whether we use anything from this
   1552   1.1  christos 	 * block?  If the accumulator or index register doesn't change
   1553   1.1  christos 	 * its value, isn't that OK even if we use that value?
   1554   1.1  christos 	 *
   1555   1.1  christos 	 * XXX - if we load the accumulator with a different value,
   1556   1.1  christos 	 * and the block ends with a conditional branch, we obviously
   1557   1.1  christos 	 * can't eliminate it, as the branch depends on that value.
   1558   1.1  christos 	 * For the index register, the conditional branch only depends
   1559   1.1  christos 	 * on the index register value if the test is against the index
   1560   1.1  christos 	 * register value rather than a constant; if nothing uses the
   1561   1.1  christos 	 * value we put into the index register, and we're not testing
   1562   1.1  christos 	 * against the index register's value, and there aren't any
   1563   1.1  christos 	 * other problems that would keep us from eliminating this
   1564   1.1  christos 	 * block, can we eliminate it?
   1565   1.1  christos 	 */
   1566   1.1  christos 	if (do_stmts &&
   1567  1.10  christos 	    ((b->out_use == 0 &&
   1568  1.10  christos 	      aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
   1569  1.10  christos 	      xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
   1570   1.1  christos 	     BPF_CLASS(b->s.code) == BPF_RET)) {
   1571   1.1  christos 		if (b->stmts != 0) {
   1572   1.1  christos 			b->stmts = 0;
   1573  1.12  christos 			/*
   1574  1.12  christos 			 * XXX - optimizer loop detection.
   1575  1.12  christos 			 */
   1576  1.12  christos 			opt_state->non_branch_movement_performed = 1;
   1577   1.9  christos 			opt_state->done = 0;
   1578   1.1  christos 		}
   1579   1.1  christos 	} else {
   1580   1.9  christos 		opt_peep(opt_state, b);
   1581   1.9  christos 		opt_deadstores(opt_state, b);
   1582   1.1  christos 	}
   1583   1.1  christos 	/*
   1584   1.1  christos 	 * Set up values for branch optimizer.
   1585   1.1  christos 	 */
   1586   1.1  christos 	if (BPF_SRC(b->s.code) == BPF_K)
   1587   1.1  christos 		b->oval = K(b->s.k);
   1588   1.1  christos 	else
   1589   1.1  christos 		b->oval = b->val[X_ATOM];
   1590   1.1  christos 	b->et.code = b->s.code;
   1591   1.1  christos 	b->ef.code = -b->s.code;
   1592   1.1  christos }
   1593   1.1  christos 
   1594   1.1  christos /*
   1595   1.1  christos  * Return true if any register that is used on exit from 'succ', has
   1596   1.1  christos  * an exit value that is different from the corresponding exit value
   1597   1.1  christos  * from 'b'.
   1598   1.1  christos  */
   1599   1.1  christos static int
   1600   1.6  christos use_conflict(struct block *b, struct block *succ)
   1601   1.1  christos {
   1602   1.1  christos 	int atom;
   1603   1.1  christos 	atomset use = succ->out_use;
   1604   1.1  christos 
   1605   1.1  christos 	if (use == 0)
   1606   1.1  christos 		return 0;
   1607   1.1  christos 
   1608   1.1  christos 	for (atom = 0; atom < N_ATOMS; ++atom)
   1609   1.1  christos 		if (ATOMELEM(use, atom))
   1610   1.1  christos 			if (b->val[atom] != succ->val[atom])
   1611   1.1  christos 				return 1;
   1612   1.1  christos 	return 0;
   1613   1.1  christos }
   1614   1.1  christos 
   1615  1.12  christos /*
   1616  1.12  christos  * Given a block that is the successor of an edge, and an edge that
   1617  1.12  christos  * dominates that edge, return either a pointer to a child of that
   1618  1.12  christos  * block (a block to which that block jumps) if that block is a
   1619  1.12  christos  * candidate to replace the successor of the latter edge or NULL
   1620  1.12  christos  * if neither of the children of the first block are candidates.
   1621  1.12  christos  */
   1622   1.1  christos static struct block *
   1623   1.6  christos fold_edge(struct block *child, struct edge *ep)
   1624   1.1  christos {
   1625   1.1  christos 	int sense;
   1626  1.12  christos 	bpf_u_int32 aval0, aval1, oval0, oval1;
   1627   1.1  christos 	int code = ep->code;
   1628   1.1  christos 
   1629   1.1  christos 	if (code < 0) {
   1630  1.12  christos 		/*
   1631  1.12  christos 		 * This edge is a "branch if false" edge.
   1632  1.12  christos 		 */
   1633   1.1  christos 		code = -code;
   1634   1.1  christos 		sense = 0;
   1635  1.12  christos 	} else {
   1636  1.12  christos 		/*
   1637  1.12  christos 		 * This edge is a "branch if true" edge.
   1638  1.12  christos 		 */
   1639   1.1  christos 		sense = 1;
   1640  1.12  christos 	}
   1641   1.1  christos 
   1642  1.12  christos 	/*
   1643  1.12  christos 	 * If the opcode for the branch at the end of the block we
   1644  1.12  christos 	 * were handed isn't the same as the opcode for the branch
   1645  1.12  christos 	 * to which the edge we were handed corresponds, the tests
   1646  1.12  christos 	 * for those branches aren't testing the same conditions,
   1647  1.12  christos 	 * so the blocks to which the first block branches aren't
   1648  1.12  christos 	 * candidates to replace the successor of the edge.
   1649  1.12  christos 	 */
   1650   1.1  christos 	if (child->s.code != code)
   1651   1.1  christos 		return 0;
   1652   1.1  christos 
   1653   1.1  christos 	aval0 = child->val[A_ATOM];
   1654   1.1  christos 	oval0 = child->oval;
   1655   1.1  christos 	aval1 = ep->pred->val[A_ATOM];
   1656   1.1  christos 	oval1 = ep->pred->oval;
   1657   1.1  christos 
   1658  1.12  christos 	/*
   1659  1.12  christos 	 * If the A register value on exit from the successor block
   1660  1.12  christos 	 * isn't the same as the A register value on exit from the
   1661  1.12  christos 	 * predecessor of the edge, the blocks to which the first
   1662  1.12  christos 	 * block branches aren't candidates to replace the successor
   1663  1.12  christos 	 * of the edge.
   1664  1.12  christos 	 */
   1665   1.1  christos 	if (aval0 != aval1)
   1666   1.1  christos 		return 0;
   1667   1.1  christos 
   1668   1.1  christos 	if (oval0 == oval1)
   1669   1.1  christos 		/*
   1670   1.1  christos 		 * The operands of the branch instructions are
   1671  1.12  christos 		 * identical, so the branches are testing the
   1672  1.12  christos 		 * same condition, and the result is true if a true
   1673   1.1  christos 		 * branch was taken to get here, otherwise false.
   1674   1.1  christos 		 */
   1675   1.1  christos 		return sense ? JT(child) : JF(child);
   1676   1.1  christos 
   1677   1.1  christos 	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
   1678   1.1  christos 		/*
   1679   1.1  christos 		 * At this point, we only know the comparison if we
   1680   1.1  christos 		 * came down the true branch, and it was an equality
   1681   1.1  christos 		 * comparison with a constant.
   1682   1.1  christos 		 *
   1683   1.1  christos 		 * I.e., if we came down the true branch, and the branch
   1684   1.1  christos 		 * was an equality comparison with a constant, we know the
   1685   1.1  christos 		 * accumulator contains that constant.  If we came down
   1686   1.1  christos 		 * the false branch, or the comparison wasn't with a
   1687   1.1  christos 		 * constant, we don't know what was in the accumulator.
   1688   1.1  christos 		 *
   1689   1.1  christos 		 * We rely on the fact that distinct constants have distinct
   1690   1.1  christos 		 * value numbers.
   1691   1.1  christos 		 */
   1692   1.1  christos 		return JF(child);
   1693   1.1  christos 
   1694   1.1  christos 	return 0;
   1695   1.1  christos }
   1696   1.1  christos 
   1697  1.12  christos /*
   1698  1.12  christos  * If we can make this edge go directly to a child of the edge's current
   1699  1.12  christos  * successor, do so.
   1700  1.12  christos  */
   1701   1.1  christos static void
   1702   1.9  christos opt_j(opt_state_t *opt_state, struct edge *ep)
   1703   1.1  christos {
   1704  1.12  christos 	register u_int i, k;
   1705   1.1  christos 	register struct block *target;
   1706   1.1  christos 
   1707  1.12  christos 	/*
   1708  1.12  christos 	 * Does this edge go to a block where, if the test
   1709  1.12  christos 	 * at the end of it succeeds, it goes to a block
   1710  1.12  christos 	 * that's a leaf node of the DAG, i.e. a return
   1711  1.12  christos 	 * statement?
   1712  1.12  christos 	 * If so, there's nothing to optimize.
   1713  1.12  christos 	 */
   1714   1.1  christos 	if (JT(ep->succ) == 0)
   1715   1.1  christos 		return;
   1716   1.1  christos 
   1717  1.12  christos 	/*
   1718  1.12  christos 	 * Does this edge go to a block that goes, in turn, to
   1719  1.12  christos 	 * the same block regardless of whether the test at the
   1720  1.12  christos 	 * end succeeds or fails?
   1721  1.12  christos 	 */
   1722   1.1  christos 	if (JT(ep->succ) == JF(ep->succ)) {
   1723   1.1  christos 		/*
   1724   1.1  christos 		 * Common branch targets can be eliminated, provided
   1725   1.1  christos 		 * there is no data dependency.
   1726  1.12  christos 		 *
   1727  1.12  christos 		 * Check whether any register used on exit from the
   1728  1.12  christos 		 * block to which the successor of this edge goes
   1729  1.12  christos 		 * has a value at that point that's different from
   1730  1.12  christos 		 * the value it has on exit from the predecessor of
   1731  1.12  christos 		 * this edge.  If not, the predecessor of this edge
   1732  1.12  christos 		 * can just go to the block to which the successor
   1733  1.12  christos 		 * of this edge goes, bypassing the successor of this
   1734  1.12  christos 		 * edge, as the successor of this edge isn't doing
   1735  1.12  christos 		 * any calculations whose results are different
   1736  1.12  christos 		 * from what the blocks before it did and isn't
   1737  1.12  christos 		 * doing any tests the results of which matter.
   1738   1.1  christos 		 */
   1739  1.12  christos 		if (!use_conflict(ep->pred, JT(ep->succ))) {
   1740  1.12  christos 			/*
   1741  1.12  christos 			 * No, there isn't.
   1742  1.12  christos 			 * Make this edge go to the block to
   1743  1.12  christos 			 * which the successor of that edge
   1744  1.12  christos 			 * goes.
   1745  1.12  christos 			 *
   1746  1.12  christos 			 * XXX - optimizer loop detection.
   1747  1.12  christos 			 */
   1748  1.12  christos 			opt_state->non_branch_movement_performed = 1;
   1749   1.9  christos 			opt_state->done = 0;
   1750   1.1  christos 			ep->succ = JT(ep->succ);
   1751   1.1  christos 		}
   1752   1.1  christos 	}
   1753   1.1  christos 	/*
   1754   1.1  christos 	 * For each edge dominator that matches the successor of this
   1755   1.1  christos 	 * edge, promote the edge successor to the its grandchild.
   1756   1.1  christos 	 *
   1757   1.1  christos 	 * XXX We violate the set abstraction here in favor a reasonably
   1758   1.1  christos 	 * efficient loop.
   1759   1.1  christos 	 */
   1760   1.1  christos  top:
   1761   1.9  christos 	for (i = 0; i < opt_state->edgewords; ++i) {
   1762  1.12  christos 		/* i'th word in the bitset of dominators */
   1763   1.1  christos 		register bpf_u_int32 x = ep->edom[i];
   1764   1.1  christos 
   1765   1.1  christos 		while (x != 0) {
   1766  1.12  christos 			/* Find the next dominator in that word and mark it as found */
   1767  1.10  christos 			k = lowest_set_bit(x);
   1768  1.11  christos 			x &=~ ((bpf_u_int32)1 << k);
   1769   1.1  christos 			k += i * BITS_PER_WORD;
   1770   1.1  christos 
   1771   1.9  christos 			target = fold_edge(ep->succ, opt_state->edges[k]);
   1772   1.1  christos 			/*
   1773  1.12  christos 			 * We have a candidate to replace the successor
   1774  1.12  christos 			 * of ep.
   1775  1.12  christos 			 *
   1776   1.1  christos 			 * Check that there is no data dependency between
   1777  1.12  christos 			 * nodes that will be violated if we move the edge;
   1778  1.12  christos 			 * i.e., if any register used on exit from the
   1779  1.12  christos 			 * candidate has a value at that point different
   1780  1.12  christos 			 * from the value it has when we exit the
   1781  1.12  christos 			 * predecessor of that edge, there's a data
   1782  1.12  christos 			 * dependency that will be violated.
   1783   1.1  christos 			 */
   1784   1.1  christos 			if (target != 0 && !use_conflict(ep->pred, target)) {
   1785  1.12  christos 				/*
   1786  1.12  christos 				 * It's safe to replace the successor of
   1787  1.12  christos 				 * ep; do so, and note that we've made
   1788  1.12  christos 				 * at least one change.
   1789  1.12  christos 				 *
   1790  1.12  christos 				 * XXX - this is one of the operations that
   1791  1.12  christos 				 * happens when the optimizer gets into
   1792  1.12  christos 				 * one of those infinite loops.
   1793  1.12  christos 				 */
   1794   1.9  christos 				opt_state->done = 0;
   1795   1.1  christos 				ep->succ = target;
   1796   1.1  christos 				if (JT(target) != 0)
   1797   1.1  christos 					/*
   1798   1.1  christos 					 * Start over unless we hit a leaf.
   1799   1.1  christos 					 */
   1800   1.1  christos 					goto top;
   1801   1.1  christos 				return;
   1802   1.1  christos 			}
   1803   1.1  christos 		}
   1804   1.1  christos 	}
   1805   1.1  christos }
   1806   1.1  christos 
   1807  1.12  christos /*
   1808  1.12  christos  * XXX - is this, and and_pullup(), what's described in section 6.1.2
   1809  1.12  christos  * "Predicate Assertion Propagation" in the BPF+ paper?
   1810  1.12  christos  *
   1811  1.12  christos  * Note that this looks at block dominators, not edge dominators.
   1812  1.12  christos  * Don't think so.
   1813  1.12  christos  *
   1814  1.12  christos  * "A or B" compiles into
   1815  1.12  christos  *
   1816  1.12  christos  *          A
   1817  1.12  christos  *       t / \ f
   1818  1.12  christos  *        /   B
   1819  1.12  christos  *       / t / \ f
   1820  1.12  christos  *      \   /
   1821  1.12  christos  *       \ /
   1822  1.12  christos  *        X
   1823  1.12  christos  *
   1824  1.12  christos  *
   1825  1.12  christos  */
   1826   1.1  christos static void
   1827  1.13  christos or_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
   1828   1.1  christos {
   1829  1.12  christos 	bpf_u_int32 val;
   1830  1.12  christos 	int at_top;
   1831   1.1  christos 	struct block *pull;
   1832   1.1  christos 	struct block **diffp, **samep;
   1833   1.1  christos 	struct edge *ep;
   1834   1.1  christos 
   1835   1.1  christos 	ep = b->in_edges;
   1836   1.1  christos 	if (ep == 0)
   1837   1.1  christos 		return;
   1838   1.1  christos 
   1839   1.1  christos 	/*
   1840   1.1  christos 	 * Make sure each predecessor loads the same value.
   1841   1.1  christos 	 * XXX why?
   1842   1.1  christos 	 */
   1843   1.1  christos 	val = ep->pred->val[A_ATOM];
   1844   1.1  christos 	for (ep = ep->next; ep != 0; ep = ep->next)
   1845   1.1  christos 		if (val != ep->pred->val[A_ATOM])
   1846   1.1  christos 			return;
   1847   1.1  christos 
   1848  1.12  christos 	/*
   1849  1.12  christos 	 * For the first edge in the list of edges coming into this block,
   1850  1.12  christos 	 * see whether the predecessor of that edge comes here via a true
   1851  1.12  christos 	 * branch or a false branch.
   1852  1.12  christos 	 */
   1853   1.1  christos 	if (JT(b->in_edges->pred) == b)
   1854  1.12  christos 		diffp = &JT(b->in_edges->pred);	/* jt */
   1855   1.1  christos 	else
   1856  1.12  christos 		diffp = &JF(b->in_edges->pred);	/* jf */
   1857   1.1  christos 
   1858  1.12  christos 	/*
   1859  1.12  christos 	 * diffp is a pointer to a pointer to the block.
   1860  1.12  christos 	 *
   1861  1.12  christos 	 * Go down the false chain looking as far as you can,
   1862  1.12  christos 	 * making sure that each jump-compare is doing the
   1863  1.12  christos 	 * same as the original block.
   1864  1.12  christos 	 *
   1865  1.12  christos 	 * If you reach the bottom before you reach a
   1866  1.12  christos 	 * different jump-compare, just exit.  There's nothing
   1867  1.12  christos 	 * to do here.  XXX - no, this version is checking for
   1868  1.12  christos 	 * the value leaving the block; that's from the BPF+
   1869  1.12  christos 	 * pullup routine.
   1870  1.12  christos 	 */
   1871   1.1  christos 	at_top = 1;
   1872  1.10  christos 	for (;;) {
   1873  1.12  christos 		/*
   1874  1.12  christos 		 * Done if that's not going anywhere XXX
   1875  1.12  christos 		 */
   1876   1.1  christos 		if (*diffp == 0)
   1877   1.1  christos 			return;
   1878   1.1  christos 
   1879  1.12  christos 		/*
   1880  1.12  christos 		 * Done if that predecessor blah blah blah isn't
   1881  1.12  christos 		 * going the same place we're going XXX
   1882  1.12  christos 		 *
   1883  1.12  christos 		 * Does the true edge of this block point to the same
   1884  1.12  christos 		 * location as the true edge of b?
   1885  1.12  christos 		 */
   1886   1.1  christos 		if (JT(*diffp) != JT(b))
   1887   1.1  christos 			return;
   1888   1.1  christos 
   1889  1.12  christos 		/*
   1890  1.12  christos 		 * Done if this node isn't a dominator of that
   1891  1.12  christos 		 * node blah blah blah XXX
   1892  1.12  christos 		 *
   1893  1.12  christos 		 * Does b dominate diffp?
   1894  1.12  christos 		 */
   1895   1.1  christos 		if (!SET_MEMBER((*diffp)->dom, b->id))
   1896   1.1  christos 			return;
   1897   1.1  christos 
   1898  1.12  christos 		/*
   1899  1.12  christos 		 * Break out of the loop if that node's value of A
   1900  1.12  christos 		 * isn't the value of A above XXX
   1901  1.12  christos 		 */
   1902   1.1  christos 		if ((*diffp)->val[A_ATOM] != val)
   1903   1.1  christos 			break;
   1904   1.1  christos 
   1905  1.12  christos 		/*
   1906  1.12  christos 		 * Get the JF for that node XXX
   1907  1.12  christos 		 * Go down the false path.
   1908  1.12  christos 		 */
   1909   1.1  christos 		diffp = &JF(*diffp);
   1910   1.1  christos 		at_top = 0;
   1911   1.1  christos 	}
   1912  1.12  christos 
   1913  1.12  christos 	/*
   1914  1.12  christos 	 * Now that we've found a different jump-compare in a chain
   1915  1.12  christos 	 * below b, search further down until we find another
   1916  1.12  christos 	 * jump-compare that looks at the original value.  This
   1917  1.12  christos 	 * jump-compare should get pulled up.  XXX again we're
   1918  1.12  christos 	 * comparing values not jump-compares.
   1919  1.12  christos 	 */
   1920   1.1  christos 	samep = &JF(*diffp);
   1921  1.10  christos 	for (;;) {
   1922  1.12  christos 		/*
   1923  1.12  christos 		 * Done if that's not going anywhere XXX
   1924  1.12  christos 		 */
   1925   1.1  christos 		if (*samep == 0)
   1926   1.1  christos 			return;
   1927   1.1  christos 
   1928  1.12  christos 		/*
   1929  1.12  christos 		 * Done if that predecessor blah blah blah isn't
   1930  1.12  christos 		 * going the same place we're going XXX
   1931  1.12  christos 		 */
   1932   1.1  christos 		if (JT(*samep) != JT(b))
   1933   1.1  christos 			return;
   1934   1.1  christos 
   1935  1.12  christos 		/*
   1936  1.12  christos 		 * Done if this node isn't a dominator of that
   1937  1.12  christos 		 * node blah blah blah XXX
   1938  1.12  christos 		 *
   1939  1.12  christos 		 * Does b dominate samep?
   1940  1.12  christos 		 */
   1941   1.1  christos 		if (!SET_MEMBER((*samep)->dom, b->id))
   1942   1.1  christos 			return;
   1943   1.1  christos 
   1944  1.12  christos 		/*
   1945  1.12  christos 		 * Break out of the loop if that node's value of A
   1946  1.12  christos 		 * is the value of A above XXX
   1947  1.12  christos 		 */
   1948   1.1  christos 		if ((*samep)->val[A_ATOM] == val)
   1949   1.1  christos 			break;
   1950   1.1  christos 
   1951   1.1  christos 		/* XXX Need to check that there are no data dependencies
   1952   1.1  christos 		   between dp0 and dp1.  Currently, the code generator
   1953   1.1  christos 		   will not produce such dependencies. */
   1954   1.1  christos 		samep = &JF(*samep);
   1955   1.1  christos 	}
   1956   1.1  christos #ifdef notdef
   1957   1.1  christos 	/* XXX This doesn't cover everything. */
   1958   1.1  christos 	for (i = 0; i < N_ATOMS; ++i)
   1959   1.1  christos 		if ((*samep)->val[i] != pred->val[i])
   1960   1.1  christos 			return;
   1961   1.1  christos #endif
   1962   1.1  christos 	/* Pull up the node. */
   1963   1.1  christos 	pull = *samep;
   1964   1.1  christos 	*samep = JF(pull);
   1965   1.1  christos 	JF(pull) = *diffp;
   1966   1.1  christos 
   1967   1.1  christos 	/*
   1968   1.1  christos 	 * At the top of the chain, each predecessor needs to point at the
   1969   1.1  christos 	 * pulled up node.  Inside the chain, there is only one predecessor
   1970   1.1  christos 	 * to worry about.
   1971   1.1  christos 	 */
   1972   1.1  christos 	if (at_top) {
   1973   1.1  christos 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
   1974   1.1  christos 			if (JT(ep->pred) == b)
   1975   1.1  christos 				JT(ep->pred) = pull;
   1976   1.1  christos 			else
   1977   1.1  christos 				JF(ep->pred) = pull;
   1978   1.1  christos 		}
   1979   1.1  christos 	}
   1980   1.1  christos 	else
   1981   1.1  christos 		*diffp = pull;
   1982   1.1  christos 
   1983  1.12  christos 	/*
   1984  1.12  christos 	 * XXX - this is one of the operations that happens when the
   1985  1.12  christos 	 * optimizer gets into one of those infinite loops.
   1986  1.12  christos 	 */
   1987   1.9  christos 	opt_state->done = 0;
   1988  1.13  christos 
   1989  1.13  christos 	/*
   1990  1.13  christos 	 * Recompute dominator sets as control flow graph has changed.
   1991  1.13  christos 	 */
   1992  1.13  christos 	find_dom(opt_state, root);
   1993   1.1  christos }
   1994   1.1  christos 
   1995   1.1  christos static void
   1996  1.13  christos and_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
   1997   1.1  christos {
   1998  1.12  christos 	bpf_u_int32 val;
   1999  1.12  christos 	int at_top;
   2000   1.1  christos 	struct block *pull;
   2001   1.1  christos 	struct block **diffp, **samep;
   2002   1.1  christos 	struct edge *ep;
   2003   1.1  christos 
   2004   1.1  christos 	ep = b->in_edges;
   2005   1.1  christos 	if (ep == 0)
   2006   1.1  christos 		return;
   2007   1.1  christos 
   2008   1.1  christos 	/*
   2009   1.1  christos 	 * Make sure each predecessor loads the same value.
   2010   1.1  christos 	 */
   2011   1.1  christos 	val = ep->pred->val[A_ATOM];
   2012   1.1  christos 	for (ep = ep->next; ep != 0; ep = ep->next)
   2013   1.1  christos 		if (val != ep->pred->val[A_ATOM])
   2014   1.1  christos 			return;
   2015   1.1  christos 
   2016   1.1  christos 	if (JT(b->in_edges->pred) == b)
   2017   1.1  christos 		diffp = &JT(b->in_edges->pred);
   2018   1.1  christos 	else
   2019   1.1  christos 		diffp = &JF(b->in_edges->pred);
   2020   1.1  christos 
   2021   1.1  christos 	at_top = 1;
   2022  1.10  christos 	for (;;) {
   2023   1.1  christos 		if (*diffp == 0)
   2024   1.1  christos 			return;
   2025   1.1  christos 
   2026   1.1  christos 		if (JF(*diffp) != JF(b))
   2027   1.1  christos 			return;
   2028   1.1  christos 
   2029   1.1  christos 		if (!SET_MEMBER((*diffp)->dom, b->id))
   2030   1.1  christos 			return;
   2031   1.1  christos 
   2032   1.1  christos 		if ((*diffp)->val[A_ATOM] != val)
   2033   1.1  christos 			break;
   2034   1.1  christos 
   2035   1.1  christos 		diffp = &JT(*diffp);
   2036   1.1  christos 		at_top = 0;
   2037   1.1  christos 	}
   2038   1.1  christos 	samep = &JT(*diffp);
   2039  1.10  christos 	for (;;) {
   2040   1.1  christos 		if (*samep == 0)
   2041   1.1  christos 			return;
   2042   1.1  christos 
   2043   1.1  christos 		if (JF(*samep) != JF(b))
   2044   1.1  christos 			return;
   2045   1.1  christos 
   2046   1.1  christos 		if (!SET_MEMBER((*samep)->dom, b->id))
   2047   1.1  christos 			return;
   2048   1.1  christos 
   2049   1.1  christos 		if ((*samep)->val[A_ATOM] == val)
   2050   1.1  christos 			break;
   2051   1.1  christos 
   2052   1.1  christos 		/* XXX Need to check that there are no data dependencies
   2053   1.1  christos 		   between diffp and samep.  Currently, the code generator
   2054   1.1  christos 		   will not produce such dependencies. */
   2055   1.1  christos 		samep = &JT(*samep);
   2056   1.1  christos 	}
   2057   1.1  christos #ifdef notdef
   2058   1.1  christos 	/* XXX This doesn't cover everything. */
   2059   1.1  christos 	for (i = 0; i < N_ATOMS; ++i)
   2060   1.1  christos 		if ((*samep)->val[i] != pred->val[i])
   2061   1.1  christos 			return;
   2062   1.1  christos #endif
   2063   1.1  christos 	/* Pull up the node. */
   2064   1.1  christos 	pull = *samep;
   2065   1.1  christos 	*samep = JT(pull);
   2066   1.1  christos 	JT(pull) = *diffp;
   2067   1.1  christos 
   2068   1.1  christos 	/*
   2069   1.1  christos 	 * At the top of the chain, each predecessor needs to point at the
   2070   1.1  christos 	 * pulled up node.  Inside the chain, there is only one predecessor
   2071   1.1  christos 	 * to worry about.
   2072   1.1  christos 	 */
   2073   1.1  christos 	if (at_top) {
   2074   1.1  christos 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
   2075   1.1  christos 			if (JT(ep->pred) == b)
   2076   1.1  christos 				JT(ep->pred) = pull;
   2077   1.1  christos 			else
   2078   1.1  christos 				JF(ep->pred) = pull;
   2079   1.1  christos 		}
   2080   1.1  christos 	}
   2081   1.1  christos 	else
   2082   1.1  christos 		*diffp = pull;
   2083   1.1  christos 
   2084  1.12  christos 	/*
   2085  1.12  christos 	 * XXX - this is one of the operations that happens when the
   2086  1.12  christos 	 * optimizer gets into one of those infinite loops.
   2087  1.12  christos 	 */
   2088   1.9  christos 	opt_state->done = 0;
   2089  1.13  christos 
   2090  1.13  christos 	/*
   2091  1.13  christos 	 * Recompute dominator sets as control flow graph has changed.
   2092  1.13  christos 	 */
   2093  1.13  christos 	find_dom(opt_state, root);
   2094   1.1  christos }
   2095   1.1  christos 
   2096   1.1  christos static void
   2097  1.11  christos opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
   2098   1.1  christos {
   2099   1.1  christos 	int i, maxlevel;
   2100   1.1  christos 	struct block *p;
   2101   1.1  christos 
   2102   1.9  christos 	init_val(opt_state);
   2103   1.9  christos 	maxlevel = ic->root->level;
   2104   1.1  christos 
   2105   1.9  christos 	find_inedges(opt_state, ic->root);
   2106   1.1  christos 	for (i = maxlevel; i >= 0; --i)
   2107   1.9  christos 		for (p = opt_state->levels[i]; p; p = p->link)
   2108  1.11  christos 			opt_blk(opt_state, p, do_stmts);
   2109   1.1  christos 
   2110   1.1  christos 	if (do_stmts)
   2111   1.1  christos 		/*
   2112   1.1  christos 		 * No point trying to move branches; it can't possibly
   2113   1.1  christos 		 * make a difference at this point.
   2114  1.12  christos 		 *
   2115  1.12  christos 		 * XXX - this might be after we detect a loop where
   2116  1.12  christos 		 * we were just looping infinitely moving branches
   2117  1.12  christos 		 * in such a fashion that we went through two or more
   2118  1.12  christos 		 * versions of the machine code, eventually returning
   2119  1.12  christos 		 * to the first version.  (We're really not doing a
   2120  1.12  christos 		 * full loop detection, we're just testing for two
   2121  1.12  christos 		 * passes in a row where we do nothing but
   2122  1.12  christos 		 * move branches.)
   2123   1.1  christos 		 */
   2124   1.1  christos 		return;
   2125   1.1  christos 
   2126  1.12  christos 	/*
   2127  1.12  christos 	 * Is this what the BPF+ paper describes in sections 6.1.1,
   2128  1.12  christos 	 * 6.1.2, and 6.1.3?
   2129  1.12  christos 	 */
   2130   1.1  christos 	for (i = 1; i <= maxlevel; ++i) {
   2131   1.9  christos 		for (p = opt_state->levels[i]; p; p = p->link) {
   2132   1.9  christos 			opt_j(opt_state, &p->et);
   2133   1.9  christos 			opt_j(opt_state, &p->ef);
   2134   1.1  christos 		}
   2135   1.1  christos 	}
   2136   1.1  christos 
   2137   1.9  christos 	find_inedges(opt_state, ic->root);
   2138   1.1  christos 	for (i = 1; i <= maxlevel; ++i) {
   2139   1.9  christos 		for (p = opt_state->levels[i]; p; p = p->link) {
   2140  1.13  christos 			or_pullup(opt_state, p, ic->root);
   2141  1.13  christos 			and_pullup(opt_state, p, ic->root);
   2142   1.1  christos 		}
   2143   1.1  christos 	}
   2144   1.1  christos }
   2145   1.1  christos 
   2146   1.1  christos static inline void
   2147   1.6  christos link_inedge(struct edge *parent, struct block *child)
   2148   1.1  christos {
   2149   1.1  christos 	parent->next = child->in_edges;
   2150   1.1  christos 	child->in_edges = parent;
   2151   1.1  christos }
   2152   1.1  christos 
   2153   1.1  christos static void
   2154   1.9  christos find_inedges(opt_state_t *opt_state, struct block *root)
   2155   1.1  christos {
   2156  1.12  christos 	u_int i;
   2157  1.12  christos 	int level;
   2158   1.1  christos 	struct block *b;
   2159   1.1  christos 
   2160   1.9  christos 	for (i = 0; i < opt_state->n_blocks; ++i)
   2161   1.9  christos 		opt_state->blocks[i]->in_edges = 0;
   2162   1.1  christos 
   2163   1.1  christos 	/*
   2164   1.1  christos 	 * Traverse the graph, adding each edge to the predecessor
   2165   1.1  christos 	 * list of its successors.  Skip the leaves (i.e. level 0).
   2166   1.1  christos 	 */
   2167  1.12  christos 	for (level = root->level; level > 0; --level) {
   2168  1.12  christos 		for (b = opt_state->levels[level]; b != 0; b = b->link) {
   2169   1.1  christos 			link_inedge(&b->et, JT(b));
   2170   1.1  christos 			link_inedge(&b->ef, JF(b));
   2171   1.1  christos 		}
   2172   1.1  christos 	}
   2173   1.1  christos }
   2174   1.1  christos 
   2175   1.1  christos static void
   2176   1.6  christos opt_root(struct block **b)
   2177   1.1  christos {
   2178   1.1  christos 	struct slist *tmp, *s;
   2179   1.1  christos 
   2180   1.1  christos 	s = (*b)->stmts;
   2181   1.1  christos 	(*b)->stmts = 0;
   2182   1.1  christos 	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
   2183   1.1  christos 		*b = JT(*b);
   2184   1.1  christos 
   2185   1.1  christos 	tmp = (*b)->stmts;
   2186   1.1  christos 	if (tmp != 0)
   2187   1.1  christos 		sappend(s, tmp);
   2188   1.1  christos 	(*b)->stmts = s;
   2189   1.1  christos 
   2190   1.1  christos 	/*
   2191   1.1  christos 	 * If the root node is a return, then there is no
   2192   1.1  christos 	 * point executing any statements (since the bpf machine
   2193   1.1  christos 	 * has no side effects).
   2194   1.1  christos 	 */
   2195   1.1  christos 	if (BPF_CLASS((*b)->s.code) == BPF_RET)
   2196   1.1  christos 		(*b)->stmts = 0;
   2197   1.1  christos }
   2198   1.1  christos 
   2199   1.1  christos static void
   2200  1.11  christos opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
   2201   1.1  christos {
   2202   1.1  christos 
   2203   1.1  christos #ifdef BDEBUG
   2204  1.10  christos 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
   2205   1.1  christos 		printf("opt_loop(root, %d) begin\n", do_stmts);
   2206  1.11  christos 		opt_dump(opt_state, ic);
   2207   1.1  christos 	}
   2208   1.1  christos #endif
   2209  1.12  christos 
   2210  1.12  christos 	/*
   2211  1.12  christos 	 * XXX - optimizer loop detection.
   2212  1.12  christos 	 */
   2213  1.12  christos 	int loop_count = 0;
   2214  1.12  christos 	for (;;) {
   2215   1.9  christos 		opt_state->done = 1;
   2216  1.12  christos 		/*
   2217  1.12  christos 		 * XXX - optimizer loop detection.
   2218  1.12  christos 		 */
   2219  1.12  christos 		opt_state->non_branch_movement_performed = 0;
   2220   1.9  christos 		find_levels(opt_state, ic);
   2221   1.9  christos 		find_dom(opt_state, ic->root);
   2222   1.9  christos 		find_closure(opt_state, ic->root);
   2223   1.9  christos 		find_ud(opt_state, ic->root);
   2224   1.9  christos 		find_edom(opt_state, ic->root);
   2225  1.11  christos 		opt_blks(opt_state, ic, do_stmts);
   2226   1.1  christos #ifdef BDEBUG
   2227  1.10  christos 		if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
   2228   1.9  christos 			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
   2229  1.11  christos 			opt_dump(opt_state, ic);
   2230   1.1  christos 		}
   2231   1.1  christos #endif
   2232  1.12  christos 
   2233  1.12  christos 		/*
   2234  1.12  christos 		 * Was anything done in this optimizer pass?
   2235  1.12  christos 		 */
   2236  1.12  christos 		if (opt_state->done) {
   2237  1.12  christos 			/*
   2238  1.12  christos 			 * No, so we've reached a fixed point.
   2239  1.12  christos 			 * We're done.
   2240  1.12  christos 			 */
   2241  1.12  christos 			break;
   2242  1.12  christos 		}
   2243  1.12  christos 
   2244  1.12  christos 		/*
   2245  1.12  christos 		 * XXX - was anything done other than branch movement
   2246  1.12  christos 		 * in this pass?
   2247  1.12  christos 		 */
   2248  1.12  christos 		if (opt_state->non_branch_movement_performed) {
   2249  1.12  christos 			/*
   2250  1.12  christos 			 * Yes.  Clear any loop-detection counter;
   2251  1.12  christos 			 * we're making some form of progress (assuming
   2252  1.12  christos 			 * we can't get into a cycle doing *other*
   2253  1.12  christos 			 * optimizations...).
   2254  1.12  christos 			 */
   2255  1.12  christos 			loop_count = 0;
   2256  1.12  christos 		} else {
   2257  1.12  christos 			/*
   2258  1.12  christos 			 * No - increment the counter, and quit if
   2259  1.12  christos 			 * it's up to 100.
   2260  1.12  christos 			 */
   2261  1.12  christos 			loop_count++;
   2262  1.12  christos 			if (loop_count >= 100) {
   2263  1.12  christos 				/*
   2264  1.12  christos 				 * We've done nothing but branch movement
   2265  1.12  christos 				 * for 100 passes; we're probably
   2266  1.12  christos 				 * in a cycle and will never reach a
   2267  1.12  christos 				 * fixed point.
   2268  1.12  christos 				 *
   2269  1.12  christos 				 * XXX - yes, we really need a non-
   2270  1.12  christos 				 * heuristic way of detecting a cycle.
   2271  1.12  christos 				 */
   2272  1.12  christos 				opt_state->done = 1;
   2273  1.12  christos 				break;
   2274  1.12  christos 			}
   2275  1.12  christos 		}
   2276  1.12  christos 	}
   2277   1.1  christos }
   2278   1.1  christos 
   2279   1.1  christos /*
   2280   1.1  christos  * Optimize the filter code in its dag representation.
   2281  1.11  christos  * Return 0 on success, -1 on error.
   2282   1.1  christos  */
   2283  1.11  christos int
   2284  1.11  christos bpf_optimize(struct icode *ic, char *errbuf)
   2285   1.1  christos {
   2286   1.9  christos 	opt_state_t opt_state;
   2287   1.1  christos 
   2288  1.11  christos 	memset(&opt_state, 0, sizeof(opt_state));
   2289  1.11  christos 	opt_state.errbuf = errbuf;
   2290  1.12  christos 	opt_state.non_branch_movement_performed = 0;
   2291  1.11  christos 	if (setjmp(opt_state.top_ctx)) {
   2292  1.11  christos 		opt_cleanup(&opt_state);
   2293  1.11  christos 		return -1;
   2294  1.11  christos 	}
   2295  1.11  christos 	opt_init(&opt_state, ic);
   2296  1.11  christos 	opt_loop(&opt_state, ic, 0);
   2297  1.11  christos 	opt_loop(&opt_state, ic, 1);
   2298   1.9  christos 	intern_blocks(&opt_state, ic);
   2299   1.1  christos #ifdef BDEBUG
   2300  1.10  christos 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
   2301   1.1  christos 		printf("after intern_blocks()\n");
   2302  1.11  christos 		opt_dump(&opt_state, ic);
   2303   1.1  christos 	}
   2304   1.1  christos #endif
   2305   1.9  christos 	opt_root(&ic->root);
   2306   1.1  christos #ifdef BDEBUG
   2307  1.10  christos 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
   2308   1.1  christos 		printf("after opt_root()\n");
   2309  1.11  christos 		opt_dump(&opt_state, ic);
   2310   1.1  christos 	}
   2311   1.1  christos #endif
   2312   1.9  christos 	opt_cleanup(&opt_state);
   2313  1.11  christos 	return 0;
   2314   1.1  christos }
   2315   1.1  christos 
   2316   1.1  christos static void
   2317   1.9  christos make_marks(struct icode *ic, struct block *p)
   2318   1.1  christos {
   2319   1.9  christos 	if (!isMarked(ic, p)) {
   2320   1.9  christos 		Mark(ic, p);
   2321   1.1  christos 		if (BPF_CLASS(p->s.code) != BPF_RET) {
   2322   1.9  christos 			make_marks(ic, JT(p));
   2323   1.9  christos 			make_marks(ic, JF(p));
   2324   1.1  christos 		}
   2325   1.1  christos 	}
   2326   1.1  christos }
   2327   1.1  christos 
   2328   1.1  christos /*
   2329   1.9  christos  * Mark code array such that isMarked(ic->cur_mark, i) is true
   2330   1.1  christos  * only for nodes that are alive.
   2331   1.1  christos  */
   2332   1.1  christos static void
   2333   1.9  christos mark_code(struct icode *ic)
   2334   1.1  christos {
   2335   1.9  christos 	ic->cur_mark += 1;
   2336   1.9  christos 	make_marks(ic, ic->root);
   2337   1.1  christos }
   2338   1.1  christos 
   2339   1.1  christos /*
   2340   1.1  christos  * True iff the two stmt lists load the same value from the packet into
   2341   1.1  christos  * the accumulator.
   2342   1.1  christos  */
   2343   1.1  christos static int
   2344   1.6  christos eq_slist(struct slist *x, struct slist *y)
   2345   1.1  christos {
   2346  1.10  christos 	for (;;) {
   2347   1.1  christos 		while (x && x->s.code == NOP)
   2348   1.1  christos 			x = x->next;
   2349   1.1  christos 		while (y && y->s.code == NOP)
   2350   1.1  christos 			y = y->next;
   2351   1.1  christos 		if (x == 0)
   2352   1.1  christos 			return y == 0;
   2353   1.1  christos 		if (y == 0)
   2354   1.1  christos 			return x == 0;
   2355   1.1  christos 		if (x->s.code != y->s.code || x->s.k != y->s.k)
   2356   1.1  christos 			return 0;
   2357   1.1  christos 		x = x->next;
   2358   1.1  christos 		y = y->next;
   2359   1.1  christos 	}
   2360   1.1  christos }
   2361   1.1  christos 
   2362   1.1  christos static inline int
   2363   1.6  christos eq_blk(struct block *b0, struct block *b1)
   2364   1.1  christos {
   2365   1.1  christos 	if (b0->s.code == b1->s.code &&
   2366   1.1  christos 	    b0->s.k == b1->s.k &&
   2367   1.1  christos 	    b0->et.succ == b1->et.succ &&
   2368   1.1  christos 	    b0->ef.succ == b1->ef.succ)
   2369   1.1  christos 		return eq_slist(b0->stmts, b1->stmts);
   2370   1.1  christos 	return 0;
   2371   1.1  christos }
   2372   1.1  christos 
   2373   1.1  christos static void
   2374   1.9  christos intern_blocks(opt_state_t *opt_state, struct icode *ic)
   2375   1.1  christos {
   2376   1.1  christos 	struct block *p;
   2377  1.12  christos 	u_int i, j;
   2378   1.1  christos 	int done1; /* don't shadow global */
   2379   1.1  christos  top:
   2380   1.1  christos 	done1 = 1;
   2381   1.9  christos 	for (i = 0; i < opt_state->n_blocks; ++i)
   2382   1.9  christos 		opt_state->blocks[i]->link = 0;
   2383   1.1  christos 
   2384   1.9  christos 	mark_code(ic);
   2385   1.1  christos 
   2386  1.12  christos 	for (i = opt_state->n_blocks - 1; i != 0; ) {
   2387  1.12  christos 		--i;
   2388   1.9  christos 		if (!isMarked(ic, opt_state->blocks[i]))
   2389   1.1  christos 			continue;
   2390   1.9  christos 		for (j = i + 1; j < opt_state->n_blocks; ++j) {
   2391   1.9  christos 			if (!isMarked(ic, opt_state->blocks[j]))
   2392   1.1  christos 				continue;
   2393   1.9  christos 			if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
   2394   1.9  christos 				opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
   2395   1.9  christos 					opt_state->blocks[j]->link : opt_state->blocks[j];
   2396   1.1  christos 				break;
   2397   1.1  christos 			}
   2398   1.1  christos 		}
   2399   1.1  christos 	}
   2400   1.9  christos 	for (i = 0; i < opt_state->n_blocks; ++i) {
   2401   1.9  christos 		p = opt_state->blocks[i];
   2402   1.1  christos 		if (JT(p) == 0)
   2403   1.1  christos 			continue;
   2404   1.1  christos 		if (JT(p)->link) {
   2405   1.1  christos 			done1 = 0;
   2406   1.1  christos 			JT(p) = JT(p)->link;
   2407   1.1  christos 		}
   2408   1.1  christos 		if (JF(p)->link) {
   2409   1.1  christos 			done1 = 0;
   2410   1.1  christos 			JF(p) = JF(p)->link;
   2411   1.1  christos 		}
   2412   1.1  christos 	}
   2413   1.1  christos 	if (!done1)
   2414   1.1  christos 		goto top;
   2415   1.1  christos }
   2416   1.1  christos 
   2417   1.1  christos static void
   2418   1.9  christos opt_cleanup(opt_state_t *opt_state)
   2419   1.1  christos {
   2420   1.9  christos 	free((void *)opt_state->vnode_base);
   2421   1.9  christos 	free((void *)opt_state->vmap);
   2422   1.9  christos 	free((void *)opt_state->edges);
   2423   1.9  christos 	free((void *)opt_state->space);
   2424   1.9  christos 	free((void *)opt_state->levels);
   2425   1.9  christos 	free((void *)opt_state->blocks);
   2426   1.1  christos }
   2427   1.1  christos 
   2428   1.1  christos /*
   2429  1.11  christos  * For optimizer errors.
   2430  1.11  christos  */
   2431  1.11  christos static void PCAP_NORETURN
   2432  1.11  christos opt_error(opt_state_t *opt_state, const char *fmt, ...)
   2433  1.11  christos {
   2434  1.11  christos 	va_list ap;
   2435  1.11  christos 
   2436  1.11  christos 	if (opt_state->errbuf != NULL) {
   2437  1.11  christos 		va_start(ap, fmt);
   2438  1.12  christos 		(void)vsnprintf(opt_state->errbuf,
   2439  1.11  christos 		    PCAP_ERRBUF_SIZE, fmt, ap);
   2440  1.11  christos 		va_end(ap);
   2441  1.11  christos 	}
   2442  1.11  christos 	longjmp(opt_state->top_ctx, 1);
   2443  1.11  christos 	/* NOTREACHED */
   2444  1.12  christos #ifdef _AIX
   2445  1.12  christos 	PCAP_UNREACHABLE
   2446  1.12  christos #endif /* _AIX */
   2447  1.11  christos }
   2448  1.11  christos 
   2449  1.11  christos /*
   2450   1.1  christos  * Return the number of stmts in 's'.
   2451   1.1  christos  */
   2452   1.5  christos static u_int
   2453   1.6  christos slength(struct slist *s)
   2454   1.1  christos {
   2455   1.5  christos 	u_int n = 0;
   2456   1.1  christos 
   2457   1.1  christos 	for (; s; s = s->next)
   2458   1.1  christos 		if (s->s.code != NOP)
   2459   1.1  christos 			++n;
   2460   1.1  christos 	return n;
   2461   1.1  christos }
   2462   1.1  christos 
   2463   1.1  christos /*
   2464   1.1  christos  * Return the number of nodes reachable by 'p'.
   2465   1.1  christos  * All nodes should be initially unmarked.
   2466   1.1  christos  */
   2467   1.1  christos static int
   2468   1.9  christos count_blocks(struct icode *ic, struct block *p)
   2469   1.1  christos {
   2470   1.9  christos 	if (p == 0 || isMarked(ic, p))
   2471   1.1  christos 		return 0;
   2472   1.9  christos 	Mark(ic, p);
   2473   1.9  christos 	return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
   2474   1.1  christos }
   2475   1.1  christos 
   2476   1.1  christos /*
   2477   1.1  christos  * Do a depth first search on the flow graph, numbering the
   2478   1.1  christos  * the basic blocks, and entering them into the 'blocks' array.`
   2479   1.1  christos  */
   2480   1.1  christos static void
   2481   1.9  christos number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
   2482   1.1  christos {
   2483  1.12  christos 	u_int n;
   2484   1.1  christos 
   2485   1.9  christos 	if (p == 0 || isMarked(ic, p))
   2486   1.1  christos 		return;
   2487   1.1  christos 
   2488   1.9  christos 	Mark(ic, p);
   2489   1.9  christos 	n = opt_state->n_blocks++;
   2490  1.12  christos 	if (opt_state->n_blocks == 0) {
   2491  1.12  christos 		/*
   2492  1.12  christos 		 * Overflow.
   2493  1.12  christos 		 */
   2494  1.12  christos 		opt_error(opt_state, "filter is too complex to optimize");
   2495  1.12  christos 	}
   2496   1.1  christos 	p->id = n;
   2497   1.9  christos 	opt_state->blocks[n] = p;
   2498   1.1  christos 
   2499   1.9  christos 	number_blks_r(opt_state, ic, JT(p));
   2500   1.9  christos 	number_blks_r(opt_state, ic, JF(p));
   2501   1.1  christos }
   2502   1.1  christos 
   2503   1.1  christos /*
   2504   1.1  christos  * Return the number of stmts in the flowgraph reachable by 'p'.
   2505   1.1  christos  * The nodes should be unmarked before calling.
   2506   1.1  christos  *
   2507   1.1  christos  * Note that "stmts" means "instructions", and that this includes
   2508   1.1  christos  *
   2509   1.1  christos  *	side-effect statements in 'p' (slength(p->stmts));
   2510   1.1  christos  *
   2511   1.1  christos  *	statements in the true branch from 'p' (count_stmts(JT(p)));
   2512   1.1  christos  *
   2513   1.1  christos  *	statements in the false branch from 'p' (count_stmts(JF(p)));
   2514   1.1  christos  *
   2515   1.1  christos  *	the conditional jump itself (1);
   2516   1.1  christos  *
   2517   1.1  christos  *	an extra long jump if the true branch requires it (p->longjt);
   2518   1.1  christos  *
   2519   1.1  christos  *	an extra long jump if the false branch requires it (p->longjf).
   2520   1.1  christos  */
   2521   1.5  christos static u_int
   2522   1.9  christos count_stmts(struct icode *ic, struct block *p)
   2523   1.1  christos {
   2524   1.5  christos 	u_int n;
   2525   1.1  christos 
   2526   1.9  christos 	if (p == 0 || isMarked(ic, p))
   2527   1.1  christos 		return 0;
   2528   1.9  christos 	Mark(ic, p);
   2529   1.9  christos 	n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
   2530   1.1  christos 	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
   2531   1.1  christos }
   2532   1.1  christos 
   2533   1.1  christos /*
   2534   1.1  christos  * Allocate memory.  All allocation is done before optimization
   2535   1.1  christos  * is begun.  A linear bound on the size of all data structures is computed
   2536   1.1  christos  * from the total number of blocks and/or statements.
   2537   1.1  christos  */
   2538   1.1  christos static void
   2539  1.11  christos opt_init(opt_state_t *opt_state, struct icode *ic)
   2540   1.1  christos {
   2541   1.1  christos 	bpf_u_int32 *p;
   2542   1.1  christos 	int i, n, max_stmts;
   2543  1.12  christos 	u_int product;
   2544  1.12  christos 	size_t block_memsize, edge_memsize;
   2545   1.1  christos 
   2546   1.1  christos 	/*
   2547   1.1  christos 	 * First, count the blocks, so we can malloc an array to map
   2548   1.1  christos 	 * block number to block.  Then, put the blocks into the array.
   2549   1.1  christos 	 */
   2550   1.9  christos 	unMarkAll(ic);
   2551   1.9  christos 	n = count_blocks(ic, ic->root);
   2552   1.9  christos 	opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
   2553   1.9  christos 	if (opt_state->blocks == NULL)
   2554  1.11  christos 		opt_error(opt_state, "malloc");
   2555   1.9  christos 	unMarkAll(ic);
   2556   1.9  christos 	opt_state->n_blocks = 0;
   2557   1.9  christos 	number_blks_r(opt_state, ic, ic->root);
   2558   1.9  christos 
   2559  1.12  christos 	/*
   2560  1.12  christos 	 * This "should not happen".
   2561  1.12  christos 	 */
   2562  1.12  christos 	if (opt_state->n_blocks == 0)
   2563  1.12  christos 		opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue");
   2564  1.12  christos 
   2565   1.9  christos 	opt_state->n_edges = 2 * opt_state->n_blocks;
   2566  1.12  christos 	if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
   2567  1.12  christos 		/*
   2568  1.12  christos 		 * Overflow.
   2569  1.12  christos 		 */
   2570  1.12  christos 		opt_error(opt_state, "filter is too complex to optimize");
   2571  1.12  christos 	}
   2572   1.9  christos 	opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
   2573  1.11  christos 	if (opt_state->edges == NULL) {
   2574  1.11  christos 		opt_error(opt_state, "malloc");
   2575  1.11  christos 	}
   2576   1.1  christos 
   2577   1.1  christos 	/*
   2578   1.1  christos 	 * The number of levels is bounded by the number of nodes.
   2579   1.1  christos 	 */
   2580   1.9  christos 	opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
   2581  1.11  christos 	if (opt_state->levels == NULL) {
   2582  1.11  christos 		opt_error(opt_state, "malloc");
   2583  1.11  christos 	}
   2584   1.1  christos 
   2585  1.12  christos 	opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
   2586  1.12  christos 	opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
   2587  1.12  christos 
   2588  1.12  christos 	/*
   2589  1.12  christos 	 * Make sure opt_state->n_blocks * opt_state->nodewords fits
   2590  1.12  christos 	 * in a u_int; we use it as a u_int number-of-iterations
   2591  1.12  christos 	 * value.
   2592  1.12  christos 	 */
   2593  1.12  christos 	product = opt_state->n_blocks * opt_state->nodewords;
   2594  1.12  christos 	if ((product / opt_state->n_blocks) != opt_state->nodewords) {
   2595  1.12  christos 		/*
   2596  1.12  christos 		 * XXX - just punt and don't try to optimize?
   2597  1.12  christos 		 * In practice, this is unlikely to happen with
   2598  1.12  christos 		 * a normal filter.
   2599  1.12  christos 		 */
   2600  1.12  christos 		opt_error(opt_state, "filter is too complex to optimize");
   2601  1.12  christos 	}
   2602  1.12  christos 
   2603  1.12  christos 	/*
   2604  1.12  christos 	 * Make sure the total memory required for that doesn't
   2605  1.12  christos 	 * overflow.
   2606  1.12  christos 	 */
   2607  1.12  christos 	block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
   2608  1.12  christos 	if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
   2609  1.12  christos 		opt_error(opt_state, "filter is too complex to optimize");
   2610  1.12  christos 	}
   2611  1.12  christos 
   2612  1.12  christos 	/*
   2613  1.12  christos 	 * Make sure opt_state->n_edges * opt_state->edgewords fits
   2614  1.12  christos 	 * in a u_int; we use it as a u_int number-of-iterations
   2615  1.12  christos 	 * value.
   2616  1.12  christos 	 */
   2617  1.12  christos 	product = opt_state->n_edges * opt_state->edgewords;
   2618  1.12  christos 	if ((product / opt_state->n_edges) != opt_state->edgewords) {
   2619  1.12  christos 		opt_error(opt_state, "filter is too complex to optimize");
   2620  1.12  christos 	}
   2621  1.12  christos 
   2622  1.12  christos 	/*
   2623  1.12  christos 	 * Make sure the total memory required for that doesn't
   2624  1.12  christos 	 * overflow.
   2625  1.12  christos 	 */
   2626  1.12  christos 	edge_memsize = (size_t)product * sizeof(*opt_state->space);
   2627  1.12  christos 	if (edge_memsize / product != sizeof(*opt_state->space)) {
   2628  1.12  christos 		opt_error(opt_state, "filter is too complex to optimize");
   2629  1.12  christos 	}
   2630  1.12  christos 
   2631  1.12  christos 	/*
   2632  1.12  christos 	 * Make sure the total memory required for both of them doesn't
   2633  1.12  christos 	 * overflow.
   2634  1.12  christos 	 */
   2635  1.12  christos 	if (block_memsize > SIZE_MAX - edge_memsize) {
   2636  1.12  christos 		opt_error(opt_state, "filter is too complex to optimize");
   2637  1.12  christos 	}
   2638   1.1  christos 
   2639   1.1  christos 	/* XXX */
   2640  1.12  christos 	opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
   2641  1.11  christos 	if (opt_state->space == NULL) {
   2642  1.11  christos 		opt_error(opt_state, "malloc");
   2643  1.11  christos 	}
   2644   1.9  christos 	p = opt_state->space;
   2645   1.9  christos 	opt_state->all_dom_sets = p;
   2646   1.1  christos 	for (i = 0; i < n; ++i) {
   2647   1.9  christos 		opt_state->blocks[i]->dom = p;
   2648   1.9  christos 		p += opt_state->nodewords;
   2649   1.1  christos 	}
   2650   1.9  christos 	opt_state->all_closure_sets = p;
   2651   1.1  christos 	for (i = 0; i < n; ++i) {
   2652   1.9  christos 		opt_state->blocks[i]->closure = p;
   2653   1.9  christos 		p += opt_state->nodewords;
   2654   1.1  christos 	}
   2655   1.9  christos 	opt_state->all_edge_sets = p;
   2656   1.1  christos 	for (i = 0; i < n; ++i) {
   2657   1.9  christos 		register struct block *b = opt_state->blocks[i];
   2658   1.1  christos 
   2659   1.1  christos 		b->et.edom = p;
   2660   1.9  christos 		p += opt_state->edgewords;
   2661   1.1  christos 		b->ef.edom = p;
   2662   1.9  christos 		p += opt_state->edgewords;
   2663   1.1  christos 		b->et.id = i;
   2664   1.9  christos 		opt_state->edges[i] = &b->et;
   2665   1.9  christos 		b->ef.id = opt_state->n_blocks + i;
   2666   1.9  christos 		opt_state->edges[opt_state->n_blocks + i] = &b->ef;
   2667   1.1  christos 		b->et.pred = b;
   2668   1.1  christos 		b->ef.pred = b;
   2669   1.1  christos 	}
   2670   1.1  christos 	max_stmts = 0;
   2671   1.1  christos 	for (i = 0; i < n; ++i)
   2672   1.9  christos 		max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
   2673   1.1  christos 	/*
   2674   1.1  christos 	 * We allocate at most 3 value numbers per statement,
   2675   1.1  christos 	 * so this is an upper bound on the number of valnodes
   2676   1.1  christos 	 * we'll need.
   2677   1.1  christos 	 */
   2678   1.9  christos 	opt_state->maxval = 3 * max_stmts;
   2679   1.9  christos 	opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
   2680  1.11  christos 	if (opt_state->vmap == NULL) {
   2681  1.11  christos 		opt_error(opt_state, "malloc");
   2682  1.11  christos 	}
   2683   1.9  christos 	opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
   2684  1.11  christos 	if (opt_state->vnode_base == NULL) {
   2685  1.11  christos 		opt_error(opt_state, "malloc");
   2686  1.11  christos 	}
   2687   1.1  christos }
   2688   1.1  christos 
   2689   1.1  christos /*
   2690   1.9  christos  * This is only used when supporting optimizer debugging.  It is
   2691   1.9  christos  * global state, so do *not* do more than one compile in parallel
   2692   1.9  christos  * and expect it to provide meaningful information.
   2693   1.1  christos  */
   2694   1.1  christos #ifdef BDEBUG
   2695  1.10  christos int bids[NBIDS];
   2696   1.1  christos #endif
   2697   1.1  christos 
   2698  1.11  christos static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
   2699  1.11  christos     PCAP_PRINTFLIKE(2, 3);
   2700  1.11  christos 
   2701   1.1  christos /*
   2702   1.1  christos  * Returns true if successful.  Returns false if a branch has
   2703   1.1  christos  * an offset that is too large.  If so, we have marked that
   2704   1.1  christos  * branch so that on a subsequent iteration, it will be treated
   2705   1.1  christos  * properly.
   2706   1.1  christos  */
   2707   1.1  christos static int
   2708  1.11  christos convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
   2709   1.1  christos {
   2710   1.1  christos 	struct bpf_insn *dst;
   2711   1.1  christos 	struct slist *src;
   2712   1.2  christos 	u_int slen;
   2713   1.1  christos 	u_int off;
   2714   1.1  christos 	struct slist **offset = NULL;
   2715   1.1  christos 
   2716   1.9  christos 	if (p == 0 || isMarked(ic, p))
   2717   1.1  christos 		return (1);
   2718   1.9  christos 	Mark(ic, p);
   2719   1.1  christos 
   2720  1.11  christos 	if (convert_code_r(conv_state, ic, JF(p)) == 0)
   2721   1.1  christos 		return (0);
   2722  1.11  christos 	if (convert_code_r(conv_state, ic, JT(p)) == 0)
   2723   1.1  christos 		return (0);
   2724   1.1  christos 
   2725   1.1  christos 	slen = slength(p->stmts);
   2726   1.9  christos 	dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
   2727   1.1  christos 		/* inflate length by any extra jumps */
   2728   1.1  christos 
   2729   1.9  christos 	p->offset = (int)(dst - conv_state->fstart);
   2730   1.1  christos 
   2731   1.1  christos 	/* generate offset[] for convenience  */
   2732   1.1  christos 	if (slen) {
   2733   1.1  christos 		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
   2734   1.1  christos 		if (!offset) {
   2735  1.11  christos 			conv_error(conv_state, "not enough core");
   2736   1.1  christos 			/*NOTREACHED*/
   2737   1.1  christos 		}
   2738   1.1  christos 	}
   2739   1.1  christos 	src = p->stmts;
   2740   1.1  christos 	for (off = 0; off < slen && src; off++) {
   2741   1.1  christos #if 0
   2742   1.1  christos 		printf("off=%d src=%x\n", off, src);
   2743   1.1  christos #endif
   2744   1.1  christos 		offset[off] = src;
   2745   1.1  christos 		src = src->next;
   2746   1.1  christos 	}
   2747   1.1  christos 
   2748   1.1  christos 	off = 0;
   2749   1.1  christos 	for (src = p->stmts; src; src = src->next) {
   2750   1.1  christos 		if (src->s.code == NOP)
   2751   1.1  christos 			continue;
   2752   1.1  christos 		dst->code = (u_short)src->s.code;
   2753   1.1  christos 		dst->k = src->s.k;
   2754   1.1  christos 
   2755   1.1  christos 		/* fill block-local relative jump */
   2756   1.1  christos 		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
   2757   1.1  christos #if 0
   2758   1.1  christos 			if (src->s.jt || src->s.jf) {
   2759  1.11  christos 				free(offset);
   2760  1.11  christos 				conv_error(conv_state, "illegal jmp destination");
   2761   1.1  christos 				/*NOTREACHED*/
   2762   1.1  christos 			}
   2763   1.1  christos #endif
   2764   1.1  christos 			goto filled;
   2765   1.1  christos 		}
   2766   1.1  christos 		if (off == slen - 2)	/*???*/
   2767   1.1  christos 			goto filled;
   2768   1.1  christos 
   2769   1.1  christos 	    {
   2770   1.2  christos 		u_int i;
   2771   1.1  christos 		int jt, jf;
   2772  1.10  christos 		const char ljerr[] = "%s for block-local relative jump: off=%d";
   2773   1.1  christos 
   2774   1.1  christos #if 0
   2775   1.1  christos 		printf("code=%x off=%d %x %x\n", src->s.code,
   2776   1.1  christos 			off, src->s.jt, src->s.jf);
   2777   1.1  christos #endif
   2778   1.1  christos 
   2779   1.1  christos 		if (!src->s.jt || !src->s.jf) {
   2780  1.11  christos 			free(offset);
   2781  1.11  christos 			conv_error(conv_state, ljerr, "no jmp destination", off);
   2782   1.1  christos 			/*NOTREACHED*/
   2783   1.1  christos 		}
   2784   1.1  christos 
   2785   1.1  christos 		jt = jf = 0;
   2786   1.1  christos 		for (i = 0; i < slen; i++) {
   2787   1.1  christos 			if (offset[i] == src->s.jt) {
   2788   1.1  christos 				if (jt) {
   2789  1.11  christos 					free(offset);
   2790  1.11  christos 					conv_error(conv_state, ljerr, "multiple matches", off);
   2791   1.1  christos 					/*NOTREACHED*/
   2792   1.1  christos 				}
   2793   1.1  christos 
   2794  1.10  christos 				if (i - off - 1 >= 256) {
   2795  1.11  christos 					free(offset);
   2796  1.11  christos 					conv_error(conv_state, ljerr, "out-of-range jump", off);
   2797  1.10  christos 					/*NOTREACHED*/
   2798  1.10  christos 				}
   2799  1.10  christos 				dst->jt = (u_char)(i - off - 1);
   2800   1.1  christos 				jt++;
   2801   1.1  christos 			}
   2802   1.1  christos 			if (offset[i] == src->s.jf) {
   2803   1.1  christos 				if (jf) {
   2804  1.11  christos 					free(offset);
   2805  1.11  christos 					conv_error(conv_state, ljerr, "multiple matches", off);
   2806   1.1  christos 					/*NOTREACHED*/
   2807   1.1  christos 				}
   2808  1.10  christos 				if (i - off - 1 >= 256) {
   2809  1.11  christos 					free(offset);
   2810  1.11  christos 					conv_error(conv_state, ljerr, "out-of-range jump", off);
   2811  1.10  christos 					/*NOTREACHED*/
   2812  1.10  christos 				}
   2813  1.10  christos 				dst->jf = (u_char)(i - off - 1);
   2814   1.1  christos 				jf++;
   2815   1.1  christos 			}
   2816   1.1  christos 		}
   2817   1.1  christos 		if (!jt || !jf) {
   2818  1.11  christos 			free(offset);
   2819  1.11  christos 			conv_error(conv_state, ljerr, "no destination found", off);
   2820   1.1  christos 			/*NOTREACHED*/
   2821   1.1  christos 		}
   2822   1.1  christos 	    }
   2823   1.1  christos filled:
   2824   1.1  christos 		++dst;
   2825   1.1  christos 		++off;
   2826   1.1  christos 	}
   2827   1.1  christos 	if (offset)
   2828   1.1  christos 		free(offset);
   2829   1.1  christos 
   2830   1.1  christos #ifdef BDEBUG
   2831  1.10  christos 	if (dst - conv_state->fstart < NBIDS)
   2832  1.10  christos 		bids[dst - conv_state->fstart] = p->id + 1;
   2833   1.1  christos #endif
   2834   1.1  christos 	dst->code = (u_short)p->s.code;
   2835   1.1  christos 	dst->k = p->s.k;
   2836   1.1  christos 	if (JT(p)) {
   2837  1.12  christos 		/* number of extra jumps inserted */
   2838  1.12  christos 		u_char extrajmps = 0;
   2839   1.1  christos 		off = JT(p)->offset - (p->offset + slen) - 1;
   2840   1.1  christos 		if (off >= 256) {
   2841   1.1  christos 		    /* offset too large for branch, must add a jump */
   2842   1.1  christos 		    if (p->longjt == 0) {
   2843  1.12  christos 			/* mark this instruction and retry */
   2844   1.1  christos 			p->longjt++;
   2845   1.1  christos 			return(0);
   2846   1.1  christos 		    }
   2847  1.12  christos 		    dst->jt = extrajmps;
   2848   1.1  christos 		    extrajmps++;
   2849   1.1  christos 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
   2850   1.1  christos 		    dst[extrajmps].k = off - extrajmps;
   2851   1.1  christos 		}
   2852   1.1  christos 		else
   2853  1.10  christos 		    dst->jt = (u_char)off;
   2854   1.1  christos 		off = JF(p)->offset - (p->offset + slen) - 1;
   2855   1.1  christos 		if (off >= 256) {
   2856   1.1  christos 		    /* offset too large for branch, must add a jump */
   2857   1.1  christos 		    if (p->longjf == 0) {
   2858  1.12  christos 			/* mark this instruction and retry */
   2859   1.1  christos 			p->longjf++;
   2860   1.1  christos 			return(0);
   2861   1.1  christos 		    }
   2862   1.1  christos 		    /* branch if F to following jump */
   2863   1.1  christos 		    /* if two jumps are inserted, F goes to second one */
   2864  1.12  christos 		    dst->jf = extrajmps;
   2865   1.1  christos 		    extrajmps++;
   2866   1.1  christos 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
   2867   1.1  christos 		    dst[extrajmps].k = off - extrajmps;
   2868   1.1  christos 		}
   2869   1.1  christos 		else
   2870  1.10  christos 		    dst->jf = (u_char)off;
   2871   1.1  christos 	}
   2872   1.1  christos 	return (1);
   2873   1.1  christos }
   2874   1.1  christos 
   2875   1.1  christos 
   2876   1.1  christos /*
   2877   1.1  christos  * Convert flowgraph intermediate representation to the
   2878   1.1  christos  * BPF array representation.  Set *lenp to the number of instructions.
   2879   1.1  christos  *
   2880   1.1  christos  * This routine does *NOT* leak the memory pointed to by fp.  It *must
   2881   1.1  christos  * not* do free(fp) before returning fp; doing so would make no sense,
   2882   1.1  christos  * as the BPF array pointed to by the return value of icode_to_fcode()
   2883   1.1  christos  * must be valid - it's being returned for use in a bpf_program structure.
   2884   1.1  christos  *
   2885   1.1  christos  * If it appears that icode_to_fcode() is leaking, the problem is that
   2886   1.1  christos  * the program using pcap_compile() is failing to free the memory in
   2887   1.1  christos  * the BPF program when it's done - the leak is in the program, not in
   2888   1.1  christos  * the routine that happens to be allocating the memory.  (By analogy, if
   2889   1.1  christos  * a program calls fopen() without ever calling fclose() on the FILE *,
   2890   1.1  christos  * it will leak the FILE structure; the leak is not in fopen(), it's in
   2891   1.1  christos  * the program.)  Change the program to use pcap_freecode() when it's
   2892   1.1  christos  * done with the filter program.  See the pcap man page.
   2893   1.1  christos  */
   2894   1.1  christos struct bpf_insn *
   2895  1.12  christos icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
   2896  1.11  christos     char *errbuf)
   2897   1.1  christos {
   2898   1.5  christos 	u_int n;
   2899   1.1  christos 	struct bpf_insn *fp;
   2900   1.9  christos 	conv_state_t conv_state;
   2901   1.1  christos 
   2902  1.11  christos 	conv_state.fstart = NULL;
   2903  1.11  christos 	conv_state.errbuf = errbuf;
   2904  1.11  christos 	if (setjmp(conv_state.top_ctx) != 0) {
   2905  1.11  christos 		free(conv_state.fstart);
   2906  1.11  christos 		return NULL;
   2907  1.11  christos 	}
   2908  1.11  christos 
   2909   1.1  christos 	/*
   2910   1.1  christos 	 * Loop doing convert_code_r() until no branches remain
   2911   1.1  christos 	 * with too-large offsets.
   2912   1.1  christos 	 */
   2913  1.10  christos 	for (;;) {
   2914   1.9  christos 	    unMarkAll(ic);
   2915   1.9  christos 	    n = *lenp = count_stmts(ic, root);
   2916   1.1  christos 
   2917   1.1  christos 	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
   2918  1.11  christos 	    if (fp == NULL) {
   2919  1.12  christos 		(void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
   2920  1.11  christos 		    "malloc");
   2921  1.11  christos 		return NULL;
   2922  1.11  christos 	    }
   2923   1.1  christos 	    memset((char *)fp, 0, sizeof(*fp) * n);
   2924   1.9  christos 	    conv_state.fstart = fp;
   2925   1.9  christos 	    conv_state.ftail = fp + n;
   2926   1.1  christos 
   2927   1.9  christos 	    unMarkAll(ic);
   2928  1.11  christos 	    if (convert_code_r(&conv_state, ic, root))
   2929   1.1  christos 		break;
   2930   1.1  christos 	    free(fp);
   2931   1.1  christos 	}
   2932   1.1  christos 
   2933   1.1  christos 	return fp;
   2934   1.1  christos }
   2935   1.1  christos 
   2936   1.1  christos /*
   2937  1.11  christos  * For iconv_to_fconv() errors.
   2938  1.11  christos  */
   2939  1.11  christos static void PCAP_NORETURN
   2940  1.11  christos conv_error(conv_state_t *conv_state, const char *fmt, ...)
   2941  1.11  christos {
   2942  1.11  christos 	va_list ap;
   2943  1.11  christos 
   2944  1.11  christos 	va_start(ap, fmt);
   2945  1.12  christos 	(void)vsnprintf(conv_state->errbuf,
   2946  1.11  christos 	    PCAP_ERRBUF_SIZE, fmt, ap);
   2947  1.11  christos 	va_end(ap);
   2948  1.11  christos 	longjmp(conv_state->top_ctx, 1);
   2949  1.11  christos 	/* NOTREACHED */
   2950  1.12  christos #ifdef _AIX
   2951  1.12  christos 	PCAP_UNREACHABLE
   2952  1.12  christos #endif /* _AIX */
   2953  1.11  christos }
   2954  1.11  christos 
   2955  1.11  christos /*
   2956   1.1  christos  * Make a copy of a BPF program and put it in the "fcode" member of
   2957   1.1  christos  * a "pcap_t".
   2958   1.1  christos  *
   2959   1.1  christos  * If we fail to allocate memory for the copy, fill in the "errbuf"
   2960   1.1  christos  * member of the "pcap_t" with an error message, and return -1;
   2961   1.1  christos  * otherwise, return 0.
   2962   1.1  christos  */
   2963   1.1  christos int
   2964  1.13  christos pcapint_install_bpf_program(pcap_t *p, struct bpf_program *fp)
   2965   1.1  christos {
   2966   1.1  christos 	size_t prog_size;
   2967   1.1  christos 
   2968   1.1  christos 	/*
   2969   1.1  christos 	 * Validate the program.
   2970   1.1  christos 	 */
   2971  1.13  christos 	if (!pcapint_validate_filter(fp->bf_insns, fp->bf_len)) {
   2972  1.12  christos 		snprintf(p->errbuf, sizeof(p->errbuf),
   2973   1.1  christos 			"BPF program is not valid");
   2974   1.1  christos 		return (-1);
   2975   1.1  christos 	}
   2976   1.1  christos 
   2977   1.1  christos 	/*
   2978   1.1  christos 	 * Free up any already installed program.
   2979   1.1  christos 	 */
   2980   1.1  christos 	pcap_freecode(&p->fcode);
   2981   1.1  christos 
   2982   1.1  christos 	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
   2983   1.1  christos 	p->fcode.bf_len = fp->bf_len;
   2984   1.1  christos 	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
   2985   1.1  christos 	if (p->fcode.bf_insns == NULL) {
   2986  1.13  christos 		pcapint_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
   2987  1.10  christos 		    errno, "malloc");
   2988   1.1  christos 		return (-1);
   2989   1.1  christos 	}
   2990   1.1  christos 	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
   2991   1.1  christos 	return (0);
   2992   1.1  christos }
   2993   1.1  christos 
   2994   1.1  christos #ifdef BDEBUG
   2995   1.1  christos static void
   2996   1.9  christos dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
   2997   1.9  christos     FILE *out)
   2998   1.8  christos {
   2999   1.8  christos 	int icount, noffset;
   3000   1.8  christos 	int i;
   3001   1.8  christos 
   3002   1.9  christos 	if (block == NULL || isMarked(ic, block))
   3003   1.8  christos 		return;
   3004   1.9  christos 	Mark(ic, block);
   3005   1.8  christos 
   3006   1.8  christos 	icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
   3007   1.8  christos 	noffset = min(block->offset + icount, (int)prog->bf_len);
   3008   1.8  christos 
   3009  1.12  christos 	fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
   3010   1.8  christos 	for (i = block->offset; i < noffset; i++) {
   3011   1.8  christos 		fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
   3012   1.8  christos 	}
   3013   1.8  christos 	fprintf(out, "\" tooltip=\"");
   3014   1.8  christos 	for (i = 0; i < BPF_MEMWORDS; i++)
   3015  1.10  christos 		if (block->val[i] != VAL_UNKNOWN)
   3016   1.8  christos 			fprintf(out, "val[%d]=%d ", i, block->val[i]);
   3017   1.8  christos 	fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
   3018   1.8  christos 	fprintf(out, "val[X]=%d", block->val[X_ATOM]);
   3019   1.8  christos 	fprintf(out, "\"");
   3020   1.8  christos 	if (JT(block) == NULL)
   3021   1.8  christos 		fprintf(out, ", peripheries=2");
   3022   1.8  christos 	fprintf(out, "];\n");
   3023   1.8  christos 
   3024   1.9  christos 	dot_dump_node(ic, JT(block), prog, out);
   3025   1.9  christos 	dot_dump_node(ic, JF(block), prog, out);
   3026   1.8  christos }
   3027   1.9  christos 
   3028   1.8  christos static void
   3029   1.9  christos dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
   3030   1.8  christos {
   3031   1.9  christos 	if (block == NULL || isMarked(ic, block))
   3032   1.8  christos 		return;
   3033   1.9  christos 	Mark(ic, block);
   3034   1.8  christos 
   3035   1.8  christos 	if (JT(block)) {
   3036  1.12  christos 		fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
   3037   1.8  christos 				block->id, JT(block)->id);
   3038  1.12  christos 		fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
   3039   1.8  christos 			   block->id, JF(block)->id);
   3040   1.8  christos 	}
   3041   1.9  christos 	dot_dump_edge(ic, JT(block), out);
   3042   1.9  christos 	dot_dump_edge(ic, JF(block), out);
   3043   1.8  christos }
   3044   1.9  christos 
   3045   1.8  christos /* Output the block CFG using graphviz/DOT language
   3046   1.8  christos  * In the CFG, block's code, value index for each registers at EXIT,
   3047   1.8  christos  * and the jump relationship is show.
   3048   1.8  christos  *
   3049   1.8  christos  * example DOT for BPF `ip src host 1.1.1.1' is:
   3050   1.8  christos     digraph BPF {
   3051  1.12  christos 	block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2	jf 5" tooltip="val[A]=0 val[X]=0"];
   3052  1.12  christos 	block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4	jf 5" tooltip="val[A]=0 val[X]=0"];
   3053  1.12  christos 	block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
   3054  1.12  christos 	block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
   3055  1.12  christos 	"block0":se -> "block1":n [label="T"];
   3056  1.12  christos 	"block0":sw -> "block3":n [label="F"];
   3057  1.12  christos 	"block1":se -> "block2":n [label="T"];
   3058  1.12  christos 	"block1":sw -> "block3":n [label="F"];
   3059   1.8  christos     }
   3060   1.8  christos  *
   3061  1.12  christos  *  After install graphviz on https://www.graphviz.org/, save it as bpf.dot
   3062   1.8  christos  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
   3063   1.8  christos  */
   3064  1.11  christos static int
   3065  1.11  christos dot_dump(struct icode *ic, char *errbuf)
   3066   1.8  christos {
   3067   1.8  christos 	struct bpf_program f;
   3068   1.8  christos 	FILE *out = stdout;
   3069   1.8  christos 
   3070   1.8  christos 	memset(bids, 0, sizeof bids);
   3071  1.11  christos 	f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
   3072  1.11  christos 	if (f.bf_insns == NULL)
   3073  1.11  christos 		return -1;
   3074   1.8  christos 
   3075   1.8  christos 	fprintf(out, "digraph BPF {\n");
   3076   1.9  christos 	unMarkAll(ic);
   3077   1.9  christos 	dot_dump_node(ic, ic->root, &f, out);
   3078   1.9  christos 	unMarkAll(ic);
   3079   1.9  christos 	dot_dump_edge(ic, ic->root, out);
   3080   1.8  christos 	fprintf(out, "}\n");
   3081   1.8  christos 
   3082   1.8  christos 	free((char *)f.bf_insns);
   3083  1.11  christos 	return 0;
   3084   1.8  christos }
   3085   1.8  christos 
   3086  1.11  christos static int
   3087  1.11  christos plain_dump(struct icode *ic, char *errbuf)
   3088   1.1  christos {
   3089   1.1  christos 	struct bpf_program f;
   3090   1.1  christos 
   3091   1.1  christos 	memset(bids, 0, sizeof bids);
   3092  1.11  christos 	f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
   3093  1.11  christos 	if (f.bf_insns == NULL)
   3094  1.11  christos 		return -1;
   3095   1.1  christos 	bpf_dump(&f, 1);
   3096   1.1  christos 	putchar('\n');
   3097   1.1  christos 	free((char *)f.bf_insns);
   3098  1.11  christos 	return 0;
   3099   1.1  christos }
   3100   1.9  christos 
   3101   1.8  christos static void
   3102  1.11  christos opt_dump(opt_state_t *opt_state, struct icode *ic)
   3103   1.8  christos {
   3104  1.11  christos 	int status;
   3105  1.11  christos 	char errbuf[PCAP_ERRBUF_SIZE];
   3106  1.11  christos 
   3107  1.10  christos 	/*
   3108  1.10  christos 	 * If the CFG, in DOT format, is requested, output it rather than
   3109  1.10  christos 	 * the code that would be generated from that graph.
   3110   1.8  christos 	 */
   3111  1.10  christos 	if (pcap_print_dot_graph)
   3112  1.11  christos 		status = dot_dump(ic, errbuf);
   3113   1.8  christos 	else
   3114  1.11  christos 		status = plain_dump(ic, errbuf);
   3115  1.11  christos 	if (status == -1)
   3116  1.11  christos 		opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
   3117   1.8  christos }
   3118   1.1  christos #endif
   3119