optimize.c revision 1.13 1 1.13 christos /* $NetBSD: optimize.c,v 1.13 2024/09/02 15:33:37 christos Exp $ */
2 1.5 christos
3 1.1 christos /*
4 1.1 christos * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
5 1.1 christos * The Regents of the University of California. All rights reserved.
6 1.1 christos *
7 1.1 christos * Redistribution and use in source and binary forms, with or without
8 1.1 christos * modification, are permitted provided that: (1) source code distributions
9 1.1 christos * retain the above copyright notice and this paragraph in its entirety, (2)
10 1.1 christos * distributions including binary code include the above copyright notice and
11 1.1 christos * this paragraph in its entirety in the documentation or other materials
12 1.1 christos * provided with the distribution, and (3) all advertising materials mentioning
13 1.1 christos * features or use of this software display the following acknowledgement:
14 1.1 christos * ``This product includes software developed by the University of California,
15 1.1 christos * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
16 1.1 christos * the University nor the names of its contributors may be used to endorse
17 1.1 christos * or promote products derived from this software without specific prior
18 1.1 christos * written permission.
19 1.1 christos * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
20 1.1 christos * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
21 1.1 christos * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
22 1.1 christos *
23 1.10 christos * Optimization module for BPF code intermediate representation.
24 1.1 christos */
25 1.7 christos
26 1.7 christos #include <sys/cdefs.h>
27 1.13 christos __RCSID("$NetBSD: optimize.c,v 1.13 2024/09/02 15:33:37 christos Exp $");
28 1.1 christos
29 1.10 christos #include <config.h>
30 1.1 christos
31 1.10 christos #include <pcap-types.h>
32 1.1 christos
33 1.1 christos #include <stdio.h>
34 1.1 christos #include <stdlib.h>
35 1.1 christos #include <memory.h>
36 1.11 christos #include <setjmp.h>
37 1.1 christos #include <string.h>
38 1.12 christos #include <limits.h> /* for SIZE_MAX */
39 1.1 christos #include <errno.h>
40 1.1 christos
41 1.1 christos #include "pcap-int.h"
42 1.1 christos
43 1.1 christos #include "gencode.h"
44 1.10 christos #include "optimize.h"
45 1.12 christos #include "diag-control.h"
46 1.1 christos
47 1.1 christos #ifdef HAVE_OS_PROTO_H
48 1.1 christos #include "os-proto.h"
49 1.1 christos #endif
50 1.1 christos
51 1.1 christos #ifdef BDEBUG
52 1.10 christos /*
53 1.10 christos * The internal "debug printout" flag for the filter expression optimizer.
54 1.10 christos * The code to print that stuff is present only if BDEBUG is defined, so
55 1.10 christos * the flag, and the routine to set it, are defined only if BDEBUG is
56 1.10 christos * defined.
57 1.10 christos */
58 1.10 christos static int pcap_optimizer_debug;
59 1.10 christos
60 1.10 christos /*
61 1.10 christos * Routine to set that flag.
62 1.10 christos *
63 1.10 christos * This is intended for libpcap developers, not for general use.
64 1.10 christos * If you want to set these in a program, you'll have to declare this
65 1.10 christos * routine yourself, with the appropriate DLL import attribute on Windows;
66 1.10 christos * it's not declared in any header file, and won't be declared in any
67 1.10 christos * header file provided by libpcap.
68 1.10 christos */
69 1.10 christos PCAP_API void pcap_set_optimizer_debug(int value);
70 1.10 christos
71 1.10 christos PCAP_API_DEF void
72 1.10 christos pcap_set_optimizer_debug(int value)
73 1.10 christos {
74 1.10 christos pcap_optimizer_debug = value;
75 1.10 christos }
76 1.10 christos
77 1.10 christos /*
78 1.10 christos * The internal "print dot graph" flag for the filter expression optimizer.
79 1.10 christos * The code to print that stuff is present only if BDEBUG is defined, so
80 1.10 christos * the flag, and the routine to set it, are defined only if BDEBUG is
81 1.10 christos * defined.
82 1.10 christos */
83 1.10 christos static int pcap_print_dot_graph;
84 1.10 christos
85 1.10 christos /*
86 1.10 christos * Routine to set that flag.
87 1.10 christos *
88 1.10 christos * This is intended for libpcap developers, not for general use.
89 1.10 christos * If you want to set these in a program, you'll have to declare this
90 1.10 christos * routine yourself, with the appropriate DLL import attribute on Windows;
91 1.10 christos * it's not declared in any header file, and won't be declared in any
92 1.10 christos * header file provided by libpcap.
93 1.10 christos */
94 1.10 christos PCAP_API void pcap_set_print_dot_graph(int value);
95 1.10 christos
96 1.10 christos PCAP_API_DEF void
97 1.10 christos pcap_set_print_dot_graph(int value)
98 1.10 christos {
99 1.10 christos pcap_print_dot_graph = value;
100 1.10 christos }
101 1.1 christos
102 1.1 christos #endif
103 1.1 christos
104 1.9 christos /*
105 1.10 christos * lowest_set_bit().
106 1.10 christos *
107 1.10 christos * Takes a 32-bit integer as an argument.
108 1.10 christos *
109 1.10 christos * If handed a non-zero value, returns the index of the lowest set bit,
110 1.12 christos * counting upwards from zero.
111 1.10 christos *
112 1.10 christos * If handed zero, the results are platform- and compiler-dependent.
113 1.10 christos * Keep it out of the light, don't give it any water, don't feed it
114 1.10 christos * after midnight, and don't pass zero to it.
115 1.10 christos *
116 1.10 christos * This is the same as the count of trailing zeroes in the word.
117 1.9 christos */
118 1.10 christos #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
119 1.10 christos /*
120 1.10 christos * GCC 3.4 and later; we have __builtin_ctz().
121 1.10 christos */
122 1.12 christos #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
123 1.10 christos #elif defined(_MSC_VER)
124 1.10 christos /*
125 1.10 christos * Visual Studio; we support only 2005 and later, so use
126 1.10 christos * _BitScanForward().
127 1.10 christos */
128 1.10 christos #include <intrin.h>
129 1.10 christos
130 1.10 christos #ifndef __clang__
131 1.10 christos #pragma intrinsic(_BitScanForward)
132 1.10 christos #endif
133 1.10 christos
134 1.12 christos static __forceinline u_int
135 1.10 christos lowest_set_bit(int mask)
136 1.10 christos {
137 1.10 christos unsigned long bit;
138 1.10 christos
139 1.10 christos /*
140 1.10 christos * Don't sign-extend mask if long is longer than int.
141 1.10 christos * (It's currently not, in MSVC, even on 64-bit platforms, but....)
142 1.10 christos */
143 1.10 christos if (_BitScanForward(&bit, (unsigned int)mask) == 0)
144 1.12 christos abort(); /* mask is zero */
145 1.12 christos return (u_int)bit;
146 1.10 christos }
147 1.10 christos #elif defined(MSDOS) && defined(__DJGPP__)
148 1.10 christos /*
149 1.10 christos * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
150 1.10 christos * we've already included.
151 1.10 christos */
152 1.12 christos #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1))
153 1.10 christos #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
154 1.10 christos /*
155 1.10 christos * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
156 1.10 christos * or some other platform (UN*X conforming to a sufficient recent version
157 1.10 christos * of the Single UNIX Specification).
158 1.10 christos */
159 1.10 christos #include <strings.h>
160 1.12 christos #define lowest_set_bit(mask) (u_int)((ffs((mask)) - 1))
161 1.10 christos #else
162 1.9 christos /*
163 1.10 christos * None of the above.
164 1.10 christos * Use a perfect-hash-function-based function.
165 1.9 christos */
166 1.12 christos static u_int
167 1.10 christos lowest_set_bit(int mask)
168 1.9 christos {
169 1.10 christos unsigned int v = (unsigned int)mask;
170 1.10 christos
171 1.12 christos static const u_int MultiplyDeBruijnBitPosition[32] = {
172 1.10 christos 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
173 1.10 christos 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
174 1.10 christos };
175 1.9 christos
176 1.10 christos /*
177 1.10 christos * We strip off all but the lowermost set bit (v & ~v),
178 1.10 christos * and perform a minimal perfect hash on it to look up the
179 1.10 christos * number of low-order zero bits in a table.
180 1.10 christos *
181 1.10 christos * See:
182 1.10 christos *
183 1.10 christos * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
184 1.10 christos *
185 1.10 christos * http://supertech.csail.mit.edu/papers/debruijn.pdf
186 1.10 christos */
187 1.10 christos return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
188 1.9 christos }
189 1.1 christos #endif
190 1.1 christos
191 1.1 christos /*
192 1.1 christos * Represents a deleted instruction.
193 1.1 christos */
194 1.1 christos #define NOP -1
195 1.1 christos
196 1.1 christos /*
197 1.1 christos * Register numbers for use-def values.
198 1.1 christos * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
199 1.1 christos * location. A_ATOM is the accumulator and X_ATOM is the index
200 1.1 christos * register.
201 1.1 christos */
202 1.1 christos #define A_ATOM BPF_MEMWORDS
203 1.1 christos #define X_ATOM (BPF_MEMWORDS+1)
204 1.1 christos
205 1.1 christos /*
206 1.1 christos * This define is used to represent *both* the accumulator and
207 1.1 christos * x register in use-def computations.
208 1.1 christos * Currently, the use-def code assumes only one definition per instruction.
209 1.1 christos */
210 1.1 christos #define AX_ATOM N_ATOMS
211 1.1 christos
212 1.1 christos /*
213 1.13 christos * These data structures are used in a Cocke and Schwartz style
214 1.9 christos * value numbering scheme. Since the flowgraph is acyclic,
215 1.9 christos * exit values can be propagated from a node's predecessors
216 1.9 christos * provided it is uniquely defined.
217 1.1 christos */
218 1.9 christos struct valnode {
219 1.9 christos int code;
220 1.12 christos bpf_u_int32 v0, v1;
221 1.12 christos int val; /* the value number */
222 1.9 christos struct valnode *next;
223 1.9 christos };
224 1.1 christos
225 1.9 christos /* Integer constants mapped with the load immediate opcode. */
226 1.12 christos #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U)
227 1.1 christos
228 1.9 christos struct vmapinfo {
229 1.9 christos int is_const;
230 1.12 christos bpf_u_int32 const_val;
231 1.9 christos };
232 1.1 christos
233 1.10 christos typedef struct {
234 1.9 christos /*
235 1.11 christos * Place to longjmp to on an error.
236 1.11 christos */
237 1.11 christos jmp_buf top_ctx;
238 1.11 christos
239 1.11 christos /*
240 1.11 christos * The buffer into which to put error message.
241 1.11 christos */
242 1.11 christos char *errbuf;
243 1.11 christos
244 1.11 christos /*
245 1.9 christos * A flag to indicate that further optimization is needed.
246 1.9 christos * Iterative passes are continued until a given pass yields no
247 1.12 christos * code simplification or branch movement.
248 1.9 christos */
249 1.9 christos int done;
250 1.1 christos
251 1.12 christos /*
252 1.12 christos * XXX - detect loops that do nothing but repeated AND/OR pullups
253 1.12 christos * and edge moves.
254 1.12 christos * If 100 passes in a row do nothing but that, treat that as a
255 1.12 christos * sign that we're in a loop that just shuffles in a cycle in
256 1.12 christos * which each pass just shuffles the code and we eventually
257 1.12 christos * get back to the original configuration.
258 1.12 christos *
259 1.12 christos * XXX - we need a non-heuristic way of detecting, or preventing,
260 1.12 christos * such a cycle.
261 1.12 christos */
262 1.12 christos int non_branch_movement_performed;
263 1.12 christos
264 1.12 christos u_int n_blocks; /* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
265 1.9 christos struct block **blocks;
266 1.12 christos u_int n_edges; /* twice n_blocks, so guaranteed to be > 0 */
267 1.9 christos struct edge **edges;
268 1.1 christos
269 1.9 christos /*
270 1.9 christos * A bit vector set representation of the dominators.
271 1.9 christos * We round up the set size to the next power of two.
272 1.9 christos */
273 1.12 christos u_int nodewords; /* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
274 1.12 christos u_int edgewords; /* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
275 1.9 christos struct block **levels;
276 1.9 christos bpf_u_int32 *space;
277 1.1 christos
278 1.1 christos #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
279 1.1 christos /*
280 1.1 christos * True if a is in uset {p}
281 1.1 christos */
282 1.1 christos #define SET_MEMBER(p, a) \
283 1.11 christos ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
284 1.1 christos
285 1.1 christos /*
286 1.1 christos * Add 'a' to uset p.
287 1.1 christos */
288 1.1 christos #define SET_INSERT(p, a) \
289 1.11 christos (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
290 1.1 christos
291 1.1 christos /*
292 1.1 christos * Delete 'a' from uset p.
293 1.1 christos */
294 1.1 christos #define SET_DELETE(p, a) \
295 1.11 christos (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
296 1.1 christos
297 1.1 christos /*
298 1.1 christos * a := a intersect b
299 1.12 christos * n must be guaranteed to be > 0
300 1.1 christos */
301 1.1 christos #define SET_INTERSECT(a, b, n)\
302 1.1 christos {\
303 1.1 christos register bpf_u_int32 *_x = a, *_y = b;\
304 1.12 christos register u_int _n = n;\
305 1.12 christos do *_x++ &= *_y++; while (--_n != 0);\
306 1.1 christos }
307 1.1 christos
308 1.1 christos /*
309 1.1 christos * a := a - b
310 1.12 christos * n must be guaranteed to be > 0
311 1.1 christos */
312 1.1 christos #define SET_SUBTRACT(a, b, n)\
313 1.1 christos {\
314 1.1 christos register bpf_u_int32 *_x = a, *_y = b;\
315 1.12 christos register u_int _n = n;\
316 1.12 christos do *_x++ &=~ *_y++; while (--_n != 0);\
317 1.1 christos }
318 1.1 christos
319 1.1 christos /*
320 1.1 christos * a := a union b
321 1.12 christos * n must be guaranteed to be > 0
322 1.1 christos */
323 1.1 christos #define SET_UNION(a, b, n)\
324 1.1 christos {\
325 1.1 christos register bpf_u_int32 *_x = a, *_y = b;\
326 1.12 christos register u_int _n = n;\
327 1.12 christos do *_x++ |= *_y++; while (--_n != 0);\
328 1.1 christos }
329 1.1 christos
330 1.9 christos uset all_dom_sets;
331 1.9 christos uset all_closure_sets;
332 1.9 christos uset all_edge_sets;
333 1.9 christos
334 1.9 christos #define MODULUS 213
335 1.9 christos struct valnode *hashtbl[MODULUS];
336 1.12 christos bpf_u_int32 curval;
337 1.12 christos bpf_u_int32 maxval;
338 1.9 christos
339 1.9 christos struct vmapinfo *vmap;
340 1.9 christos struct valnode *vnode_base;
341 1.9 christos struct valnode *next_vnode;
342 1.10 christos } opt_state_t;
343 1.9 christos
344 1.9 christos typedef struct {
345 1.9 christos /*
346 1.11 christos * Place to longjmp to on an error.
347 1.11 christos */
348 1.11 christos jmp_buf top_ctx;
349 1.11 christos
350 1.11 christos /*
351 1.11 christos * The buffer into which to put error message.
352 1.11 christos */
353 1.11 christos char *errbuf;
354 1.11 christos
355 1.11 christos /*
356 1.9 christos * Some pointers used to convert the basic block form of the code,
357 1.9 christos * into the array form that BPF requires. 'fstart' will point to
358 1.9 christos * the malloc'd array while 'ftail' is used during the recursive
359 1.9 christos * traversal.
360 1.9 christos */
361 1.9 christos struct bpf_insn *fstart;
362 1.9 christos struct bpf_insn *ftail;
363 1.9 christos } conv_state_t;
364 1.9 christos
365 1.11 christos static void opt_init(opt_state_t *, struct icode *);
366 1.9 christos static void opt_cleanup(opt_state_t *);
367 1.11 christos static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
368 1.11 christos PCAP_PRINTFLIKE(2, 3);
369 1.9 christos
370 1.9 christos static void intern_blocks(opt_state_t *, struct icode *);
371 1.9 christos
372 1.9 christos static void find_inedges(opt_state_t *, struct block *);
373 1.9 christos #ifdef BDEBUG
374 1.11 christos static void opt_dump(opt_state_t *, struct icode *);
375 1.9 christos #endif
376 1.1 christos
377 1.1 christos #ifndef MAX
378 1.1 christos #define MAX(a,b) ((a)>(b)?(a):(b))
379 1.1 christos #endif
380 1.1 christos
381 1.1 christos static void
382 1.9 christos find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
383 1.1 christos {
384 1.1 christos int level;
385 1.1 christos
386 1.9 christos if (isMarked(ic, b))
387 1.1 christos return;
388 1.1 christos
389 1.9 christos Mark(ic, b);
390 1.1 christos b->link = 0;
391 1.1 christos
392 1.1 christos if (JT(b)) {
393 1.9 christos find_levels_r(opt_state, ic, JT(b));
394 1.9 christos find_levels_r(opt_state, ic, JF(b));
395 1.1 christos level = MAX(JT(b)->level, JF(b)->level) + 1;
396 1.1 christos } else
397 1.1 christos level = 0;
398 1.1 christos b->level = level;
399 1.9 christos b->link = opt_state->levels[level];
400 1.9 christos opt_state->levels[level] = b;
401 1.1 christos }
402 1.1 christos
403 1.1 christos /*
404 1.1 christos * Level graph. The levels go from 0 at the leaves to
405 1.9 christos * N_LEVELS at the root. The opt_state->levels[] array points to the
406 1.1 christos * first node of the level list, whose elements are linked
407 1.1 christos * with the 'link' field of the struct block.
408 1.1 christos */
409 1.1 christos static void
410 1.9 christos find_levels(opt_state_t *opt_state, struct icode *ic)
411 1.1 christos {
412 1.9 christos memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
413 1.9 christos unMarkAll(ic);
414 1.9 christos find_levels_r(opt_state, ic, ic->root);
415 1.1 christos }
416 1.1 christos
417 1.1 christos /*
418 1.1 christos * Find dominator relationships.
419 1.1 christos * Assumes graph has been leveled.
420 1.1 christos */
421 1.1 christos static void
422 1.9 christos find_dom(opt_state_t *opt_state, struct block *root)
423 1.1 christos {
424 1.12 christos u_int i;
425 1.12 christos int level;
426 1.1 christos struct block *b;
427 1.1 christos bpf_u_int32 *x;
428 1.1 christos
429 1.1 christos /*
430 1.1 christos * Initialize sets to contain all nodes.
431 1.1 christos */
432 1.9 christos x = opt_state->all_dom_sets;
433 1.12 christos /*
434 1.12 christos * In opt_init(), we've made sure the product doesn't overflow.
435 1.12 christos */
436 1.9 christos i = opt_state->n_blocks * opt_state->nodewords;
437 1.12 christos while (i != 0) {
438 1.12 christos --i;
439 1.10 christos *x++ = 0xFFFFFFFFU;
440 1.12 christos }
441 1.1 christos /* Root starts off empty. */
442 1.12 christos for (i = opt_state->nodewords; i != 0;) {
443 1.12 christos --i;
444 1.1 christos root->dom[i] = 0;
445 1.12 christos }
446 1.1 christos
447 1.1 christos /* root->level is the highest level no found. */
448 1.12 christos for (level = root->level; level >= 0; --level) {
449 1.12 christos for (b = opt_state->levels[level]; b; b = b->link) {
450 1.1 christos SET_INSERT(b->dom, b->id);
451 1.1 christos if (JT(b) == 0)
452 1.1 christos continue;
453 1.9 christos SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
454 1.9 christos SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
455 1.1 christos }
456 1.1 christos }
457 1.1 christos }
458 1.1 christos
459 1.1 christos static void
460 1.9 christos propedom(opt_state_t *opt_state, struct edge *ep)
461 1.1 christos {
462 1.1 christos SET_INSERT(ep->edom, ep->id);
463 1.1 christos if (ep->succ) {
464 1.9 christos SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
465 1.9 christos SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
466 1.1 christos }
467 1.1 christos }
468 1.1 christos
469 1.1 christos /*
470 1.1 christos * Compute edge dominators.
471 1.1 christos * Assumes graph has been leveled and predecessors established.
472 1.1 christos */
473 1.1 christos static void
474 1.9 christos find_edom(opt_state_t *opt_state, struct block *root)
475 1.1 christos {
476 1.12 christos u_int i;
477 1.1 christos uset x;
478 1.12 christos int level;
479 1.1 christos struct block *b;
480 1.1 christos
481 1.9 christos x = opt_state->all_edge_sets;
482 1.12 christos /*
483 1.12 christos * In opt_init(), we've made sure the product doesn't overflow.
484 1.12 christos */
485 1.12 christos for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
486 1.12 christos --i;
487 1.10 christos x[i] = 0xFFFFFFFFU;
488 1.12 christos }
489 1.1 christos
490 1.1 christos /* root->level is the highest level no found. */
491 1.9 christos memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
492 1.9 christos memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
493 1.12 christos for (level = root->level; level >= 0; --level) {
494 1.12 christos for (b = opt_state->levels[level]; b != 0; b = b->link) {
495 1.9 christos propedom(opt_state, &b->et);
496 1.9 christos propedom(opt_state, &b->ef);
497 1.1 christos }
498 1.1 christos }
499 1.1 christos }
500 1.1 christos
501 1.1 christos /*
502 1.1 christos * Find the backwards transitive closure of the flow graph. These sets
503 1.1 christos * are backwards in the sense that we find the set of nodes that reach
504 1.1 christos * a given node, not the set of nodes that can be reached by a node.
505 1.1 christos *
506 1.1 christos * Assumes graph has been leveled.
507 1.1 christos */
508 1.1 christos static void
509 1.9 christos find_closure(opt_state_t *opt_state, struct block *root)
510 1.1 christos {
511 1.12 christos int level;
512 1.1 christos struct block *b;
513 1.1 christos
514 1.1 christos /*
515 1.1 christos * Initialize sets to contain no nodes.
516 1.1 christos */
517 1.9 christos memset((char *)opt_state->all_closure_sets, 0,
518 1.9 christos opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
519 1.1 christos
520 1.1 christos /* root->level is the highest level no found. */
521 1.12 christos for (level = root->level; level >= 0; --level) {
522 1.12 christos for (b = opt_state->levels[level]; b; b = b->link) {
523 1.1 christos SET_INSERT(b->closure, b->id);
524 1.1 christos if (JT(b) == 0)
525 1.1 christos continue;
526 1.9 christos SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
527 1.9 christos SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
528 1.1 christos }
529 1.1 christos }
530 1.1 christos }
531 1.1 christos
532 1.1 christos /*
533 1.12 christos * Return the register number that is used by s.
534 1.12 christos *
535 1.12 christos * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X
536 1.12 christos * are used, the scratch memory location's number if a scratch memory
537 1.12 christos * location is used (e.g., 0 for M[0]), or -1 if none of those are used.
538 1.1 christos *
539 1.1 christos * The implementation should probably change to an array access.
540 1.1 christos */
541 1.1 christos static int
542 1.6 christos atomuse(struct stmt *s)
543 1.1 christos {
544 1.1 christos register int c = s->code;
545 1.1 christos
546 1.1 christos if (c == NOP)
547 1.1 christos return -1;
548 1.1 christos
549 1.1 christos switch (BPF_CLASS(c)) {
550 1.1 christos
551 1.1 christos case BPF_RET:
552 1.1 christos return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
553 1.1 christos (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
554 1.1 christos
555 1.1 christos case BPF_LD:
556 1.1 christos case BPF_LDX:
557 1.12 christos /*
558 1.12 christos * As there are fewer than 2^31 memory locations,
559 1.12 christos * s->k should be convertible to int without problems.
560 1.12 christos */
561 1.1 christos return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
562 1.12 christos (BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
563 1.1 christos
564 1.1 christos case BPF_ST:
565 1.1 christos return A_ATOM;
566 1.1 christos
567 1.1 christos case BPF_STX:
568 1.1 christos return X_ATOM;
569 1.1 christos
570 1.1 christos case BPF_JMP:
571 1.1 christos case BPF_ALU:
572 1.1 christos if (BPF_SRC(c) == BPF_X)
573 1.1 christos return AX_ATOM;
574 1.1 christos return A_ATOM;
575 1.1 christos
576 1.1 christos case BPF_MISC:
577 1.1 christos return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
578 1.1 christos }
579 1.1 christos abort();
580 1.1 christos /* NOTREACHED */
581 1.1 christos }
582 1.1 christos
583 1.1 christos /*
584 1.1 christos * Return the register number that is defined by 's'. We assume that
585 1.1 christos * a single stmt cannot define more than one register. If no register
586 1.1 christos * is defined, return -1.
587 1.1 christos *
588 1.1 christos * The implementation should probably change to an array access.
589 1.1 christos */
590 1.1 christos static int
591 1.6 christos atomdef(struct stmt *s)
592 1.1 christos {
593 1.1 christos if (s->code == NOP)
594 1.1 christos return -1;
595 1.1 christos
596 1.1 christos switch (BPF_CLASS(s->code)) {
597 1.1 christos
598 1.1 christos case BPF_LD:
599 1.1 christos case BPF_ALU:
600 1.1 christos return A_ATOM;
601 1.1 christos
602 1.1 christos case BPF_LDX:
603 1.1 christos return X_ATOM;
604 1.1 christos
605 1.1 christos case BPF_ST:
606 1.1 christos case BPF_STX:
607 1.1 christos return s->k;
608 1.1 christos
609 1.1 christos case BPF_MISC:
610 1.1 christos return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
611 1.1 christos }
612 1.1 christos return -1;
613 1.1 christos }
614 1.1 christos
615 1.1 christos /*
616 1.1 christos * Compute the sets of registers used, defined, and killed by 'b'.
617 1.1 christos *
618 1.1 christos * "Used" means that a statement in 'b' uses the register before any
619 1.1 christos * statement in 'b' defines it, i.e. it uses the value left in
620 1.1 christos * that register by a predecessor block of this block.
621 1.1 christos * "Defined" means that a statement in 'b' defines it.
622 1.1 christos * "Killed" means that a statement in 'b' defines it before any
623 1.1 christos * statement in 'b' uses it, i.e. it kills the value left in that
624 1.1 christos * register by a predecessor block of this block.
625 1.1 christos */
626 1.1 christos static void
627 1.6 christos compute_local_ud(struct block *b)
628 1.1 christos {
629 1.1 christos struct slist *s;
630 1.9 christos atomset def = 0, use = 0, killed = 0;
631 1.1 christos int atom;
632 1.1 christos
633 1.1 christos for (s = b->stmts; s; s = s->next) {
634 1.1 christos if (s->s.code == NOP)
635 1.1 christos continue;
636 1.1 christos atom = atomuse(&s->s);
637 1.1 christos if (atom >= 0) {
638 1.1 christos if (atom == AX_ATOM) {
639 1.1 christos if (!ATOMELEM(def, X_ATOM))
640 1.1 christos use |= ATOMMASK(X_ATOM);
641 1.1 christos if (!ATOMELEM(def, A_ATOM))
642 1.1 christos use |= ATOMMASK(A_ATOM);
643 1.1 christos }
644 1.1 christos else if (atom < N_ATOMS) {
645 1.1 christos if (!ATOMELEM(def, atom))
646 1.1 christos use |= ATOMMASK(atom);
647 1.1 christos }
648 1.1 christos else
649 1.1 christos abort();
650 1.1 christos }
651 1.1 christos atom = atomdef(&s->s);
652 1.1 christos if (atom >= 0) {
653 1.1 christos if (!ATOMELEM(use, atom))
654 1.9 christos killed |= ATOMMASK(atom);
655 1.1 christos def |= ATOMMASK(atom);
656 1.1 christos }
657 1.1 christos }
658 1.1 christos if (BPF_CLASS(b->s.code) == BPF_JMP) {
659 1.1 christos /*
660 1.1 christos * XXX - what about RET?
661 1.1 christos */
662 1.1 christos atom = atomuse(&b->s);
663 1.1 christos if (atom >= 0) {
664 1.1 christos if (atom == AX_ATOM) {
665 1.1 christos if (!ATOMELEM(def, X_ATOM))
666 1.1 christos use |= ATOMMASK(X_ATOM);
667 1.1 christos if (!ATOMELEM(def, A_ATOM))
668 1.1 christos use |= ATOMMASK(A_ATOM);
669 1.1 christos }
670 1.1 christos else if (atom < N_ATOMS) {
671 1.1 christos if (!ATOMELEM(def, atom))
672 1.1 christos use |= ATOMMASK(atom);
673 1.1 christos }
674 1.1 christos else
675 1.1 christos abort();
676 1.1 christos }
677 1.1 christos }
678 1.1 christos
679 1.1 christos b->def = def;
680 1.9 christos b->kill = killed;
681 1.1 christos b->in_use = use;
682 1.1 christos }
683 1.1 christos
684 1.1 christos /*
685 1.1 christos * Assume graph is already leveled.
686 1.1 christos */
687 1.1 christos static void
688 1.9 christos find_ud(opt_state_t *opt_state, struct block *root)
689 1.1 christos {
690 1.1 christos int i, maxlevel;
691 1.1 christos struct block *p;
692 1.1 christos
693 1.1 christos /*
694 1.1 christos * root->level is the highest level no found;
695 1.1 christos * count down from there.
696 1.1 christos */
697 1.1 christos maxlevel = root->level;
698 1.1 christos for (i = maxlevel; i >= 0; --i)
699 1.9 christos for (p = opt_state->levels[i]; p; p = p->link) {
700 1.1 christos compute_local_ud(p);
701 1.1 christos p->out_use = 0;
702 1.1 christos }
703 1.1 christos
704 1.1 christos for (i = 1; i <= maxlevel; ++i) {
705 1.9 christos for (p = opt_state->levels[i]; p; p = p->link) {
706 1.1 christos p->out_use |= JT(p)->in_use | JF(p)->in_use;
707 1.1 christos p->in_use |= p->out_use &~ p->kill;
708 1.1 christos }
709 1.1 christos }
710 1.1 christos }
711 1.1 christos static void
712 1.9 christos init_val(opt_state_t *opt_state)
713 1.1 christos {
714 1.9 christos opt_state->curval = 0;
715 1.9 christos opt_state->next_vnode = opt_state->vnode_base;
716 1.9 christos memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
717 1.9 christos memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
718 1.1 christos }
719 1.1 christos
720 1.12 christos /*
721 1.12 christos * Because we really don't have an IR, this stuff is a little messy.
722 1.12 christos *
723 1.12 christos * This routine looks in the table of existing value number for a value
724 1.12 christos * with generated from an operation with the specified opcode and
725 1.12 christos * the specified values. If it finds it, it returns its value number,
726 1.12 christos * otherwise it makes a new entry in the table and returns the
727 1.12 christos * value number of that entry.
728 1.12 christos */
729 1.12 christos static bpf_u_int32
730 1.12 christos F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1)
731 1.1 christos {
732 1.1 christos u_int hash;
733 1.12 christos bpf_u_int32 val;
734 1.1 christos struct valnode *p;
735 1.1 christos
736 1.12 christos hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
737 1.1 christos hash %= MODULUS;
738 1.1 christos
739 1.9 christos for (p = opt_state->hashtbl[hash]; p; p = p->next)
740 1.1 christos if (p->code == code && p->v0 == v0 && p->v1 == v1)
741 1.1 christos return p->val;
742 1.1 christos
743 1.12 christos /*
744 1.12 christos * Not found. Allocate a new value, and assign it a new
745 1.12 christos * value number.
746 1.12 christos *
747 1.12 christos * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we
748 1.12 christos * increment it before using it as the new value number, which
749 1.12 christos * means we never assign VAL_UNKNOWN.
750 1.12 christos *
751 1.12 christos * XXX - unless we overflow, but we probably won't have 2^32-1
752 1.12 christos * values; we treat 32 bits as effectively infinite.
753 1.12 christos */
754 1.9 christos val = ++opt_state->curval;
755 1.1 christos if (BPF_MODE(code) == BPF_IMM &&
756 1.1 christos (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
757 1.9 christos opt_state->vmap[val].const_val = v0;
758 1.9 christos opt_state->vmap[val].is_const = 1;
759 1.1 christos }
760 1.9 christos p = opt_state->next_vnode++;
761 1.1 christos p->val = val;
762 1.1 christos p->code = code;
763 1.1 christos p->v0 = v0;
764 1.1 christos p->v1 = v1;
765 1.9 christos p->next = opt_state->hashtbl[hash];
766 1.9 christos opt_state->hashtbl[hash] = p;
767 1.1 christos
768 1.1 christos return val;
769 1.1 christos }
770 1.1 christos
771 1.1 christos static inline void
772 1.12 christos vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter)
773 1.1 christos {
774 1.10 christos if (alter && newval != VAL_UNKNOWN && *valp == newval)
775 1.1 christos s->code = NOP;
776 1.1 christos else
777 1.1 christos *valp = newval;
778 1.1 christos }
779 1.1 christos
780 1.6 christos /*
781 1.6 christos * Do constant-folding on binary operators.
782 1.6 christos * (Unary operators are handled elsewhere.)
783 1.6 christos */
784 1.1 christos static void
785 1.12 christos fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1)
786 1.1 christos {
787 1.1 christos bpf_u_int32 a, b;
788 1.1 christos
789 1.9 christos a = opt_state->vmap[v0].const_val;
790 1.9 christos b = opt_state->vmap[v1].const_val;
791 1.1 christos
792 1.1 christos switch (BPF_OP(s->code)) {
793 1.1 christos case BPF_ADD:
794 1.1 christos a += b;
795 1.1 christos break;
796 1.1 christos
797 1.1 christos case BPF_SUB:
798 1.1 christos a -= b;
799 1.1 christos break;
800 1.1 christos
801 1.1 christos case BPF_MUL:
802 1.1 christos a *= b;
803 1.1 christos break;
804 1.1 christos
805 1.1 christos case BPF_DIV:
806 1.1 christos if (b == 0)
807 1.11 christos opt_error(opt_state, "division by zero");
808 1.1 christos a /= b;
809 1.1 christos break;
810 1.1 christos
811 1.7 christos case BPF_MOD:
812 1.7 christos if (b == 0)
813 1.11 christos opt_error(opt_state, "modulus by zero");
814 1.7 christos a %= b;
815 1.7 christos break;
816 1.7 christos
817 1.1 christos case BPF_AND:
818 1.1 christos a &= b;
819 1.1 christos break;
820 1.1 christos
821 1.1 christos case BPF_OR:
822 1.1 christos a |= b;
823 1.1 christos break;
824 1.1 christos
825 1.7 christos case BPF_XOR:
826 1.7 christos a ^= b;
827 1.7 christos break;
828 1.7 christos
829 1.1 christos case BPF_LSH:
830 1.11 christos /*
831 1.11 christos * A left shift of more than the width of the type
832 1.11 christos * is undefined in C; we'll just treat it as shifting
833 1.11 christos * all the bits out.
834 1.11 christos *
835 1.11 christos * XXX - the BPF interpreter doesn't check for this,
836 1.11 christos * so its behavior is dependent on the behavior of
837 1.11 christos * the processor on which it's running. There are
838 1.11 christos * processors on which it shifts all the bits out
839 1.11 christos * and processors on which it does no shift.
840 1.11 christos */
841 1.11 christos if (b < 32)
842 1.11 christos a <<= b;
843 1.11 christos else
844 1.11 christos a = 0;
845 1.1 christos break;
846 1.1 christos
847 1.1 christos case BPF_RSH:
848 1.11 christos /*
849 1.11 christos * A right shift of more than the width of the type
850 1.11 christos * is undefined in C; we'll just treat it as shifting
851 1.11 christos * all the bits out.
852 1.11 christos *
853 1.11 christos * XXX - the BPF interpreter doesn't check for this,
854 1.11 christos * so its behavior is dependent on the behavior of
855 1.11 christos * the processor on which it's running. There are
856 1.11 christos * processors on which it shifts all the bits out
857 1.11 christos * and processors on which it does no shift.
858 1.11 christos */
859 1.11 christos if (b < 32)
860 1.11 christos a >>= b;
861 1.11 christos else
862 1.11 christos a = 0;
863 1.1 christos break;
864 1.1 christos
865 1.1 christos default:
866 1.1 christos abort();
867 1.1 christos }
868 1.1 christos s->k = a;
869 1.1 christos s->code = BPF_LD|BPF_IMM;
870 1.12 christos /*
871 1.12 christos * XXX - optimizer loop detection.
872 1.12 christos */
873 1.12 christos opt_state->non_branch_movement_performed = 1;
874 1.9 christos opt_state->done = 0;
875 1.1 christos }
876 1.1 christos
877 1.1 christos static inline struct slist *
878 1.6 christos this_op(struct slist *s)
879 1.1 christos {
880 1.1 christos while (s != 0 && s->s.code == NOP)
881 1.1 christos s = s->next;
882 1.1 christos return s;
883 1.1 christos }
884 1.1 christos
885 1.1 christos static void
886 1.6 christos opt_not(struct block *b)
887 1.1 christos {
888 1.1 christos struct block *tmp = JT(b);
889 1.1 christos
890 1.1 christos JT(b) = JF(b);
891 1.1 christos JF(b) = tmp;
892 1.1 christos }
893 1.1 christos
894 1.1 christos static void
895 1.9 christos opt_peep(opt_state_t *opt_state, struct block *b)
896 1.1 christos {
897 1.1 christos struct slist *s;
898 1.1 christos struct slist *next, *last;
899 1.12 christos bpf_u_int32 val;
900 1.1 christos
901 1.1 christos s = b->stmts;
902 1.1 christos if (s == 0)
903 1.1 christos return;
904 1.1 christos
905 1.1 christos last = s;
906 1.1 christos for (/*empty*/; /*empty*/; s = next) {
907 1.1 christos /*
908 1.1 christos * Skip over nops.
909 1.1 christos */
910 1.1 christos s = this_op(s);
911 1.1 christos if (s == 0)
912 1.1 christos break; /* nothing left in the block */
913 1.1 christos
914 1.1 christos /*
915 1.1 christos * Find the next real instruction after that one
916 1.1 christos * (skipping nops).
917 1.1 christos */
918 1.1 christos next = this_op(s->next);
919 1.1 christos if (next == 0)
920 1.1 christos break; /* no next instruction */
921 1.1 christos last = next;
922 1.1 christos
923 1.1 christos /*
924 1.1 christos * st M[k] --> st M[k]
925 1.1 christos * ldx M[k] tax
926 1.1 christos */
927 1.1 christos if (s->s.code == BPF_ST &&
928 1.1 christos next->s.code == (BPF_LDX|BPF_MEM) &&
929 1.1 christos s->s.k == next->s.k) {
930 1.12 christos /*
931 1.12 christos * XXX - optimizer loop detection.
932 1.12 christos */
933 1.12 christos opt_state->non_branch_movement_performed = 1;
934 1.9 christos opt_state->done = 0;
935 1.1 christos next->s.code = BPF_MISC|BPF_TAX;
936 1.1 christos }
937 1.1 christos /*
938 1.1 christos * ld #k --> ldx #k
939 1.1 christos * tax txa
940 1.1 christos */
941 1.1 christos if (s->s.code == (BPF_LD|BPF_IMM) &&
942 1.1 christos next->s.code == (BPF_MISC|BPF_TAX)) {
943 1.1 christos s->s.code = BPF_LDX|BPF_IMM;
944 1.1 christos next->s.code = BPF_MISC|BPF_TXA;
945 1.12 christos /*
946 1.12 christos * XXX - optimizer loop detection.
947 1.12 christos */
948 1.12 christos opt_state->non_branch_movement_performed = 1;
949 1.9 christos opt_state->done = 0;
950 1.1 christos }
951 1.1 christos /*
952 1.1 christos * This is an ugly special case, but it happens
953 1.1 christos * when you say tcp[k] or udp[k] where k is a constant.
954 1.1 christos */
955 1.1 christos if (s->s.code == (BPF_LD|BPF_IMM)) {
956 1.1 christos struct slist *add, *tax, *ild;
957 1.1 christos
958 1.1 christos /*
959 1.1 christos * Check that X isn't used on exit from this
960 1.1 christos * block (which the optimizer might cause).
961 1.1 christos * We know the code generator won't generate
962 1.1 christos * any local dependencies.
963 1.1 christos */
964 1.1 christos if (ATOMELEM(b->out_use, X_ATOM))
965 1.1 christos continue;
966 1.1 christos
967 1.1 christos /*
968 1.1 christos * Check that the instruction following the ldi
969 1.1 christos * is an addx, or it's an ldxms with an addx
970 1.1 christos * following it (with 0 or more nops between the
971 1.1 christos * ldxms and addx).
972 1.1 christos */
973 1.1 christos if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
974 1.1 christos add = next;
975 1.1 christos else
976 1.1 christos add = this_op(next->next);
977 1.1 christos if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
978 1.1 christos continue;
979 1.1 christos
980 1.1 christos /*
981 1.1 christos * Check that a tax follows that (with 0 or more
982 1.1 christos * nops between them).
983 1.1 christos */
984 1.1 christos tax = this_op(add->next);
985 1.1 christos if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
986 1.1 christos continue;
987 1.1 christos
988 1.1 christos /*
989 1.1 christos * Check that an ild follows that (with 0 or more
990 1.1 christos * nops between them).
991 1.1 christos */
992 1.1 christos ild = this_op(tax->next);
993 1.1 christos if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
994 1.1 christos BPF_MODE(ild->s.code) != BPF_IND)
995 1.1 christos continue;
996 1.1 christos /*
997 1.1 christos * We want to turn this sequence:
998 1.1 christos *
999 1.1 christos * (004) ldi #0x2 {s}
1000 1.1 christos * (005) ldxms [14] {next} -- optional
1001 1.1 christos * (006) addx {add}
1002 1.1 christos * (007) tax {tax}
1003 1.1 christos * (008) ild [x+0] {ild}
1004 1.1 christos *
1005 1.1 christos * into this sequence:
1006 1.1 christos *
1007 1.1 christos * (004) nop
1008 1.1 christos * (005) ldxms [14]
1009 1.1 christos * (006) nop
1010 1.1 christos * (007) nop
1011 1.1 christos * (008) ild [x+2]
1012 1.1 christos *
1013 1.1 christos * XXX We need to check that X is not
1014 1.1 christos * subsequently used, because we want to change
1015 1.1 christos * what'll be in it after this sequence.
1016 1.1 christos *
1017 1.1 christos * We know we can eliminate the accumulator
1018 1.1 christos * modifications earlier in the sequence since
1019 1.1 christos * it is defined by the last stmt of this sequence
1020 1.1 christos * (i.e., the last statement of the sequence loads
1021 1.1 christos * a value into the accumulator, so we can eliminate
1022 1.1 christos * earlier operations on the accumulator).
1023 1.1 christos */
1024 1.1 christos ild->s.k += s->s.k;
1025 1.1 christos s->s.code = NOP;
1026 1.1 christos add->s.code = NOP;
1027 1.1 christos tax->s.code = NOP;
1028 1.12 christos /*
1029 1.12 christos * XXX - optimizer loop detection.
1030 1.12 christos */
1031 1.12 christos opt_state->non_branch_movement_performed = 1;
1032 1.9 christos opt_state->done = 0;
1033 1.1 christos }
1034 1.1 christos }
1035 1.1 christos /*
1036 1.1 christos * If the comparison at the end of a block is an equality
1037 1.1 christos * comparison against a constant, and nobody uses the value
1038 1.1 christos * we leave in the A register at the end of a block, and
1039 1.1 christos * the operation preceding the comparison is an arithmetic
1040 1.1 christos * operation, we can sometime optimize it away.
1041 1.1 christos */
1042 1.1 christos if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
1043 1.1 christos !ATOMELEM(b->out_use, A_ATOM)) {
1044 1.12 christos /*
1045 1.12 christos * We can optimize away certain subtractions of the
1046 1.12 christos * X register.
1047 1.12 christos */
1048 1.1 christos if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
1049 1.1 christos val = b->val[X_ATOM];
1050 1.9 christos if (opt_state->vmap[val].is_const) {
1051 1.1 christos /*
1052 1.1 christos * If we have a subtract to do a comparison,
1053 1.1 christos * and the X register is a known constant,
1054 1.1 christos * we can merge this value into the
1055 1.1 christos * comparison:
1056 1.1 christos *
1057 1.1 christos * sub x -> nop
1058 1.1 christos * jeq #y jeq #(x+y)
1059 1.1 christos */
1060 1.9 christos b->s.k += opt_state->vmap[val].const_val;
1061 1.1 christos last->s.code = NOP;
1062 1.12 christos /*
1063 1.12 christos * XXX - optimizer loop detection.
1064 1.12 christos */
1065 1.12 christos opt_state->non_branch_movement_performed = 1;
1066 1.9 christos opt_state->done = 0;
1067 1.1 christos } else if (b->s.k == 0) {
1068 1.1 christos /*
1069 1.1 christos * If the X register isn't a constant,
1070 1.1 christos * and the comparison in the test is
1071 1.1 christos * against 0, we can compare with the
1072 1.1 christos * X register, instead:
1073 1.1 christos *
1074 1.1 christos * sub x -> nop
1075 1.1 christos * jeq #0 jeq x
1076 1.1 christos */
1077 1.1 christos last->s.code = NOP;
1078 1.1 christos b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
1079 1.12 christos /*
1080 1.12 christos * XXX - optimizer loop detection.
1081 1.12 christos */
1082 1.12 christos opt_state->non_branch_movement_performed = 1;
1083 1.9 christos opt_state->done = 0;
1084 1.1 christos }
1085 1.1 christos }
1086 1.1 christos /*
1087 1.1 christos * Likewise, a constant subtract can be simplified:
1088 1.1 christos *
1089 1.1 christos * sub #x -> nop
1090 1.1 christos * jeq #y -> jeq #(x+y)
1091 1.1 christos */
1092 1.1 christos else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1093 1.1 christos last->s.code = NOP;
1094 1.1 christos b->s.k += last->s.k;
1095 1.12 christos /*
1096 1.12 christos * XXX - optimizer loop detection.
1097 1.12 christos */
1098 1.12 christos opt_state->non_branch_movement_performed = 1;
1099 1.9 christos opt_state->done = 0;
1100 1.1 christos }
1101 1.1 christos /*
1102 1.1 christos * And, similarly, a constant AND can be simplified
1103 1.1 christos * if we're testing against 0, i.e.:
1104 1.1 christos *
1105 1.1 christos * and #k nop
1106 1.1 christos * jeq #0 -> jset #k
1107 1.1 christos */
1108 1.1 christos else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1109 1.1 christos b->s.k == 0) {
1110 1.1 christos b->s.k = last->s.k;
1111 1.1 christos b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1112 1.1 christos last->s.code = NOP;
1113 1.12 christos /*
1114 1.12 christos * XXX - optimizer loop detection.
1115 1.12 christos */
1116 1.12 christos opt_state->non_branch_movement_performed = 1;
1117 1.9 christos opt_state->done = 0;
1118 1.1 christos opt_not(b);
1119 1.1 christos }
1120 1.1 christos }
1121 1.1 christos /*
1122 1.1 christos * jset #0 -> never
1123 1.1 christos * jset #ffffffff -> always
1124 1.1 christos */
1125 1.1 christos if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1126 1.1 christos if (b->s.k == 0)
1127 1.1 christos JT(b) = JF(b);
1128 1.12 christos if (b->s.k == 0xffffffffU)
1129 1.1 christos JF(b) = JT(b);
1130 1.1 christos }
1131 1.1 christos /*
1132 1.1 christos * If we're comparing against the index register, and the index
1133 1.1 christos * register is a known constant, we can just compare against that
1134 1.1 christos * constant.
1135 1.1 christos */
1136 1.1 christos val = b->val[X_ATOM];
1137 1.9 christos if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1138 1.12 christos bpf_u_int32 v = opt_state->vmap[val].const_val;
1139 1.1 christos b->s.code &= ~BPF_X;
1140 1.1 christos b->s.k = v;
1141 1.1 christos }
1142 1.1 christos /*
1143 1.1 christos * If the accumulator is a known constant, we can compute the
1144 1.1 christos * comparison result.
1145 1.1 christos */
1146 1.1 christos val = b->val[A_ATOM];
1147 1.9 christos if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1148 1.12 christos bpf_u_int32 v = opt_state->vmap[val].const_val;
1149 1.1 christos switch (BPF_OP(b->s.code)) {
1150 1.1 christos
1151 1.1 christos case BPF_JEQ:
1152 1.1 christos v = v == b->s.k;
1153 1.1 christos break;
1154 1.1 christos
1155 1.1 christos case BPF_JGT:
1156 1.12 christos v = v > b->s.k;
1157 1.1 christos break;
1158 1.1 christos
1159 1.1 christos case BPF_JGE:
1160 1.12 christos v = v >= b->s.k;
1161 1.1 christos break;
1162 1.1 christos
1163 1.1 christos case BPF_JSET:
1164 1.1 christos v &= b->s.k;
1165 1.1 christos break;
1166 1.1 christos
1167 1.1 christos default:
1168 1.1 christos abort();
1169 1.1 christos }
1170 1.12 christos if (JF(b) != JT(b)) {
1171 1.12 christos /*
1172 1.12 christos * XXX - optimizer loop detection.
1173 1.12 christos */
1174 1.12 christos opt_state->non_branch_movement_performed = 1;
1175 1.9 christos opt_state->done = 0;
1176 1.12 christos }
1177 1.1 christos if (v)
1178 1.1 christos JF(b) = JT(b);
1179 1.1 christos else
1180 1.1 christos JT(b) = JF(b);
1181 1.1 christos }
1182 1.1 christos }
1183 1.1 christos
1184 1.1 christos /*
1185 1.1 christos * Compute the symbolic value of expression of 's', and update
1186 1.1 christos * anything it defines in the value table 'val'. If 'alter' is true,
1187 1.1 christos * do various optimizations. This code would be cleaner if symbolic
1188 1.1 christos * evaluation and code transformations weren't folded together.
1189 1.1 christos */
1190 1.1 christos static void
1191 1.12 christos opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter)
1192 1.1 christos {
1193 1.1 christos int op;
1194 1.12 christos bpf_u_int32 v;
1195 1.1 christos
1196 1.1 christos switch (s->code) {
1197 1.1 christos
1198 1.1 christos case BPF_LD|BPF_ABS|BPF_W:
1199 1.1 christos case BPF_LD|BPF_ABS|BPF_H:
1200 1.1 christos case BPF_LD|BPF_ABS|BPF_B:
1201 1.9 christos v = F(opt_state, s->code, s->k, 0L);
1202 1.1 christos vstore(s, &val[A_ATOM], v, alter);
1203 1.1 christos break;
1204 1.1 christos
1205 1.1 christos case BPF_LD|BPF_IND|BPF_W:
1206 1.1 christos case BPF_LD|BPF_IND|BPF_H:
1207 1.1 christos case BPF_LD|BPF_IND|BPF_B:
1208 1.1 christos v = val[X_ATOM];
1209 1.9 christos if (alter && opt_state->vmap[v].is_const) {
1210 1.1 christos s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1211 1.9 christos s->k += opt_state->vmap[v].const_val;
1212 1.9 christos v = F(opt_state, s->code, s->k, 0L);
1213 1.12 christos /*
1214 1.12 christos * XXX - optimizer loop detection.
1215 1.12 christos */
1216 1.12 christos opt_state->non_branch_movement_performed = 1;
1217 1.9 christos opt_state->done = 0;
1218 1.1 christos }
1219 1.1 christos else
1220 1.9 christos v = F(opt_state, s->code, s->k, v);
1221 1.1 christos vstore(s, &val[A_ATOM], v, alter);
1222 1.1 christos break;
1223 1.1 christos
1224 1.1 christos case BPF_LD|BPF_LEN:
1225 1.9 christos v = F(opt_state, s->code, 0L, 0L);
1226 1.1 christos vstore(s, &val[A_ATOM], v, alter);
1227 1.1 christos break;
1228 1.1 christos
1229 1.1 christos case BPF_LD|BPF_IMM:
1230 1.1 christos v = K(s->k);
1231 1.1 christos vstore(s, &val[A_ATOM], v, alter);
1232 1.1 christos break;
1233 1.1 christos
1234 1.1 christos case BPF_LDX|BPF_IMM:
1235 1.1 christos v = K(s->k);
1236 1.1 christos vstore(s, &val[X_ATOM], v, alter);
1237 1.1 christos break;
1238 1.1 christos
1239 1.1 christos case BPF_LDX|BPF_MSH|BPF_B:
1240 1.9 christos v = F(opt_state, s->code, s->k, 0L);
1241 1.1 christos vstore(s, &val[X_ATOM], v, alter);
1242 1.1 christos break;
1243 1.1 christos
1244 1.1 christos case BPF_ALU|BPF_NEG:
1245 1.9 christos if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1246 1.1 christos s->code = BPF_LD|BPF_IMM;
1247 1.11 christos /*
1248 1.11 christos * Do this negation as unsigned arithmetic; that's
1249 1.11 christos * what modern BPF engines do, and it guarantees
1250 1.11 christos * that all possible values can be negated. (Yeah,
1251 1.11 christos * negating 0x80000000, the minimum signed 32-bit
1252 1.11 christos * two's-complement value, results in 0x80000000,
1253 1.11 christos * so it's still negative, but we *should* be doing
1254 1.11 christos * all unsigned arithmetic here, to match what
1255 1.11 christos * modern BPF engines do.)
1256 1.11 christos *
1257 1.11 christos * Express it as 0U - (unsigned value) so that we
1258 1.11 christos * don't get compiler warnings about negating an
1259 1.11 christos * unsigned value and don't get UBSan warnings
1260 1.11 christos * about the result of negating 0x80000000 being
1261 1.11 christos * undefined.
1262 1.11 christos */
1263 1.12 christos s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val;
1264 1.1 christos val[A_ATOM] = K(s->k);
1265 1.1 christos }
1266 1.1 christos else
1267 1.9 christos val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1268 1.1 christos break;
1269 1.1 christos
1270 1.1 christos case BPF_ALU|BPF_ADD|BPF_K:
1271 1.1 christos case BPF_ALU|BPF_SUB|BPF_K:
1272 1.1 christos case BPF_ALU|BPF_MUL|BPF_K:
1273 1.1 christos case BPF_ALU|BPF_DIV|BPF_K:
1274 1.7 christos case BPF_ALU|BPF_MOD|BPF_K:
1275 1.1 christos case BPF_ALU|BPF_AND|BPF_K:
1276 1.1 christos case BPF_ALU|BPF_OR|BPF_K:
1277 1.7 christos case BPF_ALU|BPF_XOR|BPF_K:
1278 1.1 christos case BPF_ALU|BPF_LSH|BPF_K:
1279 1.1 christos case BPF_ALU|BPF_RSH|BPF_K:
1280 1.1 christos op = BPF_OP(s->code);
1281 1.1 christos if (alter) {
1282 1.1 christos if (s->k == 0) {
1283 1.11 christos /*
1284 1.11 christos * Optimize operations where the constant
1285 1.11 christos * is zero.
1286 1.11 christos *
1287 1.11 christos * Don't optimize away "sub #0"
1288 1.1 christos * as it may be needed later to
1289 1.11 christos * fixup the generated math code.
1290 1.11 christos *
1291 1.11 christos * Fail if we're dividing by zero or taking
1292 1.11 christos * a modulus by zero.
1293 1.11 christos */
1294 1.1 christos if (op == BPF_ADD ||
1295 1.1 christos op == BPF_LSH || op == BPF_RSH ||
1296 1.7 christos op == BPF_OR || op == BPF_XOR) {
1297 1.1 christos s->code = NOP;
1298 1.1 christos break;
1299 1.1 christos }
1300 1.1 christos if (op == BPF_MUL || op == BPF_AND) {
1301 1.1 christos s->code = BPF_LD|BPF_IMM;
1302 1.1 christos val[A_ATOM] = K(s->k);
1303 1.1 christos break;
1304 1.1 christos }
1305 1.11 christos if (op == BPF_DIV)
1306 1.11 christos opt_error(opt_state,
1307 1.11 christos "division by zero");
1308 1.11 christos if (op == BPF_MOD)
1309 1.11 christos opt_error(opt_state,
1310 1.11 christos "modulus by zero");
1311 1.1 christos }
1312 1.9 christos if (opt_state->vmap[val[A_ATOM]].is_const) {
1313 1.11 christos fold_op(opt_state, s, val[A_ATOM], K(s->k));
1314 1.1 christos val[A_ATOM] = K(s->k);
1315 1.1 christos break;
1316 1.1 christos }
1317 1.1 christos }
1318 1.9 christos val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1319 1.1 christos break;
1320 1.1 christos
1321 1.1 christos case BPF_ALU|BPF_ADD|BPF_X:
1322 1.1 christos case BPF_ALU|BPF_SUB|BPF_X:
1323 1.1 christos case BPF_ALU|BPF_MUL|BPF_X:
1324 1.1 christos case BPF_ALU|BPF_DIV|BPF_X:
1325 1.7 christos case BPF_ALU|BPF_MOD|BPF_X:
1326 1.1 christos case BPF_ALU|BPF_AND|BPF_X:
1327 1.1 christos case BPF_ALU|BPF_OR|BPF_X:
1328 1.7 christos case BPF_ALU|BPF_XOR|BPF_X:
1329 1.1 christos case BPF_ALU|BPF_LSH|BPF_X:
1330 1.1 christos case BPF_ALU|BPF_RSH|BPF_X:
1331 1.1 christos op = BPF_OP(s->code);
1332 1.9 christos if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1333 1.9 christos if (opt_state->vmap[val[A_ATOM]].is_const) {
1334 1.11 christos fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
1335 1.1 christos val[A_ATOM] = K(s->k);
1336 1.1 christos }
1337 1.1 christos else {
1338 1.1 christos s->code = BPF_ALU|BPF_K|op;
1339 1.9 christos s->k = opt_state->vmap[val[X_ATOM]].const_val;
1340 1.11 christos if ((op == BPF_LSH || op == BPF_RSH) &&
1341 1.12 christos s->k > 31)
1342 1.11 christos opt_error(opt_state,
1343 1.11 christos "shift by more than 31 bits");
1344 1.12 christos /*
1345 1.12 christos * XXX - optimizer loop detection.
1346 1.12 christos */
1347 1.12 christos opt_state->non_branch_movement_performed = 1;
1348 1.9 christos opt_state->done = 0;
1349 1.1 christos val[A_ATOM] =
1350 1.9 christos F(opt_state, s->code, val[A_ATOM], K(s->k));
1351 1.1 christos }
1352 1.1 christos break;
1353 1.1 christos }
1354 1.1 christos /*
1355 1.1 christos * Check if we're doing something to an accumulator
1356 1.1 christos * that is 0, and simplify. This may not seem like
1357 1.1 christos * much of a simplification but it could open up further
1358 1.1 christos * optimizations.
1359 1.1 christos * XXX We could also check for mul by 1, etc.
1360 1.1 christos */
1361 1.9 christos if (alter && opt_state->vmap[val[A_ATOM]].is_const
1362 1.9 christos && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1363 1.7 christos if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1364 1.1 christos s->code = BPF_MISC|BPF_TXA;
1365 1.1 christos vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1366 1.1 christos break;
1367 1.1 christos }
1368 1.7 christos else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1369 1.1 christos op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1370 1.1 christos s->code = BPF_LD|BPF_IMM;
1371 1.1 christos s->k = 0;
1372 1.1 christos vstore(s, &val[A_ATOM], K(s->k), alter);
1373 1.1 christos break;
1374 1.1 christos }
1375 1.1 christos else if (op == BPF_NEG) {
1376 1.1 christos s->code = NOP;
1377 1.1 christos break;
1378 1.1 christos }
1379 1.1 christos }
1380 1.9 christos val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1381 1.1 christos break;
1382 1.1 christos
1383 1.1 christos case BPF_MISC|BPF_TXA:
1384 1.1 christos vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1385 1.1 christos break;
1386 1.1 christos
1387 1.1 christos case BPF_LD|BPF_MEM:
1388 1.1 christos v = val[s->k];
1389 1.9 christos if (alter && opt_state->vmap[v].is_const) {
1390 1.1 christos s->code = BPF_LD|BPF_IMM;
1391 1.9 christos s->k = opt_state->vmap[v].const_val;
1392 1.12 christos /*
1393 1.12 christos * XXX - optimizer loop detection.
1394 1.12 christos */
1395 1.12 christos opt_state->non_branch_movement_performed = 1;
1396 1.9 christos opt_state->done = 0;
1397 1.1 christos }
1398 1.1 christos vstore(s, &val[A_ATOM], v, alter);
1399 1.1 christos break;
1400 1.1 christos
1401 1.1 christos case BPF_MISC|BPF_TAX:
1402 1.1 christos vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1403 1.1 christos break;
1404 1.1 christos
1405 1.1 christos case BPF_LDX|BPF_MEM:
1406 1.1 christos v = val[s->k];
1407 1.9 christos if (alter && opt_state->vmap[v].is_const) {
1408 1.1 christos s->code = BPF_LDX|BPF_IMM;
1409 1.9 christos s->k = opt_state->vmap[v].const_val;
1410 1.12 christos /*
1411 1.12 christos * XXX - optimizer loop detection.
1412 1.12 christos */
1413 1.12 christos opt_state->non_branch_movement_performed = 1;
1414 1.9 christos opt_state->done = 0;
1415 1.1 christos }
1416 1.1 christos vstore(s, &val[X_ATOM], v, alter);
1417 1.1 christos break;
1418 1.1 christos
1419 1.1 christos case BPF_ST:
1420 1.1 christos vstore(s, &val[s->k], val[A_ATOM], alter);
1421 1.1 christos break;
1422 1.1 christos
1423 1.1 christos case BPF_STX:
1424 1.1 christos vstore(s, &val[s->k], val[X_ATOM], alter);
1425 1.1 christos break;
1426 1.1 christos }
1427 1.1 christos }
1428 1.1 christos
1429 1.1 christos static void
1430 1.9 christos deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1431 1.1 christos {
1432 1.1 christos register int atom;
1433 1.1 christos
1434 1.1 christos atom = atomuse(s);
1435 1.1 christos if (atom >= 0) {
1436 1.1 christos if (atom == AX_ATOM) {
1437 1.1 christos last[X_ATOM] = 0;
1438 1.1 christos last[A_ATOM] = 0;
1439 1.1 christos }
1440 1.1 christos else
1441 1.1 christos last[atom] = 0;
1442 1.1 christos }
1443 1.1 christos atom = atomdef(s);
1444 1.1 christos if (atom >= 0) {
1445 1.1 christos if (last[atom]) {
1446 1.12 christos /*
1447 1.12 christos * XXX - optimizer loop detection.
1448 1.12 christos */
1449 1.12 christos opt_state->non_branch_movement_performed = 1;
1450 1.9 christos opt_state->done = 0;
1451 1.1 christos last[atom]->code = NOP;
1452 1.1 christos }
1453 1.1 christos last[atom] = s;
1454 1.1 christos }
1455 1.1 christos }
1456 1.1 christos
1457 1.1 christos static void
1458 1.9 christos opt_deadstores(opt_state_t *opt_state, register struct block *b)
1459 1.1 christos {
1460 1.1 christos register struct slist *s;
1461 1.1 christos register int atom;
1462 1.1 christos struct stmt *last[N_ATOMS];
1463 1.1 christos
1464 1.1 christos memset((char *)last, 0, sizeof last);
1465 1.1 christos
1466 1.1 christos for (s = b->stmts; s != 0; s = s->next)
1467 1.9 christos deadstmt(opt_state, &s->s, last);
1468 1.9 christos deadstmt(opt_state, &b->s, last);
1469 1.1 christos
1470 1.1 christos for (atom = 0; atom < N_ATOMS; ++atom)
1471 1.1 christos if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1472 1.1 christos last[atom]->code = NOP;
1473 1.12 christos /*
1474 1.13 christos * The store was removed as it's dead,
1475 1.13 christos * so the value stored into now has
1476 1.13 christos * an unknown value.
1477 1.13 christos */
1478 1.13 christos vstore(0, &b->val[atom], VAL_UNKNOWN, 0);
1479 1.13 christos /*
1480 1.12 christos * XXX - optimizer loop detection.
1481 1.12 christos */
1482 1.12 christos opt_state->non_branch_movement_performed = 1;
1483 1.9 christos opt_state->done = 0;
1484 1.1 christos }
1485 1.1 christos }
1486 1.1 christos
1487 1.1 christos static void
1488 1.11 christos opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
1489 1.1 christos {
1490 1.1 christos struct slist *s;
1491 1.1 christos struct edge *p;
1492 1.1 christos int i;
1493 1.12 christos bpf_u_int32 aval, xval;
1494 1.1 christos
1495 1.1 christos #if 0
1496 1.1 christos for (s = b->stmts; s && s->next; s = s->next)
1497 1.1 christos if (BPF_CLASS(s->s.code) == BPF_JMP) {
1498 1.1 christos do_stmts = 0;
1499 1.1 christos break;
1500 1.1 christos }
1501 1.1 christos #endif
1502 1.1 christos
1503 1.1 christos /*
1504 1.1 christos * Initialize the atom values.
1505 1.1 christos */
1506 1.1 christos p = b->in_edges;
1507 1.1 christos if (p == 0) {
1508 1.1 christos /*
1509 1.1 christos * We have no predecessors, so everything is undefined
1510 1.1 christos * upon entry to this block.
1511 1.1 christos */
1512 1.1 christos memset((char *)b->val, 0, sizeof(b->val));
1513 1.1 christos } else {
1514 1.1 christos /*
1515 1.1 christos * Inherit values from our predecessors.
1516 1.1 christos *
1517 1.1 christos * First, get the values from the predecessor along the
1518 1.1 christos * first edge leading to this node.
1519 1.1 christos */
1520 1.1 christos memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1521 1.1 christos /*
1522 1.1 christos * Now look at all the other nodes leading to this node.
1523 1.1 christos * If, for the predecessor along that edge, a register
1524 1.1 christos * has a different value from the one we have (i.e.,
1525 1.1 christos * control paths are merging, and the merging paths
1526 1.1 christos * assign different values to that register), give the
1527 1.1 christos * register the undefined value of 0.
1528 1.1 christos */
1529 1.1 christos while ((p = p->next) != NULL) {
1530 1.1 christos for (i = 0; i < N_ATOMS; ++i)
1531 1.1 christos if (b->val[i] != p->pred->val[i])
1532 1.1 christos b->val[i] = 0;
1533 1.1 christos }
1534 1.1 christos }
1535 1.1 christos aval = b->val[A_ATOM];
1536 1.1 christos xval = b->val[X_ATOM];
1537 1.1 christos for (s = b->stmts; s; s = s->next)
1538 1.11 christos opt_stmt(opt_state, &s->s, b->val, do_stmts);
1539 1.1 christos
1540 1.1 christos /*
1541 1.1 christos * This is a special case: if we don't use anything from this
1542 1.1 christos * block, and we load the accumulator or index register with a
1543 1.1 christos * value that is already there, or if this block is a return,
1544 1.1 christos * eliminate all the statements.
1545 1.1 christos *
1546 1.12 christos * XXX - what if it does a store? Presumably that falls under
1547 1.12 christos * the heading of "if we don't use anything from this block",
1548 1.12 christos * i.e., if we use any memory location set to a different
1549 1.12 christos * value by this block, then we use something from this block.
1550 1.1 christos *
1551 1.1 christos * XXX - why does it matter whether we use anything from this
1552 1.1 christos * block? If the accumulator or index register doesn't change
1553 1.1 christos * its value, isn't that OK even if we use that value?
1554 1.1 christos *
1555 1.1 christos * XXX - if we load the accumulator with a different value,
1556 1.1 christos * and the block ends with a conditional branch, we obviously
1557 1.1 christos * can't eliminate it, as the branch depends on that value.
1558 1.1 christos * For the index register, the conditional branch only depends
1559 1.1 christos * on the index register value if the test is against the index
1560 1.1 christos * register value rather than a constant; if nothing uses the
1561 1.1 christos * value we put into the index register, and we're not testing
1562 1.1 christos * against the index register's value, and there aren't any
1563 1.1 christos * other problems that would keep us from eliminating this
1564 1.1 christos * block, can we eliminate it?
1565 1.1 christos */
1566 1.1 christos if (do_stmts &&
1567 1.10 christos ((b->out_use == 0 &&
1568 1.10 christos aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1569 1.10 christos xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1570 1.1 christos BPF_CLASS(b->s.code) == BPF_RET)) {
1571 1.1 christos if (b->stmts != 0) {
1572 1.1 christos b->stmts = 0;
1573 1.12 christos /*
1574 1.12 christos * XXX - optimizer loop detection.
1575 1.12 christos */
1576 1.12 christos opt_state->non_branch_movement_performed = 1;
1577 1.9 christos opt_state->done = 0;
1578 1.1 christos }
1579 1.1 christos } else {
1580 1.9 christos opt_peep(opt_state, b);
1581 1.9 christos opt_deadstores(opt_state, b);
1582 1.1 christos }
1583 1.1 christos /*
1584 1.1 christos * Set up values for branch optimizer.
1585 1.1 christos */
1586 1.1 christos if (BPF_SRC(b->s.code) == BPF_K)
1587 1.1 christos b->oval = K(b->s.k);
1588 1.1 christos else
1589 1.1 christos b->oval = b->val[X_ATOM];
1590 1.1 christos b->et.code = b->s.code;
1591 1.1 christos b->ef.code = -b->s.code;
1592 1.1 christos }
1593 1.1 christos
1594 1.1 christos /*
1595 1.1 christos * Return true if any register that is used on exit from 'succ', has
1596 1.1 christos * an exit value that is different from the corresponding exit value
1597 1.1 christos * from 'b'.
1598 1.1 christos */
1599 1.1 christos static int
1600 1.6 christos use_conflict(struct block *b, struct block *succ)
1601 1.1 christos {
1602 1.1 christos int atom;
1603 1.1 christos atomset use = succ->out_use;
1604 1.1 christos
1605 1.1 christos if (use == 0)
1606 1.1 christos return 0;
1607 1.1 christos
1608 1.1 christos for (atom = 0; atom < N_ATOMS; ++atom)
1609 1.1 christos if (ATOMELEM(use, atom))
1610 1.1 christos if (b->val[atom] != succ->val[atom])
1611 1.1 christos return 1;
1612 1.1 christos return 0;
1613 1.1 christos }
1614 1.1 christos
1615 1.12 christos /*
1616 1.12 christos * Given a block that is the successor of an edge, and an edge that
1617 1.12 christos * dominates that edge, return either a pointer to a child of that
1618 1.12 christos * block (a block to which that block jumps) if that block is a
1619 1.12 christos * candidate to replace the successor of the latter edge or NULL
1620 1.12 christos * if neither of the children of the first block are candidates.
1621 1.12 christos */
1622 1.1 christos static struct block *
1623 1.6 christos fold_edge(struct block *child, struct edge *ep)
1624 1.1 christos {
1625 1.1 christos int sense;
1626 1.12 christos bpf_u_int32 aval0, aval1, oval0, oval1;
1627 1.1 christos int code = ep->code;
1628 1.1 christos
1629 1.1 christos if (code < 0) {
1630 1.12 christos /*
1631 1.12 christos * This edge is a "branch if false" edge.
1632 1.12 christos */
1633 1.1 christos code = -code;
1634 1.1 christos sense = 0;
1635 1.12 christos } else {
1636 1.12 christos /*
1637 1.12 christos * This edge is a "branch if true" edge.
1638 1.12 christos */
1639 1.1 christos sense = 1;
1640 1.12 christos }
1641 1.1 christos
1642 1.12 christos /*
1643 1.12 christos * If the opcode for the branch at the end of the block we
1644 1.12 christos * were handed isn't the same as the opcode for the branch
1645 1.12 christos * to which the edge we were handed corresponds, the tests
1646 1.12 christos * for those branches aren't testing the same conditions,
1647 1.12 christos * so the blocks to which the first block branches aren't
1648 1.12 christos * candidates to replace the successor of the edge.
1649 1.12 christos */
1650 1.1 christos if (child->s.code != code)
1651 1.1 christos return 0;
1652 1.1 christos
1653 1.1 christos aval0 = child->val[A_ATOM];
1654 1.1 christos oval0 = child->oval;
1655 1.1 christos aval1 = ep->pred->val[A_ATOM];
1656 1.1 christos oval1 = ep->pred->oval;
1657 1.1 christos
1658 1.12 christos /*
1659 1.12 christos * If the A register value on exit from the successor block
1660 1.12 christos * isn't the same as the A register value on exit from the
1661 1.12 christos * predecessor of the edge, the blocks to which the first
1662 1.12 christos * block branches aren't candidates to replace the successor
1663 1.12 christos * of the edge.
1664 1.12 christos */
1665 1.1 christos if (aval0 != aval1)
1666 1.1 christos return 0;
1667 1.1 christos
1668 1.1 christos if (oval0 == oval1)
1669 1.1 christos /*
1670 1.1 christos * The operands of the branch instructions are
1671 1.12 christos * identical, so the branches are testing the
1672 1.12 christos * same condition, and the result is true if a true
1673 1.1 christos * branch was taken to get here, otherwise false.
1674 1.1 christos */
1675 1.1 christos return sense ? JT(child) : JF(child);
1676 1.1 christos
1677 1.1 christos if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1678 1.1 christos /*
1679 1.1 christos * At this point, we only know the comparison if we
1680 1.1 christos * came down the true branch, and it was an equality
1681 1.1 christos * comparison with a constant.
1682 1.1 christos *
1683 1.1 christos * I.e., if we came down the true branch, and the branch
1684 1.1 christos * was an equality comparison with a constant, we know the
1685 1.1 christos * accumulator contains that constant. If we came down
1686 1.1 christos * the false branch, or the comparison wasn't with a
1687 1.1 christos * constant, we don't know what was in the accumulator.
1688 1.1 christos *
1689 1.1 christos * We rely on the fact that distinct constants have distinct
1690 1.1 christos * value numbers.
1691 1.1 christos */
1692 1.1 christos return JF(child);
1693 1.1 christos
1694 1.1 christos return 0;
1695 1.1 christos }
1696 1.1 christos
1697 1.12 christos /*
1698 1.12 christos * If we can make this edge go directly to a child of the edge's current
1699 1.12 christos * successor, do so.
1700 1.12 christos */
1701 1.1 christos static void
1702 1.9 christos opt_j(opt_state_t *opt_state, struct edge *ep)
1703 1.1 christos {
1704 1.12 christos register u_int i, k;
1705 1.1 christos register struct block *target;
1706 1.1 christos
1707 1.12 christos /*
1708 1.12 christos * Does this edge go to a block where, if the test
1709 1.12 christos * at the end of it succeeds, it goes to a block
1710 1.12 christos * that's a leaf node of the DAG, i.e. a return
1711 1.12 christos * statement?
1712 1.12 christos * If so, there's nothing to optimize.
1713 1.12 christos */
1714 1.1 christos if (JT(ep->succ) == 0)
1715 1.1 christos return;
1716 1.1 christos
1717 1.12 christos /*
1718 1.12 christos * Does this edge go to a block that goes, in turn, to
1719 1.12 christos * the same block regardless of whether the test at the
1720 1.12 christos * end succeeds or fails?
1721 1.12 christos */
1722 1.1 christos if (JT(ep->succ) == JF(ep->succ)) {
1723 1.1 christos /*
1724 1.1 christos * Common branch targets can be eliminated, provided
1725 1.1 christos * there is no data dependency.
1726 1.12 christos *
1727 1.12 christos * Check whether any register used on exit from the
1728 1.12 christos * block to which the successor of this edge goes
1729 1.12 christos * has a value at that point that's different from
1730 1.12 christos * the value it has on exit from the predecessor of
1731 1.12 christos * this edge. If not, the predecessor of this edge
1732 1.12 christos * can just go to the block to which the successor
1733 1.12 christos * of this edge goes, bypassing the successor of this
1734 1.12 christos * edge, as the successor of this edge isn't doing
1735 1.12 christos * any calculations whose results are different
1736 1.12 christos * from what the blocks before it did and isn't
1737 1.12 christos * doing any tests the results of which matter.
1738 1.1 christos */
1739 1.12 christos if (!use_conflict(ep->pred, JT(ep->succ))) {
1740 1.12 christos /*
1741 1.12 christos * No, there isn't.
1742 1.12 christos * Make this edge go to the block to
1743 1.12 christos * which the successor of that edge
1744 1.12 christos * goes.
1745 1.12 christos *
1746 1.12 christos * XXX - optimizer loop detection.
1747 1.12 christos */
1748 1.12 christos opt_state->non_branch_movement_performed = 1;
1749 1.9 christos opt_state->done = 0;
1750 1.1 christos ep->succ = JT(ep->succ);
1751 1.1 christos }
1752 1.1 christos }
1753 1.1 christos /*
1754 1.1 christos * For each edge dominator that matches the successor of this
1755 1.1 christos * edge, promote the edge successor to the its grandchild.
1756 1.1 christos *
1757 1.1 christos * XXX We violate the set abstraction here in favor a reasonably
1758 1.1 christos * efficient loop.
1759 1.1 christos */
1760 1.1 christos top:
1761 1.9 christos for (i = 0; i < opt_state->edgewords; ++i) {
1762 1.12 christos /* i'th word in the bitset of dominators */
1763 1.1 christos register bpf_u_int32 x = ep->edom[i];
1764 1.1 christos
1765 1.1 christos while (x != 0) {
1766 1.12 christos /* Find the next dominator in that word and mark it as found */
1767 1.10 christos k = lowest_set_bit(x);
1768 1.11 christos x &=~ ((bpf_u_int32)1 << k);
1769 1.1 christos k += i * BITS_PER_WORD;
1770 1.1 christos
1771 1.9 christos target = fold_edge(ep->succ, opt_state->edges[k]);
1772 1.1 christos /*
1773 1.12 christos * We have a candidate to replace the successor
1774 1.12 christos * of ep.
1775 1.12 christos *
1776 1.1 christos * Check that there is no data dependency between
1777 1.12 christos * nodes that will be violated if we move the edge;
1778 1.12 christos * i.e., if any register used on exit from the
1779 1.12 christos * candidate has a value at that point different
1780 1.12 christos * from the value it has when we exit the
1781 1.12 christos * predecessor of that edge, there's a data
1782 1.12 christos * dependency that will be violated.
1783 1.1 christos */
1784 1.1 christos if (target != 0 && !use_conflict(ep->pred, target)) {
1785 1.12 christos /*
1786 1.12 christos * It's safe to replace the successor of
1787 1.12 christos * ep; do so, and note that we've made
1788 1.12 christos * at least one change.
1789 1.12 christos *
1790 1.12 christos * XXX - this is one of the operations that
1791 1.12 christos * happens when the optimizer gets into
1792 1.12 christos * one of those infinite loops.
1793 1.12 christos */
1794 1.9 christos opt_state->done = 0;
1795 1.1 christos ep->succ = target;
1796 1.1 christos if (JT(target) != 0)
1797 1.1 christos /*
1798 1.1 christos * Start over unless we hit a leaf.
1799 1.1 christos */
1800 1.1 christos goto top;
1801 1.1 christos return;
1802 1.1 christos }
1803 1.1 christos }
1804 1.1 christos }
1805 1.1 christos }
1806 1.1 christos
1807 1.12 christos /*
1808 1.12 christos * XXX - is this, and and_pullup(), what's described in section 6.1.2
1809 1.12 christos * "Predicate Assertion Propagation" in the BPF+ paper?
1810 1.12 christos *
1811 1.12 christos * Note that this looks at block dominators, not edge dominators.
1812 1.12 christos * Don't think so.
1813 1.12 christos *
1814 1.12 christos * "A or B" compiles into
1815 1.12 christos *
1816 1.12 christos * A
1817 1.12 christos * t / \ f
1818 1.12 christos * / B
1819 1.12 christos * / t / \ f
1820 1.12 christos * \ /
1821 1.12 christos * \ /
1822 1.12 christos * X
1823 1.12 christos *
1824 1.12 christos *
1825 1.12 christos */
1826 1.1 christos static void
1827 1.13 christos or_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
1828 1.1 christos {
1829 1.12 christos bpf_u_int32 val;
1830 1.12 christos int at_top;
1831 1.1 christos struct block *pull;
1832 1.1 christos struct block **diffp, **samep;
1833 1.1 christos struct edge *ep;
1834 1.1 christos
1835 1.1 christos ep = b->in_edges;
1836 1.1 christos if (ep == 0)
1837 1.1 christos return;
1838 1.1 christos
1839 1.1 christos /*
1840 1.1 christos * Make sure each predecessor loads the same value.
1841 1.1 christos * XXX why?
1842 1.1 christos */
1843 1.1 christos val = ep->pred->val[A_ATOM];
1844 1.1 christos for (ep = ep->next; ep != 0; ep = ep->next)
1845 1.1 christos if (val != ep->pred->val[A_ATOM])
1846 1.1 christos return;
1847 1.1 christos
1848 1.12 christos /*
1849 1.12 christos * For the first edge in the list of edges coming into this block,
1850 1.12 christos * see whether the predecessor of that edge comes here via a true
1851 1.12 christos * branch or a false branch.
1852 1.12 christos */
1853 1.1 christos if (JT(b->in_edges->pred) == b)
1854 1.12 christos diffp = &JT(b->in_edges->pred); /* jt */
1855 1.1 christos else
1856 1.12 christos diffp = &JF(b->in_edges->pred); /* jf */
1857 1.1 christos
1858 1.12 christos /*
1859 1.12 christos * diffp is a pointer to a pointer to the block.
1860 1.12 christos *
1861 1.12 christos * Go down the false chain looking as far as you can,
1862 1.12 christos * making sure that each jump-compare is doing the
1863 1.12 christos * same as the original block.
1864 1.12 christos *
1865 1.12 christos * If you reach the bottom before you reach a
1866 1.12 christos * different jump-compare, just exit. There's nothing
1867 1.12 christos * to do here. XXX - no, this version is checking for
1868 1.12 christos * the value leaving the block; that's from the BPF+
1869 1.12 christos * pullup routine.
1870 1.12 christos */
1871 1.1 christos at_top = 1;
1872 1.10 christos for (;;) {
1873 1.12 christos /*
1874 1.12 christos * Done if that's not going anywhere XXX
1875 1.12 christos */
1876 1.1 christos if (*diffp == 0)
1877 1.1 christos return;
1878 1.1 christos
1879 1.12 christos /*
1880 1.12 christos * Done if that predecessor blah blah blah isn't
1881 1.12 christos * going the same place we're going XXX
1882 1.12 christos *
1883 1.12 christos * Does the true edge of this block point to the same
1884 1.12 christos * location as the true edge of b?
1885 1.12 christos */
1886 1.1 christos if (JT(*diffp) != JT(b))
1887 1.1 christos return;
1888 1.1 christos
1889 1.12 christos /*
1890 1.12 christos * Done if this node isn't a dominator of that
1891 1.12 christos * node blah blah blah XXX
1892 1.12 christos *
1893 1.12 christos * Does b dominate diffp?
1894 1.12 christos */
1895 1.1 christos if (!SET_MEMBER((*diffp)->dom, b->id))
1896 1.1 christos return;
1897 1.1 christos
1898 1.12 christos /*
1899 1.12 christos * Break out of the loop if that node's value of A
1900 1.12 christos * isn't the value of A above XXX
1901 1.12 christos */
1902 1.1 christos if ((*diffp)->val[A_ATOM] != val)
1903 1.1 christos break;
1904 1.1 christos
1905 1.12 christos /*
1906 1.12 christos * Get the JF for that node XXX
1907 1.12 christos * Go down the false path.
1908 1.12 christos */
1909 1.1 christos diffp = &JF(*diffp);
1910 1.1 christos at_top = 0;
1911 1.1 christos }
1912 1.12 christos
1913 1.12 christos /*
1914 1.12 christos * Now that we've found a different jump-compare in a chain
1915 1.12 christos * below b, search further down until we find another
1916 1.12 christos * jump-compare that looks at the original value. This
1917 1.12 christos * jump-compare should get pulled up. XXX again we're
1918 1.12 christos * comparing values not jump-compares.
1919 1.12 christos */
1920 1.1 christos samep = &JF(*diffp);
1921 1.10 christos for (;;) {
1922 1.12 christos /*
1923 1.12 christos * Done if that's not going anywhere XXX
1924 1.12 christos */
1925 1.1 christos if (*samep == 0)
1926 1.1 christos return;
1927 1.1 christos
1928 1.12 christos /*
1929 1.12 christos * Done if that predecessor blah blah blah isn't
1930 1.12 christos * going the same place we're going XXX
1931 1.12 christos */
1932 1.1 christos if (JT(*samep) != JT(b))
1933 1.1 christos return;
1934 1.1 christos
1935 1.12 christos /*
1936 1.12 christos * Done if this node isn't a dominator of that
1937 1.12 christos * node blah blah blah XXX
1938 1.12 christos *
1939 1.12 christos * Does b dominate samep?
1940 1.12 christos */
1941 1.1 christos if (!SET_MEMBER((*samep)->dom, b->id))
1942 1.1 christos return;
1943 1.1 christos
1944 1.12 christos /*
1945 1.12 christos * Break out of the loop if that node's value of A
1946 1.12 christos * is the value of A above XXX
1947 1.12 christos */
1948 1.1 christos if ((*samep)->val[A_ATOM] == val)
1949 1.1 christos break;
1950 1.1 christos
1951 1.1 christos /* XXX Need to check that there are no data dependencies
1952 1.1 christos between dp0 and dp1. Currently, the code generator
1953 1.1 christos will not produce such dependencies. */
1954 1.1 christos samep = &JF(*samep);
1955 1.1 christos }
1956 1.1 christos #ifdef notdef
1957 1.1 christos /* XXX This doesn't cover everything. */
1958 1.1 christos for (i = 0; i < N_ATOMS; ++i)
1959 1.1 christos if ((*samep)->val[i] != pred->val[i])
1960 1.1 christos return;
1961 1.1 christos #endif
1962 1.1 christos /* Pull up the node. */
1963 1.1 christos pull = *samep;
1964 1.1 christos *samep = JF(pull);
1965 1.1 christos JF(pull) = *diffp;
1966 1.1 christos
1967 1.1 christos /*
1968 1.1 christos * At the top of the chain, each predecessor needs to point at the
1969 1.1 christos * pulled up node. Inside the chain, there is only one predecessor
1970 1.1 christos * to worry about.
1971 1.1 christos */
1972 1.1 christos if (at_top) {
1973 1.1 christos for (ep = b->in_edges; ep != 0; ep = ep->next) {
1974 1.1 christos if (JT(ep->pred) == b)
1975 1.1 christos JT(ep->pred) = pull;
1976 1.1 christos else
1977 1.1 christos JF(ep->pred) = pull;
1978 1.1 christos }
1979 1.1 christos }
1980 1.1 christos else
1981 1.1 christos *diffp = pull;
1982 1.1 christos
1983 1.12 christos /*
1984 1.12 christos * XXX - this is one of the operations that happens when the
1985 1.12 christos * optimizer gets into one of those infinite loops.
1986 1.12 christos */
1987 1.9 christos opt_state->done = 0;
1988 1.13 christos
1989 1.13 christos /*
1990 1.13 christos * Recompute dominator sets as control flow graph has changed.
1991 1.13 christos */
1992 1.13 christos find_dom(opt_state, root);
1993 1.1 christos }
1994 1.1 christos
1995 1.1 christos static void
1996 1.13 christos and_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
1997 1.1 christos {
1998 1.12 christos bpf_u_int32 val;
1999 1.12 christos int at_top;
2000 1.1 christos struct block *pull;
2001 1.1 christos struct block **diffp, **samep;
2002 1.1 christos struct edge *ep;
2003 1.1 christos
2004 1.1 christos ep = b->in_edges;
2005 1.1 christos if (ep == 0)
2006 1.1 christos return;
2007 1.1 christos
2008 1.1 christos /*
2009 1.1 christos * Make sure each predecessor loads the same value.
2010 1.1 christos */
2011 1.1 christos val = ep->pred->val[A_ATOM];
2012 1.1 christos for (ep = ep->next; ep != 0; ep = ep->next)
2013 1.1 christos if (val != ep->pred->val[A_ATOM])
2014 1.1 christos return;
2015 1.1 christos
2016 1.1 christos if (JT(b->in_edges->pred) == b)
2017 1.1 christos diffp = &JT(b->in_edges->pred);
2018 1.1 christos else
2019 1.1 christos diffp = &JF(b->in_edges->pred);
2020 1.1 christos
2021 1.1 christos at_top = 1;
2022 1.10 christos for (;;) {
2023 1.1 christos if (*diffp == 0)
2024 1.1 christos return;
2025 1.1 christos
2026 1.1 christos if (JF(*diffp) != JF(b))
2027 1.1 christos return;
2028 1.1 christos
2029 1.1 christos if (!SET_MEMBER((*diffp)->dom, b->id))
2030 1.1 christos return;
2031 1.1 christos
2032 1.1 christos if ((*diffp)->val[A_ATOM] != val)
2033 1.1 christos break;
2034 1.1 christos
2035 1.1 christos diffp = &JT(*diffp);
2036 1.1 christos at_top = 0;
2037 1.1 christos }
2038 1.1 christos samep = &JT(*diffp);
2039 1.10 christos for (;;) {
2040 1.1 christos if (*samep == 0)
2041 1.1 christos return;
2042 1.1 christos
2043 1.1 christos if (JF(*samep) != JF(b))
2044 1.1 christos return;
2045 1.1 christos
2046 1.1 christos if (!SET_MEMBER((*samep)->dom, b->id))
2047 1.1 christos return;
2048 1.1 christos
2049 1.1 christos if ((*samep)->val[A_ATOM] == val)
2050 1.1 christos break;
2051 1.1 christos
2052 1.1 christos /* XXX Need to check that there are no data dependencies
2053 1.1 christos between diffp and samep. Currently, the code generator
2054 1.1 christos will not produce such dependencies. */
2055 1.1 christos samep = &JT(*samep);
2056 1.1 christos }
2057 1.1 christos #ifdef notdef
2058 1.1 christos /* XXX This doesn't cover everything. */
2059 1.1 christos for (i = 0; i < N_ATOMS; ++i)
2060 1.1 christos if ((*samep)->val[i] != pred->val[i])
2061 1.1 christos return;
2062 1.1 christos #endif
2063 1.1 christos /* Pull up the node. */
2064 1.1 christos pull = *samep;
2065 1.1 christos *samep = JT(pull);
2066 1.1 christos JT(pull) = *diffp;
2067 1.1 christos
2068 1.1 christos /*
2069 1.1 christos * At the top of the chain, each predecessor needs to point at the
2070 1.1 christos * pulled up node. Inside the chain, there is only one predecessor
2071 1.1 christos * to worry about.
2072 1.1 christos */
2073 1.1 christos if (at_top) {
2074 1.1 christos for (ep = b->in_edges; ep != 0; ep = ep->next) {
2075 1.1 christos if (JT(ep->pred) == b)
2076 1.1 christos JT(ep->pred) = pull;
2077 1.1 christos else
2078 1.1 christos JF(ep->pred) = pull;
2079 1.1 christos }
2080 1.1 christos }
2081 1.1 christos else
2082 1.1 christos *diffp = pull;
2083 1.1 christos
2084 1.12 christos /*
2085 1.12 christos * XXX - this is one of the operations that happens when the
2086 1.12 christos * optimizer gets into one of those infinite loops.
2087 1.12 christos */
2088 1.9 christos opt_state->done = 0;
2089 1.13 christos
2090 1.13 christos /*
2091 1.13 christos * Recompute dominator sets as control flow graph has changed.
2092 1.13 christos */
2093 1.13 christos find_dom(opt_state, root);
2094 1.1 christos }
2095 1.1 christos
2096 1.1 christos static void
2097 1.11 christos opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2098 1.1 christos {
2099 1.1 christos int i, maxlevel;
2100 1.1 christos struct block *p;
2101 1.1 christos
2102 1.9 christos init_val(opt_state);
2103 1.9 christos maxlevel = ic->root->level;
2104 1.1 christos
2105 1.9 christos find_inedges(opt_state, ic->root);
2106 1.1 christos for (i = maxlevel; i >= 0; --i)
2107 1.9 christos for (p = opt_state->levels[i]; p; p = p->link)
2108 1.11 christos opt_blk(opt_state, p, do_stmts);
2109 1.1 christos
2110 1.1 christos if (do_stmts)
2111 1.1 christos /*
2112 1.1 christos * No point trying to move branches; it can't possibly
2113 1.1 christos * make a difference at this point.
2114 1.12 christos *
2115 1.12 christos * XXX - this might be after we detect a loop where
2116 1.12 christos * we were just looping infinitely moving branches
2117 1.12 christos * in such a fashion that we went through two or more
2118 1.12 christos * versions of the machine code, eventually returning
2119 1.12 christos * to the first version. (We're really not doing a
2120 1.12 christos * full loop detection, we're just testing for two
2121 1.12 christos * passes in a row where we do nothing but
2122 1.12 christos * move branches.)
2123 1.1 christos */
2124 1.1 christos return;
2125 1.1 christos
2126 1.12 christos /*
2127 1.12 christos * Is this what the BPF+ paper describes in sections 6.1.1,
2128 1.12 christos * 6.1.2, and 6.1.3?
2129 1.12 christos */
2130 1.1 christos for (i = 1; i <= maxlevel; ++i) {
2131 1.9 christos for (p = opt_state->levels[i]; p; p = p->link) {
2132 1.9 christos opt_j(opt_state, &p->et);
2133 1.9 christos opt_j(opt_state, &p->ef);
2134 1.1 christos }
2135 1.1 christos }
2136 1.1 christos
2137 1.9 christos find_inedges(opt_state, ic->root);
2138 1.1 christos for (i = 1; i <= maxlevel; ++i) {
2139 1.9 christos for (p = opt_state->levels[i]; p; p = p->link) {
2140 1.13 christos or_pullup(opt_state, p, ic->root);
2141 1.13 christos and_pullup(opt_state, p, ic->root);
2142 1.1 christos }
2143 1.1 christos }
2144 1.1 christos }
2145 1.1 christos
2146 1.1 christos static inline void
2147 1.6 christos link_inedge(struct edge *parent, struct block *child)
2148 1.1 christos {
2149 1.1 christos parent->next = child->in_edges;
2150 1.1 christos child->in_edges = parent;
2151 1.1 christos }
2152 1.1 christos
2153 1.1 christos static void
2154 1.9 christos find_inedges(opt_state_t *opt_state, struct block *root)
2155 1.1 christos {
2156 1.12 christos u_int i;
2157 1.12 christos int level;
2158 1.1 christos struct block *b;
2159 1.1 christos
2160 1.9 christos for (i = 0; i < opt_state->n_blocks; ++i)
2161 1.9 christos opt_state->blocks[i]->in_edges = 0;
2162 1.1 christos
2163 1.1 christos /*
2164 1.1 christos * Traverse the graph, adding each edge to the predecessor
2165 1.1 christos * list of its successors. Skip the leaves (i.e. level 0).
2166 1.1 christos */
2167 1.12 christos for (level = root->level; level > 0; --level) {
2168 1.12 christos for (b = opt_state->levels[level]; b != 0; b = b->link) {
2169 1.1 christos link_inedge(&b->et, JT(b));
2170 1.1 christos link_inedge(&b->ef, JF(b));
2171 1.1 christos }
2172 1.1 christos }
2173 1.1 christos }
2174 1.1 christos
2175 1.1 christos static void
2176 1.6 christos opt_root(struct block **b)
2177 1.1 christos {
2178 1.1 christos struct slist *tmp, *s;
2179 1.1 christos
2180 1.1 christos s = (*b)->stmts;
2181 1.1 christos (*b)->stmts = 0;
2182 1.1 christos while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
2183 1.1 christos *b = JT(*b);
2184 1.1 christos
2185 1.1 christos tmp = (*b)->stmts;
2186 1.1 christos if (tmp != 0)
2187 1.1 christos sappend(s, tmp);
2188 1.1 christos (*b)->stmts = s;
2189 1.1 christos
2190 1.1 christos /*
2191 1.1 christos * If the root node is a return, then there is no
2192 1.1 christos * point executing any statements (since the bpf machine
2193 1.1 christos * has no side effects).
2194 1.1 christos */
2195 1.1 christos if (BPF_CLASS((*b)->s.code) == BPF_RET)
2196 1.1 christos (*b)->stmts = 0;
2197 1.1 christos }
2198 1.1 christos
2199 1.1 christos static void
2200 1.11 christos opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2201 1.1 christos {
2202 1.1 christos
2203 1.1 christos #ifdef BDEBUG
2204 1.10 christos if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2205 1.1 christos printf("opt_loop(root, %d) begin\n", do_stmts);
2206 1.11 christos opt_dump(opt_state, ic);
2207 1.1 christos }
2208 1.1 christos #endif
2209 1.12 christos
2210 1.12 christos /*
2211 1.12 christos * XXX - optimizer loop detection.
2212 1.12 christos */
2213 1.12 christos int loop_count = 0;
2214 1.12 christos for (;;) {
2215 1.9 christos opt_state->done = 1;
2216 1.12 christos /*
2217 1.12 christos * XXX - optimizer loop detection.
2218 1.12 christos */
2219 1.12 christos opt_state->non_branch_movement_performed = 0;
2220 1.9 christos find_levels(opt_state, ic);
2221 1.9 christos find_dom(opt_state, ic->root);
2222 1.9 christos find_closure(opt_state, ic->root);
2223 1.9 christos find_ud(opt_state, ic->root);
2224 1.9 christos find_edom(opt_state, ic->root);
2225 1.11 christos opt_blks(opt_state, ic, do_stmts);
2226 1.1 christos #ifdef BDEBUG
2227 1.10 christos if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2228 1.9 christos printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
2229 1.11 christos opt_dump(opt_state, ic);
2230 1.1 christos }
2231 1.1 christos #endif
2232 1.12 christos
2233 1.12 christos /*
2234 1.12 christos * Was anything done in this optimizer pass?
2235 1.12 christos */
2236 1.12 christos if (opt_state->done) {
2237 1.12 christos /*
2238 1.12 christos * No, so we've reached a fixed point.
2239 1.12 christos * We're done.
2240 1.12 christos */
2241 1.12 christos break;
2242 1.12 christos }
2243 1.12 christos
2244 1.12 christos /*
2245 1.12 christos * XXX - was anything done other than branch movement
2246 1.12 christos * in this pass?
2247 1.12 christos */
2248 1.12 christos if (opt_state->non_branch_movement_performed) {
2249 1.12 christos /*
2250 1.12 christos * Yes. Clear any loop-detection counter;
2251 1.12 christos * we're making some form of progress (assuming
2252 1.12 christos * we can't get into a cycle doing *other*
2253 1.12 christos * optimizations...).
2254 1.12 christos */
2255 1.12 christos loop_count = 0;
2256 1.12 christos } else {
2257 1.12 christos /*
2258 1.12 christos * No - increment the counter, and quit if
2259 1.12 christos * it's up to 100.
2260 1.12 christos */
2261 1.12 christos loop_count++;
2262 1.12 christos if (loop_count >= 100) {
2263 1.12 christos /*
2264 1.12 christos * We've done nothing but branch movement
2265 1.12 christos * for 100 passes; we're probably
2266 1.12 christos * in a cycle and will never reach a
2267 1.12 christos * fixed point.
2268 1.12 christos *
2269 1.12 christos * XXX - yes, we really need a non-
2270 1.12 christos * heuristic way of detecting a cycle.
2271 1.12 christos */
2272 1.12 christos opt_state->done = 1;
2273 1.12 christos break;
2274 1.12 christos }
2275 1.12 christos }
2276 1.12 christos }
2277 1.1 christos }
2278 1.1 christos
2279 1.1 christos /*
2280 1.1 christos * Optimize the filter code in its dag representation.
2281 1.11 christos * Return 0 on success, -1 on error.
2282 1.1 christos */
2283 1.11 christos int
2284 1.11 christos bpf_optimize(struct icode *ic, char *errbuf)
2285 1.1 christos {
2286 1.9 christos opt_state_t opt_state;
2287 1.1 christos
2288 1.11 christos memset(&opt_state, 0, sizeof(opt_state));
2289 1.11 christos opt_state.errbuf = errbuf;
2290 1.12 christos opt_state.non_branch_movement_performed = 0;
2291 1.11 christos if (setjmp(opt_state.top_ctx)) {
2292 1.11 christos opt_cleanup(&opt_state);
2293 1.11 christos return -1;
2294 1.11 christos }
2295 1.11 christos opt_init(&opt_state, ic);
2296 1.11 christos opt_loop(&opt_state, ic, 0);
2297 1.11 christos opt_loop(&opt_state, ic, 1);
2298 1.9 christos intern_blocks(&opt_state, ic);
2299 1.1 christos #ifdef BDEBUG
2300 1.10 christos if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2301 1.1 christos printf("after intern_blocks()\n");
2302 1.11 christos opt_dump(&opt_state, ic);
2303 1.1 christos }
2304 1.1 christos #endif
2305 1.9 christos opt_root(&ic->root);
2306 1.1 christos #ifdef BDEBUG
2307 1.10 christos if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2308 1.1 christos printf("after opt_root()\n");
2309 1.11 christos opt_dump(&opt_state, ic);
2310 1.1 christos }
2311 1.1 christos #endif
2312 1.9 christos opt_cleanup(&opt_state);
2313 1.11 christos return 0;
2314 1.1 christos }
2315 1.1 christos
2316 1.1 christos static void
2317 1.9 christos make_marks(struct icode *ic, struct block *p)
2318 1.1 christos {
2319 1.9 christos if (!isMarked(ic, p)) {
2320 1.9 christos Mark(ic, p);
2321 1.1 christos if (BPF_CLASS(p->s.code) != BPF_RET) {
2322 1.9 christos make_marks(ic, JT(p));
2323 1.9 christos make_marks(ic, JF(p));
2324 1.1 christos }
2325 1.1 christos }
2326 1.1 christos }
2327 1.1 christos
2328 1.1 christos /*
2329 1.9 christos * Mark code array such that isMarked(ic->cur_mark, i) is true
2330 1.1 christos * only for nodes that are alive.
2331 1.1 christos */
2332 1.1 christos static void
2333 1.9 christos mark_code(struct icode *ic)
2334 1.1 christos {
2335 1.9 christos ic->cur_mark += 1;
2336 1.9 christos make_marks(ic, ic->root);
2337 1.1 christos }
2338 1.1 christos
2339 1.1 christos /*
2340 1.1 christos * True iff the two stmt lists load the same value from the packet into
2341 1.1 christos * the accumulator.
2342 1.1 christos */
2343 1.1 christos static int
2344 1.6 christos eq_slist(struct slist *x, struct slist *y)
2345 1.1 christos {
2346 1.10 christos for (;;) {
2347 1.1 christos while (x && x->s.code == NOP)
2348 1.1 christos x = x->next;
2349 1.1 christos while (y && y->s.code == NOP)
2350 1.1 christos y = y->next;
2351 1.1 christos if (x == 0)
2352 1.1 christos return y == 0;
2353 1.1 christos if (y == 0)
2354 1.1 christos return x == 0;
2355 1.1 christos if (x->s.code != y->s.code || x->s.k != y->s.k)
2356 1.1 christos return 0;
2357 1.1 christos x = x->next;
2358 1.1 christos y = y->next;
2359 1.1 christos }
2360 1.1 christos }
2361 1.1 christos
2362 1.1 christos static inline int
2363 1.6 christos eq_blk(struct block *b0, struct block *b1)
2364 1.1 christos {
2365 1.1 christos if (b0->s.code == b1->s.code &&
2366 1.1 christos b0->s.k == b1->s.k &&
2367 1.1 christos b0->et.succ == b1->et.succ &&
2368 1.1 christos b0->ef.succ == b1->ef.succ)
2369 1.1 christos return eq_slist(b0->stmts, b1->stmts);
2370 1.1 christos return 0;
2371 1.1 christos }
2372 1.1 christos
2373 1.1 christos static void
2374 1.9 christos intern_blocks(opt_state_t *opt_state, struct icode *ic)
2375 1.1 christos {
2376 1.1 christos struct block *p;
2377 1.12 christos u_int i, j;
2378 1.1 christos int done1; /* don't shadow global */
2379 1.1 christos top:
2380 1.1 christos done1 = 1;
2381 1.9 christos for (i = 0; i < opt_state->n_blocks; ++i)
2382 1.9 christos opt_state->blocks[i]->link = 0;
2383 1.1 christos
2384 1.9 christos mark_code(ic);
2385 1.1 christos
2386 1.12 christos for (i = opt_state->n_blocks - 1; i != 0; ) {
2387 1.12 christos --i;
2388 1.9 christos if (!isMarked(ic, opt_state->blocks[i]))
2389 1.1 christos continue;
2390 1.9 christos for (j = i + 1; j < opt_state->n_blocks; ++j) {
2391 1.9 christos if (!isMarked(ic, opt_state->blocks[j]))
2392 1.1 christos continue;
2393 1.9 christos if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
2394 1.9 christos opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
2395 1.9 christos opt_state->blocks[j]->link : opt_state->blocks[j];
2396 1.1 christos break;
2397 1.1 christos }
2398 1.1 christos }
2399 1.1 christos }
2400 1.9 christos for (i = 0; i < opt_state->n_blocks; ++i) {
2401 1.9 christos p = opt_state->blocks[i];
2402 1.1 christos if (JT(p) == 0)
2403 1.1 christos continue;
2404 1.1 christos if (JT(p)->link) {
2405 1.1 christos done1 = 0;
2406 1.1 christos JT(p) = JT(p)->link;
2407 1.1 christos }
2408 1.1 christos if (JF(p)->link) {
2409 1.1 christos done1 = 0;
2410 1.1 christos JF(p) = JF(p)->link;
2411 1.1 christos }
2412 1.1 christos }
2413 1.1 christos if (!done1)
2414 1.1 christos goto top;
2415 1.1 christos }
2416 1.1 christos
2417 1.1 christos static void
2418 1.9 christos opt_cleanup(opt_state_t *opt_state)
2419 1.1 christos {
2420 1.9 christos free((void *)opt_state->vnode_base);
2421 1.9 christos free((void *)opt_state->vmap);
2422 1.9 christos free((void *)opt_state->edges);
2423 1.9 christos free((void *)opt_state->space);
2424 1.9 christos free((void *)opt_state->levels);
2425 1.9 christos free((void *)opt_state->blocks);
2426 1.1 christos }
2427 1.1 christos
2428 1.1 christos /*
2429 1.11 christos * For optimizer errors.
2430 1.11 christos */
2431 1.11 christos static void PCAP_NORETURN
2432 1.11 christos opt_error(opt_state_t *opt_state, const char *fmt, ...)
2433 1.11 christos {
2434 1.11 christos va_list ap;
2435 1.11 christos
2436 1.11 christos if (opt_state->errbuf != NULL) {
2437 1.11 christos va_start(ap, fmt);
2438 1.12 christos (void)vsnprintf(opt_state->errbuf,
2439 1.11 christos PCAP_ERRBUF_SIZE, fmt, ap);
2440 1.11 christos va_end(ap);
2441 1.11 christos }
2442 1.11 christos longjmp(opt_state->top_ctx, 1);
2443 1.11 christos /* NOTREACHED */
2444 1.12 christos #ifdef _AIX
2445 1.12 christos PCAP_UNREACHABLE
2446 1.12 christos #endif /* _AIX */
2447 1.11 christos }
2448 1.11 christos
2449 1.11 christos /*
2450 1.1 christos * Return the number of stmts in 's'.
2451 1.1 christos */
2452 1.5 christos static u_int
2453 1.6 christos slength(struct slist *s)
2454 1.1 christos {
2455 1.5 christos u_int n = 0;
2456 1.1 christos
2457 1.1 christos for (; s; s = s->next)
2458 1.1 christos if (s->s.code != NOP)
2459 1.1 christos ++n;
2460 1.1 christos return n;
2461 1.1 christos }
2462 1.1 christos
2463 1.1 christos /*
2464 1.1 christos * Return the number of nodes reachable by 'p'.
2465 1.1 christos * All nodes should be initially unmarked.
2466 1.1 christos */
2467 1.1 christos static int
2468 1.9 christos count_blocks(struct icode *ic, struct block *p)
2469 1.1 christos {
2470 1.9 christos if (p == 0 || isMarked(ic, p))
2471 1.1 christos return 0;
2472 1.9 christos Mark(ic, p);
2473 1.9 christos return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2474 1.1 christos }
2475 1.1 christos
2476 1.1 christos /*
2477 1.1 christos * Do a depth first search on the flow graph, numbering the
2478 1.1 christos * the basic blocks, and entering them into the 'blocks' array.`
2479 1.1 christos */
2480 1.1 christos static void
2481 1.9 christos number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2482 1.1 christos {
2483 1.12 christos u_int n;
2484 1.1 christos
2485 1.9 christos if (p == 0 || isMarked(ic, p))
2486 1.1 christos return;
2487 1.1 christos
2488 1.9 christos Mark(ic, p);
2489 1.9 christos n = opt_state->n_blocks++;
2490 1.12 christos if (opt_state->n_blocks == 0) {
2491 1.12 christos /*
2492 1.12 christos * Overflow.
2493 1.12 christos */
2494 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2495 1.12 christos }
2496 1.1 christos p->id = n;
2497 1.9 christos opt_state->blocks[n] = p;
2498 1.1 christos
2499 1.9 christos number_blks_r(opt_state, ic, JT(p));
2500 1.9 christos number_blks_r(opt_state, ic, JF(p));
2501 1.1 christos }
2502 1.1 christos
2503 1.1 christos /*
2504 1.1 christos * Return the number of stmts in the flowgraph reachable by 'p'.
2505 1.1 christos * The nodes should be unmarked before calling.
2506 1.1 christos *
2507 1.1 christos * Note that "stmts" means "instructions", and that this includes
2508 1.1 christos *
2509 1.1 christos * side-effect statements in 'p' (slength(p->stmts));
2510 1.1 christos *
2511 1.1 christos * statements in the true branch from 'p' (count_stmts(JT(p)));
2512 1.1 christos *
2513 1.1 christos * statements in the false branch from 'p' (count_stmts(JF(p)));
2514 1.1 christos *
2515 1.1 christos * the conditional jump itself (1);
2516 1.1 christos *
2517 1.1 christos * an extra long jump if the true branch requires it (p->longjt);
2518 1.1 christos *
2519 1.1 christos * an extra long jump if the false branch requires it (p->longjf).
2520 1.1 christos */
2521 1.5 christos static u_int
2522 1.9 christos count_stmts(struct icode *ic, struct block *p)
2523 1.1 christos {
2524 1.5 christos u_int n;
2525 1.1 christos
2526 1.9 christos if (p == 0 || isMarked(ic, p))
2527 1.1 christos return 0;
2528 1.9 christos Mark(ic, p);
2529 1.9 christos n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2530 1.1 christos return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2531 1.1 christos }
2532 1.1 christos
2533 1.1 christos /*
2534 1.1 christos * Allocate memory. All allocation is done before optimization
2535 1.1 christos * is begun. A linear bound on the size of all data structures is computed
2536 1.1 christos * from the total number of blocks and/or statements.
2537 1.1 christos */
2538 1.1 christos static void
2539 1.11 christos opt_init(opt_state_t *opt_state, struct icode *ic)
2540 1.1 christos {
2541 1.1 christos bpf_u_int32 *p;
2542 1.1 christos int i, n, max_stmts;
2543 1.12 christos u_int product;
2544 1.12 christos size_t block_memsize, edge_memsize;
2545 1.1 christos
2546 1.1 christos /*
2547 1.1 christos * First, count the blocks, so we can malloc an array to map
2548 1.1 christos * block number to block. Then, put the blocks into the array.
2549 1.1 christos */
2550 1.9 christos unMarkAll(ic);
2551 1.9 christos n = count_blocks(ic, ic->root);
2552 1.9 christos opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2553 1.9 christos if (opt_state->blocks == NULL)
2554 1.11 christos opt_error(opt_state, "malloc");
2555 1.9 christos unMarkAll(ic);
2556 1.9 christos opt_state->n_blocks = 0;
2557 1.9 christos number_blks_r(opt_state, ic, ic->root);
2558 1.9 christos
2559 1.12 christos /*
2560 1.12 christos * This "should not happen".
2561 1.12 christos */
2562 1.12 christos if (opt_state->n_blocks == 0)
2563 1.12 christos opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue");
2564 1.12 christos
2565 1.9 christos opt_state->n_edges = 2 * opt_state->n_blocks;
2566 1.12 christos if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
2567 1.12 christos /*
2568 1.12 christos * Overflow.
2569 1.12 christos */
2570 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2571 1.12 christos }
2572 1.9 christos opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2573 1.11 christos if (opt_state->edges == NULL) {
2574 1.11 christos opt_error(opt_state, "malloc");
2575 1.11 christos }
2576 1.1 christos
2577 1.1 christos /*
2578 1.1 christos * The number of levels is bounded by the number of nodes.
2579 1.1 christos */
2580 1.9 christos opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2581 1.11 christos if (opt_state->levels == NULL) {
2582 1.11 christos opt_error(opt_state, "malloc");
2583 1.11 christos }
2584 1.1 christos
2585 1.12 christos opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
2586 1.12 christos opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
2587 1.12 christos
2588 1.12 christos /*
2589 1.12 christos * Make sure opt_state->n_blocks * opt_state->nodewords fits
2590 1.12 christos * in a u_int; we use it as a u_int number-of-iterations
2591 1.12 christos * value.
2592 1.12 christos */
2593 1.12 christos product = opt_state->n_blocks * opt_state->nodewords;
2594 1.12 christos if ((product / opt_state->n_blocks) != opt_state->nodewords) {
2595 1.12 christos /*
2596 1.12 christos * XXX - just punt and don't try to optimize?
2597 1.12 christos * In practice, this is unlikely to happen with
2598 1.12 christos * a normal filter.
2599 1.12 christos */
2600 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2601 1.12 christos }
2602 1.12 christos
2603 1.12 christos /*
2604 1.12 christos * Make sure the total memory required for that doesn't
2605 1.12 christos * overflow.
2606 1.12 christos */
2607 1.12 christos block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
2608 1.12 christos if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
2609 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2610 1.12 christos }
2611 1.12 christos
2612 1.12 christos /*
2613 1.12 christos * Make sure opt_state->n_edges * opt_state->edgewords fits
2614 1.12 christos * in a u_int; we use it as a u_int number-of-iterations
2615 1.12 christos * value.
2616 1.12 christos */
2617 1.12 christos product = opt_state->n_edges * opt_state->edgewords;
2618 1.12 christos if ((product / opt_state->n_edges) != opt_state->edgewords) {
2619 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2620 1.12 christos }
2621 1.12 christos
2622 1.12 christos /*
2623 1.12 christos * Make sure the total memory required for that doesn't
2624 1.12 christos * overflow.
2625 1.12 christos */
2626 1.12 christos edge_memsize = (size_t)product * sizeof(*opt_state->space);
2627 1.12 christos if (edge_memsize / product != sizeof(*opt_state->space)) {
2628 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2629 1.12 christos }
2630 1.12 christos
2631 1.12 christos /*
2632 1.12 christos * Make sure the total memory required for both of them doesn't
2633 1.12 christos * overflow.
2634 1.12 christos */
2635 1.12 christos if (block_memsize > SIZE_MAX - edge_memsize) {
2636 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2637 1.12 christos }
2638 1.1 christos
2639 1.1 christos /* XXX */
2640 1.12 christos opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
2641 1.11 christos if (opt_state->space == NULL) {
2642 1.11 christos opt_error(opt_state, "malloc");
2643 1.11 christos }
2644 1.9 christos p = opt_state->space;
2645 1.9 christos opt_state->all_dom_sets = p;
2646 1.1 christos for (i = 0; i < n; ++i) {
2647 1.9 christos opt_state->blocks[i]->dom = p;
2648 1.9 christos p += opt_state->nodewords;
2649 1.1 christos }
2650 1.9 christos opt_state->all_closure_sets = p;
2651 1.1 christos for (i = 0; i < n; ++i) {
2652 1.9 christos opt_state->blocks[i]->closure = p;
2653 1.9 christos p += opt_state->nodewords;
2654 1.1 christos }
2655 1.9 christos opt_state->all_edge_sets = p;
2656 1.1 christos for (i = 0; i < n; ++i) {
2657 1.9 christos register struct block *b = opt_state->blocks[i];
2658 1.1 christos
2659 1.1 christos b->et.edom = p;
2660 1.9 christos p += opt_state->edgewords;
2661 1.1 christos b->ef.edom = p;
2662 1.9 christos p += opt_state->edgewords;
2663 1.1 christos b->et.id = i;
2664 1.9 christos opt_state->edges[i] = &b->et;
2665 1.9 christos b->ef.id = opt_state->n_blocks + i;
2666 1.9 christos opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2667 1.1 christos b->et.pred = b;
2668 1.1 christos b->ef.pred = b;
2669 1.1 christos }
2670 1.1 christos max_stmts = 0;
2671 1.1 christos for (i = 0; i < n; ++i)
2672 1.9 christos max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2673 1.1 christos /*
2674 1.1 christos * We allocate at most 3 value numbers per statement,
2675 1.1 christos * so this is an upper bound on the number of valnodes
2676 1.1 christos * we'll need.
2677 1.1 christos */
2678 1.9 christos opt_state->maxval = 3 * max_stmts;
2679 1.9 christos opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2680 1.11 christos if (opt_state->vmap == NULL) {
2681 1.11 christos opt_error(opt_state, "malloc");
2682 1.11 christos }
2683 1.9 christos opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2684 1.11 christos if (opt_state->vnode_base == NULL) {
2685 1.11 christos opt_error(opt_state, "malloc");
2686 1.11 christos }
2687 1.1 christos }
2688 1.1 christos
2689 1.1 christos /*
2690 1.9 christos * This is only used when supporting optimizer debugging. It is
2691 1.9 christos * global state, so do *not* do more than one compile in parallel
2692 1.9 christos * and expect it to provide meaningful information.
2693 1.1 christos */
2694 1.1 christos #ifdef BDEBUG
2695 1.10 christos int bids[NBIDS];
2696 1.1 christos #endif
2697 1.1 christos
2698 1.11 christos static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
2699 1.11 christos PCAP_PRINTFLIKE(2, 3);
2700 1.11 christos
2701 1.1 christos /*
2702 1.1 christos * Returns true if successful. Returns false if a branch has
2703 1.1 christos * an offset that is too large. If so, we have marked that
2704 1.1 christos * branch so that on a subsequent iteration, it will be treated
2705 1.1 christos * properly.
2706 1.1 christos */
2707 1.1 christos static int
2708 1.11 christos convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
2709 1.1 christos {
2710 1.1 christos struct bpf_insn *dst;
2711 1.1 christos struct slist *src;
2712 1.2 christos u_int slen;
2713 1.1 christos u_int off;
2714 1.1 christos struct slist **offset = NULL;
2715 1.1 christos
2716 1.9 christos if (p == 0 || isMarked(ic, p))
2717 1.1 christos return (1);
2718 1.9 christos Mark(ic, p);
2719 1.1 christos
2720 1.11 christos if (convert_code_r(conv_state, ic, JF(p)) == 0)
2721 1.1 christos return (0);
2722 1.11 christos if (convert_code_r(conv_state, ic, JT(p)) == 0)
2723 1.1 christos return (0);
2724 1.1 christos
2725 1.1 christos slen = slength(p->stmts);
2726 1.9 christos dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2727 1.1 christos /* inflate length by any extra jumps */
2728 1.1 christos
2729 1.9 christos p->offset = (int)(dst - conv_state->fstart);
2730 1.1 christos
2731 1.1 christos /* generate offset[] for convenience */
2732 1.1 christos if (slen) {
2733 1.1 christos offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2734 1.1 christos if (!offset) {
2735 1.11 christos conv_error(conv_state, "not enough core");
2736 1.1 christos /*NOTREACHED*/
2737 1.1 christos }
2738 1.1 christos }
2739 1.1 christos src = p->stmts;
2740 1.1 christos for (off = 0; off < slen && src; off++) {
2741 1.1 christos #if 0
2742 1.1 christos printf("off=%d src=%x\n", off, src);
2743 1.1 christos #endif
2744 1.1 christos offset[off] = src;
2745 1.1 christos src = src->next;
2746 1.1 christos }
2747 1.1 christos
2748 1.1 christos off = 0;
2749 1.1 christos for (src = p->stmts; src; src = src->next) {
2750 1.1 christos if (src->s.code == NOP)
2751 1.1 christos continue;
2752 1.1 christos dst->code = (u_short)src->s.code;
2753 1.1 christos dst->k = src->s.k;
2754 1.1 christos
2755 1.1 christos /* fill block-local relative jump */
2756 1.1 christos if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2757 1.1 christos #if 0
2758 1.1 christos if (src->s.jt || src->s.jf) {
2759 1.11 christos free(offset);
2760 1.11 christos conv_error(conv_state, "illegal jmp destination");
2761 1.1 christos /*NOTREACHED*/
2762 1.1 christos }
2763 1.1 christos #endif
2764 1.1 christos goto filled;
2765 1.1 christos }
2766 1.1 christos if (off == slen - 2) /*???*/
2767 1.1 christos goto filled;
2768 1.1 christos
2769 1.1 christos {
2770 1.2 christos u_int i;
2771 1.1 christos int jt, jf;
2772 1.10 christos const char ljerr[] = "%s for block-local relative jump: off=%d";
2773 1.1 christos
2774 1.1 christos #if 0
2775 1.1 christos printf("code=%x off=%d %x %x\n", src->s.code,
2776 1.1 christos off, src->s.jt, src->s.jf);
2777 1.1 christos #endif
2778 1.1 christos
2779 1.1 christos if (!src->s.jt || !src->s.jf) {
2780 1.11 christos free(offset);
2781 1.11 christos conv_error(conv_state, ljerr, "no jmp destination", off);
2782 1.1 christos /*NOTREACHED*/
2783 1.1 christos }
2784 1.1 christos
2785 1.1 christos jt = jf = 0;
2786 1.1 christos for (i = 0; i < slen; i++) {
2787 1.1 christos if (offset[i] == src->s.jt) {
2788 1.1 christos if (jt) {
2789 1.11 christos free(offset);
2790 1.11 christos conv_error(conv_state, ljerr, "multiple matches", off);
2791 1.1 christos /*NOTREACHED*/
2792 1.1 christos }
2793 1.1 christos
2794 1.10 christos if (i - off - 1 >= 256) {
2795 1.11 christos free(offset);
2796 1.11 christos conv_error(conv_state, ljerr, "out-of-range jump", off);
2797 1.10 christos /*NOTREACHED*/
2798 1.10 christos }
2799 1.10 christos dst->jt = (u_char)(i - off - 1);
2800 1.1 christos jt++;
2801 1.1 christos }
2802 1.1 christos if (offset[i] == src->s.jf) {
2803 1.1 christos if (jf) {
2804 1.11 christos free(offset);
2805 1.11 christos conv_error(conv_state, ljerr, "multiple matches", off);
2806 1.1 christos /*NOTREACHED*/
2807 1.1 christos }
2808 1.10 christos if (i - off - 1 >= 256) {
2809 1.11 christos free(offset);
2810 1.11 christos conv_error(conv_state, ljerr, "out-of-range jump", off);
2811 1.10 christos /*NOTREACHED*/
2812 1.10 christos }
2813 1.10 christos dst->jf = (u_char)(i - off - 1);
2814 1.1 christos jf++;
2815 1.1 christos }
2816 1.1 christos }
2817 1.1 christos if (!jt || !jf) {
2818 1.11 christos free(offset);
2819 1.11 christos conv_error(conv_state, ljerr, "no destination found", off);
2820 1.1 christos /*NOTREACHED*/
2821 1.1 christos }
2822 1.1 christos }
2823 1.1 christos filled:
2824 1.1 christos ++dst;
2825 1.1 christos ++off;
2826 1.1 christos }
2827 1.1 christos if (offset)
2828 1.1 christos free(offset);
2829 1.1 christos
2830 1.1 christos #ifdef BDEBUG
2831 1.10 christos if (dst - conv_state->fstart < NBIDS)
2832 1.10 christos bids[dst - conv_state->fstart] = p->id + 1;
2833 1.1 christos #endif
2834 1.1 christos dst->code = (u_short)p->s.code;
2835 1.1 christos dst->k = p->s.k;
2836 1.1 christos if (JT(p)) {
2837 1.12 christos /* number of extra jumps inserted */
2838 1.12 christos u_char extrajmps = 0;
2839 1.1 christos off = JT(p)->offset - (p->offset + slen) - 1;
2840 1.1 christos if (off >= 256) {
2841 1.1 christos /* offset too large for branch, must add a jump */
2842 1.1 christos if (p->longjt == 0) {
2843 1.12 christos /* mark this instruction and retry */
2844 1.1 christos p->longjt++;
2845 1.1 christos return(0);
2846 1.1 christos }
2847 1.12 christos dst->jt = extrajmps;
2848 1.1 christos extrajmps++;
2849 1.1 christos dst[extrajmps].code = BPF_JMP|BPF_JA;
2850 1.1 christos dst[extrajmps].k = off - extrajmps;
2851 1.1 christos }
2852 1.1 christos else
2853 1.10 christos dst->jt = (u_char)off;
2854 1.1 christos off = JF(p)->offset - (p->offset + slen) - 1;
2855 1.1 christos if (off >= 256) {
2856 1.1 christos /* offset too large for branch, must add a jump */
2857 1.1 christos if (p->longjf == 0) {
2858 1.12 christos /* mark this instruction and retry */
2859 1.1 christos p->longjf++;
2860 1.1 christos return(0);
2861 1.1 christos }
2862 1.1 christos /* branch if F to following jump */
2863 1.1 christos /* if two jumps are inserted, F goes to second one */
2864 1.12 christos dst->jf = extrajmps;
2865 1.1 christos extrajmps++;
2866 1.1 christos dst[extrajmps].code = BPF_JMP|BPF_JA;
2867 1.1 christos dst[extrajmps].k = off - extrajmps;
2868 1.1 christos }
2869 1.1 christos else
2870 1.10 christos dst->jf = (u_char)off;
2871 1.1 christos }
2872 1.1 christos return (1);
2873 1.1 christos }
2874 1.1 christos
2875 1.1 christos
2876 1.1 christos /*
2877 1.1 christos * Convert flowgraph intermediate representation to the
2878 1.1 christos * BPF array representation. Set *lenp to the number of instructions.
2879 1.1 christos *
2880 1.1 christos * This routine does *NOT* leak the memory pointed to by fp. It *must
2881 1.1 christos * not* do free(fp) before returning fp; doing so would make no sense,
2882 1.1 christos * as the BPF array pointed to by the return value of icode_to_fcode()
2883 1.1 christos * must be valid - it's being returned for use in a bpf_program structure.
2884 1.1 christos *
2885 1.1 christos * If it appears that icode_to_fcode() is leaking, the problem is that
2886 1.1 christos * the program using pcap_compile() is failing to free the memory in
2887 1.1 christos * the BPF program when it's done - the leak is in the program, not in
2888 1.1 christos * the routine that happens to be allocating the memory. (By analogy, if
2889 1.1 christos * a program calls fopen() without ever calling fclose() on the FILE *,
2890 1.1 christos * it will leak the FILE structure; the leak is not in fopen(), it's in
2891 1.1 christos * the program.) Change the program to use pcap_freecode() when it's
2892 1.1 christos * done with the filter program. See the pcap man page.
2893 1.1 christos */
2894 1.1 christos struct bpf_insn *
2895 1.12 christos icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
2896 1.11 christos char *errbuf)
2897 1.1 christos {
2898 1.5 christos u_int n;
2899 1.1 christos struct bpf_insn *fp;
2900 1.9 christos conv_state_t conv_state;
2901 1.1 christos
2902 1.11 christos conv_state.fstart = NULL;
2903 1.11 christos conv_state.errbuf = errbuf;
2904 1.11 christos if (setjmp(conv_state.top_ctx) != 0) {
2905 1.11 christos free(conv_state.fstart);
2906 1.11 christos return NULL;
2907 1.11 christos }
2908 1.11 christos
2909 1.1 christos /*
2910 1.1 christos * Loop doing convert_code_r() until no branches remain
2911 1.1 christos * with too-large offsets.
2912 1.1 christos */
2913 1.10 christos for (;;) {
2914 1.9 christos unMarkAll(ic);
2915 1.9 christos n = *lenp = count_stmts(ic, root);
2916 1.1 christos
2917 1.1 christos fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2918 1.11 christos if (fp == NULL) {
2919 1.12 christos (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
2920 1.11 christos "malloc");
2921 1.11 christos return NULL;
2922 1.11 christos }
2923 1.1 christos memset((char *)fp, 0, sizeof(*fp) * n);
2924 1.9 christos conv_state.fstart = fp;
2925 1.9 christos conv_state.ftail = fp + n;
2926 1.1 christos
2927 1.9 christos unMarkAll(ic);
2928 1.11 christos if (convert_code_r(&conv_state, ic, root))
2929 1.1 christos break;
2930 1.1 christos free(fp);
2931 1.1 christos }
2932 1.1 christos
2933 1.1 christos return fp;
2934 1.1 christos }
2935 1.1 christos
2936 1.1 christos /*
2937 1.11 christos * For iconv_to_fconv() errors.
2938 1.11 christos */
2939 1.11 christos static void PCAP_NORETURN
2940 1.11 christos conv_error(conv_state_t *conv_state, const char *fmt, ...)
2941 1.11 christos {
2942 1.11 christos va_list ap;
2943 1.11 christos
2944 1.11 christos va_start(ap, fmt);
2945 1.12 christos (void)vsnprintf(conv_state->errbuf,
2946 1.11 christos PCAP_ERRBUF_SIZE, fmt, ap);
2947 1.11 christos va_end(ap);
2948 1.11 christos longjmp(conv_state->top_ctx, 1);
2949 1.11 christos /* NOTREACHED */
2950 1.12 christos #ifdef _AIX
2951 1.12 christos PCAP_UNREACHABLE
2952 1.12 christos #endif /* _AIX */
2953 1.11 christos }
2954 1.11 christos
2955 1.11 christos /*
2956 1.1 christos * Make a copy of a BPF program and put it in the "fcode" member of
2957 1.1 christos * a "pcap_t".
2958 1.1 christos *
2959 1.1 christos * If we fail to allocate memory for the copy, fill in the "errbuf"
2960 1.1 christos * member of the "pcap_t" with an error message, and return -1;
2961 1.1 christos * otherwise, return 0.
2962 1.1 christos */
2963 1.1 christos int
2964 1.13 christos pcapint_install_bpf_program(pcap_t *p, struct bpf_program *fp)
2965 1.1 christos {
2966 1.1 christos size_t prog_size;
2967 1.1 christos
2968 1.1 christos /*
2969 1.1 christos * Validate the program.
2970 1.1 christos */
2971 1.13 christos if (!pcapint_validate_filter(fp->bf_insns, fp->bf_len)) {
2972 1.12 christos snprintf(p->errbuf, sizeof(p->errbuf),
2973 1.1 christos "BPF program is not valid");
2974 1.1 christos return (-1);
2975 1.1 christos }
2976 1.1 christos
2977 1.1 christos /*
2978 1.1 christos * Free up any already installed program.
2979 1.1 christos */
2980 1.1 christos pcap_freecode(&p->fcode);
2981 1.1 christos
2982 1.1 christos prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2983 1.1 christos p->fcode.bf_len = fp->bf_len;
2984 1.1 christos p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2985 1.1 christos if (p->fcode.bf_insns == NULL) {
2986 1.13 christos pcapint_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2987 1.10 christos errno, "malloc");
2988 1.1 christos return (-1);
2989 1.1 christos }
2990 1.1 christos memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2991 1.1 christos return (0);
2992 1.1 christos }
2993 1.1 christos
2994 1.1 christos #ifdef BDEBUG
2995 1.1 christos static void
2996 1.9 christos dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2997 1.9 christos FILE *out)
2998 1.8 christos {
2999 1.8 christos int icount, noffset;
3000 1.8 christos int i;
3001 1.8 christos
3002 1.9 christos if (block == NULL || isMarked(ic, block))
3003 1.8 christos return;
3004 1.9 christos Mark(ic, block);
3005 1.8 christos
3006 1.8 christos icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
3007 1.8 christos noffset = min(block->offset + icount, (int)prog->bf_len);
3008 1.8 christos
3009 1.12 christos fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
3010 1.8 christos for (i = block->offset; i < noffset; i++) {
3011 1.8 christos fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
3012 1.8 christos }
3013 1.8 christos fprintf(out, "\" tooltip=\"");
3014 1.8 christos for (i = 0; i < BPF_MEMWORDS; i++)
3015 1.10 christos if (block->val[i] != VAL_UNKNOWN)
3016 1.8 christos fprintf(out, "val[%d]=%d ", i, block->val[i]);
3017 1.8 christos fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
3018 1.8 christos fprintf(out, "val[X]=%d", block->val[X_ATOM]);
3019 1.8 christos fprintf(out, "\"");
3020 1.8 christos if (JT(block) == NULL)
3021 1.8 christos fprintf(out, ", peripheries=2");
3022 1.8 christos fprintf(out, "];\n");
3023 1.8 christos
3024 1.9 christos dot_dump_node(ic, JT(block), prog, out);
3025 1.9 christos dot_dump_node(ic, JF(block), prog, out);
3026 1.8 christos }
3027 1.9 christos
3028 1.8 christos static void
3029 1.9 christos dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
3030 1.8 christos {
3031 1.9 christos if (block == NULL || isMarked(ic, block))
3032 1.8 christos return;
3033 1.9 christos Mark(ic, block);
3034 1.8 christos
3035 1.8 christos if (JT(block)) {
3036 1.12 christos fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
3037 1.8 christos block->id, JT(block)->id);
3038 1.12 christos fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
3039 1.8 christos block->id, JF(block)->id);
3040 1.8 christos }
3041 1.9 christos dot_dump_edge(ic, JT(block), out);
3042 1.9 christos dot_dump_edge(ic, JF(block), out);
3043 1.8 christos }
3044 1.9 christos
3045 1.8 christos /* Output the block CFG using graphviz/DOT language
3046 1.8 christos * In the CFG, block's code, value index for each registers at EXIT,
3047 1.8 christos * and the jump relationship is show.
3048 1.8 christos *
3049 1.8 christos * example DOT for BPF `ip src host 1.1.1.1' is:
3050 1.8 christos digraph BPF {
3051 1.12 christos block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
3052 1.12 christos block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
3053 1.12 christos block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
3054 1.12 christos block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
3055 1.12 christos "block0":se -> "block1":n [label="T"];
3056 1.12 christos "block0":sw -> "block3":n [label="F"];
3057 1.12 christos "block1":se -> "block2":n [label="T"];
3058 1.12 christos "block1":sw -> "block3":n [label="F"];
3059 1.8 christos }
3060 1.8 christos *
3061 1.12 christos * After install graphviz on https://www.graphviz.org/, save it as bpf.dot
3062 1.8 christos * and run `dot -Tpng -O bpf.dot' to draw the graph.
3063 1.8 christos */
3064 1.11 christos static int
3065 1.11 christos dot_dump(struct icode *ic, char *errbuf)
3066 1.8 christos {
3067 1.8 christos struct bpf_program f;
3068 1.8 christos FILE *out = stdout;
3069 1.8 christos
3070 1.8 christos memset(bids, 0, sizeof bids);
3071 1.11 christos f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
3072 1.11 christos if (f.bf_insns == NULL)
3073 1.11 christos return -1;
3074 1.8 christos
3075 1.8 christos fprintf(out, "digraph BPF {\n");
3076 1.9 christos unMarkAll(ic);
3077 1.9 christos dot_dump_node(ic, ic->root, &f, out);
3078 1.9 christos unMarkAll(ic);
3079 1.9 christos dot_dump_edge(ic, ic->root, out);
3080 1.8 christos fprintf(out, "}\n");
3081 1.8 christos
3082 1.8 christos free((char *)f.bf_insns);
3083 1.11 christos return 0;
3084 1.8 christos }
3085 1.8 christos
3086 1.11 christos static int
3087 1.11 christos plain_dump(struct icode *ic, char *errbuf)
3088 1.1 christos {
3089 1.1 christos struct bpf_program f;
3090 1.1 christos
3091 1.1 christos memset(bids, 0, sizeof bids);
3092 1.11 christos f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
3093 1.11 christos if (f.bf_insns == NULL)
3094 1.11 christos return -1;
3095 1.1 christos bpf_dump(&f, 1);
3096 1.1 christos putchar('\n');
3097 1.1 christos free((char *)f.bf_insns);
3098 1.11 christos return 0;
3099 1.1 christos }
3100 1.9 christos
3101 1.8 christos static void
3102 1.11 christos opt_dump(opt_state_t *opt_state, struct icode *ic)
3103 1.8 christos {
3104 1.11 christos int status;
3105 1.11 christos char errbuf[PCAP_ERRBUF_SIZE];
3106 1.11 christos
3107 1.10 christos /*
3108 1.10 christos * If the CFG, in DOT format, is requested, output it rather than
3109 1.10 christos * the code that would be generated from that graph.
3110 1.8 christos */
3111 1.10 christos if (pcap_print_dot_graph)
3112 1.11 christos status = dot_dump(ic, errbuf);
3113 1.8 christos else
3114 1.11 christos status = plain_dump(ic, errbuf);
3115 1.11 christos if (status == -1)
3116 1.11 christos opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
3117 1.8 christos }
3118 1.1 christos #endif
3119