optimize.c revision 1.12 1 1.12 christos /* $NetBSD: optimize.c,v 1.12 2023/08/17 15:18:12 christos Exp $ */
2 1.5 christos
3 1.1 christos /*
4 1.1 christos * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
5 1.1 christos * The Regents of the University of California. All rights reserved.
6 1.1 christos *
7 1.1 christos * Redistribution and use in source and binary forms, with or without
8 1.1 christos * modification, are permitted provided that: (1) source code distributions
9 1.1 christos * retain the above copyright notice and this paragraph in its entirety, (2)
10 1.1 christos * distributions including binary code include the above copyright notice and
11 1.1 christos * this paragraph in its entirety in the documentation or other materials
12 1.1 christos * provided with the distribution, and (3) all advertising materials mentioning
13 1.1 christos * features or use of this software display the following acknowledgement:
14 1.1 christos * ``This product includes software developed by the University of California,
15 1.1 christos * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
16 1.1 christos * the University nor the names of its contributors may be used to endorse
17 1.1 christos * or promote products derived from this software without specific prior
18 1.1 christos * written permission.
19 1.1 christos * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
20 1.1 christos * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
21 1.1 christos * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
22 1.1 christos *
23 1.10 christos * Optimization module for BPF code intermediate representation.
24 1.1 christos */
25 1.7 christos
26 1.7 christos #include <sys/cdefs.h>
27 1.12 christos __RCSID("$NetBSD: optimize.c,v 1.12 2023/08/17 15:18:12 christos Exp $");
28 1.1 christos
29 1.1 christos #ifdef HAVE_CONFIG_H
30 1.10 christos #include <config.h>
31 1.1 christos #endif
32 1.1 christos
33 1.10 christos #include <pcap-types.h>
34 1.1 christos
35 1.1 christos #include <stdio.h>
36 1.1 christos #include <stdlib.h>
37 1.1 christos #include <memory.h>
38 1.11 christos #include <setjmp.h>
39 1.1 christos #include <string.h>
40 1.12 christos #include <limits.h> /* for SIZE_MAX */
41 1.1 christos #include <errno.h>
42 1.1 christos
43 1.1 christos #include "pcap-int.h"
44 1.1 christos
45 1.1 christos #include "gencode.h"
46 1.10 christos #include "optimize.h"
47 1.12 christos #include "diag-control.h"
48 1.1 christos
49 1.1 christos #ifdef HAVE_OS_PROTO_H
50 1.1 christos #include "os-proto.h"
51 1.1 christos #endif
52 1.1 christos
53 1.1 christos #ifdef BDEBUG
54 1.10 christos /*
55 1.10 christos * The internal "debug printout" flag for the filter expression optimizer.
56 1.10 christos * The code to print that stuff is present only if BDEBUG is defined, so
57 1.10 christos * the flag, and the routine to set it, are defined only if BDEBUG is
58 1.10 christos * defined.
59 1.10 christos */
60 1.10 christos static int pcap_optimizer_debug;
61 1.10 christos
62 1.10 christos /*
63 1.10 christos * Routine to set that flag.
64 1.10 christos *
65 1.10 christos * This is intended for libpcap developers, not for general use.
66 1.10 christos * If you want to set these in a program, you'll have to declare this
67 1.10 christos * routine yourself, with the appropriate DLL import attribute on Windows;
68 1.10 christos * it's not declared in any header file, and won't be declared in any
69 1.10 christos * header file provided by libpcap.
70 1.10 christos */
71 1.10 christos PCAP_API void pcap_set_optimizer_debug(int value);
72 1.10 christos
73 1.10 christos PCAP_API_DEF void
74 1.10 christos pcap_set_optimizer_debug(int value)
75 1.10 christos {
76 1.10 christos pcap_optimizer_debug = value;
77 1.10 christos }
78 1.10 christos
79 1.10 christos /*
80 1.10 christos * The internal "print dot graph" flag for the filter expression optimizer.
81 1.10 christos * The code to print that stuff is present only if BDEBUG is defined, so
82 1.10 christos * the flag, and the routine to set it, are defined only if BDEBUG is
83 1.10 christos * defined.
84 1.10 christos */
85 1.10 christos static int pcap_print_dot_graph;
86 1.10 christos
87 1.10 christos /*
88 1.10 christos * Routine to set that flag.
89 1.10 christos *
90 1.10 christos * This is intended for libpcap developers, not for general use.
91 1.10 christos * If you want to set these in a program, you'll have to declare this
92 1.10 christos * routine yourself, with the appropriate DLL import attribute on Windows;
93 1.10 christos * it's not declared in any header file, and won't be declared in any
94 1.10 christos * header file provided by libpcap.
95 1.10 christos */
96 1.10 christos PCAP_API void pcap_set_print_dot_graph(int value);
97 1.10 christos
98 1.10 christos PCAP_API_DEF void
99 1.10 christos pcap_set_print_dot_graph(int value)
100 1.10 christos {
101 1.10 christos pcap_print_dot_graph = value;
102 1.10 christos }
103 1.1 christos
104 1.1 christos #endif
105 1.1 christos
106 1.9 christos /*
107 1.10 christos * lowest_set_bit().
108 1.10 christos *
109 1.10 christos * Takes a 32-bit integer as an argument.
110 1.10 christos *
111 1.10 christos * If handed a non-zero value, returns the index of the lowest set bit,
112 1.12 christos * counting upwards from zero.
113 1.10 christos *
114 1.10 christos * If handed zero, the results are platform- and compiler-dependent.
115 1.10 christos * Keep it out of the light, don't give it any water, don't feed it
116 1.10 christos * after midnight, and don't pass zero to it.
117 1.10 christos *
118 1.10 christos * This is the same as the count of trailing zeroes in the word.
119 1.9 christos */
120 1.10 christos #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
121 1.10 christos /*
122 1.10 christos * GCC 3.4 and later; we have __builtin_ctz().
123 1.10 christos */
124 1.12 christos #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
125 1.10 christos #elif defined(_MSC_VER)
126 1.10 christos /*
127 1.10 christos * Visual Studio; we support only 2005 and later, so use
128 1.10 christos * _BitScanForward().
129 1.10 christos */
130 1.10 christos #include <intrin.h>
131 1.10 christos
132 1.10 christos #ifndef __clang__
133 1.10 christos #pragma intrinsic(_BitScanForward)
134 1.10 christos #endif
135 1.10 christos
136 1.12 christos static __forceinline u_int
137 1.10 christos lowest_set_bit(int mask)
138 1.10 christos {
139 1.10 christos unsigned long bit;
140 1.10 christos
141 1.10 christos /*
142 1.10 christos * Don't sign-extend mask if long is longer than int.
143 1.10 christos * (It's currently not, in MSVC, even on 64-bit platforms, but....)
144 1.10 christos */
145 1.10 christos if (_BitScanForward(&bit, (unsigned int)mask) == 0)
146 1.12 christos abort(); /* mask is zero */
147 1.12 christos return (u_int)bit;
148 1.10 christos }
149 1.10 christos #elif defined(MSDOS) && defined(__DJGPP__)
150 1.10 christos /*
151 1.10 christos * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
152 1.10 christos * we've already included.
153 1.10 christos */
154 1.12 christos #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1))
155 1.10 christos #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
156 1.10 christos /*
157 1.10 christos * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
158 1.10 christos * or some other platform (UN*X conforming to a sufficient recent version
159 1.10 christos * of the Single UNIX Specification).
160 1.10 christos */
161 1.10 christos #include <strings.h>
162 1.12 christos #define lowest_set_bit(mask) (u_int)((ffs((mask)) - 1))
163 1.10 christos #else
164 1.9 christos /*
165 1.10 christos * None of the above.
166 1.10 christos * Use a perfect-hash-function-based function.
167 1.9 christos */
168 1.12 christos static u_int
169 1.10 christos lowest_set_bit(int mask)
170 1.9 christos {
171 1.10 christos unsigned int v = (unsigned int)mask;
172 1.10 christos
173 1.12 christos static const u_int MultiplyDeBruijnBitPosition[32] = {
174 1.10 christos 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
175 1.10 christos 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
176 1.10 christos };
177 1.9 christos
178 1.10 christos /*
179 1.10 christos * We strip off all but the lowermost set bit (v & ~v),
180 1.10 christos * and perform a minimal perfect hash on it to look up the
181 1.10 christos * number of low-order zero bits in a table.
182 1.10 christos *
183 1.10 christos * See:
184 1.10 christos *
185 1.10 christos * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
186 1.10 christos *
187 1.10 christos * http://supertech.csail.mit.edu/papers/debruijn.pdf
188 1.10 christos */
189 1.10 christos return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
190 1.9 christos }
191 1.1 christos #endif
192 1.1 christos
193 1.1 christos /*
194 1.1 christos * Represents a deleted instruction.
195 1.1 christos */
196 1.1 christos #define NOP -1
197 1.1 christos
198 1.1 christos /*
199 1.1 christos * Register numbers for use-def values.
200 1.1 christos * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
201 1.1 christos * location. A_ATOM is the accumulator and X_ATOM is the index
202 1.1 christos * register.
203 1.1 christos */
204 1.1 christos #define A_ATOM BPF_MEMWORDS
205 1.1 christos #define X_ATOM (BPF_MEMWORDS+1)
206 1.1 christos
207 1.1 christos /*
208 1.1 christos * This define is used to represent *both* the accumulator and
209 1.1 christos * x register in use-def computations.
210 1.1 christos * Currently, the use-def code assumes only one definition per instruction.
211 1.1 christos */
212 1.1 christos #define AX_ATOM N_ATOMS
213 1.1 christos
214 1.1 christos /*
215 1.9 christos * These data structures are used in a Cocke and Shwarz style
216 1.9 christos * value numbering scheme. Since the flowgraph is acyclic,
217 1.9 christos * exit values can be propagated from a node's predecessors
218 1.9 christos * provided it is uniquely defined.
219 1.1 christos */
220 1.9 christos struct valnode {
221 1.9 christos int code;
222 1.12 christos bpf_u_int32 v0, v1;
223 1.12 christos int val; /* the value number */
224 1.9 christos struct valnode *next;
225 1.9 christos };
226 1.1 christos
227 1.9 christos /* Integer constants mapped with the load immediate opcode. */
228 1.12 christos #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U)
229 1.1 christos
230 1.9 christos struct vmapinfo {
231 1.9 christos int is_const;
232 1.12 christos bpf_u_int32 const_val;
233 1.9 christos };
234 1.1 christos
235 1.10 christos typedef struct {
236 1.9 christos /*
237 1.11 christos * Place to longjmp to on an error.
238 1.11 christos */
239 1.11 christos jmp_buf top_ctx;
240 1.11 christos
241 1.11 christos /*
242 1.11 christos * The buffer into which to put error message.
243 1.11 christos */
244 1.11 christos char *errbuf;
245 1.11 christos
246 1.11 christos /*
247 1.9 christos * A flag to indicate that further optimization is needed.
248 1.9 christos * Iterative passes are continued until a given pass yields no
249 1.12 christos * code simplification or branch movement.
250 1.9 christos */
251 1.9 christos int done;
252 1.1 christos
253 1.12 christos /*
254 1.12 christos * XXX - detect loops that do nothing but repeated AND/OR pullups
255 1.12 christos * and edge moves.
256 1.12 christos * If 100 passes in a row do nothing but that, treat that as a
257 1.12 christos * sign that we're in a loop that just shuffles in a cycle in
258 1.12 christos * which each pass just shuffles the code and we eventually
259 1.12 christos * get back to the original configuration.
260 1.12 christos *
261 1.12 christos * XXX - we need a non-heuristic way of detecting, or preventing,
262 1.12 christos * such a cycle.
263 1.12 christos */
264 1.12 christos int non_branch_movement_performed;
265 1.12 christos
266 1.12 christos u_int n_blocks; /* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
267 1.9 christos struct block **blocks;
268 1.12 christos u_int n_edges; /* twice n_blocks, so guaranteed to be > 0 */
269 1.9 christos struct edge **edges;
270 1.1 christos
271 1.9 christos /*
272 1.9 christos * A bit vector set representation of the dominators.
273 1.9 christos * We round up the set size to the next power of two.
274 1.9 christos */
275 1.12 christos u_int nodewords; /* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
276 1.12 christos u_int edgewords; /* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
277 1.9 christos struct block **levels;
278 1.9 christos bpf_u_int32 *space;
279 1.1 christos
280 1.1 christos #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
281 1.1 christos /*
282 1.1 christos * True if a is in uset {p}
283 1.1 christos */
284 1.1 christos #define SET_MEMBER(p, a) \
285 1.11 christos ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
286 1.1 christos
287 1.1 christos /*
288 1.1 christos * Add 'a' to uset p.
289 1.1 christos */
290 1.1 christos #define SET_INSERT(p, a) \
291 1.11 christos (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
292 1.1 christos
293 1.1 christos /*
294 1.1 christos * Delete 'a' from uset p.
295 1.1 christos */
296 1.1 christos #define SET_DELETE(p, a) \
297 1.11 christos (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
298 1.1 christos
299 1.1 christos /*
300 1.1 christos * a := a intersect b
301 1.12 christos * n must be guaranteed to be > 0
302 1.1 christos */
303 1.1 christos #define SET_INTERSECT(a, b, n)\
304 1.1 christos {\
305 1.1 christos register bpf_u_int32 *_x = a, *_y = b;\
306 1.12 christos register u_int _n = n;\
307 1.12 christos do *_x++ &= *_y++; while (--_n != 0);\
308 1.1 christos }
309 1.1 christos
310 1.1 christos /*
311 1.1 christos * a := a - b
312 1.12 christos * n must be guaranteed to be > 0
313 1.1 christos */
314 1.1 christos #define SET_SUBTRACT(a, b, n)\
315 1.1 christos {\
316 1.1 christos register bpf_u_int32 *_x = a, *_y = b;\
317 1.12 christos register u_int _n = n;\
318 1.12 christos do *_x++ &=~ *_y++; while (--_n != 0);\
319 1.1 christos }
320 1.1 christos
321 1.1 christos /*
322 1.1 christos * a := a union b
323 1.12 christos * n must be guaranteed to be > 0
324 1.1 christos */
325 1.1 christos #define SET_UNION(a, b, n)\
326 1.1 christos {\
327 1.1 christos register bpf_u_int32 *_x = a, *_y = b;\
328 1.12 christos register u_int _n = n;\
329 1.12 christos do *_x++ |= *_y++; while (--_n != 0);\
330 1.1 christos }
331 1.1 christos
332 1.9 christos uset all_dom_sets;
333 1.9 christos uset all_closure_sets;
334 1.9 christos uset all_edge_sets;
335 1.9 christos
336 1.9 christos #define MODULUS 213
337 1.9 christos struct valnode *hashtbl[MODULUS];
338 1.12 christos bpf_u_int32 curval;
339 1.12 christos bpf_u_int32 maxval;
340 1.9 christos
341 1.9 christos struct vmapinfo *vmap;
342 1.9 christos struct valnode *vnode_base;
343 1.9 christos struct valnode *next_vnode;
344 1.10 christos } opt_state_t;
345 1.9 christos
346 1.9 christos typedef struct {
347 1.9 christos /*
348 1.11 christos * Place to longjmp to on an error.
349 1.11 christos */
350 1.11 christos jmp_buf top_ctx;
351 1.11 christos
352 1.11 christos /*
353 1.11 christos * The buffer into which to put error message.
354 1.11 christos */
355 1.11 christos char *errbuf;
356 1.11 christos
357 1.11 christos /*
358 1.9 christos * Some pointers used to convert the basic block form of the code,
359 1.9 christos * into the array form that BPF requires. 'fstart' will point to
360 1.9 christos * the malloc'd array while 'ftail' is used during the recursive
361 1.9 christos * traversal.
362 1.9 christos */
363 1.9 christos struct bpf_insn *fstart;
364 1.9 christos struct bpf_insn *ftail;
365 1.9 christos } conv_state_t;
366 1.9 christos
367 1.11 christos static void opt_init(opt_state_t *, struct icode *);
368 1.9 christos static void opt_cleanup(opt_state_t *);
369 1.11 christos static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
370 1.11 christos PCAP_PRINTFLIKE(2, 3);
371 1.9 christos
372 1.9 christos static void intern_blocks(opt_state_t *, struct icode *);
373 1.9 christos
374 1.9 christos static void find_inedges(opt_state_t *, struct block *);
375 1.9 christos #ifdef BDEBUG
376 1.11 christos static void opt_dump(opt_state_t *, struct icode *);
377 1.9 christos #endif
378 1.1 christos
379 1.1 christos #ifndef MAX
380 1.1 christos #define MAX(a,b) ((a)>(b)?(a):(b))
381 1.1 christos #endif
382 1.1 christos
383 1.1 christos static void
384 1.9 christos find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
385 1.1 christos {
386 1.1 christos int level;
387 1.1 christos
388 1.9 christos if (isMarked(ic, b))
389 1.1 christos return;
390 1.1 christos
391 1.9 christos Mark(ic, b);
392 1.1 christos b->link = 0;
393 1.1 christos
394 1.1 christos if (JT(b)) {
395 1.9 christos find_levels_r(opt_state, ic, JT(b));
396 1.9 christos find_levels_r(opt_state, ic, JF(b));
397 1.1 christos level = MAX(JT(b)->level, JF(b)->level) + 1;
398 1.1 christos } else
399 1.1 christos level = 0;
400 1.1 christos b->level = level;
401 1.9 christos b->link = opt_state->levels[level];
402 1.9 christos opt_state->levels[level] = b;
403 1.1 christos }
404 1.1 christos
405 1.1 christos /*
406 1.1 christos * Level graph. The levels go from 0 at the leaves to
407 1.9 christos * N_LEVELS at the root. The opt_state->levels[] array points to the
408 1.1 christos * first node of the level list, whose elements are linked
409 1.1 christos * with the 'link' field of the struct block.
410 1.1 christos */
411 1.1 christos static void
412 1.9 christos find_levels(opt_state_t *opt_state, struct icode *ic)
413 1.1 christos {
414 1.9 christos memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
415 1.9 christos unMarkAll(ic);
416 1.9 christos find_levels_r(opt_state, ic, ic->root);
417 1.1 christos }
418 1.1 christos
419 1.1 christos /*
420 1.1 christos * Find dominator relationships.
421 1.1 christos * Assumes graph has been leveled.
422 1.1 christos */
423 1.1 christos static void
424 1.9 christos find_dom(opt_state_t *opt_state, struct block *root)
425 1.1 christos {
426 1.12 christos u_int i;
427 1.12 christos int level;
428 1.1 christos struct block *b;
429 1.1 christos bpf_u_int32 *x;
430 1.1 christos
431 1.1 christos /*
432 1.1 christos * Initialize sets to contain all nodes.
433 1.1 christos */
434 1.9 christos x = opt_state->all_dom_sets;
435 1.12 christos /*
436 1.12 christos * In opt_init(), we've made sure the product doesn't overflow.
437 1.12 christos */
438 1.9 christos i = opt_state->n_blocks * opt_state->nodewords;
439 1.12 christos while (i != 0) {
440 1.12 christos --i;
441 1.10 christos *x++ = 0xFFFFFFFFU;
442 1.12 christos }
443 1.1 christos /* Root starts off empty. */
444 1.12 christos for (i = opt_state->nodewords; i != 0;) {
445 1.12 christos --i;
446 1.1 christos root->dom[i] = 0;
447 1.12 christos }
448 1.1 christos
449 1.1 christos /* root->level is the highest level no found. */
450 1.12 christos for (level = root->level; level >= 0; --level) {
451 1.12 christos for (b = opt_state->levels[level]; b; b = b->link) {
452 1.1 christos SET_INSERT(b->dom, b->id);
453 1.1 christos if (JT(b) == 0)
454 1.1 christos continue;
455 1.9 christos SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
456 1.9 christos SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
457 1.1 christos }
458 1.1 christos }
459 1.1 christos }
460 1.1 christos
461 1.1 christos static void
462 1.9 christos propedom(opt_state_t *opt_state, struct edge *ep)
463 1.1 christos {
464 1.1 christos SET_INSERT(ep->edom, ep->id);
465 1.1 christos if (ep->succ) {
466 1.9 christos SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
467 1.9 christos SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
468 1.1 christos }
469 1.1 christos }
470 1.1 christos
471 1.1 christos /*
472 1.1 christos * Compute edge dominators.
473 1.1 christos * Assumes graph has been leveled and predecessors established.
474 1.1 christos */
475 1.1 christos static void
476 1.9 christos find_edom(opt_state_t *opt_state, struct block *root)
477 1.1 christos {
478 1.12 christos u_int i;
479 1.1 christos uset x;
480 1.12 christos int level;
481 1.1 christos struct block *b;
482 1.1 christos
483 1.9 christos x = opt_state->all_edge_sets;
484 1.12 christos /*
485 1.12 christos * In opt_init(), we've made sure the product doesn't overflow.
486 1.12 christos */
487 1.12 christos for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
488 1.12 christos --i;
489 1.10 christos x[i] = 0xFFFFFFFFU;
490 1.12 christos }
491 1.1 christos
492 1.1 christos /* root->level is the highest level no found. */
493 1.9 christos memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
494 1.9 christos memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
495 1.12 christos for (level = root->level; level >= 0; --level) {
496 1.12 christos for (b = opt_state->levels[level]; b != 0; b = b->link) {
497 1.9 christos propedom(opt_state, &b->et);
498 1.9 christos propedom(opt_state, &b->ef);
499 1.1 christos }
500 1.1 christos }
501 1.1 christos }
502 1.1 christos
503 1.1 christos /*
504 1.1 christos * Find the backwards transitive closure of the flow graph. These sets
505 1.1 christos * are backwards in the sense that we find the set of nodes that reach
506 1.1 christos * a given node, not the set of nodes that can be reached by a node.
507 1.1 christos *
508 1.1 christos * Assumes graph has been leveled.
509 1.1 christos */
510 1.1 christos static void
511 1.9 christos find_closure(opt_state_t *opt_state, struct block *root)
512 1.1 christos {
513 1.12 christos int level;
514 1.1 christos struct block *b;
515 1.1 christos
516 1.1 christos /*
517 1.1 christos * Initialize sets to contain no nodes.
518 1.1 christos */
519 1.9 christos memset((char *)opt_state->all_closure_sets, 0,
520 1.9 christos opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
521 1.1 christos
522 1.1 christos /* root->level is the highest level no found. */
523 1.12 christos for (level = root->level; level >= 0; --level) {
524 1.12 christos for (b = opt_state->levels[level]; b; b = b->link) {
525 1.1 christos SET_INSERT(b->closure, b->id);
526 1.1 christos if (JT(b) == 0)
527 1.1 christos continue;
528 1.9 christos SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
529 1.9 christos SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
530 1.1 christos }
531 1.1 christos }
532 1.1 christos }
533 1.1 christos
534 1.1 christos /*
535 1.12 christos * Return the register number that is used by s.
536 1.12 christos *
537 1.12 christos * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X
538 1.12 christos * are used, the scratch memory location's number if a scratch memory
539 1.12 christos * location is used (e.g., 0 for M[0]), or -1 if none of those are used.
540 1.1 christos *
541 1.1 christos * The implementation should probably change to an array access.
542 1.1 christos */
543 1.1 christos static int
544 1.6 christos atomuse(struct stmt *s)
545 1.1 christos {
546 1.1 christos register int c = s->code;
547 1.1 christos
548 1.1 christos if (c == NOP)
549 1.1 christos return -1;
550 1.1 christos
551 1.1 christos switch (BPF_CLASS(c)) {
552 1.1 christos
553 1.1 christos case BPF_RET:
554 1.1 christos return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
555 1.1 christos (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
556 1.1 christos
557 1.1 christos case BPF_LD:
558 1.1 christos case BPF_LDX:
559 1.12 christos /*
560 1.12 christos * As there are fewer than 2^31 memory locations,
561 1.12 christos * s->k should be convertible to int without problems.
562 1.12 christos */
563 1.1 christos return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
564 1.12 christos (BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
565 1.1 christos
566 1.1 christos case BPF_ST:
567 1.1 christos return A_ATOM;
568 1.1 christos
569 1.1 christos case BPF_STX:
570 1.1 christos return X_ATOM;
571 1.1 christos
572 1.1 christos case BPF_JMP:
573 1.1 christos case BPF_ALU:
574 1.1 christos if (BPF_SRC(c) == BPF_X)
575 1.1 christos return AX_ATOM;
576 1.1 christos return A_ATOM;
577 1.1 christos
578 1.1 christos case BPF_MISC:
579 1.1 christos return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
580 1.1 christos }
581 1.1 christos abort();
582 1.1 christos /* NOTREACHED */
583 1.1 christos }
584 1.1 christos
585 1.1 christos /*
586 1.1 christos * Return the register number that is defined by 's'. We assume that
587 1.1 christos * a single stmt cannot define more than one register. If no register
588 1.1 christos * is defined, return -1.
589 1.1 christos *
590 1.1 christos * The implementation should probably change to an array access.
591 1.1 christos */
592 1.1 christos static int
593 1.6 christos atomdef(struct stmt *s)
594 1.1 christos {
595 1.1 christos if (s->code == NOP)
596 1.1 christos return -1;
597 1.1 christos
598 1.1 christos switch (BPF_CLASS(s->code)) {
599 1.1 christos
600 1.1 christos case BPF_LD:
601 1.1 christos case BPF_ALU:
602 1.1 christos return A_ATOM;
603 1.1 christos
604 1.1 christos case BPF_LDX:
605 1.1 christos return X_ATOM;
606 1.1 christos
607 1.1 christos case BPF_ST:
608 1.1 christos case BPF_STX:
609 1.1 christos return s->k;
610 1.1 christos
611 1.1 christos case BPF_MISC:
612 1.1 christos return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
613 1.1 christos }
614 1.1 christos return -1;
615 1.1 christos }
616 1.1 christos
617 1.1 christos /*
618 1.1 christos * Compute the sets of registers used, defined, and killed by 'b'.
619 1.1 christos *
620 1.1 christos * "Used" means that a statement in 'b' uses the register before any
621 1.1 christos * statement in 'b' defines it, i.e. it uses the value left in
622 1.1 christos * that register by a predecessor block of this block.
623 1.1 christos * "Defined" means that a statement in 'b' defines it.
624 1.1 christos * "Killed" means that a statement in 'b' defines it before any
625 1.1 christos * statement in 'b' uses it, i.e. it kills the value left in that
626 1.1 christos * register by a predecessor block of this block.
627 1.1 christos */
628 1.1 christos static void
629 1.6 christos compute_local_ud(struct block *b)
630 1.1 christos {
631 1.1 christos struct slist *s;
632 1.9 christos atomset def = 0, use = 0, killed = 0;
633 1.1 christos int atom;
634 1.1 christos
635 1.1 christos for (s = b->stmts; s; s = s->next) {
636 1.1 christos if (s->s.code == NOP)
637 1.1 christos continue;
638 1.1 christos atom = atomuse(&s->s);
639 1.1 christos if (atom >= 0) {
640 1.1 christos if (atom == AX_ATOM) {
641 1.1 christos if (!ATOMELEM(def, X_ATOM))
642 1.1 christos use |= ATOMMASK(X_ATOM);
643 1.1 christos if (!ATOMELEM(def, A_ATOM))
644 1.1 christos use |= ATOMMASK(A_ATOM);
645 1.1 christos }
646 1.1 christos else if (atom < N_ATOMS) {
647 1.1 christos if (!ATOMELEM(def, atom))
648 1.1 christos use |= ATOMMASK(atom);
649 1.1 christos }
650 1.1 christos else
651 1.1 christos abort();
652 1.1 christos }
653 1.1 christos atom = atomdef(&s->s);
654 1.1 christos if (atom >= 0) {
655 1.1 christos if (!ATOMELEM(use, atom))
656 1.9 christos killed |= ATOMMASK(atom);
657 1.1 christos def |= ATOMMASK(atom);
658 1.1 christos }
659 1.1 christos }
660 1.1 christos if (BPF_CLASS(b->s.code) == BPF_JMP) {
661 1.1 christos /*
662 1.1 christos * XXX - what about RET?
663 1.1 christos */
664 1.1 christos atom = atomuse(&b->s);
665 1.1 christos if (atom >= 0) {
666 1.1 christos if (atom == AX_ATOM) {
667 1.1 christos if (!ATOMELEM(def, X_ATOM))
668 1.1 christos use |= ATOMMASK(X_ATOM);
669 1.1 christos if (!ATOMELEM(def, A_ATOM))
670 1.1 christos use |= ATOMMASK(A_ATOM);
671 1.1 christos }
672 1.1 christos else if (atom < N_ATOMS) {
673 1.1 christos if (!ATOMELEM(def, atom))
674 1.1 christos use |= ATOMMASK(atom);
675 1.1 christos }
676 1.1 christos else
677 1.1 christos abort();
678 1.1 christos }
679 1.1 christos }
680 1.1 christos
681 1.1 christos b->def = def;
682 1.9 christos b->kill = killed;
683 1.1 christos b->in_use = use;
684 1.1 christos }
685 1.1 christos
686 1.1 christos /*
687 1.1 christos * Assume graph is already leveled.
688 1.1 christos */
689 1.1 christos static void
690 1.9 christos find_ud(opt_state_t *opt_state, struct block *root)
691 1.1 christos {
692 1.1 christos int i, maxlevel;
693 1.1 christos struct block *p;
694 1.1 christos
695 1.1 christos /*
696 1.1 christos * root->level is the highest level no found;
697 1.1 christos * count down from there.
698 1.1 christos */
699 1.1 christos maxlevel = root->level;
700 1.1 christos for (i = maxlevel; i >= 0; --i)
701 1.9 christos for (p = opt_state->levels[i]; p; p = p->link) {
702 1.1 christos compute_local_ud(p);
703 1.1 christos p->out_use = 0;
704 1.1 christos }
705 1.1 christos
706 1.1 christos for (i = 1; i <= maxlevel; ++i) {
707 1.9 christos for (p = opt_state->levels[i]; p; p = p->link) {
708 1.1 christos p->out_use |= JT(p)->in_use | JF(p)->in_use;
709 1.1 christos p->in_use |= p->out_use &~ p->kill;
710 1.1 christos }
711 1.1 christos }
712 1.1 christos }
713 1.1 christos static void
714 1.9 christos init_val(opt_state_t *opt_state)
715 1.1 christos {
716 1.9 christos opt_state->curval = 0;
717 1.9 christos opt_state->next_vnode = opt_state->vnode_base;
718 1.9 christos memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
719 1.9 christos memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
720 1.1 christos }
721 1.1 christos
722 1.12 christos /*
723 1.12 christos * Because we really don't have an IR, this stuff is a little messy.
724 1.12 christos *
725 1.12 christos * This routine looks in the table of existing value number for a value
726 1.12 christos * with generated from an operation with the specified opcode and
727 1.12 christos * the specified values. If it finds it, it returns its value number,
728 1.12 christos * otherwise it makes a new entry in the table and returns the
729 1.12 christos * value number of that entry.
730 1.12 christos */
731 1.12 christos static bpf_u_int32
732 1.12 christos F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1)
733 1.1 christos {
734 1.1 christos u_int hash;
735 1.12 christos bpf_u_int32 val;
736 1.1 christos struct valnode *p;
737 1.1 christos
738 1.12 christos hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
739 1.1 christos hash %= MODULUS;
740 1.1 christos
741 1.9 christos for (p = opt_state->hashtbl[hash]; p; p = p->next)
742 1.1 christos if (p->code == code && p->v0 == v0 && p->v1 == v1)
743 1.1 christos return p->val;
744 1.1 christos
745 1.12 christos /*
746 1.12 christos * Not found. Allocate a new value, and assign it a new
747 1.12 christos * value number.
748 1.12 christos *
749 1.12 christos * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we
750 1.12 christos * increment it before using it as the new value number, which
751 1.12 christos * means we never assign VAL_UNKNOWN.
752 1.12 christos *
753 1.12 christos * XXX - unless we overflow, but we probably won't have 2^32-1
754 1.12 christos * values; we treat 32 bits as effectively infinite.
755 1.12 christos */
756 1.9 christos val = ++opt_state->curval;
757 1.1 christos if (BPF_MODE(code) == BPF_IMM &&
758 1.1 christos (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
759 1.9 christos opt_state->vmap[val].const_val = v0;
760 1.9 christos opt_state->vmap[val].is_const = 1;
761 1.1 christos }
762 1.9 christos p = opt_state->next_vnode++;
763 1.1 christos p->val = val;
764 1.1 christos p->code = code;
765 1.1 christos p->v0 = v0;
766 1.1 christos p->v1 = v1;
767 1.9 christos p->next = opt_state->hashtbl[hash];
768 1.9 christos opt_state->hashtbl[hash] = p;
769 1.1 christos
770 1.1 christos return val;
771 1.1 christos }
772 1.1 christos
773 1.1 christos static inline void
774 1.12 christos vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter)
775 1.1 christos {
776 1.10 christos if (alter && newval != VAL_UNKNOWN && *valp == newval)
777 1.1 christos s->code = NOP;
778 1.1 christos else
779 1.1 christos *valp = newval;
780 1.1 christos }
781 1.1 christos
782 1.6 christos /*
783 1.6 christos * Do constant-folding on binary operators.
784 1.6 christos * (Unary operators are handled elsewhere.)
785 1.6 christos */
786 1.1 christos static void
787 1.12 christos fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1)
788 1.1 christos {
789 1.1 christos bpf_u_int32 a, b;
790 1.1 christos
791 1.9 christos a = opt_state->vmap[v0].const_val;
792 1.9 christos b = opt_state->vmap[v1].const_val;
793 1.1 christos
794 1.1 christos switch (BPF_OP(s->code)) {
795 1.1 christos case BPF_ADD:
796 1.1 christos a += b;
797 1.1 christos break;
798 1.1 christos
799 1.1 christos case BPF_SUB:
800 1.1 christos a -= b;
801 1.1 christos break;
802 1.1 christos
803 1.1 christos case BPF_MUL:
804 1.1 christos a *= b;
805 1.1 christos break;
806 1.1 christos
807 1.1 christos case BPF_DIV:
808 1.1 christos if (b == 0)
809 1.11 christos opt_error(opt_state, "division by zero");
810 1.1 christos a /= b;
811 1.1 christos break;
812 1.1 christos
813 1.7 christos case BPF_MOD:
814 1.7 christos if (b == 0)
815 1.11 christos opt_error(opt_state, "modulus by zero");
816 1.7 christos a %= b;
817 1.7 christos break;
818 1.7 christos
819 1.1 christos case BPF_AND:
820 1.1 christos a &= b;
821 1.1 christos break;
822 1.1 christos
823 1.1 christos case BPF_OR:
824 1.1 christos a |= b;
825 1.1 christos break;
826 1.1 christos
827 1.7 christos case BPF_XOR:
828 1.7 christos a ^= b;
829 1.7 christos break;
830 1.7 christos
831 1.1 christos case BPF_LSH:
832 1.11 christos /*
833 1.11 christos * A left shift of more than the width of the type
834 1.11 christos * is undefined in C; we'll just treat it as shifting
835 1.11 christos * all the bits out.
836 1.11 christos *
837 1.11 christos * XXX - the BPF interpreter doesn't check for this,
838 1.11 christos * so its behavior is dependent on the behavior of
839 1.11 christos * the processor on which it's running. There are
840 1.11 christos * processors on which it shifts all the bits out
841 1.11 christos * and processors on which it does no shift.
842 1.11 christos */
843 1.11 christos if (b < 32)
844 1.11 christos a <<= b;
845 1.11 christos else
846 1.11 christos a = 0;
847 1.1 christos break;
848 1.1 christos
849 1.1 christos case BPF_RSH:
850 1.11 christos /*
851 1.11 christos * A right shift of more than the width of the type
852 1.11 christos * is undefined in C; we'll just treat it as shifting
853 1.11 christos * all the bits out.
854 1.11 christos *
855 1.11 christos * XXX - the BPF interpreter doesn't check for this,
856 1.11 christos * so its behavior is dependent on the behavior of
857 1.11 christos * the processor on which it's running. There are
858 1.11 christos * processors on which it shifts all the bits out
859 1.11 christos * and processors on which it does no shift.
860 1.11 christos */
861 1.11 christos if (b < 32)
862 1.11 christos a >>= b;
863 1.11 christos else
864 1.11 christos a = 0;
865 1.1 christos break;
866 1.1 christos
867 1.1 christos default:
868 1.1 christos abort();
869 1.1 christos }
870 1.1 christos s->k = a;
871 1.1 christos s->code = BPF_LD|BPF_IMM;
872 1.12 christos /*
873 1.12 christos * XXX - optimizer loop detection.
874 1.12 christos */
875 1.12 christos opt_state->non_branch_movement_performed = 1;
876 1.9 christos opt_state->done = 0;
877 1.1 christos }
878 1.1 christos
879 1.1 christos static inline struct slist *
880 1.6 christos this_op(struct slist *s)
881 1.1 christos {
882 1.1 christos while (s != 0 && s->s.code == NOP)
883 1.1 christos s = s->next;
884 1.1 christos return s;
885 1.1 christos }
886 1.1 christos
887 1.1 christos static void
888 1.6 christos opt_not(struct block *b)
889 1.1 christos {
890 1.1 christos struct block *tmp = JT(b);
891 1.1 christos
892 1.1 christos JT(b) = JF(b);
893 1.1 christos JF(b) = tmp;
894 1.1 christos }
895 1.1 christos
896 1.1 christos static void
897 1.9 christos opt_peep(opt_state_t *opt_state, struct block *b)
898 1.1 christos {
899 1.1 christos struct slist *s;
900 1.1 christos struct slist *next, *last;
901 1.12 christos bpf_u_int32 val;
902 1.1 christos
903 1.1 christos s = b->stmts;
904 1.1 christos if (s == 0)
905 1.1 christos return;
906 1.1 christos
907 1.1 christos last = s;
908 1.1 christos for (/*empty*/; /*empty*/; s = next) {
909 1.1 christos /*
910 1.1 christos * Skip over nops.
911 1.1 christos */
912 1.1 christos s = this_op(s);
913 1.1 christos if (s == 0)
914 1.1 christos break; /* nothing left in the block */
915 1.1 christos
916 1.1 christos /*
917 1.1 christos * Find the next real instruction after that one
918 1.1 christos * (skipping nops).
919 1.1 christos */
920 1.1 christos next = this_op(s->next);
921 1.1 christos if (next == 0)
922 1.1 christos break; /* no next instruction */
923 1.1 christos last = next;
924 1.1 christos
925 1.1 christos /*
926 1.1 christos * st M[k] --> st M[k]
927 1.1 christos * ldx M[k] tax
928 1.1 christos */
929 1.1 christos if (s->s.code == BPF_ST &&
930 1.1 christos next->s.code == (BPF_LDX|BPF_MEM) &&
931 1.1 christos s->s.k == next->s.k) {
932 1.12 christos /*
933 1.12 christos * XXX - optimizer loop detection.
934 1.12 christos */
935 1.12 christos opt_state->non_branch_movement_performed = 1;
936 1.9 christos opt_state->done = 0;
937 1.1 christos next->s.code = BPF_MISC|BPF_TAX;
938 1.1 christos }
939 1.1 christos /*
940 1.1 christos * ld #k --> ldx #k
941 1.1 christos * tax txa
942 1.1 christos */
943 1.1 christos if (s->s.code == (BPF_LD|BPF_IMM) &&
944 1.1 christos next->s.code == (BPF_MISC|BPF_TAX)) {
945 1.1 christos s->s.code = BPF_LDX|BPF_IMM;
946 1.1 christos next->s.code = BPF_MISC|BPF_TXA;
947 1.12 christos /*
948 1.12 christos * XXX - optimizer loop detection.
949 1.12 christos */
950 1.12 christos opt_state->non_branch_movement_performed = 1;
951 1.9 christos opt_state->done = 0;
952 1.1 christos }
953 1.1 christos /*
954 1.1 christos * This is an ugly special case, but it happens
955 1.1 christos * when you say tcp[k] or udp[k] where k is a constant.
956 1.1 christos */
957 1.1 christos if (s->s.code == (BPF_LD|BPF_IMM)) {
958 1.1 christos struct slist *add, *tax, *ild;
959 1.1 christos
960 1.1 christos /*
961 1.1 christos * Check that X isn't used on exit from this
962 1.1 christos * block (which the optimizer might cause).
963 1.1 christos * We know the code generator won't generate
964 1.1 christos * any local dependencies.
965 1.1 christos */
966 1.1 christos if (ATOMELEM(b->out_use, X_ATOM))
967 1.1 christos continue;
968 1.1 christos
969 1.1 christos /*
970 1.1 christos * Check that the instruction following the ldi
971 1.1 christos * is an addx, or it's an ldxms with an addx
972 1.1 christos * following it (with 0 or more nops between the
973 1.1 christos * ldxms and addx).
974 1.1 christos */
975 1.1 christos if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
976 1.1 christos add = next;
977 1.1 christos else
978 1.1 christos add = this_op(next->next);
979 1.1 christos if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
980 1.1 christos continue;
981 1.1 christos
982 1.1 christos /*
983 1.1 christos * Check that a tax follows that (with 0 or more
984 1.1 christos * nops between them).
985 1.1 christos */
986 1.1 christos tax = this_op(add->next);
987 1.1 christos if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
988 1.1 christos continue;
989 1.1 christos
990 1.1 christos /*
991 1.1 christos * Check that an ild follows that (with 0 or more
992 1.1 christos * nops between them).
993 1.1 christos */
994 1.1 christos ild = this_op(tax->next);
995 1.1 christos if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
996 1.1 christos BPF_MODE(ild->s.code) != BPF_IND)
997 1.1 christos continue;
998 1.1 christos /*
999 1.1 christos * We want to turn this sequence:
1000 1.1 christos *
1001 1.1 christos * (004) ldi #0x2 {s}
1002 1.1 christos * (005) ldxms [14] {next} -- optional
1003 1.1 christos * (006) addx {add}
1004 1.1 christos * (007) tax {tax}
1005 1.1 christos * (008) ild [x+0] {ild}
1006 1.1 christos *
1007 1.1 christos * into this sequence:
1008 1.1 christos *
1009 1.1 christos * (004) nop
1010 1.1 christos * (005) ldxms [14]
1011 1.1 christos * (006) nop
1012 1.1 christos * (007) nop
1013 1.1 christos * (008) ild [x+2]
1014 1.1 christos *
1015 1.1 christos * XXX We need to check that X is not
1016 1.1 christos * subsequently used, because we want to change
1017 1.1 christos * what'll be in it after this sequence.
1018 1.1 christos *
1019 1.1 christos * We know we can eliminate the accumulator
1020 1.1 christos * modifications earlier in the sequence since
1021 1.1 christos * it is defined by the last stmt of this sequence
1022 1.1 christos * (i.e., the last statement of the sequence loads
1023 1.1 christos * a value into the accumulator, so we can eliminate
1024 1.1 christos * earlier operations on the accumulator).
1025 1.1 christos */
1026 1.1 christos ild->s.k += s->s.k;
1027 1.1 christos s->s.code = NOP;
1028 1.1 christos add->s.code = NOP;
1029 1.1 christos tax->s.code = NOP;
1030 1.12 christos /*
1031 1.12 christos * XXX - optimizer loop detection.
1032 1.12 christos */
1033 1.12 christos opt_state->non_branch_movement_performed = 1;
1034 1.9 christos opt_state->done = 0;
1035 1.1 christos }
1036 1.1 christos }
1037 1.1 christos /*
1038 1.1 christos * If the comparison at the end of a block is an equality
1039 1.1 christos * comparison against a constant, and nobody uses the value
1040 1.1 christos * we leave in the A register at the end of a block, and
1041 1.1 christos * the operation preceding the comparison is an arithmetic
1042 1.1 christos * operation, we can sometime optimize it away.
1043 1.1 christos */
1044 1.1 christos if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
1045 1.1 christos !ATOMELEM(b->out_use, A_ATOM)) {
1046 1.12 christos /*
1047 1.12 christos * We can optimize away certain subtractions of the
1048 1.12 christos * X register.
1049 1.12 christos */
1050 1.1 christos if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
1051 1.1 christos val = b->val[X_ATOM];
1052 1.9 christos if (opt_state->vmap[val].is_const) {
1053 1.1 christos /*
1054 1.1 christos * If we have a subtract to do a comparison,
1055 1.1 christos * and the X register is a known constant,
1056 1.1 christos * we can merge this value into the
1057 1.1 christos * comparison:
1058 1.1 christos *
1059 1.1 christos * sub x -> nop
1060 1.1 christos * jeq #y jeq #(x+y)
1061 1.1 christos */
1062 1.9 christos b->s.k += opt_state->vmap[val].const_val;
1063 1.1 christos last->s.code = NOP;
1064 1.12 christos /*
1065 1.12 christos * XXX - optimizer loop detection.
1066 1.12 christos */
1067 1.12 christos opt_state->non_branch_movement_performed = 1;
1068 1.9 christos opt_state->done = 0;
1069 1.1 christos } else if (b->s.k == 0) {
1070 1.1 christos /*
1071 1.1 christos * If the X register isn't a constant,
1072 1.1 christos * and the comparison in the test is
1073 1.1 christos * against 0, we can compare with the
1074 1.1 christos * X register, instead:
1075 1.1 christos *
1076 1.1 christos * sub x -> nop
1077 1.1 christos * jeq #0 jeq x
1078 1.1 christos */
1079 1.1 christos last->s.code = NOP;
1080 1.1 christos b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
1081 1.12 christos /*
1082 1.12 christos * XXX - optimizer loop detection.
1083 1.12 christos */
1084 1.12 christos opt_state->non_branch_movement_performed = 1;
1085 1.9 christos opt_state->done = 0;
1086 1.1 christos }
1087 1.1 christos }
1088 1.1 christos /*
1089 1.1 christos * Likewise, a constant subtract can be simplified:
1090 1.1 christos *
1091 1.1 christos * sub #x -> nop
1092 1.1 christos * jeq #y -> jeq #(x+y)
1093 1.1 christos */
1094 1.1 christos else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1095 1.1 christos last->s.code = NOP;
1096 1.1 christos b->s.k += last->s.k;
1097 1.12 christos /*
1098 1.12 christos * XXX - optimizer loop detection.
1099 1.12 christos */
1100 1.12 christos opt_state->non_branch_movement_performed = 1;
1101 1.9 christos opt_state->done = 0;
1102 1.1 christos }
1103 1.1 christos /*
1104 1.1 christos * And, similarly, a constant AND can be simplified
1105 1.1 christos * if we're testing against 0, i.e.:
1106 1.1 christos *
1107 1.1 christos * and #k nop
1108 1.1 christos * jeq #0 -> jset #k
1109 1.1 christos */
1110 1.1 christos else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1111 1.1 christos b->s.k == 0) {
1112 1.1 christos b->s.k = last->s.k;
1113 1.1 christos b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1114 1.1 christos last->s.code = NOP;
1115 1.12 christos /*
1116 1.12 christos * XXX - optimizer loop detection.
1117 1.12 christos */
1118 1.12 christos opt_state->non_branch_movement_performed = 1;
1119 1.9 christos opt_state->done = 0;
1120 1.1 christos opt_not(b);
1121 1.1 christos }
1122 1.1 christos }
1123 1.1 christos /*
1124 1.1 christos * jset #0 -> never
1125 1.1 christos * jset #ffffffff -> always
1126 1.1 christos */
1127 1.1 christos if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1128 1.1 christos if (b->s.k == 0)
1129 1.1 christos JT(b) = JF(b);
1130 1.12 christos if (b->s.k == 0xffffffffU)
1131 1.1 christos JF(b) = JT(b);
1132 1.1 christos }
1133 1.1 christos /*
1134 1.1 christos * If we're comparing against the index register, and the index
1135 1.1 christos * register is a known constant, we can just compare against that
1136 1.1 christos * constant.
1137 1.1 christos */
1138 1.1 christos val = b->val[X_ATOM];
1139 1.9 christos if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1140 1.12 christos bpf_u_int32 v = opt_state->vmap[val].const_val;
1141 1.1 christos b->s.code &= ~BPF_X;
1142 1.1 christos b->s.k = v;
1143 1.1 christos }
1144 1.1 christos /*
1145 1.1 christos * If the accumulator is a known constant, we can compute the
1146 1.1 christos * comparison result.
1147 1.1 christos */
1148 1.1 christos val = b->val[A_ATOM];
1149 1.9 christos if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1150 1.12 christos bpf_u_int32 v = opt_state->vmap[val].const_val;
1151 1.1 christos switch (BPF_OP(b->s.code)) {
1152 1.1 christos
1153 1.1 christos case BPF_JEQ:
1154 1.1 christos v = v == b->s.k;
1155 1.1 christos break;
1156 1.1 christos
1157 1.1 christos case BPF_JGT:
1158 1.12 christos v = v > b->s.k;
1159 1.1 christos break;
1160 1.1 christos
1161 1.1 christos case BPF_JGE:
1162 1.12 christos v = v >= b->s.k;
1163 1.1 christos break;
1164 1.1 christos
1165 1.1 christos case BPF_JSET:
1166 1.1 christos v &= b->s.k;
1167 1.1 christos break;
1168 1.1 christos
1169 1.1 christos default:
1170 1.1 christos abort();
1171 1.1 christos }
1172 1.12 christos if (JF(b) != JT(b)) {
1173 1.12 christos /*
1174 1.12 christos * XXX - optimizer loop detection.
1175 1.12 christos */
1176 1.12 christos opt_state->non_branch_movement_performed = 1;
1177 1.9 christos opt_state->done = 0;
1178 1.12 christos }
1179 1.1 christos if (v)
1180 1.1 christos JF(b) = JT(b);
1181 1.1 christos else
1182 1.1 christos JT(b) = JF(b);
1183 1.1 christos }
1184 1.1 christos }
1185 1.1 christos
1186 1.1 christos /*
1187 1.1 christos * Compute the symbolic value of expression of 's', and update
1188 1.1 christos * anything it defines in the value table 'val'. If 'alter' is true,
1189 1.1 christos * do various optimizations. This code would be cleaner if symbolic
1190 1.1 christos * evaluation and code transformations weren't folded together.
1191 1.1 christos */
1192 1.1 christos static void
1193 1.12 christos opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter)
1194 1.1 christos {
1195 1.1 christos int op;
1196 1.12 christos bpf_u_int32 v;
1197 1.1 christos
1198 1.1 christos switch (s->code) {
1199 1.1 christos
1200 1.1 christos case BPF_LD|BPF_ABS|BPF_W:
1201 1.1 christos case BPF_LD|BPF_ABS|BPF_H:
1202 1.1 christos case BPF_LD|BPF_ABS|BPF_B:
1203 1.9 christos v = F(opt_state, s->code, s->k, 0L);
1204 1.1 christos vstore(s, &val[A_ATOM], v, alter);
1205 1.1 christos break;
1206 1.1 christos
1207 1.1 christos case BPF_LD|BPF_IND|BPF_W:
1208 1.1 christos case BPF_LD|BPF_IND|BPF_H:
1209 1.1 christos case BPF_LD|BPF_IND|BPF_B:
1210 1.1 christos v = val[X_ATOM];
1211 1.9 christos if (alter && opt_state->vmap[v].is_const) {
1212 1.1 christos s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1213 1.9 christos s->k += opt_state->vmap[v].const_val;
1214 1.9 christos v = F(opt_state, s->code, s->k, 0L);
1215 1.12 christos /*
1216 1.12 christos * XXX - optimizer loop detection.
1217 1.12 christos */
1218 1.12 christos opt_state->non_branch_movement_performed = 1;
1219 1.9 christos opt_state->done = 0;
1220 1.1 christos }
1221 1.1 christos else
1222 1.9 christos v = F(opt_state, s->code, s->k, v);
1223 1.1 christos vstore(s, &val[A_ATOM], v, alter);
1224 1.1 christos break;
1225 1.1 christos
1226 1.1 christos case BPF_LD|BPF_LEN:
1227 1.9 christos v = F(opt_state, s->code, 0L, 0L);
1228 1.1 christos vstore(s, &val[A_ATOM], v, alter);
1229 1.1 christos break;
1230 1.1 christos
1231 1.1 christos case BPF_LD|BPF_IMM:
1232 1.1 christos v = K(s->k);
1233 1.1 christos vstore(s, &val[A_ATOM], v, alter);
1234 1.1 christos break;
1235 1.1 christos
1236 1.1 christos case BPF_LDX|BPF_IMM:
1237 1.1 christos v = K(s->k);
1238 1.1 christos vstore(s, &val[X_ATOM], v, alter);
1239 1.1 christos break;
1240 1.1 christos
1241 1.1 christos case BPF_LDX|BPF_MSH|BPF_B:
1242 1.9 christos v = F(opt_state, s->code, s->k, 0L);
1243 1.1 christos vstore(s, &val[X_ATOM], v, alter);
1244 1.1 christos break;
1245 1.1 christos
1246 1.1 christos case BPF_ALU|BPF_NEG:
1247 1.9 christos if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1248 1.1 christos s->code = BPF_LD|BPF_IMM;
1249 1.11 christos /*
1250 1.11 christos * Do this negation as unsigned arithmetic; that's
1251 1.11 christos * what modern BPF engines do, and it guarantees
1252 1.11 christos * that all possible values can be negated. (Yeah,
1253 1.11 christos * negating 0x80000000, the minimum signed 32-bit
1254 1.11 christos * two's-complement value, results in 0x80000000,
1255 1.11 christos * so it's still negative, but we *should* be doing
1256 1.11 christos * all unsigned arithmetic here, to match what
1257 1.11 christos * modern BPF engines do.)
1258 1.11 christos *
1259 1.11 christos * Express it as 0U - (unsigned value) so that we
1260 1.11 christos * don't get compiler warnings about negating an
1261 1.11 christos * unsigned value and don't get UBSan warnings
1262 1.11 christos * about the result of negating 0x80000000 being
1263 1.11 christos * undefined.
1264 1.11 christos */
1265 1.12 christos s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val;
1266 1.1 christos val[A_ATOM] = K(s->k);
1267 1.1 christos }
1268 1.1 christos else
1269 1.9 christos val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1270 1.1 christos break;
1271 1.1 christos
1272 1.1 christos case BPF_ALU|BPF_ADD|BPF_K:
1273 1.1 christos case BPF_ALU|BPF_SUB|BPF_K:
1274 1.1 christos case BPF_ALU|BPF_MUL|BPF_K:
1275 1.1 christos case BPF_ALU|BPF_DIV|BPF_K:
1276 1.7 christos case BPF_ALU|BPF_MOD|BPF_K:
1277 1.1 christos case BPF_ALU|BPF_AND|BPF_K:
1278 1.1 christos case BPF_ALU|BPF_OR|BPF_K:
1279 1.7 christos case BPF_ALU|BPF_XOR|BPF_K:
1280 1.1 christos case BPF_ALU|BPF_LSH|BPF_K:
1281 1.1 christos case BPF_ALU|BPF_RSH|BPF_K:
1282 1.1 christos op = BPF_OP(s->code);
1283 1.1 christos if (alter) {
1284 1.1 christos if (s->k == 0) {
1285 1.11 christos /*
1286 1.11 christos * Optimize operations where the constant
1287 1.11 christos * is zero.
1288 1.11 christos *
1289 1.11 christos * Don't optimize away "sub #0"
1290 1.1 christos * as it may be needed later to
1291 1.11 christos * fixup the generated math code.
1292 1.11 christos *
1293 1.11 christos * Fail if we're dividing by zero or taking
1294 1.11 christos * a modulus by zero.
1295 1.11 christos */
1296 1.1 christos if (op == BPF_ADD ||
1297 1.1 christos op == BPF_LSH || op == BPF_RSH ||
1298 1.7 christos op == BPF_OR || op == BPF_XOR) {
1299 1.1 christos s->code = NOP;
1300 1.1 christos break;
1301 1.1 christos }
1302 1.1 christos if (op == BPF_MUL || op == BPF_AND) {
1303 1.1 christos s->code = BPF_LD|BPF_IMM;
1304 1.1 christos val[A_ATOM] = K(s->k);
1305 1.1 christos break;
1306 1.1 christos }
1307 1.11 christos if (op == BPF_DIV)
1308 1.11 christos opt_error(opt_state,
1309 1.11 christos "division by zero");
1310 1.11 christos if (op == BPF_MOD)
1311 1.11 christos opt_error(opt_state,
1312 1.11 christos "modulus by zero");
1313 1.1 christos }
1314 1.9 christos if (opt_state->vmap[val[A_ATOM]].is_const) {
1315 1.11 christos fold_op(opt_state, s, val[A_ATOM], K(s->k));
1316 1.1 christos val[A_ATOM] = K(s->k);
1317 1.1 christos break;
1318 1.1 christos }
1319 1.1 christos }
1320 1.9 christos val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1321 1.1 christos break;
1322 1.1 christos
1323 1.1 christos case BPF_ALU|BPF_ADD|BPF_X:
1324 1.1 christos case BPF_ALU|BPF_SUB|BPF_X:
1325 1.1 christos case BPF_ALU|BPF_MUL|BPF_X:
1326 1.1 christos case BPF_ALU|BPF_DIV|BPF_X:
1327 1.7 christos case BPF_ALU|BPF_MOD|BPF_X:
1328 1.1 christos case BPF_ALU|BPF_AND|BPF_X:
1329 1.1 christos case BPF_ALU|BPF_OR|BPF_X:
1330 1.7 christos case BPF_ALU|BPF_XOR|BPF_X:
1331 1.1 christos case BPF_ALU|BPF_LSH|BPF_X:
1332 1.1 christos case BPF_ALU|BPF_RSH|BPF_X:
1333 1.1 christos op = BPF_OP(s->code);
1334 1.9 christos if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1335 1.9 christos if (opt_state->vmap[val[A_ATOM]].is_const) {
1336 1.11 christos fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
1337 1.1 christos val[A_ATOM] = K(s->k);
1338 1.1 christos }
1339 1.1 christos else {
1340 1.1 christos s->code = BPF_ALU|BPF_K|op;
1341 1.9 christos s->k = opt_state->vmap[val[X_ATOM]].const_val;
1342 1.11 christos if ((op == BPF_LSH || op == BPF_RSH) &&
1343 1.12 christos s->k > 31)
1344 1.11 christos opt_error(opt_state,
1345 1.11 christos "shift by more than 31 bits");
1346 1.12 christos /*
1347 1.12 christos * XXX - optimizer loop detection.
1348 1.12 christos */
1349 1.12 christos opt_state->non_branch_movement_performed = 1;
1350 1.9 christos opt_state->done = 0;
1351 1.1 christos val[A_ATOM] =
1352 1.9 christos F(opt_state, s->code, val[A_ATOM], K(s->k));
1353 1.1 christos }
1354 1.1 christos break;
1355 1.1 christos }
1356 1.1 christos /*
1357 1.1 christos * Check if we're doing something to an accumulator
1358 1.1 christos * that is 0, and simplify. This may not seem like
1359 1.1 christos * much of a simplification but it could open up further
1360 1.1 christos * optimizations.
1361 1.1 christos * XXX We could also check for mul by 1, etc.
1362 1.1 christos */
1363 1.9 christos if (alter && opt_state->vmap[val[A_ATOM]].is_const
1364 1.9 christos && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1365 1.7 christos if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1366 1.1 christos s->code = BPF_MISC|BPF_TXA;
1367 1.1 christos vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1368 1.1 christos break;
1369 1.1 christos }
1370 1.7 christos else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1371 1.1 christos op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1372 1.1 christos s->code = BPF_LD|BPF_IMM;
1373 1.1 christos s->k = 0;
1374 1.1 christos vstore(s, &val[A_ATOM], K(s->k), alter);
1375 1.1 christos break;
1376 1.1 christos }
1377 1.1 christos else if (op == BPF_NEG) {
1378 1.1 christos s->code = NOP;
1379 1.1 christos break;
1380 1.1 christos }
1381 1.1 christos }
1382 1.9 christos val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1383 1.1 christos break;
1384 1.1 christos
1385 1.1 christos case BPF_MISC|BPF_TXA:
1386 1.1 christos vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1387 1.1 christos break;
1388 1.1 christos
1389 1.1 christos case BPF_LD|BPF_MEM:
1390 1.1 christos v = val[s->k];
1391 1.9 christos if (alter && opt_state->vmap[v].is_const) {
1392 1.1 christos s->code = BPF_LD|BPF_IMM;
1393 1.9 christos s->k = opt_state->vmap[v].const_val;
1394 1.12 christos /*
1395 1.12 christos * XXX - optimizer loop detection.
1396 1.12 christos */
1397 1.12 christos opt_state->non_branch_movement_performed = 1;
1398 1.9 christos opt_state->done = 0;
1399 1.1 christos }
1400 1.1 christos vstore(s, &val[A_ATOM], v, alter);
1401 1.1 christos break;
1402 1.1 christos
1403 1.1 christos case BPF_MISC|BPF_TAX:
1404 1.1 christos vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1405 1.1 christos break;
1406 1.1 christos
1407 1.1 christos case BPF_LDX|BPF_MEM:
1408 1.1 christos v = val[s->k];
1409 1.9 christos if (alter && opt_state->vmap[v].is_const) {
1410 1.1 christos s->code = BPF_LDX|BPF_IMM;
1411 1.9 christos s->k = opt_state->vmap[v].const_val;
1412 1.12 christos /*
1413 1.12 christos * XXX - optimizer loop detection.
1414 1.12 christos */
1415 1.12 christos opt_state->non_branch_movement_performed = 1;
1416 1.9 christos opt_state->done = 0;
1417 1.1 christos }
1418 1.1 christos vstore(s, &val[X_ATOM], v, alter);
1419 1.1 christos break;
1420 1.1 christos
1421 1.1 christos case BPF_ST:
1422 1.1 christos vstore(s, &val[s->k], val[A_ATOM], alter);
1423 1.1 christos break;
1424 1.1 christos
1425 1.1 christos case BPF_STX:
1426 1.1 christos vstore(s, &val[s->k], val[X_ATOM], alter);
1427 1.1 christos break;
1428 1.1 christos }
1429 1.1 christos }
1430 1.1 christos
1431 1.1 christos static void
1432 1.9 christos deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1433 1.1 christos {
1434 1.1 christos register int atom;
1435 1.1 christos
1436 1.1 christos atom = atomuse(s);
1437 1.1 christos if (atom >= 0) {
1438 1.1 christos if (atom == AX_ATOM) {
1439 1.1 christos last[X_ATOM] = 0;
1440 1.1 christos last[A_ATOM] = 0;
1441 1.1 christos }
1442 1.1 christos else
1443 1.1 christos last[atom] = 0;
1444 1.1 christos }
1445 1.1 christos atom = atomdef(s);
1446 1.1 christos if (atom >= 0) {
1447 1.1 christos if (last[atom]) {
1448 1.12 christos /*
1449 1.12 christos * XXX - optimizer loop detection.
1450 1.12 christos */
1451 1.12 christos opt_state->non_branch_movement_performed = 1;
1452 1.9 christos opt_state->done = 0;
1453 1.1 christos last[atom]->code = NOP;
1454 1.1 christos }
1455 1.1 christos last[atom] = s;
1456 1.1 christos }
1457 1.1 christos }
1458 1.1 christos
1459 1.1 christos static void
1460 1.9 christos opt_deadstores(opt_state_t *opt_state, register struct block *b)
1461 1.1 christos {
1462 1.1 christos register struct slist *s;
1463 1.1 christos register int atom;
1464 1.1 christos struct stmt *last[N_ATOMS];
1465 1.1 christos
1466 1.1 christos memset((char *)last, 0, sizeof last);
1467 1.1 christos
1468 1.1 christos for (s = b->stmts; s != 0; s = s->next)
1469 1.9 christos deadstmt(opt_state, &s->s, last);
1470 1.9 christos deadstmt(opt_state, &b->s, last);
1471 1.1 christos
1472 1.1 christos for (atom = 0; atom < N_ATOMS; ++atom)
1473 1.1 christos if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1474 1.1 christos last[atom]->code = NOP;
1475 1.12 christos /*
1476 1.12 christos * XXX - optimizer loop detection.
1477 1.12 christos */
1478 1.12 christos opt_state->non_branch_movement_performed = 1;
1479 1.9 christos opt_state->done = 0;
1480 1.1 christos }
1481 1.1 christos }
1482 1.1 christos
1483 1.1 christos static void
1484 1.11 christos opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
1485 1.1 christos {
1486 1.1 christos struct slist *s;
1487 1.1 christos struct edge *p;
1488 1.1 christos int i;
1489 1.12 christos bpf_u_int32 aval, xval;
1490 1.1 christos
1491 1.1 christos #if 0
1492 1.1 christos for (s = b->stmts; s && s->next; s = s->next)
1493 1.1 christos if (BPF_CLASS(s->s.code) == BPF_JMP) {
1494 1.1 christos do_stmts = 0;
1495 1.1 christos break;
1496 1.1 christos }
1497 1.1 christos #endif
1498 1.1 christos
1499 1.1 christos /*
1500 1.1 christos * Initialize the atom values.
1501 1.1 christos */
1502 1.1 christos p = b->in_edges;
1503 1.1 christos if (p == 0) {
1504 1.1 christos /*
1505 1.1 christos * We have no predecessors, so everything is undefined
1506 1.1 christos * upon entry to this block.
1507 1.1 christos */
1508 1.1 christos memset((char *)b->val, 0, sizeof(b->val));
1509 1.1 christos } else {
1510 1.1 christos /*
1511 1.1 christos * Inherit values from our predecessors.
1512 1.1 christos *
1513 1.1 christos * First, get the values from the predecessor along the
1514 1.1 christos * first edge leading to this node.
1515 1.1 christos */
1516 1.1 christos memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1517 1.1 christos /*
1518 1.1 christos * Now look at all the other nodes leading to this node.
1519 1.1 christos * If, for the predecessor along that edge, a register
1520 1.1 christos * has a different value from the one we have (i.e.,
1521 1.1 christos * control paths are merging, and the merging paths
1522 1.1 christos * assign different values to that register), give the
1523 1.1 christos * register the undefined value of 0.
1524 1.1 christos */
1525 1.1 christos while ((p = p->next) != NULL) {
1526 1.1 christos for (i = 0; i < N_ATOMS; ++i)
1527 1.1 christos if (b->val[i] != p->pred->val[i])
1528 1.1 christos b->val[i] = 0;
1529 1.1 christos }
1530 1.1 christos }
1531 1.1 christos aval = b->val[A_ATOM];
1532 1.1 christos xval = b->val[X_ATOM];
1533 1.1 christos for (s = b->stmts; s; s = s->next)
1534 1.11 christos opt_stmt(opt_state, &s->s, b->val, do_stmts);
1535 1.1 christos
1536 1.1 christos /*
1537 1.1 christos * This is a special case: if we don't use anything from this
1538 1.1 christos * block, and we load the accumulator or index register with a
1539 1.1 christos * value that is already there, or if this block is a return,
1540 1.1 christos * eliminate all the statements.
1541 1.1 christos *
1542 1.12 christos * XXX - what if it does a store? Presumably that falls under
1543 1.12 christos * the heading of "if we don't use anything from this block",
1544 1.12 christos * i.e., if we use any memory location set to a different
1545 1.12 christos * value by this block, then we use something from this block.
1546 1.1 christos *
1547 1.1 christos * XXX - why does it matter whether we use anything from this
1548 1.1 christos * block? If the accumulator or index register doesn't change
1549 1.1 christos * its value, isn't that OK even if we use that value?
1550 1.1 christos *
1551 1.1 christos * XXX - if we load the accumulator with a different value,
1552 1.1 christos * and the block ends with a conditional branch, we obviously
1553 1.1 christos * can't eliminate it, as the branch depends on that value.
1554 1.1 christos * For the index register, the conditional branch only depends
1555 1.1 christos * on the index register value if the test is against the index
1556 1.1 christos * register value rather than a constant; if nothing uses the
1557 1.1 christos * value we put into the index register, and we're not testing
1558 1.1 christos * against the index register's value, and there aren't any
1559 1.1 christos * other problems that would keep us from eliminating this
1560 1.1 christos * block, can we eliminate it?
1561 1.1 christos */
1562 1.1 christos if (do_stmts &&
1563 1.10 christos ((b->out_use == 0 &&
1564 1.10 christos aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1565 1.10 christos xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1566 1.1 christos BPF_CLASS(b->s.code) == BPF_RET)) {
1567 1.1 christos if (b->stmts != 0) {
1568 1.1 christos b->stmts = 0;
1569 1.12 christos /*
1570 1.12 christos * XXX - optimizer loop detection.
1571 1.12 christos */
1572 1.12 christos opt_state->non_branch_movement_performed = 1;
1573 1.9 christos opt_state->done = 0;
1574 1.1 christos }
1575 1.1 christos } else {
1576 1.9 christos opt_peep(opt_state, b);
1577 1.9 christos opt_deadstores(opt_state, b);
1578 1.1 christos }
1579 1.1 christos /*
1580 1.1 christos * Set up values for branch optimizer.
1581 1.1 christos */
1582 1.1 christos if (BPF_SRC(b->s.code) == BPF_K)
1583 1.1 christos b->oval = K(b->s.k);
1584 1.1 christos else
1585 1.1 christos b->oval = b->val[X_ATOM];
1586 1.1 christos b->et.code = b->s.code;
1587 1.1 christos b->ef.code = -b->s.code;
1588 1.1 christos }
1589 1.1 christos
1590 1.1 christos /*
1591 1.1 christos * Return true if any register that is used on exit from 'succ', has
1592 1.1 christos * an exit value that is different from the corresponding exit value
1593 1.1 christos * from 'b'.
1594 1.1 christos */
1595 1.1 christos static int
1596 1.6 christos use_conflict(struct block *b, struct block *succ)
1597 1.1 christos {
1598 1.1 christos int atom;
1599 1.1 christos atomset use = succ->out_use;
1600 1.1 christos
1601 1.1 christos if (use == 0)
1602 1.1 christos return 0;
1603 1.1 christos
1604 1.1 christos for (atom = 0; atom < N_ATOMS; ++atom)
1605 1.1 christos if (ATOMELEM(use, atom))
1606 1.1 christos if (b->val[atom] != succ->val[atom])
1607 1.1 christos return 1;
1608 1.1 christos return 0;
1609 1.1 christos }
1610 1.1 christos
1611 1.12 christos /*
1612 1.12 christos * Given a block that is the successor of an edge, and an edge that
1613 1.12 christos * dominates that edge, return either a pointer to a child of that
1614 1.12 christos * block (a block to which that block jumps) if that block is a
1615 1.12 christos * candidate to replace the successor of the latter edge or NULL
1616 1.12 christos * if neither of the children of the first block are candidates.
1617 1.12 christos */
1618 1.1 christos static struct block *
1619 1.6 christos fold_edge(struct block *child, struct edge *ep)
1620 1.1 christos {
1621 1.1 christos int sense;
1622 1.12 christos bpf_u_int32 aval0, aval1, oval0, oval1;
1623 1.1 christos int code = ep->code;
1624 1.1 christos
1625 1.1 christos if (code < 0) {
1626 1.12 christos /*
1627 1.12 christos * This edge is a "branch if false" edge.
1628 1.12 christos */
1629 1.1 christos code = -code;
1630 1.1 christos sense = 0;
1631 1.12 christos } else {
1632 1.12 christos /*
1633 1.12 christos * This edge is a "branch if true" edge.
1634 1.12 christos */
1635 1.1 christos sense = 1;
1636 1.12 christos }
1637 1.1 christos
1638 1.12 christos /*
1639 1.12 christos * If the opcode for the branch at the end of the block we
1640 1.12 christos * were handed isn't the same as the opcode for the branch
1641 1.12 christos * to which the edge we were handed corresponds, the tests
1642 1.12 christos * for those branches aren't testing the same conditions,
1643 1.12 christos * so the blocks to which the first block branches aren't
1644 1.12 christos * candidates to replace the successor of the edge.
1645 1.12 christos */
1646 1.1 christos if (child->s.code != code)
1647 1.1 christos return 0;
1648 1.1 christos
1649 1.1 christos aval0 = child->val[A_ATOM];
1650 1.1 christos oval0 = child->oval;
1651 1.1 christos aval1 = ep->pred->val[A_ATOM];
1652 1.1 christos oval1 = ep->pred->oval;
1653 1.1 christos
1654 1.12 christos /*
1655 1.12 christos * If the A register value on exit from the successor block
1656 1.12 christos * isn't the same as the A register value on exit from the
1657 1.12 christos * predecessor of the edge, the blocks to which the first
1658 1.12 christos * block branches aren't candidates to replace the successor
1659 1.12 christos * of the edge.
1660 1.12 christos */
1661 1.1 christos if (aval0 != aval1)
1662 1.1 christos return 0;
1663 1.1 christos
1664 1.1 christos if (oval0 == oval1)
1665 1.1 christos /*
1666 1.1 christos * The operands of the branch instructions are
1667 1.12 christos * identical, so the branches are testing the
1668 1.12 christos * same condition, and the result is true if a true
1669 1.1 christos * branch was taken to get here, otherwise false.
1670 1.1 christos */
1671 1.1 christos return sense ? JT(child) : JF(child);
1672 1.1 christos
1673 1.1 christos if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1674 1.1 christos /*
1675 1.1 christos * At this point, we only know the comparison if we
1676 1.1 christos * came down the true branch, and it was an equality
1677 1.1 christos * comparison with a constant.
1678 1.1 christos *
1679 1.1 christos * I.e., if we came down the true branch, and the branch
1680 1.1 christos * was an equality comparison with a constant, we know the
1681 1.1 christos * accumulator contains that constant. If we came down
1682 1.1 christos * the false branch, or the comparison wasn't with a
1683 1.1 christos * constant, we don't know what was in the accumulator.
1684 1.1 christos *
1685 1.1 christos * We rely on the fact that distinct constants have distinct
1686 1.1 christos * value numbers.
1687 1.1 christos */
1688 1.1 christos return JF(child);
1689 1.1 christos
1690 1.1 christos return 0;
1691 1.1 christos }
1692 1.1 christos
1693 1.12 christos /*
1694 1.12 christos * If we can make this edge go directly to a child of the edge's current
1695 1.12 christos * successor, do so.
1696 1.12 christos */
1697 1.1 christos static void
1698 1.9 christos opt_j(opt_state_t *opt_state, struct edge *ep)
1699 1.1 christos {
1700 1.12 christos register u_int i, k;
1701 1.1 christos register struct block *target;
1702 1.1 christos
1703 1.12 christos /*
1704 1.12 christos * Does this edge go to a block where, if the test
1705 1.12 christos * at the end of it succeeds, it goes to a block
1706 1.12 christos * that's a leaf node of the DAG, i.e. a return
1707 1.12 christos * statement?
1708 1.12 christos * If so, there's nothing to optimize.
1709 1.12 christos */
1710 1.1 christos if (JT(ep->succ) == 0)
1711 1.1 christos return;
1712 1.1 christos
1713 1.12 christos /*
1714 1.12 christos * Does this edge go to a block that goes, in turn, to
1715 1.12 christos * the same block regardless of whether the test at the
1716 1.12 christos * end succeeds or fails?
1717 1.12 christos */
1718 1.1 christos if (JT(ep->succ) == JF(ep->succ)) {
1719 1.1 christos /*
1720 1.1 christos * Common branch targets can be eliminated, provided
1721 1.1 christos * there is no data dependency.
1722 1.12 christos *
1723 1.12 christos * Check whether any register used on exit from the
1724 1.12 christos * block to which the successor of this edge goes
1725 1.12 christos * has a value at that point that's different from
1726 1.12 christos * the value it has on exit from the predecessor of
1727 1.12 christos * this edge. If not, the predecessor of this edge
1728 1.12 christos * can just go to the block to which the successor
1729 1.12 christos * of this edge goes, bypassing the successor of this
1730 1.12 christos * edge, as the successor of this edge isn't doing
1731 1.12 christos * any calculations whose results are different
1732 1.12 christos * from what the blocks before it did and isn't
1733 1.12 christos * doing any tests the results of which matter.
1734 1.1 christos */
1735 1.12 christos if (!use_conflict(ep->pred, JT(ep->succ))) {
1736 1.12 christos /*
1737 1.12 christos * No, there isn't.
1738 1.12 christos * Make this edge go to the block to
1739 1.12 christos * which the successor of that edge
1740 1.12 christos * goes.
1741 1.12 christos *
1742 1.12 christos * XXX - optimizer loop detection.
1743 1.12 christos */
1744 1.12 christos opt_state->non_branch_movement_performed = 1;
1745 1.9 christos opt_state->done = 0;
1746 1.1 christos ep->succ = JT(ep->succ);
1747 1.1 christos }
1748 1.1 christos }
1749 1.1 christos /*
1750 1.1 christos * For each edge dominator that matches the successor of this
1751 1.1 christos * edge, promote the edge successor to the its grandchild.
1752 1.1 christos *
1753 1.1 christos * XXX We violate the set abstraction here in favor a reasonably
1754 1.1 christos * efficient loop.
1755 1.1 christos */
1756 1.1 christos top:
1757 1.9 christos for (i = 0; i < opt_state->edgewords; ++i) {
1758 1.12 christos /* i'th word in the bitset of dominators */
1759 1.1 christos register bpf_u_int32 x = ep->edom[i];
1760 1.1 christos
1761 1.1 christos while (x != 0) {
1762 1.12 christos /* Find the next dominator in that word and mark it as found */
1763 1.10 christos k = lowest_set_bit(x);
1764 1.11 christos x &=~ ((bpf_u_int32)1 << k);
1765 1.1 christos k += i * BITS_PER_WORD;
1766 1.1 christos
1767 1.9 christos target = fold_edge(ep->succ, opt_state->edges[k]);
1768 1.1 christos /*
1769 1.12 christos * We have a candidate to replace the successor
1770 1.12 christos * of ep.
1771 1.12 christos *
1772 1.1 christos * Check that there is no data dependency between
1773 1.12 christos * nodes that will be violated if we move the edge;
1774 1.12 christos * i.e., if any register used on exit from the
1775 1.12 christos * candidate has a value at that point different
1776 1.12 christos * from the value it has when we exit the
1777 1.12 christos * predecessor of that edge, there's a data
1778 1.12 christos * dependency that will be violated.
1779 1.1 christos */
1780 1.1 christos if (target != 0 && !use_conflict(ep->pred, target)) {
1781 1.12 christos /*
1782 1.12 christos * It's safe to replace the successor of
1783 1.12 christos * ep; do so, and note that we've made
1784 1.12 christos * at least one change.
1785 1.12 christos *
1786 1.12 christos * XXX - this is one of the operations that
1787 1.12 christos * happens when the optimizer gets into
1788 1.12 christos * one of those infinite loops.
1789 1.12 christos */
1790 1.9 christos opt_state->done = 0;
1791 1.1 christos ep->succ = target;
1792 1.1 christos if (JT(target) != 0)
1793 1.1 christos /*
1794 1.1 christos * Start over unless we hit a leaf.
1795 1.1 christos */
1796 1.1 christos goto top;
1797 1.1 christos return;
1798 1.1 christos }
1799 1.1 christos }
1800 1.1 christos }
1801 1.1 christos }
1802 1.1 christos
1803 1.12 christos /*
1804 1.12 christos * XXX - is this, and and_pullup(), what's described in section 6.1.2
1805 1.12 christos * "Predicate Assertion Propagation" in the BPF+ paper?
1806 1.12 christos *
1807 1.12 christos * Note that this looks at block dominators, not edge dominators.
1808 1.12 christos * Don't think so.
1809 1.12 christos *
1810 1.12 christos * "A or B" compiles into
1811 1.12 christos *
1812 1.12 christos * A
1813 1.12 christos * t / \ f
1814 1.12 christos * / B
1815 1.12 christos * / t / \ f
1816 1.12 christos * \ /
1817 1.12 christos * \ /
1818 1.12 christos * X
1819 1.12 christos *
1820 1.12 christos *
1821 1.12 christos */
1822 1.1 christos static void
1823 1.9 christos or_pullup(opt_state_t *opt_state, struct block *b)
1824 1.1 christos {
1825 1.12 christos bpf_u_int32 val;
1826 1.12 christos int at_top;
1827 1.1 christos struct block *pull;
1828 1.1 christos struct block **diffp, **samep;
1829 1.1 christos struct edge *ep;
1830 1.1 christos
1831 1.1 christos ep = b->in_edges;
1832 1.1 christos if (ep == 0)
1833 1.1 christos return;
1834 1.1 christos
1835 1.1 christos /*
1836 1.1 christos * Make sure each predecessor loads the same value.
1837 1.1 christos * XXX why?
1838 1.1 christos */
1839 1.1 christos val = ep->pred->val[A_ATOM];
1840 1.1 christos for (ep = ep->next; ep != 0; ep = ep->next)
1841 1.1 christos if (val != ep->pred->val[A_ATOM])
1842 1.1 christos return;
1843 1.1 christos
1844 1.12 christos /*
1845 1.12 christos * For the first edge in the list of edges coming into this block,
1846 1.12 christos * see whether the predecessor of that edge comes here via a true
1847 1.12 christos * branch or a false branch.
1848 1.12 christos */
1849 1.1 christos if (JT(b->in_edges->pred) == b)
1850 1.12 christos diffp = &JT(b->in_edges->pred); /* jt */
1851 1.1 christos else
1852 1.12 christos diffp = &JF(b->in_edges->pred); /* jf */
1853 1.1 christos
1854 1.12 christos /*
1855 1.12 christos * diffp is a pointer to a pointer to the block.
1856 1.12 christos *
1857 1.12 christos * Go down the false chain looking as far as you can,
1858 1.12 christos * making sure that each jump-compare is doing the
1859 1.12 christos * same as the original block.
1860 1.12 christos *
1861 1.12 christos * If you reach the bottom before you reach a
1862 1.12 christos * different jump-compare, just exit. There's nothing
1863 1.12 christos * to do here. XXX - no, this version is checking for
1864 1.12 christos * the value leaving the block; that's from the BPF+
1865 1.12 christos * pullup routine.
1866 1.12 christos */
1867 1.1 christos at_top = 1;
1868 1.10 christos for (;;) {
1869 1.12 christos /*
1870 1.12 christos * Done if that's not going anywhere XXX
1871 1.12 christos */
1872 1.1 christos if (*diffp == 0)
1873 1.1 christos return;
1874 1.1 christos
1875 1.12 christos /*
1876 1.12 christos * Done if that predecessor blah blah blah isn't
1877 1.12 christos * going the same place we're going XXX
1878 1.12 christos *
1879 1.12 christos * Does the true edge of this block point to the same
1880 1.12 christos * location as the true edge of b?
1881 1.12 christos */
1882 1.1 christos if (JT(*diffp) != JT(b))
1883 1.1 christos return;
1884 1.1 christos
1885 1.12 christos /*
1886 1.12 christos * Done if this node isn't a dominator of that
1887 1.12 christos * node blah blah blah XXX
1888 1.12 christos *
1889 1.12 christos * Does b dominate diffp?
1890 1.12 christos */
1891 1.1 christos if (!SET_MEMBER((*diffp)->dom, b->id))
1892 1.1 christos return;
1893 1.1 christos
1894 1.12 christos /*
1895 1.12 christos * Break out of the loop if that node's value of A
1896 1.12 christos * isn't the value of A above XXX
1897 1.12 christos */
1898 1.1 christos if ((*diffp)->val[A_ATOM] != val)
1899 1.1 christos break;
1900 1.1 christos
1901 1.12 christos /*
1902 1.12 christos * Get the JF for that node XXX
1903 1.12 christos * Go down the false path.
1904 1.12 christos */
1905 1.1 christos diffp = &JF(*diffp);
1906 1.1 christos at_top = 0;
1907 1.1 christos }
1908 1.12 christos
1909 1.12 christos /*
1910 1.12 christos * Now that we've found a different jump-compare in a chain
1911 1.12 christos * below b, search further down until we find another
1912 1.12 christos * jump-compare that looks at the original value. This
1913 1.12 christos * jump-compare should get pulled up. XXX again we're
1914 1.12 christos * comparing values not jump-compares.
1915 1.12 christos */
1916 1.1 christos samep = &JF(*diffp);
1917 1.10 christos for (;;) {
1918 1.12 christos /*
1919 1.12 christos * Done if that's not going anywhere XXX
1920 1.12 christos */
1921 1.1 christos if (*samep == 0)
1922 1.1 christos return;
1923 1.1 christos
1924 1.12 christos /*
1925 1.12 christos * Done if that predecessor blah blah blah isn't
1926 1.12 christos * going the same place we're going XXX
1927 1.12 christos */
1928 1.1 christos if (JT(*samep) != JT(b))
1929 1.1 christos return;
1930 1.1 christos
1931 1.12 christos /*
1932 1.12 christos * Done if this node isn't a dominator of that
1933 1.12 christos * node blah blah blah XXX
1934 1.12 christos *
1935 1.12 christos * Does b dominate samep?
1936 1.12 christos */
1937 1.1 christos if (!SET_MEMBER((*samep)->dom, b->id))
1938 1.1 christos return;
1939 1.1 christos
1940 1.12 christos /*
1941 1.12 christos * Break out of the loop if that node's value of A
1942 1.12 christos * is the value of A above XXX
1943 1.12 christos */
1944 1.1 christos if ((*samep)->val[A_ATOM] == val)
1945 1.1 christos break;
1946 1.1 christos
1947 1.1 christos /* XXX Need to check that there are no data dependencies
1948 1.1 christos between dp0 and dp1. Currently, the code generator
1949 1.1 christos will not produce such dependencies. */
1950 1.1 christos samep = &JF(*samep);
1951 1.1 christos }
1952 1.1 christos #ifdef notdef
1953 1.1 christos /* XXX This doesn't cover everything. */
1954 1.1 christos for (i = 0; i < N_ATOMS; ++i)
1955 1.1 christos if ((*samep)->val[i] != pred->val[i])
1956 1.1 christos return;
1957 1.1 christos #endif
1958 1.1 christos /* Pull up the node. */
1959 1.1 christos pull = *samep;
1960 1.1 christos *samep = JF(pull);
1961 1.1 christos JF(pull) = *diffp;
1962 1.1 christos
1963 1.1 christos /*
1964 1.1 christos * At the top of the chain, each predecessor needs to point at the
1965 1.1 christos * pulled up node. Inside the chain, there is only one predecessor
1966 1.1 christos * to worry about.
1967 1.1 christos */
1968 1.1 christos if (at_top) {
1969 1.1 christos for (ep = b->in_edges; ep != 0; ep = ep->next) {
1970 1.1 christos if (JT(ep->pred) == b)
1971 1.1 christos JT(ep->pred) = pull;
1972 1.1 christos else
1973 1.1 christos JF(ep->pred) = pull;
1974 1.1 christos }
1975 1.1 christos }
1976 1.1 christos else
1977 1.1 christos *diffp = pull;
1978 1.1 christos
1979 1.12 christos /*
1980 1.12 christos * XXX - this is one of the operations that happens when the
1981 1.12 christos * optimizer gets into one of those infinite loops.
1982 1.12 christos */
1983 1.9 christos opt_state->done = 0;
1984 1.1 christos }
1985 1.1 christos
1986 1.1 christos static void
1987 1.9 christos and_pullup(opt_state_t *opt_state, struct block *b)
1988 1.1 christos {
1989 1.12 christos bpf_u_int32 val;
1990 1.12 christos int at_top;
1991 1.1 christos struct block *pull;
1992 1.1 christos struct block **diffp, **samep;
1993 1.1 christos struct edge *ep;
1994 1.1 christos
1995 1.1 christos ep = b->in_edges;
1996 1.1 christos if (ep == 0)
1997 1.1 christos return;
1998 1.1 christos
1999 1.1 christos /*
2000 1.1 christos * Make sure each predecessor loads the same value.
2001 1.1 christos */
2002 1.1 christos val = ep->pred->val[A_ATOM];
2003 1.1 christos for (ep = ep->next; ep != 0; ep = ep->next)
2004 1.1 christos if (val != ep->pred->val[A_ATOM])
2005 1.1 christos return;
2006 1.1 christos
2007 1.1 christos if (JT(b->in_edges->pred) == b)
2008 1.1 christos diffp = &JT(b->in_edges->pred);
2009 1.1 christos else
2010 1.1 christos diffp = &JF(b->in_edges->pred);
2011 1.1 christos
2012 1.1 christos at_top = 1;
2013 1.10 christos for (;;) {
2014 1.1 christos if (*diffp == 0)
2015 1.1 christos return;
2016 1.1 christos
2017 1.1 christos if (JF(*diffp) != JF(b))
2018 1.1 christos return;
2019 1.1 christos
2020 1.1 christos if (!SET_MEMBER((*diffp)->dom, b->id))
2021 1.1 christos return;
2022 1.1 christos
2023 1.1 christos if ((*diffp)->val[A_ATOM] != val)
2024 1.1 christos break;
2025 1.1 christos
2026 1.1 christos diffp = &JT(*diffp);
2027 1.1 christos at_top = 0;
2028 1.1 christos }
2029 1.1 christos samep = &JT(*diffp);
2030 1.10 christos for (;;) {
2031 1.1 christos if (*samep == 0)
2032 1.1 christos return;
2033 1.1 christos
2034 1.1 christos if (JF(*samep) != JF(b))
2035 1.1 christos return;
2036 1.1 christos
2037 1.1 christos if (!SET_MEMBER((*samep)->dom, b->id))
2038 1.1 christos return;
2039 1.1 christos
2040 1.1 christos if ((*samep)->val[A_ATOM] == val)
2041 1.1 christos break;
2042 1.1 christos
2043 1.1 christos /* XXX Need to check that there are no data dependencies
2044 1.1 christos between diffp and samep. Currently, the code generator
2045 1.1 christos will not produce such dependencies. */
2046 1.1 christos samep = &JT(*samep);
2047 1.1 christos }
2048 1.1 christos #ifdef notdef
2049 1.1 christos /* XXX This doesn't cover everything. */
2050 1.1 christos for (i = 0; i < N_ATOMS; ++i)
2051 1.1 christos if ((*samep)->val[i] != pred->val[i])
2052 1.1 christos return;
2053 1.1 christos #endif
2054 1.1 christos /* Pull up the node. */
2055 1.1 christos pull = *samep;
2056 1.1 christos *samep = JT(pull);
2057 1.1 christos JT(pull) = *diffp;
2058 1.1 christos
2059 1.1 christos /*
2060 1.1 christos * At the top of the chain, each predecessor needs to point at the
2061 1.1 christos * pulled up node. Inside the chain, there is only one predecessor
2062 1.1 christos * to worry about.
2063 1.1 christos */
2064 1.1 christos if (at_top) {
2065 1.1 christos for (ep = b->in_edges; ep != 0; ep = ep->next) {
2066 1.1 christos if (JT(ep->pred) == b)
2067 1.1 christos JT(ep->pred) = pull;
2068 1.1 christos else
2069 1.1 christos JF(ep->pred) = pull;
2070 1.1 christos }
2071 1.1 christos }
2072 1.1 christos else
2073 1.1 christos *diffp = pull;
2074 1.1 christos
2075 1.12 christos /*
2076 1.12 christos * XXX - this is one of the operations that happens when the
2077 1.12 christos * optimizer gets into one of those infinite loops.
2078 1.12 christos */
2079 1.9 christos opt_state->done = 0;
2080 1.1 christos }
2081 1.1 christos
2082 1.1 christos static void
2083 1.11 christos opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2084 1.1 christos {
2085 1.1 christos int i, maxlevel;
2086 1.1 christos struct block *p;
2087 1.1 christos
2088 1.9 christos init_val(opt_state);
2089 1.9 christos maxlevel = ic->root->level;
2090 1.1 christos
2091 1.9 christos find_inedges(opt_state, ic->root);
2092 1.1 christos for (i = maxlevel; i >= 0; --i)
2093 1.9 christos for (p = opt_state->levels[i]; p; p = p->link)
2094 1.11 christos opt_blk(opt_state, p, do_stmts);
2095 1.1 christos
2096 1.1 christos if (do_stmts)
2097 1.1 christos /*
2098 1.1 christos * No point trying to move branches; it can't possibly
2099 1.1 christos * make a difference at this point.
2100 1.12 christos *
2101 1.12 christos * XXX - this might be after we detect a loop where
2102 1.12 christos * we were just looping infinitely moving branches
2103 1.12 christos * in such a fashion that we went through two or more
2104 1.12 christos * versions of the machine code, eventually returning
2105 1.12 christos * to the first version. (We're really not doing a
2106 1.12 christos * full loop detection, we're just testing for two
2107 1.12 christos * passes in a row where we do nothing but
2108 1.12 christos * move branches.)
2109 1.1 christos */
2110 1.1 christos return;
2111 1.1 christos
2112 1.12 christos /*
2113 1.12 christos * Is this what the BPF+ paper describes in sections 6.1.1,
2114 1.12 christos * 6.1.2, and 6.1.3?
2115 1.12 christos */
2116 1.1 christos for (i = 1; i <= maxlevel; ++i) {
2117 1.9 christos for (p = opt_state->levels[i]; p; p = p->link) {
2118 1.9 christos opt_j(opt_state, &p->et);
2119 1.9 christos opt_j(opt_state, &p->ef);
2120 1.1 christos }
2121 1.1 christos }
2122 1.1 christos
2123 1.9 christos find_inedges(opt_state, ic->root);
2124 1.1 christos for (i = 1; i <= maxlevel; ++i) {
2125 1.9 christos for (p = opt_state->levels[i]; p; p = p->link) {
2126 1.9 christos or_pullup(opt_state, p);
2127 1.9 christos and_pullup(opt_state, p);
2128 1.1 christos }
2129 1.1 christos }
2130 1.1 christos }
2131 1.1 christos
2132 1.1 christos static inline void
2133 1.6 christos link_inedge(struct edge *parent, struct block *child)
2134 1.1 christos {
2135 1.1 christos parent->next = child->in_edges;
2136 1.1 christos child->in_edges = parent;
2137 1.1 christos }
2138 1.1 christos
2139 1.1 christos static void
2140 1.9 christos find_inedges(opt_state_t *opt_state, struct block *root)
2141 1.1 christos {
2142 1.12 christos u_int i;
2143 1.12 christos int level;
2144 1.1 christos struct block *b;
2145 1.1 christos
2146 1.9 christos for (i = 0; i < opt_state->n_blocks; ++i)
2147 1.9 christos opt_state->blocks[i]->in_edges = 0;
2148 1.1 christos
2149 1.1 christos /*
2150 1.1 christos * Traverse the graph, adding each edge to the predecessor
2151 1.1 christos * list of its successors. Skip the leaves (i.e. level 0).
2152 1.1 christos */
2153 1.12 christos for (level = root->level; level > 0; --level) {
2154 1.12 christos for (b = opt_state->levels[level]; b != 0; b = b->link) {
2155 1.1 christos link_inedge(&b->et, JT(b));
2156 1.1 christos link_inedge(&b->ef, JF(b));
2157 1.1 christos }
2158 1.1 christos }
2159 1.1 christos }
2160 1.1 christos
2161 1.1 christos static void
2162 1.6 christos opt_root(struct block **b)
2163 1.1 christos {
2164 1.1 christos struct slist *tmp, *s;
2165 1.1 christos
2166 1.1 christos s = (*b)->stmts;
2167 1.1 christos (*b)->stmts = 0;
2168 1.1 christos while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
2169 1.1 christos *b = JT(*b);
2170 1.1 christos
2171 1.1 christos tmp = (*b)->stmts;
2172 1.1 christos if (tmp != 0)
2173 1.1 christos sappend(s, tmp);
2174 1.1 christos (*b)->stmts = s;
2175 1.1 christos
2176 1.1 christos /*
2177 1.1 christos * If the root node is a return, then there is no
2178 1.1 christos * point executing any statements (since the bpf machine
2179 1.1 christos * has no side effects).
2180 1.1 christos */
2181 1.1 christos if (BPF_CLASS((*b)->s.code) == BPF_RET)
2182 1.1 christos (*b)->stmts = 0;
2183 1.1 christos }
2184 1.1 christos
2185 1.1 christos static void
2186 1.11 christos opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2187 1.1 christos {
2188 1.1 christos
2189 1.1 christos #ifdef BDEBUG
2190 1.10 christos if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2191 1.1 christos printf("opt_loop(root, %d) begin\n", do_stmts);
2192 1.11 christos opt_dump(opt_state, ic);
2193 1.1 christos }
2194 1.1 christos #endif
2195 1.12 christos
2196 1.12 christos /*
2197 1.12 christos * XXX - optimizer loop detection.
2198 1.12 christos */
2199 1.12 christos int loop_count = 0;
2200 1.12 christos for (;;) {
2201 1.9 christos opt_state->done = 1;
2202 1.12 christos /*
2203 1.12 christos * XXX - optimizer loop detection.
2204 1.12 christos */
2205 1.12 christos opt_state->non_branch_movement_performed = 0;
2206 1.9 christos find_levels(opt_state, ic);
2207 1.9 christos find_dom(opt_state, ic->root);
2208 1.9 christos find_closure(opt_state, ic->root);
2209 1.9 christos find_ud(opt_state, ic->root);
2210 1.9 christos find_edom(opt_state, ic->root);
2211 1.11 christos opt_blks(opt_state, ic, do_stmts);
2212 1.1 christos #ifdef BDEBUG
2213 1.10 christos if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2214 1.9 christos printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
2215 1.11 christos opt_dump(opt_state, ic);
2216 1.1 christos }
2217 1.1 christos #endif
2218 1.12 christos
2219 1.12 christos /*
2220 1.12 christos * Was anything done in this optimizer pass?
2221 1.12 christos */
2222 1.12 christos if (opt_state->done) {
2223 1.12 christos /*
2224 1.12 christos * No, so we've reached a fixed point.
2225 1.12 christos * We're done.
2226 1.12 christos */
2227 1.12 christos break;
2228 1.12 christos }
2229 1.12 christos
2230 1.12 christos /*
2231 1.12 christos * XXX - was anything done other than branch movement
2232 1.12 christos * in this pass?
2233 1.12 christos */
2234 1.12 christos if (opt_state->non_branch_movement_performed) {
2235 1.12 christos /*
2236 1.12 christos * Yes. Clear any loop-detection counter;
2237 1.12 christos * we're making some form of progress (assuming
2238 1.12 christos * we can't get into a cycle doing *other*
2239 1.12 christos * optimizations...).
2240 1.12 christos */
2241 1.12 christos loop_count = 0;
2242 1.12 christos } else {
2243 1.12 christos /*
2244 1.12 christos * No - increment the counter, and quit if
2245 1.12 christos * it's up to 100.
2246 1.12 christos */
2247 1.12 christos loop_count++;
2248 1.12 christos if (loop_count >= 100) {
2249 1.12 christos /*
2250 1.12 christos * We've done nothing but branch movement
2251 1.12 christos * for 100 passes; we're probably
2252 1.12 christos * in a cycle and will never reach a
2253 1.12 christos * fixed point.
2254 1.12 christos *
2255 1.12 christos * XXX - yes, we really need a non-
2256 1.12 christos * heuristic way of detecting a cycle.
2257 1.12 christos */
2258 1.12 christos opt_state->done = 1;
2259 1.12 christos break;
2260 1.12 christos }
2261 1.12 christos }
2262 1.12 christos }
2263 1.1 christos }
2264 1.1 christos
2265 1.1 christos /*
2266 1.1 christos * Optimize the filter code in its dag representation.
2267 1.11 christos * Return 0 on success, -1 on error.
2268 1.1 christos */
2269 1.11 christos int
2270 1.11 christos bpf_optimize(struct icode *ic, char *errbuf)
2271 1.1 christos {
2272 1.9 christos opt_state_t opt_state;
2273 1.1 christos
2274 1.11 christos memset(&opt_state, 0, sizeof(opt_state));
2275 1.11 christos opt_state.errbuf = errbuf;
2276 1.12 christos opt_state.non_branch_movement_performed = 0;
2277 1.11 christos if (setjmp(opt_state.top_ctx)) {
2278 1.11 christos opt_cleanup(&opt_state);
2279 1.11 christos return -1;
2280 1.11 christos }
2281 1.11 christos opt_init(&opt_state, ic);
2282 1.11 christos opt_loop(&opt_state, ic, 0);
2283 1.11 christos opt_loop(&opt_state, ic, 1);
2284 1.9 christos intern_blocks(&opt_state, ic);
2285 1.1 christos #ifdef BDEBUG
2286 1.10 christos if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2287 1.1 christos printf("after intern_blocks()\n");
2288 1.11 christos opt_dump(&opt_state, ic);
2289 1.1 christos }
2290 1.1 christos #endif
2291 1.9 christos opt_root(&ic->root);
2292 1.1 christos #ifdef BDEBUG
2293 1.10 christos if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2294 1.1 christos printf("after opt_root()\n");
2295 1.11 christos opt_dump(&opt_state, ic);
2296 1.1 christos }
2297 1.1 christos #endif
2298 1.9 christos opt_cleanup(&opt_state);
2299 1.11 christos return 0;
2300 1.1 christos }
2301 1.1 christos
2302 1.1 christos static void
2303 1.9 christos make_marks(struct icode *ic, struct block *p)
2304 1.1 christos {
2305 1.9 christos if (!isMarked(ic, p)) {
2306 1.9 christos Mark(ic, p);
2307 1.1 christos if (BPF_CLASS(p->s.code) != BPF_RET) {
2308 1.9 christos make_marks(ic, JT(p));
2309 1.9 christos make_marks(ic, JF(p));
2310 1.1 christos }
2311 1.1 christos }
2312 1.1 christos }
2313 1.1 christos
2314 1.1 christos /*
2315 1.9 christos * Mark code array such that isMarked(ic->cur_mark, i) is true
2316 1.1 christos * only for nodes that are alive.
2317 1.1 christos */
2318 1.1 christos static void
2319 1.9 christos mark_code(struct icode *ic)
2320 1.1 christos {
2321 1.9 christos ic->cur_mark += 1;
2322 1.9 christos make_marks(ic, ic->root);
2323 1.1 christos }
2324 1.1 christos
2325 1.1 christos /*
2326 1.1 christos * True iff the two stmt lists load the same value from the packet into
2327 1.1 christos * the accumulator.
2328 1.1 christos */
2329 1.1 christos static int
2330 1.6 christos eq_slist(struct slist *x, struct slist *y)
2331 1.1 christos {
2332 1.10 christos for (;;) {
2333 1.1 christos while (x && x->s.code == NOP)
2334 1.1 christos x = x->next;
2335 1.1 christos while (y && y->s.code == NOP)
2336 1.1 christos y = y->next;
2337 1.1 christos if (x == 0)
2338 1.1 christos return y == 0;
2339 1.1 christos if (y == 0)
2340 1.1 christos return x == 0;
2341 1.1 christos if (x->s.code != y->s.code || x->s.k != y->s.k)
2342 1.1 christos return 0;
2343 1.1 christos x = x->next;
2344 1.1 christos y = y->next;
2345 1.1 christos }
2346 1.1 christos }
2347 1.1 christos
2348 1.1 christos static inline int
2349 1.6 christos eq_blk(struct block *b0, struct block *b1)
2350 1.1 christos {
2351 1.1 christos if (b0->s.code == b1->s.code &&
2352 1.1 christos b0->s.k == b1->s.k &&
2353 1.1 christos b0->et.succ == b1->et.succ &&
2354 1.1 christos b0->ef.succ == b1->ef.succ)
2355 1.1 christos return eq_slist(b0->stmts, b1->stmts);
2356 1.1 christos return 0;
2357 1.1 christos }
2358 1.1 christos
2359 1.1 christos static void
2360 1.9 christos intern_blocks(opt_state_t *opt_state, struct icode *ic)
2361 1.1 christos {
2362 1.1 christos struct block *p;
2363 1.12 christos u_int i, j;
2364 1.1 christos int done1; /* don't shadow global */
2365 1.1 christos top:
2366 1.1 christos done1 = 1;
2367 1.9 christos for (i = 0; i < opt_state->n_blocks; ++i)
2368 1.9 christos opt_state->blocks[i]->link = 0;
2369 1.1 christos
2370 1.9 christos mark_code(ic);
2371 1.1 christos
2372 1.12 christos for (i = opt_state->n_blocks - 1; i != 0; ) {
2373 1.12 christos --i;
2374 1.9 christos if (!isMarked(ic, opt_state->blocks[i]))
2375 1.1 christos continue;
2376 1.9 christos for (j = i + 1; j < opt_state->n_blocks; ++j) {
2377 1.9 christos if (!isMarked(ic, opt_state->blocks[j]))
2378 1.1 christos continue;
2379 1.9 christos if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
2380 1.9 christos opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
2381 1.9 christos opt_state->blocks[j]->link : opt_state->blocks[j];
2382 1.1 christos break;
2383 1.1 christos }
2384 1.1 christos }
2385 1.1 christos }
2386 1.9 christos for (i = 0; i < opt_state->n_blocks; ++i) {
2387 1.9 christos p = opt_state->blocks[i];
2388 1.1 christos if (JT(p) == 0)
2389 1.1 christos continue;
2390 1.1 christos if (JT(p)->link) {
2391 1.1 christos done1 = 0;
2392 1.1 christos JT(p) = JT(p)->link;
2393 1.1 christos }
2394 1.1 christos if (JF(p)->link) {
2395 1.1 christos done1 = 0;
2396 1.1 christos JF(p) = JF(p)->link;
2397 1.1 christos }
2398 1.1 christos }
2399 1.1 christos if (!done1)
2400 1.1 christos goto top;
2401 1.1 christos }
2402 1.1 christos
2403 1.1 christos static void
2404 1.9 christos opt_cleanup(opt_state_t *opt_state)
2405 1.1 christos {
2406 1.9 christos free((void *)opt_state->vnode_base);
2407 1.9 christos free((void *)opt_state->vmap);
2408 1.9 christos free((void *)opt_state->edges);
2409 1.9 christos free((void *)opt_state->space);
2410 1.9 christos free((void *)opt_state->levels);
2411 1.9 christos free((void *)opt_state->blocks);
2412 1.1 christos }
2413 1.1 christos
2414 1.1 christos /*
2415 1.11 christos * For optimizer errors.
2416 1.11 christos */
2417 1.11 christos static void PCAP_NORETURN
2418 1.11 christos opt_error(opt_state_t *opt_state, const char *fmt, ...)
2419 1.11 christos {
2420 1.11 christos va_list ap;
2421 1.11 christos
2422 1.11 christos if (opt_state->errbuf != NULL) {
2423 1.11 christos va_start(ap, fmt);
2424 1.12 christos (void)vsnprintf(opt_state->errbuf,
2425 1.11 christos PCAP_ERRBUF_SIZE, fmt, ap);
2426 1.11 christos va_end(ap);
2427 1.11 christos }
2428 1.11 christos longjmp(opt_state->top_ctx, 1);
2429 1.11 christos /* NOTREACHED */
2430 1.12 christos #ifdef _AIX
2431 1.12 christos PCAP_UNREACHABLE
2432 1.12 christos #endif /* _AIX */
2433 1.11 christos }
2434 1.11 christos
2435 1.11 christos /*
2436 1.1 christos * Return the number of stmts in 's'.
2437 1.1 christos */
2438 1.5 christos static u_int
2439 1.6 christos slength(struct slist *s)
2440 1.1 christos {
2441 1.5 christos u_int n = 0;
2442 1.1 christos
2443 1.1 christos for (; s; s = s->next)
2444 1.1 christos if (s->s.code != NOP)
2445 1.1 christos ++n;
2446 1.1 christos return n;
2447 1.1 christos }
2448 1.1 christos
2449 1.1 christos /*
2450 1.1 christos * Return the number of nodes reachable by 'p'.
2451 1.1 christos * All nodes should be initially unmarked.
2452 1.1 christos */
2453 1.1 christos static int
2454 1.9 christos count_blocks(struct icode *ic, struct block *p)
2455 1.1 christos {
2456 1.9 christos if (p == 0 || isMarked(ic, p))
2457 1.1 christos return 0;
2458 1.9 christos Mark(ic, p);
2459 1.9 christos return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2460 1.1 christos }
2461 1.1 christos
2462 1.1 christos /*
2463 1.1 christos * Do a depth first search on the flow graph, numbering the
2464 1.1 christos * the basic blocks, and entering them into the 'blocks' array.`
2465 1.1 christos */
2466 1.1 christos static void
2467 1.9 christos number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2468 1.1 christos {
2469 1.12 christos u_int n;
2470 1.1 christos
2471 1.9 christos if (p == 0 || isMarked(ic, p))
2472 1.1 christos return;
2473 1.1 christos
2474 1.9 christos Mark(ic, p);
2475 1.9 christos n = opt_state->n_blocks++;
2476 1.12 christos if (opt_state->n_blocks == 0) {
2477 1.12 christos /*
2478 1.12 christos * Overflow.
2479 1.12 christos */
2480 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2481 1.12 christos }
2482 1.1 christos p->id = n;
2483 1.9 christos opt_state->blocks[n] = p;
2484 1.1 christos
2485 1.9 christos number_blks_r(opt_state, ic, JT(p));
2486 1.9 christos number_blks_r(opt_state, ic, JF(p));
2487 1.1 christos }
2488 1.1 christos
2489 1.1 christos /*
2490 1.1 christos * Return the number of stmts in the flowgraph reachable by 'p'.
2491 1.1 christos * The nodes should be unmarked before calling.
2492 1.1 christos *
2493 1.1 christos * Note that "stmts" means "instructions", and that this includes
2494 1.1 christos *
2495 1.1 christos * side-effect statements in 'p' (slength(p->stmts));
2496 1.1 christos *
2497 1.1 christos * statements in the true branch from 'p' (count_stmts(JT(p)));
2498 1.1 christos *
2499 1.1 christos * statements in the false branch from 'p' (count_stmts(JF(p)));
2500 1.1 christos *
2501 1.1 christos * the conditional jump itself (1);
2502 1.1 christos *
2503 1.1 christos * an extra long jump if the true branch requires it (p->longjt);
2504 1.1 christos *
2505 1.1 christos * an extra long jump if the false branch requires it (p->longjf).
2506 1.1 christos */
2507 1.5 christos static u_int
2508 1.9 christos count_stmts(struct icode *ic, struct block *p)
2509 1.1 christos {
2510 1.5 christos u_int n;
2511 1.1 christos
2512 1.9 christos if (p == 0 || isMarked(ic, p))
2513 1.1 christos return 0;
2514 1.9 christos Mark(ic, p);
2515 1.9 christos n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2516 1.1 christos return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2517 1.1 christos }
2518 1.1 christos
2519 1.1 christos /*
2520 1.1 christos * Allocate memory. All allocation is done before optimization
2521 1.1 christos * is begun. A linear bound on the size of all data structures is computed
2522 1.1 christos * from the total number of blocks and/or statements.
2523 1.1 christos */
2524 1.1 christos static void
2525 1.11 christos opt_init(opt_state_t *opt_state, struct icode *ic)
2526 1.1 christos {
2527 1.1 christos bpf_u_int32 *p;
2528 1.1 christos int i, n, max_stmts;
2529 1.12 christos u_int product;
2530 1.12 christos size_t block_memsize, edge_memsize;
2531 1.1 christos
2532 1.1 christos /*
2533 1.1 christos * First, count the blocks, so we can malloc an array to map
2534 1.1 christos * block number to block. Then, put the blocks into the array.
2535 1.1 christos */
2536 1.9 christos unMarkAll(ic);
2537 1.9 christos n = count_blocks(ic, ic->root);
2538 1.9 christos opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2539 1.9 christos if (opt_state->blocks == NULL)
2540 1.11 christos opt_error(opt_state, "malloc");
2541 1.9 christos unMarkAll(ic);
2542 1.9 christos opt_state->n_blocks = 0;
2543 1.9 christos number_blks_r(opt_state, ic, ic->root);
2544 1.9 christos
2545 1.12 christos /*
2546 1.12 christos * This "should not happen".
2547 1.12 christos */
2548 1.12 christos if (opt_state->n_blocks == 0)
2549 1.12 christos opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue");
2550 1.12 christos
2551 1.9 christos opt_state->n_edges = 2 * opt_state->n_blocks;
2552 1.12 christos if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
2553 1.12 christos /*
2554 1.12 christos * Overflow.
2555 1.12 christos */
2556 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2557 1.12 christos }
2558 1.9 christos opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2559 1.11 christos if (opt_state->edges == NULL) {
2560 1.11 christos opt_error(opt_state, "malloc");
2561 1.11 christos }
2562 1.1 christos
2563 1.1 christos /*
2564 1.1 christos * The number of levels is bounded by the number of nodes.
2565 1.1 christos */
2566 1.9 christos opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2567 1.11 christos if (opt_state->levels == NULL) {
2568 1.11 christos opt_error(opt_state, "malloc");
2569 1.11 christos }
2570 1.1 christos
2571 1.12 christos opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
2572 1.12 christos opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
2573 1.12 christos
2574 1.12 christos /*
2575 1.12 christos * Make sure opt_state->n_blocks * opt_state->nodewords fits
2576 1.12 christos * in a u_int; we use it as a u_int number-of-iterations
2577 1.12 christos * value.
2578 1.12 christos */
2579 1.12 christos product = opt_state->n_blocks * opt_state->nodewords;
2580 1.12 christos if ((product / opt_state->n_blocks) != opt_state->nodewords) {
2581 1.12 christos /*
2582 1.12 christos * XXX - just punt and don't try to optimize?
2583 1.12 christos * In practice, this is unlikely to happen with
2584 1.12 christos * a normal filter.
2585 1.12 christos */
2586 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2587 1.12 christos }
2588 1.12 christos
2589 1.12 christos /*
2590 1.12 christos * Make sure the total memory required for that doesn't
2591 1.12 christos * overflow.
2592 1.12 christos */
2593 1.12 christos block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
2594 1.12 christos if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
2595 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2596 1.12 christos }
2597 1.12 christos
2598 1.12 christos /*
2599 1.12 christos * Make sure opt_state->n_edges * opt_state->edgewords fits
2600 1.12 christos * in a u_int; we use it as a u_int number-of-iterations
2601 1.12 christos * value.
2602 1.12 christos */
2603 1.12 christos product = opt_state->n_edges * opt_state->edgewords;
2604 1.12 christos if ((product / opt_state->n_edges) != opt_state->edgewords) {
2605 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2606 1.12 christos }
2607 1.12 christos
2608 1.12 christos /*
2609 1.12 christos * Make sure the total memory required for that doesn't
2610 1.12 christos * overflow.
2611 1.12 christos */
2612 1.12 christos edge_memsize = (size_t)product * sizeof(*opt_state->space);
2613 1.12 christos if (edge_memsize / product != sizeof(*opt_state->space)) {
2614 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2615 1.12 christos }
2616 1.12 christos
2617 1.12 christos /*
2618 1.12 christos * Make sure the total memory required for both of them doesn't
2619 1.12 christos * overflow.
2620 1.12 christos */
2621 1.12 christos if (block_memsize > SIZE_MAX - edge_memsize) {
2622 1.12 christos opt_error(opt_state, "filter is too complex to optimize");
2623 1.12 christos }
2624 1.1 christos
2625 1.1 christos /* XXX */
2626 1.12 christos opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
2627 1.11 christos if (opt_state->space == NULL) {
2628 1.11 christos opt_error(opt_state, "malloc");
2629 1.11 christos }
2630 1.9 christos p = opt_state->space;
2631 1.9 christos opt_state->all_dom_sets = p;
2632 1.1 christos for (i = 0; i < n; ++i) {
2633 1.9 christos opt_state->blocks[i]->dom = p;
2634 1.9 christos p += opt_state->nodewords;
2635 1.1 christos }
2636 1.9 christos opt_state->all_closure_sets = p;
2637 1.1 christos for (i = 0; i < n; ++i) {
2638 1.9 christos opt_state->blocks[i]->closure = p;
2639 1.9 christos p += opt_state->nodewords;
2640 1.1 christos }
2641 1.9 christos opt_state->all_edge_sets = p;
2642 1.1 christos for (i = 0; i < n; ++i) {
2643 1.9 christos register struct block *b = opt_state->blocks[i];
2644 1.1 christos
2645 1.1 christos b->et.edom = p;
2646 1.9 christos p += opt_state->edgewords;
2647 1.1 christos b->ef.edom = p;
2648 1.9 christos p += opt_state->edgewords;
2649 1.1 christos b->et.id = i;
2650 1.9 christos opt_state->edges[i] = &b->et;
2651 1.9 christos b->ef.id = opt_state->n_blocks + i;
2652 1.9 christos opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2653 1.1 christos b->et.pred = b;
2654 1.1 christos b->ef.pred = b;
2655 1.1 christos }
2656 1.1 christos max_stmts = 0;
2657 1.1 christos for (i = 0; i < n; ++i)
2658 1.9 christos max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2659 1.1 christos /*
2660 1.1 christos * We allocate at most 3 value numbers per statement,
2661 1.1 christos * so this is an upper bound on the number of valnodes
2662 1.1 christos * we'll need.
2663 1.1 christos */
2664 1.9 christos opt_state->maxval = 3 * max_stmts;
2665 1.9 christos opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2666 1.11 christos if (opt_state->vmap == NULL) {
2667 1.11 christos opt_error(opt_state, "malloc");
2668 1.11 christos }
2669 1.9 christos opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2670 1.11 christos if (opt_state->vnode_base == NULL) {
2671 1.11 christos opt_error(opt_state, "malloc");
2672 1.11 christos }
2673 1.1 christos }
2674 1.1 christos
2675 1.1 christos /*
2676 1.9 christos * This is only used when supporting optimizer debugging. It is
2677 1.9 christos * global state, so do *not* do more than one compile in parallel
2678 1.9 christos * and expect it to provide meaningful information.
2679 1.1 christos */
2680 1.1 christos #ifdef BDEBUG
2681 1.10 christos int bids[NBIDS];
2682 1.1 christos #endif
2683 1.1 christos
2684 1.11 christos static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
2685 1.11 christos PCAP_PRINTFLIKE(2, 3);
2686 1.11 christos
2687 1.1 christos /*
2688 1.1 christos * Returns true if successful. Returns false if a branch has
2689 1.1 christos * an offset that is too large. If so, we have marked that
2690 1.1 christos * branch so that on a subsequent iteration, it will be treated
2691 1.1 christos * properly.
2692 1.1 christos */
2693 1.1 christos static int
2694 1.11 christos convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
2695 1.1 christos {
2696 1.1 christos struct bpf_insn *dst;
2697 1.1 christos struct slist *src;
2698 1.2 christos u_int slen;
2699 1.1 christos u_int off;
2700 1.1 christos struct slist **offset = NULL;
2701 1.1 christos
2702 1.9 christos if (p == 0 || isMarked(ic, p))
2703 1.1 christos return (1);
2704 1.9 christos Mark(ic, p);
2705 1.1 christos
2706 1.11 christos if (convert_code_r(conv_state, ic, JF(p)) == 0)
2707 1.1 christos return (0);
2708 1.11 christos if (convert_code_r(conv_state, ic, JT(p)) == 0)
2709 1.1 christos return (0);
2710 1.1 christos
2711 1.1 christos slen = slength(p->stmts);
2712 1.9 christos dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2713 1.1 christos /* inflate length by any extra jumps */
2714 1.1 christos
2715 1.9 christos p->offset = (int)(dst - conv_state->fstart);
2716 1.1 christos
2717 1.1 christos /* generate offset[] for convenience */
2718 1.1 christos if (slen) {
2719 1.1 christos offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2720 1.1 christos if (!offset) {
2721 1.11 christos conv_error(conv_state, "not enough core");
2722 1.1 christos /*NOTREACHED*/
2723 1.1 christos }
2724 1.1 christos }
2725 1.1 christos src = p->stmts;
2726 1.1 christos for (off = 0; off < slen && src; off++) {
2727 1.1 christos #if 0
2728 1.1 christos printf("off=%d src=%x\n", off, src);
2729 1.1 christos #endif
2730 1.1 christos offset[off] = src;
2731 1.1 christos src = src->next;
2732 1.1 christos }
2733 1.1 christos
2734 1.1 christos off = 0;
2735 1.1 christos for (src = p->stmts; src; src = src->next) {
2736 1.1 christos if (src->s.code == NOP)
2737 1.1 christos continue;
2738 1.1 christos dst->code = (u_short)src->s.code;
2739 1.1 christos dst->k = src->s.k;
2740 1.1 christos
2741 1.1 christos /* fill block-local relative jump */
2742 1.1 christos if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2743 1.1 christos #if 0
2744 1.1 christos if (src->s.jt || src->s.jf) {
2745 1.11 christos free(offset);
2746 1.11 christos conv_error(conv_state, "illegal jmp destination");
2747 1.1 christos /*NOTREACHED*/
2748 1.1 christos }
2749 1.1 christos #endif
2750 1.1 christos goto filled;
2751 1.1 christos }
2752 1.1 christos if (off == slen - 2) /*???*/
2753 1.1 christos goto filled;
2754 1.1 christos
2755 1.1 christos {
2756 1.2 christos u_int i;
2757 1.1 christos int jt, jf;
2758 1.10 christos const char ljerr[] = "%s for block-local relative jump: off=%d";
2759 1.1 christos
2760 1.1 christos #if 0
2761 1.1 christos printf("code=%x off=%d %x %x\n", src->s.code,
2762 1.1 christos off, src->s.jt, src->s.jf);
2763 1.1 christos #endif
2764 1.1 christos
2765 1.1 christos if (!src->s.jt || !src->s.jf) {
2766 1.11 christos free(offset);
2767 1.11 christos conv_error(conv_state, ljerr, "no jmp destination", off);
2768 1.1 christos /*NOTREACHED*/
2769 1.1 christos }
2770 1.1 christos
2771 1.1 christos jt = jf = 0;
2772 1.1 christos for (i = 0; i < slen; i++) {
2773 1.1 christos if (offset[i] == src->s.jt) {
2774 1.1 christos if (jt) {
2775 1.11 christos free(offset);
2776 1.11 christos conv_error(conv_state, ljerr, "multiple matches", off);
2777 1.1 christos /*NOTREACHED*/
2778 1.1 christos }
2779 1.1 christos
2780 1.10 christos if (i - off - 1 >= 256) {
2781 1.11 christos free(offset);
2782 1.11 christos conv_error(conv_state, ljerr, "out-of-range jump", off);
2783 1.10 christos /*NOTREACHED*/
2784 1.10 christos }
2785 1.10 christos dst->jt = (u_char)(i - off - 1);
2786 1.1 christos jt++;
2787 1.1 christos }
2788 1.1 christos if (offset[i] == src->s.jf) {
2789 1.1 christos if (jf) {
2790 1.11 christos free(offset);
2791 1.11 christos conv_error(conv_state, ljerr, "multiple matches", off);
2792 1.1 christos /*NOTREACHED*/
2793 1.1 christos }
2794 1.10 christos if (i - off - 1 >= 256) {
2795 1.11 christos free(offset);
2796 1.11 christos conv_error(conv_state, ljerr, "out-of-range jump", off);
2797 1.10 christos /*NOTREACHED*/
2798 1.10 christos }
2799 1.10 christos dst->jf = (u_char)(i - off - 1);
2800 1.1 christos jf++;
2801 1.1 christos }
2802 1.1 christos }
2803 1.1 christos if (!jt || !jf) {
2804 1.11 christos free(offset);
2805 1.11 christos conv_error(conv_state, ljerr, "no destination found", off);
2806 1.1 christos /*NOTREACHED*/
2807 1.1 christos }
2808 1.1 christos }
2809 1.1 christos filled:
2810 1.1 christos ++dst;
2811 1.1 christos ++off;
2812 1.1 christos }
2813 1.1 christos if (offset)
2814 1.1 christos free(offset);
2815 1.1 christos
2816 1.1 christos #ifdef BDEBUG
2817 1.10 christos if (dst - conv_state->fstart < NBIDS)
2818 1.10 christos bids[dst - conv_state->fstart] = p->id + 1;
2819 1.1 christos #endif
2820 1.1 christos dst->code = (u_short)p->s.code;
2821 1.1 christos dst->k = p->s.k;
2822 1.1 christos if (JT(p)) {
2823 1.12 christos /* number of extra jumps inserted */
2824 1.12 christos u_char extrajmps = 0;
2825 1.1 christos off = JT(p)->offset - (p->offset + slen) - 1;
2826 1.1 christos if (off >= 256) {
2827 1.1 christos /* offset too large for branch, must add a jump */
2828 1.1 christos if (p->longjt == 0) {
2829 1.12 christos /* mark this instruction and retry */
2830 1.1 christos p->longjt++;
2831 1.1 christos return(0);
2832 1.1 christos }
2833 1.12 christos dst->jt = extrajmps;
2834 1.1 christos extrajmps++;
2835 1.1 christos dst[extrajmps].code = BPF_JMP|BPF_JA;
2836 1.1 christos dst[extrajmps].k = off - extrajmps;
2837 1.1 christos }
2838 1.1 christos else
2839 1.10 christos dst->jt = (u_char)off;
2840 1.1 christos off = JF(p)->offset - (p->offset + slen) - 1;
2841 1.1 christos if (off >= 256) {
2842 1.1 christos /* offset too large for branch, must add a jump */
2843 1.1 christos if (p->longjf == 0) {
2844 1.12 christos /* mark this instruction and retry */
2845 1.1 christos p->longjf++;
2846 1.1 christos return(0);
2847 1.1 christos }
2848 1.1 christos /* branch if F to following jump */
2849 1.1 christos /* if two jumps are inserted, F goes to second one */
2850 1.12 christos dst->jf = extrajmps;
2851 1.1 christos extrajmps++;
2852 1.1 christos dst[extrajmps].code = BPF_JMP|BPF_JA;
2853 1.1 christos dst[extrajmps].k = off - extrajmps;
2854 1.1 christos }
2855 1.1 christos else
2856 1.10 christos dst->jf = (u_char)off;
2857 1.1 christos }
2858 1.1 christos return (1);
2859 1.1 christos }
2860 1.1 christos
2861 1.1 christos
2862 1.1 christos /*
2863 1.1 christos * Convert flowgraph intermediate representation to the
2864 1.1 christos * BPF array representation. Set *lenp to the number of instructions.
2865 1.1 christos *
2866 1.1 christos * This routine does *NOT* leak the memory pointed to by fp. It *must
2867 1.1 christos * not* do free(fp) before returning fp; doing so would make no sense,
2868 1.1 christos * as the BPF array pointed to by the return value of icode_to_fcode()
2869 1.1 christos * must be valid - it's being returned for use in a bpf_program structure.
2870 1.1 christos *
2871 1.1 christos * If it appears that icode_to_fcode() is leaking, the problem is that
2872 1.1 christos * the program using pcap_compile() is failing to free the memory in
2873 1.1 christos * the BPF program when it's done - the leak is in the program, not in
2874 1.1 christos * the routine that happens to be allocating the memory. (By analogy, if
2875 1.1 christos * a program calls fopen() without ever calling fclose() on the FILE *,
2876 1.1 christos * it will leak the FILE structure; the leak is not in fopen(), it's in
2877 1.1 christos * the program.) Change the program to use pcap_freecode() when it's
2878 1.1 christos * done with the filter program. See the pcap man page.
2879 1.1 christos */
2880 1.1 christos struct bpf_insn *
2881 1.12 christos icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
2882 1.11 christos char *errbuf)
2883 1.1 christos {
2884 1.5 christos u_int n;
2885 1.1 christos struct bpf_insn *fp;
2886 1.9 christos conv_state_t conv_state;
2887 1.1 christos
2888 1.11 christos conv_state.fstart = NULL;
2889 1.11 christos conv_state.errbuf = errbuf;
2890 1.11 christos if (setjmp(conv_state.top_ctx) != 0) {
2891 1.11 christos free(conv_state.fstart);
2892 1.11 christos return NULL;
2893 1.11 christos }
2894 1.11 christos
2895 1.1 christos /*
2896 1.1 christos * Loop doing convert_code_r() until no branches remain
2897 1.1 christos * with too-large offsets.
2898 1.1 christos */
2899 1.10 christos for (;;) {
2900 1.9 christos unMarkAll(ic);
2901 1.9 christos n = *lenp = count_stmts(ic, root);
2902 1.1 christos
2903 1.1 christos fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2904 1.11 christos if (fp == NULL) {
2905 1.12 christos (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
2906 1.11 christos "malloc");
2907 1.11 christos return NULL;
2908 1.11 christos }
2909 1.1 christos memset((char *)fp, 0, sizeof(*fp) * n);
2910 1.9 christos conv_state.fstart = fp;
2911 1.9 christos conv_state.ftail = fp + n;
2912 1.1 christos
2913 1.9 christos unMarkAll(ic);
2914 1.11 christos if (convert_code_r(&conv_state, ic, root))
2915 1.1 christos break;
2916 1.1 christos free(fp);
2917 1.1 christos }
2918 1.1 christos
2919 1.1 christos return fp;
2920 1.1 christos }
2921 1.1 christos
2922 1.1 christos /*
2923 1.11 christos * For iconv_to_fconv() errors.
2924 1.11 christos */
2925 1.11 christos static void PCAP_NORETURN
2926 1.11 christos conv_error(conv_state_t *conv_state, const char *fmt, ...)
2927 1.11 christos {
2928 1.11 christos va_list ap;
2929 1.11 christos
2930 1.11 christos va_start(ap, fmt);
2931 1.12 christos (void)vsnprintf(conv_state->errbuf,
2932 1.11 christos PCAP_ERRBUF_SIZE, fmt, ap);
2933 1.11 christos va_end(ap);
2934 1.11 christos longjmp(conv_state->top_ctx, 1);
2935 1.11 christos /* NOTREACHED */
2936 1.12 christos #ifdef _AIX
2937 1.12 christos PCAP_UNREACHABLE
2938 1.12 christos #endif /* _AIX */
2939 1.11 christos }
2940 1.11 christos
2941 1.11 christos /*
2942 1.1 christos * Make a copy of a BPF program and put it in the "fcode" member of
2943 1.1 christos * a "pcap_t".
2944 1.1 christos *
2945 1.1 christos * If we fail to allocate memory for the copy, fill in the "errbuf"
2946 1.1 christos * member of the "pcap_t" with an error message, and return -1;
2947 1.1 christos * otherwise, return 0.
2948 1.1 christos */
2949 1.1 christos int
2950 1.1 christos install_bpf_program(pcap_t *p, struct bpf_program *fp)
2951 1.1 christos {
2952 1.1 christos size_t prog_size;
2953 1.1 christos
2954 1.1 christos /*
2955 1.1 christos * Validate the program.
2956 1.1 christos */
2957 1.12 christos if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
2958 1.12 christos snprintf(p->errbuf, sizeof(p->errbuf),
2959 1.1 christos "BPF program is not valid");
2960 1.1 christos return (-1);
2961 1.1 christos }
2962 1.1 christos
2963 1.1 christos /*
2964 1.1 christos * Free up any already installed program.
2965 1.1 christos */
2966 1.1 christos pcap_freecode(&p->fcode);
2967 1.1 christos
2968 1.1 christos prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2969 1.1 christos p->fcode.bf_len = fp->bf_len;
2970 1.1 christos p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2971 1.1 christos if (p->fcode.bf_insns == NULL) {
2972 1.10 christos pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2973 1.10 christos errno, "malloc");
2974 1.1 christos return (-1);
2975 1.1 christos }
2976 1.1 christos memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2977 1.1 christos return (0);
2978 1.1 christos }
2979 1.1 christos
2980 1.1 christos #ifdef BDEBUG
2981 1.1 christos static void
2982 1.9 christos dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2983 1.9 christos FILE *out)
2984 1.8 christos {
2985 1.8 christos int icount, noffset;
2986 1.8 christos int i;
2987 1.8 christos
2988 1.9 christos if (block == NULL || isMarked(ic, block))
2989 1.8 christos return;
2990 1.9 christos Mark(ic, block);
2991 1.8 christos
2992 1.8 christos icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2993 1.8 christos noffset = min(block->offset + icount, (int)prog->bf_len);
2994 1.8 christos
2995 1.12 christos fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
2996 1.8 christos for (i = block->offset; i < noffset; i++) {
2997 1.8 christos fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2998 1.8 christos }
2999 1.8 christos fprintf(out, "\" tooltip=\"");
3000 1.8 christos for (i = 0; i < BPF_MEMWORDS; i++)
3001 1.10 christos if (block->val[i] != VAL_UNKNOWN)
3002 1.8 christos fprintf(out, "val[%d]=%d ", i, block->val[i]);
3003 1.8 christos fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
3004 1.8 christos fprintf(out, "val[X]=%d", block->val[X_ATOM]);
3005 1.8 christos fprintf(out, "\"");
3006 1.8 christos if (JT(block) == NULL)
3007 1.8 christos fprintf(out, ", peripheries=2");
3008 1.8 christos fprintf(out, "];\n");
3009 1.8 christos
3010 1.9 christos dot_dump_node(ic, JT(block), prog, out);
3011 1.9 christos dot_dump_node(ic, JF(block), prog, out);
3012 1.8 christos }
3013 1.9 christos
3014 1.8 christos static void
3015 1.9 christos dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
3016 1.8 christos {
3017 1.9 christos if (block == NULL || isMarked(ic, block))
3018 1.8 christos return;
3019 1.9 christos Mark(ic, block);
3020 1.8 christos
3021 1.8 christos if (JT(block)) {
3022 1.12 christos fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
3023 1.8 christos block->id, JT(block)->id);
3024 1.12 christos fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
3025 1.8 christos block->id, JF(block)->id);
3026 1.8 christos }
3027 1.9 christos dot_dump_edge(ic, JT(block), out);
3028 1.9 christos dot_dump_edge(ic, JF(block), out);
3029 1.8 christos }
3030 1.9 christos
3031 1.8 christos /* Output the block CFG using graphviz/DOT language
3032 1.8 christos * In the CFG, block's code, value index for each registers at EXIT,
3033 1.8 christos * and the jump relationship is show.
3034 1.8 christos *
3035 1.8 christos * example DOT for BPF `ip src host 1.1.1.1' is:
3036 1.8 christos digraph BPF {
3037 1.12 christos block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
3038 1.12 christos block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
3039 1.12 christos block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
3040 1.12 christos block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
3041 1.12 christos "block0":se -> "block1":n [label="T"];
3042 1.12 christos "block0":sw -> "block3":n [label="F"];
3043 1.12 christos "block1":se -> "block2":n [label="T"];
3044 1.12 christos "block1":sw -> "block3":n [label="F"];
3045 1.8 christos }
3046 1.8 christos *
3047 1.12 christos * After install graphviz on https://www.graphviz.org/, save it as bpf.dot
3048 1.8 christos * and run `dot -Tpng -O bpf.dot' to draw the graph.
3049 1.8 christos */
3050 1.11 christos static int
3051 1.11 christos dot_dump(struct icode *ic, char *errbuf)
3052 1.8 christos {
3053 1.8 christos struct bpf_program f;
3054 1.8 christos FILE *out = stdout;
3055 1.8 christos
3056 1.8 christos memset(bids, 0, sizeof bids);
3057 1.11 christos f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
3058 1.11 christos if (f.bf_insns == NULL)
3059 1.11 christos return -1;
3060 1.8 christos
3061 1.8 christos fprintf(out, "digraph BPF {\n");
3062 1.9 christos unMarkAll(ic);
3063 1.9 christos dot_dump_node(ic, ic->root, &f, out);
3064 1.9 christos unMarkAll(ic);
3065 1.9 christos dot_dump_edge(ic, ic->root, out);
3066 1.8 christos fprintf(out, "}\n");
3067 1.8 christos
3068 1.8 christos free((char *)f.bf_insns);
3069 1.11 christos return 0;
3070 1.8 christos }
3071 1.8 christos
3072 1.11 christos static int
3073 1.11 christos plain_dump(struct icode *ic, char *errbuf)
3074 1.1 christos {
3075 1.1 christos struct bpf_program f;
3076 1.1 christos
3077 1.1 christos memset(bids, 0, sizeof bids);
3078 1.11 christos f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
3079 1.11 christos if (f.bf_insns == NULL)
3080 1.11 christos return -1;
3081 1.1 christos bpf_dump(&f, 1);
3082 1.1 christos putchar('\n');
3083 1.1 christos free((char *)f.bf_insns);
3084 1.11 christos return 0;
3085 1.1 christos }
3086 1.9 christos
3087 1.8 christos static void
3088 1.11 christos opt_dump(opt_state_t *opt_state, struct icode *ic)
3089 1.8 christos {
3090 1.11 christos int status;
3091 1.11 christos char errbuf[PCAP_ERRBUF_SIZE];
3092 1.11 christos
3093 1.10 christos /*
3094 1.10 christos * If the CFG, in DOT format, is requested, output it rather than
3095 1.10 christos * the code that would be generated from that graph.
3096 1.8 christos */
3097 1.10 christos if (pcap_print_dot_graph)
3098 1.11 christos status = dot_dump(ic, errbuf);
3099 1.8 christos else
3100 1.11 christos status = plain_dump(ic, errbuf);
3101 1.11 christos if (status == -1)
3102 1.11 christos opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
3103 1.8 christos }
3104 1.1 christos #endif
3105