optimize.c revision 1.9.12.2 1 /* $NetBSD: optimize.c,v 1.9.12.2 2020/04/13 07:46:11 martin Exp $ */
2
3 /*
4 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that: (1) source code distributions
9 * retain the above copyright notice and this paragraph in its entirety, (2)
10 * distributions including binary code include the above copyright notice and
11 * this paragraph in its entirety in the documentation or other materials
12 * provided with the distribution, and (3) all advertising materials mentioning
13 * features or use of this software display the following acknowledgement:
14 * ``This product includes software developed by the University of California,
15 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
16 * the University nor the names of its contributors may be used to endorse
17 * or promote products derived from this software without specific prior
18 * written permission.
19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
20 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
22 *
23 * Optimization module for BPF code intermediate representation.
24 */
25
26 #include <sys/cdefs.h>
27 __RCSID("$NetBSD: optimize.c,v 1.9.12.2 2020/04/13 07:46:11 martin Exp $");
28
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32
33 #include <pcap-types.h>
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <memory.h>
38 #include <setjmp.h>
39 #include <string.h>
40
41 #include <errno.h>
42
43 #include "pcap-int.h"
44
45 #include "gencode.h"
46 #include "optimize.h"
47
48 #ifdef HAVE_OS_PROTO_H
49 #include "os-proto.h"
50 #endif
51
52 #ifdef BDEBUG
53 /*
54 * The internal "debug printout" flag for the filter expression optimizer.
55 * The code to print that stuff is present only if BDEBUG is defined, so
56 * the flag, and the routine to set it, are defined only if BDEBUG is
57 * defined.
58 */
59 static int pcap_optimizer_debug;
60
61 /*
62 * Routine to set that flag.
63 *
64 * This is intended for libpcap developers, not for general use.
65 * If you want to set these in a program, you'll have to declare this
66 * routine yourself, with the appropriate DLL import attribute on Windows;
67 * it's not declared in any header file, and won't be declared in any
68 * header file provided by libpcap.
69 */
70 PCAP_API void pcap_set_optimizer_debug(int value);
71
72 PCAP_API_DEF void
73 pcap_set_optimizer_debug(int value)
74 {
75 pcap_optimizer_debug = value;
76 }
77
78 /*
79 * The internal "print dot graph" flag for the filter expression optimizer.
80 * The code to print that stuff is present only if BDEBUG is defined, so
81 * the flag, and the routine to set it, are defined only if BDEBUG is
82 * defined.
83 */
84 static int pcap_print_dot_graph;
85
86 /*
87 * Routine to set that flag.
88 *
89 * This is intended for libpcap developers, not for general use.
90 * If you want to set these in a program, you'll have to declare this
91 * routine yourself, with the appropriate DLL import attribute on Windows;
92 * it's not declared in any header file, and won't be declared in any
93 * header file provided by libpcap.
94 */
95 PCAP_API void pcap_set_print_dot_graph(int value);
96
97 PCAP_API_DEF void
98 pcap_set_print_dot_graph(int value)
99 {
100 pcap_print_dot_graph = value;
101 }
102
103 #endif
104
105 /*
106 * lowest_set_bit().
107 *
108 * Takes a 32-bit integer as an argument.
109 *
110 * If handed a non-zero value, returns the index of the lowest set bit,
111 * counting upwards fro zero.
112 *
113 * If handed zero, the results are platform- and compiler-dependent.
114 * Keep it out of the light, don't give it any water, don't feed it
115 * after midnight, and don't pass zero to it.
116 *
117 * This is the same as the count of trailing zeroes in the word.
118 */
119 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
120 /*
121 * GCC 3.4 and later; we have __builtin_ctz().
122 */
123 #define lowest_set_bit(mask) __builtin_ctz(mask)
124 #elif defined(_MSC_VER)
125 /*
126 * Visual Studio; we support only 2005 and later, so use
127 * _BitScanForward().
128 */
129 #include <intrin.h>
130
131 #ifndef __clang__
132 #pragma intrinsic(_BitScanForward)
133 #endif
134
135 static __forceinline int
136 lowest_set_bit(int mask)
137 {
138 unsigned long bit;
139
140 /*
141 * Don't sign-extend mask if long is longer than int.
142 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
143 */
144 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
145 return -1; /* mask is zero */
146 return (int)bit;
147 }
148 #elif defined(MSDOS) && defined(__DJGPP__)
149 /*
150 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
151 * we've already included.
152 */
153 #define lowest_set_bit(mask) (ffs((mask)) - 1)
154 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
155 /*
156 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
157 * or some other platform (UN*X conforming to a sufficient recent version
158 * of the Single UNIX Specification).
159 */
160 #include <strings.h>
161 #define lowest_set_bit(mask) (ffs((mask)) - 1)
162 #else
163 /*
164 * None of the above.
165 * Use a perfect-hash-function-based function.
166 */
167 static int
168 lowest_set_bit(int mask)
169 {
170 unsigned int v = (unsigned int)mask;
171
172 static const int MultiplyDeBruijnBitPosition[32] = {
173 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
174 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
175 };
176
177 /*
178 * We strip off all but the lowermost set bit (v & ~v),
179 * and perform a minimal perfect hash on it to look up the
180 * number of low-order zero bits in a table.
181 *
182 * See:
183 *
184 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
185 *
186 * http://supertech.csail.mit.edu/papers/debruijn.pdf
187 */
188 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
189 }
190 #endif
191
192 /*
193 * Represents a deleted instruction.
194 */
195 #define NOP -1
196
197 /*
198 * Register numbers for use-def values.
199 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
200 * location. A_ATOM is the accumulator and X_ATOM is the index
201 * register.
202 */
203 #define A_ATOM BPF_MEMWORDS
204 #define X_ATOM (BPF_MEMWORDS+1)
205
206 /*
207 * This define is used to represent *both* the accumulator and
208 * x register in use-def computations.
209 * Currently, the use-def code assumes only one definition per instruction.
210 */
211 #define AX_ATOM N_ATOMS
212
213 /*
214 * These data structures are used in a Cocke and Shwarz style
215 * value numbering scheme. Since the flowgraph is acyclic,
216 * exit values can be propagated from a node's predecessors
217 * provided it is uniquely defined.
218 */
219 struct valnode {
220 int code;
221 int v0, v1;
222 int val;
223 struct valnode *next;
224 };
225
226 /* Integer constants mapped with the load immediate opcode. */
227 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
228
229 struct vmapinfo {
230 int is_const;
231 bpf_int32 const_val;
232 };
233
234 typedef struct {
235 /*
236 * Place to longjmp to on an error.
237 */
238 jmp_buf top_ctx;
239
240 /*
241 * The buffer into which to put error message.
242 */
243 char *errbuf;
244
245 /*
246 * A flag to indicate that further optimization is needed.
247 * Iterative passes are continued until a given pass yields no
248 * branch movement.
249 */
250 int done;
251
252 int n_blocks;
253 struct block **blocks;
254 int n_edges;
255 struct edge **edges;
256
257 /*
258 * A bit vector set representation of the dominators.
259 * We round up the set size to the next power of two.
260 */
261 int nodewords;
262 int edgewords;
263 struct block **levels;
264 bpf_u_int32 *space;
265
266 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
267 /*
268 * True if a is in uset {p}
269 */
270 #define SET_MEMBER(p, a) \
271 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
272
273 /*
274 * Add 'a' to uset p.
275 */
276 #define SET_INSERT(p, a) \
277 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
278
279 /*
280 * Delete 'a' from uset p.
281 */
282 #define SET_DELETE(p, a) \
283 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
284
285 /*
286 * a := a intersect b
287 */
288 #define SET_INTERSECT(a, b, n)\
289 {\
290 register bpf_u_int32 *_x = a, *_y = b;\
291 register int _n = n;\
292 while (--_n >= 0) *_x++ &= *_y++;\
293 }
294
295 /*
296 * a := a - b
297 */
298 #define SET_SUBTRACT(a, b, n)\
299 {\
300 register bpf_u_int32 *_x = a, *_y = b;\
301 register int _n = n;\
302 while (--_n >= 0) *_x++ &=~ *_y++;\
303 }
304
305 /*
306 * a := a union b
307 */
308 #define SET_UNION(a, b, n)\
309 {\
310 register bpf_u_int32 *_x = a, *_y = b;\
311 register int _n = n;\
312 while (--_n >= 0) *_x++ |= *_y++;\
313 }
314
315 uset all_dom_sets;
316 uset all_closure_sets;
317 uset all_edge_sets;
318
319 #define MODULUS 213
320 struct valnode *hashtbl[MODULUS];
321 int curval;
322 int maxval;
323
324 struct vmapinfo *vmap;
325 struct valnode *vnode_base;
326 struct valnode *next_vnode;
327 } opt_state_t;
328
329 typedef struct {
330 /*
331 * Place to longjmp to on an error.
332 */
333 jmp_buf top_ctx;
334
335 /*
336 * The buffer into which to put error message.
337 */
338 char *errbuf;
339
340 /*
341 * Some pointers used to convert the basic block form of the code,
342 * into the array form that BPF requires. 'fstart' will point to
343 * the malloc'd array while 'ftail' is used during the recursive
344 * traversal.
345 */
346 struct bpf_insn *fstart;
347 struct bpf_insn *ftail;
348 } conv_state_t;
349
350 static void opt_init(opt_state_t *, struct icode *);
351 static void opt_cleanup(opt_state_t *);
352 static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
353 PCAP_PRINTFLIKE(2, 3);
354
355 static void intern_blocks(opt_state_t *, struct icode *);
356
357 static void find_inedges(opt_state_t *, struct block *);
358 #ifdef BDEBUG
359 static void opt_dump(opt_state_t *, struct icode *);
360 #endif
361
362 #ifndef MAX
363 #define MAX(a,b) ((a)>(b)?(a):(b))
364 #endif
365
366 static void
367 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
368 {
369 int level;
370
371 if (isMarked(ic, b))
372 return;
373
374 Mark(ic, b);
375 b->link = 0;
376
377 if (JT(b)) {
378 find_levels_r(opt_state, ic, JT(b));
379 find_levels_r(opt_state, ic, JF(b));
380 level = MAX(JT(b)->level, JF(b)->level) + 1;
381 } else
382 level = 0;
383 b->level = level;
384 b->link = opt_state->levels[level];
385 opt_state->levels[level] = b;
386 }
387
388 /*
389 * Level graph. The levels go from 0 at the leaves to
390 * N_LEVELS at the root. The opt_state->levels[] array points to the
391 * first node of the level list, whose elements are linked
392 * with the 'link' field of the struct block.
393 */
394 static void
395 find_levels(opt_state_t *opt_state, struct icode *ic)
396 {
397 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
398 unMarkAll(ic);
399 find_levels_r(opt_state, ic, ic->root);
400 }
401
402 /*
403 * Find dominator relationships.
404 * Assumes graph has been leveled.
405 */
406 static void
407 find_dom(opt_state_t *opt_state, struct block *root)
408 {
409 int i;
410 struct block *b;
411 bpf_u_int32 *x;
412
413 /*
414 * Initialize sets to contain all nodes.
415 */
416 x = opt_state->all_dom_sets;
417 i = opt_state->n_blocks * opt_state->nodewords;
418 while (--i >= 0)
419 *x++ = 0xFFFFFFFFU;
420 /* Root starts off empty. */
421 for (i = opt_state->nodewords; --i >= 0;)
422 root->dom[i] = 0;
423
424 /* root->level is the highest level no found. */
425 for (i = root->level; i >= 0; --i) {
426 for (b = opt_state->levels[i]; b; b = b->link) {
427 SET_INSERT(b->dom, b->id);
428 if (JT(b) == 0)
429 continue;
430 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
431 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
432 }
433 }
434 }
435
436 static void
437 propedom(opt_state_t *opt_state, struct edge *ep)
438 {
439 SET_INSERT(ep->edom, ep->id);
440 if (ep->succ) {
441 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
442 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
443 }
444 }
445
446 /*
447 * Compute edge dominators.
448 * Assumes graph has been leveled and predecessors established.
449 */
450 static void
451 find_edom(opt_state_t *opt_state, struct block *root)
452 {
453 int i;
454 uset x;
455 struct block *b;
456
457 x = opt_state->all_edge_sets;
458 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
459 x[i] = 0xFFFFFFFFU;
460
461 /* root->level is the highest level no found. */
462 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
463 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
464 for (i = root->level; i >= 0; --i) {
465 for (b = opt_state->levels[i]; b != 0; b = b->link) {
466 propedom(opt_state, &b->et);
467 propedom(opt_state, &b->ef);
468 }
469 }
470 }
471
472 /*
473 * Find the backwards transitive closure of the flow graph. These sets
474 * are backwards in the sense that we find the set of nodes that reach
475 * a given node, not the set of nodes that can be reached by a node.
476 *
477 * Assumes graph has been leveled.
478 */
479 static void
480 find_closure(opt_state_t *opt_state, struct block *root)
481 {
482 int i;
483 struct block *b;
484
485 /*
486 * Initialize sets to contain no nodes.
487 */
488 memset((char *)opt_state->all_closure_sets, 0,
489 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
490
491 /* root->level is the highest level no found. */
492 for (i = root->level; i >= 0; --i) {
493 for (b = opt_state->levels[i]; b; b = b->link) {
494 SET_INSERT(b->closure, b->id);
495 if (JT(b) == 0)
496 continue;
497 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
498 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
499 }
500 }
501 }
502
503 /*
504 * Return the register number that is used by s. If A and X are both
505 * used, return AX_ATOM. If no register is used, return -1.
506 *
507 * The implementation should probably change to an array access.
508 */
509 static int
510 atomuse(struct stmt *s)
511 {
512 register int c = s->code;
513
514 if (c == NOP)
515 return -1;
516
517 switch (BPF_CLASS(c)) {
518
519 case BPF_RET:
520 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
521 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
522
523 case BPF_LD:
524 case BPF_LDX:
525 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
526 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
527
528 case BPF_ST:
529 return A_ATOM;
530
531 case BPF_STX:
532 return X_ATOM;
533
534 case BPF_JMP:
535 case BPF_ALU:
536 if (BPF_SRC(c) == BPF_X)
537 return AX_ATOM;
538 return A_ATOM;
539
540 case BPF_MISC:
541 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
542 }
543 abort();
544 /* NOTREACHED */
545 }
546
547 /*
548 * Return the register number that is defined by 's'. We assume that
549 * a single stmt cannot define more than one register. If no register
550 * is defined, return -1.
551 *
552 * The implementation should probably change to an array access.
553 */
554 static int
555 atomdef(struct stmt *s)
556 {
557 if (s->code == NOP)
558 return -1;
559
560 switch (BPF_CLASS(s->code)) {
561
562 case BPF_LD:
563 case BPF_ALU:
564 return A_ATOM;
565
566 case BPF_LDX:
567 return X_ATOM;
568
569 case BPF_ST:
570 case BPF_STX:
571 return s->k;
572
573 case BPF_MISC:
574 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
575 }
576 return -1;
577 }
578
579 /*
580 * Compute the sets of registers used, defined, and killed by 'b'.
581 *
582 * "Used" means that a statement in 'b' uses the register before any
583 * statement in 'b' defines it, i.e. it uses the value left in
584 * that register by a predecessor block of this block.
585 * "Defined" means that a statement in 'b' defines it.
586 * "Killed" means that a statement in 'b' defines it before any
587 * statement in 'b' uses it, i.e. it kills the value left in that
588 * register by a predecessor block of this block.
589 */
590 static void
591 compute_local_ud(struct block *b)
592 {
593 struct slist *s;
594 atomset def = 0, use = 0, killed = 0;
595 int atom;
596
597 for (s = b->stmts; s; s = s->next) {
598 if (s->s.code == NOP)
599 continue;
600 atom = atomuse(&s->s);
601 if (atom >= 0) {
602 if (atom == AX_ATOM) {
603 if (!ATOMELEM(def, X_ATOM))
604 use |= ATOMMASK(X_ATOM);
605 if (!ATOMELEM(def, A_ATOM))
606 use |= ATOMMASK(A_ATOM);
607 }
608 else if (atom < N_ATOMS) {
609 if (!ATOMELEM(def, atom))
610 use |= ATOMMASK(atom);
611 }
612 else
613 abort();
614 }
615 atom = atomdef(&s->s);
616 if (atom >= 0) {
617 if (!ATOMELEM(use, atom))
618 killed |= ATOMMASK(atom);
619 def |= ATOMMASK(atom);
620 }
621 }
622 if (BPF_CLASS(b->s.code) == BPF_JMP) {
623 /*
624 * XXX - what about RET?
625 */
626 atom = atomuse(&b->s);
627 if (atom >= 0) {
628 if (atom == AX_ATOM) {
629 if (!ATOMELEM(def, X_ATOM))
630 use |= ATOMMASK(X_ATOM);
631 if (!ATOMELEM(def, A_ATOM))
632 use |= ATOMMASK(A_ATOM);
633 }
634 else if (atom < N_ATOMS) {
635 if (!ATOMELEM(def, atom))
636 use |= ATOMMASK(atom);
637 }
638 else
639 abort();
640 }
641 }
642
643 b->def = def;
644 b->kill = killed;
645 b->in_use = use;
646 }
647
648 /*
649 * Assume graph is already leveled.
650 */
651 static void
652 find_ud(opt_state_t *opt_state, struct block *root)
653 {
654 int i, maxlevel;
655 struct block *p;
656
657 /*
658 * root->level is the highest level no found;
659 * count down from there.
660 */
661 maxlevel = root->level;
662 for (i = maxlevel; i >= 0; --i)
663 for (p = opt_state->levels[i]; p; p = p->link) {
664 compute_local_ud(p);
665 p->out_use = 0;
666 }
667
668 for (i = 1; i <= maxlevel; ++i) {
669 for (p = opt_state->levels[i]; p; p = p->link) {
670 p->out_use |= JT(p)->in_use | JF(p)->in_use;
671 p->in_use |= p->out_use &~ p->kill;
672 }
673 }
674 }
675 static void
676 init_val(opt_state_t *opt_state)
677 {
678 opt_state->curval = 0;
679 opt_state->next_vnode = opt_state->vnode_base;
680 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
681 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
682 }
683
684 /* Because we really don't have an IR, this stuff is a little messy. */
685 static int
686 F(opt_state_t *opt_state, int code, int v0, int v1)
687 {
688 u_int hash;
689 int val;
690 struct valnode *p;
691
692 hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8);
693 hash %= MODULUS;
694
695 for (p = opt_state->hashtbl[hash]; p; p = p->next)
696 if (p->code == code && p->v0 == v0 && p->v1 == v1)
697 return p->val;
698
699 val = ++opt_state->curval;
700 if (BPF_MODE(code) == BPF_IMM &&
701 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
702 opt_state->vmap[val].const_val = v0;
703 opt_state->vmap[val].is_const = 1;
704 }
705 p = opt_state->next_vnode++;
706 p->val = val;
707 p->code = code;
708 p->v0 = v0;
709 p->v1 = v1;
710 p->next = opt_state->hashtbl[hash];
711 opt_state->hashtbl[hash] = p;
712
713 return val;
714 }
715
716 static inline void
717 vstore(struct stmt *s, int *valp, int newval, int alter)
718 {
719 if (alter && newval != VAL_UNKNOWN && *valp == newval)
720 s->code = NOP;
721 else
722 *valp = newval;
723 }
724
725 /*
726 * Do constant-folding on binary operators.
727 * (Unary operators are handled elsewhere.)
728 */
729 static void
730 fold_op(opt_state_t *opt_state, struct stmt *s, int v0, int v1)
731 {
732 bpf_u_int32 a, b;
733
734 a = opt_state->vmap[v0].const_val;
735 b = opt_state->vmap[v1].const_val;
736
737 switch (BPF_OP(s->code)) {
738 case BPF_ADD:
739 a += b;
740 break;
741
742 case BPF_SUB:
743 a -= b;
744 break;
745
746 case BPF_MUL:
747 a *= b;
748 break;
749
750 case BPF_DIV:
751 if (b == 0)
752 opt_error(opt_state, "division by zero");
753 a /= b;
754 break;
755
756 case BPF_MOD:
757 if (b == 0)
758 opt_error(opt_state, "modulus by zero");
759 a %= b;
760 break;
761
762 case BPF_AND:
763 a &= b;
764 break;
765
766 case BPF_OR:
767 a |= b;
768 break;
769
770 case BPF_XOR:
771 a ^= b;
772 break;
773
774 case BPF_LSH:
775 /*
776 * A left shift of more than the width of the type
777 * is undefined in C; we'll just treat it as shifting
778 * all the bits out.
779 *
780 * XXX - the BPF interpreter doesn't check for this,
781 * so its behavior is dependent on the behavior of
782 * the processor on which it's running. There are
783 * processors on which it shifts all the bits out
784 * and processors on which it does no shift.
785 */
786 if (b < 32)
787 a <<= b;
788 else
789 a = 0;
790 break;
791
792 case BPF_RSH:
793 /*
794 * A right shift of more than the width of the type
795 * is undefined in C; we'll just treat it as shifting
796 * all the bits out.
797 *
798 * XXX - the BPF interpreter doesn't check for this,
799 * so its behavior is dependent on the behavior of
800 * the processor on which it's running. There are
801 * processors on which it shifts all the bits out
802 * and processors on which it does no shift.
803 */
804 if (b < 32)
805 a >>= b;
806 else
807 a = 0;
808 break;
809
810 default:
811 abort();
812 }
813 s->k = a;
814 s->code = BPF_LD|BPF_IMM;
815 opt_state->done = 0;
816 }
817
818 static inline struct slist *
819 this_op(struct slist *s)
820 {
821 while (s != 0 && s->s.code == NOP)
822 s = s->next;
823 return s;
824 }
825
826 static void
827 opt_not(struct block *b)
828 {
829 struct block *tmp = JT(b);
830
831 JT(b) = JF(b);
832 JF(b) = tmp;
833 }
834
835 static void
836 opt_peep(opt_state_t *opt_state, struct block *b)
837 {
838 struct slist *s;
839 struct slist *next, *last;
840 int val;
841
842 s = b->stmts;
843 if (s == 0)
844 return;
845
846 last = s;
847 for (/*empty*/; /*empty*/; s = next) {
848 /*
849 * Skip over nops.
850 */
851 s = this_op(s);
852 if (s == 0)
853 break; /* nothing left in the block */
854
855 /*
856 * Find the next real instruction after that one
857 * (skipping nops).
858 */
859 next = this_op(s->next);
860 if (next == 0)
861 break; /* no next instruction */
862 last = next;
863
864 /*
865 * st M[k] --> st M[k]
866 * ldx M[k] tax
867 */
868 if (s->s.code == BPF_ST &&
869 next->s.code == (BPF_LDX|BPF_MEM) &&
870 s->s.k == next->s.k) {
871 opt_state->done = 0;
872 next->s.code = BPF_MISC|BPF_TAX;
873 }
874 /*
875 * ld #k --> ldx #k
876 * tax txa
877 */
878 if (s->s.code == (BPF_LD|BPF_IMM) &&
879 next->s.code == (BPF_MISC|BPF_TAX)) {
880 s->s.code = BPF_LDX|BPF_IMM;
881 next->s.code = BPF_MISC|BPF_TXA;
882 opt_state->done = 0;
883 }
884 /*
885 * This is an ugly special case, but it happens
886 * when you say tcp[k] or udp[k] where k is a constant.
887 */
888 if (s->s.code == (BPF_LD|BPF_IMM)) {
889 struct slist *add, *tax, *ild;
890
891 /*
892 * Check that X isn't used on exit from this
893 * block (which the optimizer might cause).
894 * We know the code generator won't generate
895 * any local dependencies.
896 */
897 if (ATOMELEM(b->out_use, X_ATOM))
898 continue;
899
900 /*
901 * Check that the instruction following the ldi
902 * is an addx, or it's an ldxms with an addx
903 * following it (with 0 or more nops between the
904 * ldxms and addx).
905 */
906 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
907 add = next;
908 else
909 add = this_op(next->next);
910 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
911 continue;
912
913 /*
914 * Check that a tax follows that (with 0 or more
915 * nops between them).
916 */
917 tax = this_op(add->next);
918 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
919 continue;
920
921 /*
922 * Check that an ild follows that (with 0 or more
923 * nops between them).
924 */
925 ild = this_op(tax->next);
926 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
927 BPF_MODE(ild->s.code) != BPF_IND)
928 continue;
929 /*
930 * We want to turn this sequence:
931 *
932 * (004) ldi #0x2 {s}
933 * (005) ldxms [14] {next} -- optional
934 * (006) addx {add}
935 * (007) tax {tax}
936 * (008) ild [x+0] {ild}
937 *
938 * into this sequence:
939 *
940 * (004) nop
941 * (005) ldxms [14]
942 * (006) nop
943 * (007) nop
944 * (008) ild [x+2]
945 *
946 * XXX We need to check that X is not
947 * subsequently used, because we want to change
948 * what'll be in it after this sequence.
949 *
950 * We know we can eliminate the accumulator
951 * modifications earlier in the sequence since
952 * it is defined by the last stmt of this sequence
953 * (i.e., the last statement of the sequence loads
954 * a value into the accumulator, so we can eliminate
955 * earlier operations on the accumulator).
956 */
957 ild->s.k += s->s.k;
958 s->s.code = NOP;
959 add->s.code = NOP;
960 tax->s.code = NOP;
961 opt_state->done = 0;
962 }
963 }
964 /*
965 * If the comparison at the end of a block is an equality
966 * comparison against a constant, and nobody uses the value
967 * we leave in the A register at the end of a block, and
968 * the operation preceding the comparison is an arithmetic
969 * operation, we can sometime optimize it away.
970 */
971 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
972 !ATOMELEM(b->out_use, A_ATOM)) {
973 /*
974 * We can optimize away certain subtractions of the
975 * X register.
976 */
977 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
978 val = b->val[X_ATOM];
979 if (opt_state->vmap[val].is_const) {
980 /*
981 * If we have a subtract to do a comparison,
982 * and the X register is a known constant,
983 * we can merge this value into the
984 * comparison:
985 *
986 * sub x -> nop
987 * jeq #y jeq #(x+y)
988 */
989 b->s.k += opt_state->vmap[val].const_val;
990 last->s.code = NOP;
991 opt_state->done = 0;
992 } else if (b->s.k == 0) {
993 /*
994 * If the X register isn't a constant,
995 * and the comparison in the test is
996 * against 0, we can compare with the
997 * X register, instead:
998 *
999 * sub x -> nop
1000 * jeq #0 jeq x
1001 */
1002 last->s.code = NOP;
1003 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
1004 opt_state->done = 0;
1005 }
1006 }
1007 /*
1008 * Likewise, a constant subtract can be simplified:
1009 *
1010 * sub #x -> nop
1011 * jeq #y -> jeq #(x+y)
1012 */
1013 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1014 last->s.code = NOP;
1015 b->s.k += last->s.k;
1016 opt_state->done = 0;
1017 }
1018 /*
1019 * And, similarly, a constant AND can be simplified
1020 * if we're testing against 0, i.e.:
1021 *
1022 * and #k nop
1023 * jeq #0 -> jset #k
1024 */
1025 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1026 b->s.k == 0) {
1027 b->s.k = last->s.k;
1028 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1029 last->s.code = NOP;
1030 opt_state->done = 0;
1031 opt_not(b);
1032 }
1033 }
1034 /*
1035 * jset #0 -> never
1036 * jset #ffffffff -> always
1037 */
1038 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1039 if (b->s.k == 0)
1040 JT(b) = JF(b);
1041 if ((u_int)b->s.k == 0xffffffffU)
1042 JF(b) = JT(b);
1043 }
1044 /*
1045 * If we're comparing against the index register, and the index
1046 * register is a known constant, we can just compare against that
1047 * constant.
1048 */
1049 val = b->val[X_ATOM];
1050 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1051 bpf_int32 v = opt_state->vmap[val].const_val;
1052 b->s.code &= ~BPF_X;
1053 b->s.k = v;
1054 }
1055 /*
1056 * If the accumulator is a known constant, we can compute the
1057 * comparison result.
1058 */
1059 val = b->val[A_ATOM];
1060 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1061 bpf_int32 v = opt_state->vmap[val].const_val;
1062 switch (BPF_OP(b->s.code)) {
1063
1064 case BPF_JEQ:
1065 v = v == b->s.k;
1066 break;
1067
1068 case BPF_JGT:
1069 v = (unsigned)v > (unsigned)b->s.k;
1070 break;
1071
1072 case BPF_JGE:
1073 v = (unsigned)v >= (unsigned)b->s.k;
1074 break;
1075
1076 case BPF_JSET:
1077 v &= b->s.k;
1078 break;
1079
1080 default:
1081 abort();
1082 }
1083 if (JF(b) != JT(b))
1084 opt_state->done = 0;
1085 if (v)
1086 JF(b) = JT(b);
1087 else
1088 JT(b) = JF(b);
1089 }
1090 }
1091
1092 /*
1093 * Compute the symbolic value of expression of 's', and update
1094 * anything it defines in the value table 'val'. If 'alter' is true,
1095 * do various optimizations. This code would be cleaner if symbolic
1096 * evaluation and code transformations weren't folded together.
1097 */
1098 static void
1099 opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter)
1100 {
1101 int op;
1102 int v;
1103
1104 switch (s->code) {
1105
1106 case BPF_LD|BPF_ABS|BPF_W:
1107 case BPF_LD|BPF_ABS|BPF_H:
1108 case BPF_LD|BPF_ABS|BPF_B:
1109 v = F(opt_state, s->code, s->k, 0L);
1110 vstore(s, &val[A_ATOM], v, alter);
1111 break;
1112
1113 case BPF_LD|BPF_IND|BPF_W:
1114 case BPF_LD|BPF_IND|BPF_H:
1115 case BPF_LD|BPF_IND|BPF_B:
1116 v = val[X_ATOM];
1117 if (alter && opt_state->vmap[v].is_const) {
1118 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1119 s->k += opt_state->vmap[v].const_val;
1120 v = F(opt_state, s->code, s->k, 0L);
1121 opt_state->done = 0;
1122 }
1123 else
1124 v = F(opt_state, s->code, s->k, v);
1125 vstore(s, &val[A_ATOM], v, alter);
1126 break;
1127
1128 case BPF_LD|BPF_LEN:
1129 v = F(opt_state, s->code, 0L, 0L);
1130 vstore(s, &val[A_ATOM], v, alter);
1131 break;
1132
1133 case BPF_LD|BPF_IMM:
1134 v = K(s->k);
1135 vstore(s, &val[A_ATOM], v, alter);
1136 break;
1137
1138 case BPF_LDX|BPF_IMM:
1139 v = K(s->k);
1140 vstore(s, &val[X_ATOM], v, alter);
1141 break;
1142
1143 case BPF_LDX|BPF_MSH|BPF_B:
1144 v = F(opt_state, s->code, s->k, 0L);
1145 vstore(s, &val[X_ATOM], v, alter);
1146 break;
1147
1148 case BPF_ALU|BPF_NEG:
1149 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1150 s->code = BPF_LD|BPF_IMM;
1151 /*
1152 * Do this negation as unsigned arithmetic; that's
1153 * what modern BPF engines do, and it guarantees
1154 * that all possible values can be negated. (Yeah,
1155 * negating 0x80000000, the minimum signed 32-bit
1156 * two's-complement value, results in 0x80000000,
1157 * so it's still negative, but we *should* be doing
1158 * all unsigned arithmetic here, to match what
1159 * modern BPF engines do.)
1160 *
1161 * Express it as 0U - (unsigned value) so that we
1162 * don't get compiler warnings about negating an
1163 * unsigned value and don't get UBSan warnings
1164 * about the result of negating 0x80000000 being
1165 * undefined.
1166 */
1167 s->k = 0U - (bpf_u_int32)(opt_state->vmap[val[A_ATOM]].const_val);
1168 val[A_ATOM] = K(s->k);
1169 }
1170 else
1171 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1172 break;
1173
1174 case BPF_ALU|BPF_ADD|BPF_K:
1175 case BPF_ALU|BPF_SUB|BPF_K:
1176 case BPF_ALU|BPF_MUL|BPF_K:
1177 case BPF_ALU|BPF_DIV|BPF_K:
1178 case BPF_ALU|BPF_MOD|BPF_K:
1179 case BPF_ALU|BPF_AND|BPF_K:
1180 case BPF_ALU|BPF_OR|BPF_K:
1181 case BPF_ALU|BPF_XOR|BPF_K:
1182 case BPF_ALU|BPF_LSH|BPF_K:
1183 case BPF_ALU|BPF_RSH|BPF_K:
1184 op = BPF_OP(s->code);
1185 if (alter) {
1186 if (s->k == 0) {
1187 /*
1188 * Optimize operations where the constant
1189 * is zero.
1190 *
1191 * Don't optimize away "sub #0"
1192 * as it may be needed later to
1193 * fixup the generated math code.
1194 *
1195 * Fail if we're dividing by zero or taking
1196 * a modulus by zero.
1197 */
1198 if (op == BPF_ADD ||
1199 op == BPF_LSH || op == BPF_RSH ||
1200 op == BPF_OR || op == BPF_XOR) {
1201 s->code = NOP;
1202 break;
1203 }
1204 if (op == BPF_MUL || op == BPF_AND) {
1205 s->code = BPF_LD|BPF_IMM;
1206 val[A_ATOM] = K(s->k);
1207 break;
1208 }
1209 if (op == BPF_DIV)
1210 opt_error(opt_state,
1211 "division by zero");
1212 if (op == BPF_MOD)
1213 opt_error(opt_state,
1214 "modulus by zero");
1215 }
1216 if (opt_state->vmap[val[A_ATOM]].is_const) {
1217 fold_op(opt_state, s, val[A_ATOM], K(s->k));
1218 val[A_ATOM] = K(s->k);
1219 break;
1220 }
1221 }
1222 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1223 break;
1224
1225 case BPF_ALU|BPF_ADD|BPF_X:
1226 case BPF_ALU|BPF_SUB|BPF_X:
1227 case BPF_ALU|BPF_MUL|BPF_X:
1228 case BPF_ALU|BPF_DIV|BPF_X:
1229 case BPF_ALU|BPF_MOD|BPF_X:
1230 case BPF_ALU|BPF_AND|BPF_X:
1231 case BPF_ALU|BPF_OR|BPF_X:
1232 case BPF_ALU|BPF_XOR|BPF_X:
1233 case BPF_ALU|BPF_LSH|BPF_X:
1234 case BPF_ALU|BPF_RSH|BPF_X:
1235 op = BPF_OP(s->code);
1236 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1237 if (opt_state->vmap[val[A_ATOM]].is_const) {
1238 fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
1239 val[A_ATOM] = K(s->k);
1240 }
1241 else {
1242 s->code = BPF_ALU|BPF_K|op;
1243 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1244 /*
1245 * XXX - we need to make up our minds
1246 * as to what integers are signed and
1247 * what integers are unsigned in BPF
1248 * programs and in our IR.
1249 */
1250 if ((op == BPF_LSH || op == BPF_RSH) &&
1251 (s->k < 0 || s->k > 31))
1252 opt_error(opt_state,
1253 "shift by more than 31 bits");
1254 opt_state->done = 0;
1255 val[A_ATOM] =
1256 F(opt_state, s->code, val[A_ATOM], K(s->k));
1257 }
1258 break;
1259 }
1260 /*
1261 * Check if we're doing something to an accumulator
1262 * that is 0, and simplify. This may not seem like
1263 * much of a simplification but it could open up further
1264 * optimizations.
1265 * XXX We could also check for mul by 1, etc.
1266 */
1267 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1268 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1269 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1270 s->code = BPF_MISC|BPF_TXA;
1271 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1272 break;
1273 }
1274 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1275 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1276 s->code = BPF_LD|BPF_IMM;
1277 s->k = 0;
1278 vstore(s, &val[A_ATOM], K(s->k), alter);
1279 break;
1280 }
1281 else if (op == BPF_NEG) {
1282 s->code = NOP;
1283 break;
1284 }
1285 }
1286 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1287 break;
1288
1289 case BPF_MISC|BPF_TXA:
1290 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1291 break;
1292
1293 case BPF_LD|BPF_MEM:
1294 v = val[s->k];
1295 if (alter && opt_state->vmap[v].is_const) {
1296 s->code = BPF_LD|BPF_IMM;
1297 s->k = opt_state->vmap[v].const_val;
1298 opt_state->done = 0;
1299 }
1300 vstore(s, &val[A_ATOM], v, alter);
1301 break;
1302
1303 case BPF_MISC|BPF_TAX:
1304 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1305 break;
1306
1307 case BPF_LDX|BPF_MEM:
1308 v = val[s->k];
1309 if (alter && opt_state->vmap[v].is_const) {
1310 s->code = BPF_LDX|BPF_IMM;
1311 s->k = opt_state->vmap[v].const_val;
1312 opt_state->done = 0;
1313 }
1314 vstore(s, &val[X_ATOM], v, alter);
1315 break;
1316
1317 case BPF_ST:
1318 vstore(s, &val[s->k], val[A_ATOM], alter);
1319 break;
1320
1321 case BPF_STX:
1322 vstore(s, &val[s->k], val[X_ATOM], alter);
1323 break;
1324 }
1325 }
1326
1327 static void
1328 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1329 {
1330 register int atom;
1331
1332 atom = atomuse(s);
1333 if (atom >= 0) {
1334 if (atom == AX_ATOM) {
1335 last[X_ATOM] = 0;
1336 last[A_ATOM] = 0;
1337 }
1338 else
1339 last[atom] = 0;
1340 }
1341 atom = atomdef(s);
1342 if (atom >= 0) {
1343 if (last[atom]) {
1344 opt_state->done = 0;
1345 last[atom]->code = NOP;
1346 }
1347 last[atom] = s;
1348 }
1349 }
1350
1351 static void
1352 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1353 {
1354 register struct slist *s;
1355 register int atom;
1356 struct stmt *last[N_ATOMS];
1357
1358 memset((char *)last, 0, sizeof last);
1359
1360 for (s = b->stmts; s != 0; s = s->next)
1361 deadstmt(opt_state, &s->s, last);
1362 deadstmt(opt_state, &b->s, last);
1363
1364 for (atom = 0; atom < N_ATOMS; ++atom)
1365 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1366 last[atom]->code = NOP;
1367 opt_state->done = 0;
1368 }
1369 }
1370
1371 static void
1372 opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
1373 {
1374 struct slist *s;
1375 struct edge *p;
1376 int i;
1377 bpf_int32 aval, xval;
1378
1379 #if 0
1380 for (s = b->stmts; s && s->next; s = s->next)
1381 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1382 do_stmts = 0;
1383 break;
1384 }
1385 #endif
1386
1387 /*
1388 * Initialize the atom values.
1389 */
1390 p = b->in_edges;
1391 if (p == 0) {
1392 /*
1393 * We have no predecessors, so everything is undefined
1394 * upon entry to this block.
1395 */
1396 memset((char *)b->val, 0, sizeof(b->val));
1397 } else {
1398 /*
1399 * Inherit values from our predecessors.
1400 *
1401 * First, get the values from the predecessor along the
1402 * first edge leading to this node.
1403 */
1404 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1405 /*
1406 * Now look at all the other nodes leading to this node.
1407 * If, for the predecessor along that edge, a register
1408 * has a different value from the one we have (i.e.,
1409 * control paths are merging, and the merging paths
1410 * assign different values to that register), give the
1411 * register the undefined value of 0.
1412 */
1413 while ((p = p->next) != NULL) {
1414 for (i = 0; i < N_ATOMS; ++i)
1415 if (b->val[i] != p->pred->val[i])
1416 b->val[i] = 0;
1417 }
1418 }
1419 aval = b->val[A_ATOM];
1420 xval = b->val[X_ATOM];
1421 for (s = b->stmts; s; s = s->next)
1422 opt_stmt(opt_state, &s->s, b->val, do_stmts);
1423
1424 /*
1425 * This is a special case: if we don't use anything from this
1426 * block, and we load the accumulator or index register with a
1427 * value that is already there, or if this block is a return,
1428 * eliminate all the statements.
1429 *
1430 * XXX - what if it does a store?
1431 *
1432 * XXX - why does it matter whether we use anything from this
1433 * block? If the accumulator or index register doesn't change
1434 * its value, isn't that OK even if we use that value?
1435 *
1436 * XXX - if we load the accumulator with a different value,
1437 * and the block ends with a conditional branch, we obviously
1438 * can't eliminate it, as the branch depends on that value.
1439 * For the index register, the conditional branch only depends
1440 * on the index register value if the test is against the index
1441 * register value rather than a constant; if nothing uses the
1442 * value we put into the index register, and we're not testing
1443 * against the index register's value, and there aren't any
1444 * other problems that would keep us from eliminating this
1445 * block, can we eliminate it?
1446 */
1447 if (do_stmts &&
1448 ((b->out_use == 0 &&
1449 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1450 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1451 BPF_CLASS(b->s.code) == BPF_RET)) {
1452 if (b->stmts != 0) {
1453 b->stmts = 0;
1454 opt_state->done = 0;
1455 }
1456 } else {
1457 opt_peep(opt_state, b);
1458 opt_deadstores(opt_state, b);
1459 }
1460 /*
1461 * Set up values for branch optimizer.
1462 */
1463 if (BPF_SRC(b->s.code) == BPF_K)
1464 b->oval = K(b->s.k);
1465 else
1466 b->oval = b->val[X_ATOM];
1467 b->et.code = b->s.code;
1468 b->ef.code = -b->s.code;
1469 }
1470
1471 /*
1472 * Return true if any register that is used on exit from 'succ', has
1473 * an exit value that is different from the corresponding exit value
1474 * from 'b'.
1475 */
1476 static int
1477 use_conflict(struct block *b, struct block *succ)
1478 {
1479 int atom;
1480 atomset use = succ->out_use;
1481
1482 if (use == 0)
1483 return 0;
1484
1485 for (atom = 0; atom < N_ATOMS; ++atom)
1486 if (ATOMELEM(use, atom))
1487 if (b->val[atom] != succ->val[atom])
1488 return 1;
1489 return 0;
1490 }
1491
1492 static struct block *
1493 fold_edge(struct block *child, struct edge *ep)
1494 {
1495 int sense;
1496 int aval0, aval1, oval0, oval1;
1497 int code = ep->code;
1498
1499 if (code < 0) {
1500 code = -code;
1501 sense = 0;
1502 } else
1503 sense = 1;
1504
1505 if (child->s.code != code)
1506 return 0;
1507
1508 aval0 = child->val[A_ATOM];
1509 oval0 = child->oval;
1510 aval1 = ep->pred->val[A_ATOM];
1511 oval1 = ep->pred->oval;
1512
1513 if (aval0 != aval1)
1514 return 0;
1515
1516 if (oval0 == oval1)
1517 /*
1518 * The operands of the branch instructions are
1519 * identical, so the result is true if a true
1520 * branch was taken to get here, otherwise false.
1521 */
1522 return sense ? JT(child) : JF(child);
1523
1524 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1525 /*
1526 * At this point, we only know the comparison if we
1527 * came down the true branch, and it was an equality
1528 * comparison with a constant.
1529 *
1530 * I.e., if we came down the true branch, and the branch
1531 * was an equality comparison with a constant, we know the
1532 * accumulator contains that constant. If we came down
1533 * the false branch, or the comparison wasn't with a
1534 * constant, we don't know what was in the accumulator.
1535 *
1536 * We rely on the fact that distinct constants have distinct
1537 * value numbers.
1538 */
1539 return JF(child);
1540
1541 return 0;
1542 }
1543
1544 static void
1545 opt_j(opt_state_t *opt_state, struct edge *ep)
1546 {
1547 register int i, k;
1548 register struct block *target;
1549
1550 if (JT(ep->succ) == 0)
1551 return;
1552
1553 if (JT(ep->succ) == JF(ep->succ)) {
1554 /*
1555 * Common branch targets can be eliminated, provided
1556 * there is no data dependency.
1557 */
1558 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1559 opt_state->done = 0;
1560 ep->succ = JT(ep->succ);
1561 }
1562 }
1563 /*
1564 * For each edge dominator that matches the successor of this
1565 * edge, promote the edge successor to the its grandchild.
1566 *
1567 * XXX We violate the set abstraction here in favor a reasonably
1568 * efficient loop.
1569 */
1570 top:
1571 for (i = 0; i < opt_state->edgewords; ++i) {
1572 register bpf_u_int32 x = ep->edom[i];
1573
1574 while (x != 0) {
1575 k = lowest_set_bit(x);
1576 x &=~ ((bpf_u_int32)1 << k);
1577 k += i * BITS_PER_WORD;
1578
1579 target = fold_edge(ep->succ, opt_state->edges[k]);
1580 /*
1581 * Check that there is no data dependency between
1582 * nodes that will be violated if we move the edge.
1583 */
1584 if (target != 0 && !use_conflict(ep->pred, target)) {
1585 opt_state->done = 0;
1586 ep->succ = target;
1587 if (JT(target) != 0)
1588 /*
1589 * Start over unless we hit a leaf.
1590 */
1591 goto top;
1592 return;
1593 }
1594 }
1595 }
1596 }
1597
1598
1599 static void
1600 or_pullup(opt_state_t *opt_state, struct block *b)
1601 {
1602 int val, at_top;
1603 struct block *pull;
1604 struct block **diffp, **samep;
1605 struct edge *ep;
1606
1607 ep = b->in_edges;
1608 if (ep == 0)
1609 return;
1610
1611 /*
1612 * Make sure each predecessor loads the same value.
1613 * XXX why?
1614 */
1615 val = ep->pred->val[A_ATOM];
1616 for (ep = ep->next; ep != 0; ep = ep->next)
1617 if (val != ep->pred->val[A_ATOM])
1618 return;
1619
1620 if (JT(b->in_edges->pred) == b)
1621 diffp = &JT(b->in_edges->pred);
1622 else
1623 diffp = &JF(b->in_edges->pred);
1624
1625 at_top = 1;
1626 for (;;) {
1627 if (*diffp == 0)
1628 return;
1629
1630 if (JT(*diffp) != JT(b))
1631 return;
1632
1633 if (!SET_MEMBER((*diffp)->dom, b->id))
1634 return;
1635
1636 if ((*diffp)->val[A_ATOM] != val)
1637 break;
1638
1639 diffp = &JF(*diffp);
1640 at_top = 0;
1641 }
1642 samep = &JF(*diffp);
1643 for (;;) {
1644 if (*samep == 0)
1645 return;
1646
1647 if (JT(*samep) != JT(b))
1648 return;
1649
1650 if (!SET_MEMBER((*samep)->dom, b->id))
1651 return;
1652
1653 if ((*samep)->val[A_ATOM] == val)
1654 break;
1655
1656 /* XXX Need to check that there are no data dependencies
1657 between dp0 and dp1. Currently, the code generator
1658 will not produce such dependencies. */
1659 samep = &JF(*samep);
1660 }
1661 #ifdef notdef
1662 /* XXX This doesn't cover everything. */
1663 for (i = 0; i < N_ATOMS; ++i)
1664 if ((*samep)->val[i] != pred->val[i])
1665 return;
1666 #endif
1667 /* Pull up the node. */
1668 pull = *samep;
1669 *samep = JF(pull);
1670 JF(pull) = *diffp;
1671
1672 /*
1673 * At the top of the chain, each predecessor needs to point at the
1674 * pulled up node. Inside the chain, there is only one predecessor
1675 * to worry about.
1676 */
1677 if (at_top) {
1678 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1679 if (JT(ep->pred) == b)
1680 JT(ep->pred) = pull;
1681 else
1682 JF(ep->pred) = pull;
1683 }
1684 }
1685 else
1686 *diffp = pull;
1687
1688 opt_state->done = 0;
1689 }
1690
1691 static void
1692 and_pullup(opt_state_t *opt_state, struct block *b)
1693 {
1694 int val, at_top;
1695 struct block *pull;
1696 struct block **diffp, **samep;
1697 struct edge *ep;
1698
1699 ep = b->in_edges;
1700 if (ep == 0)
1701 return;
1702
1703 /*
1704 * Make sure each predecessor loads the same value.
1705 */
1706 val = ep->pred->val[A_ATOM];
1707 for (ep = ep->next; ep != 0; ep = ep->next)
1708 if (val != ep->pred->val[A_ATOM])
1709 return;
1710
1711 if (JT(b->in_edges->pred) == b)
1712 diffp = &JT(b->in_edges->pred);
1713 else
1714 diffp = &JF(b->in_edges->pred);
1715
1716 at_top = 1;
1717 for (;;) {
1718 if (*diffp == 0)
1719 return;
1720
1721 if (JF(*diffp) != JF(b))
1722 return;
1723
1724 if (!SET_MEMBER((*diffp)->dom, b->id))
1725 return;
1726
1727 if ((*diffp)->val[A_ATOM] != val)
1728 break;
1729
1730 diffp = &JT(*diffp);
1731 at_top = 0;
1732 }
1733 samep = &JT(*diffp);
1734 for (;;) {
1735 if (*samep == 0)
1736 return;
1737
1738 if (JF(*samep) != JF(b))
1739 return;
1740
1741 if (!SET_MEMBER((*samep)->dom, b->id))
1742 return;
1743
1744 if ((*samep)->val[A_ATOM] == val)
1745 break;
1746
1747 /* XXX Need to check that there are no data dependencies
1748 between diffp and samep. Currently, the code generator
1749 will not produce such dependencies. */
1750 samep = &JT(*samep);
1751 }
1752 #ifdef notdef
1753 /* XXX This doesn't cover everything. */
1754 for (i = 0; i < N_ATOMS; ++i)
1755 if ((*samep)->val[i] != pred->val[i])
1756 return;
1757 #endif
1758 /* Pull up the node. */
1759 pull = *samep;
1760 *samep = JT(pull);
1761 JT(pull) = *diffp;
1762
1763 /*
1764 * At the top of the chain, each predecessor needs to point at the
1765 * pulled up node. Inside the chain, there is only one predecessor
1766 * to worry about.
1767 */
1768 if (at_top) {
1769 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1770 if (JT(ep->pred) == b)
1771 JT(ep->pred) = pull;
1772 else
1773 JF(ep->pred) = pull;
1774 }
1775 }
1776 else
1777 *diffp = pull;
1778
1779 opt_state->done = 0;
1780 }
1781
1782 static void
1783 opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
1784 {
1785 int i, maxlevel;
1786 struct block *p;
1787
1788 init_val(opt_state);
1789 maxlevel = ic->root->level;
1790
1791 find_inedges(opt_state, ic->root);
1792 for (i = maxlevel; i >= 0; --i)
1793 for (p = opt_state->levels[i]; p; p = p->link)
1794 opt_blk(opt_state, p, do_stmts);
1795
1796 if (do_stmts)
1797 /*
1798 * No point trying to move branches; it can't possibly
1799 * make a difference at this point.
1800 */
1801 return;
1802
1803 for (i = 1; i <= maxlevel; ++i) {
1804 for (p = opt_state->levels[i]; p; p = p->link) {
1805 opt_j(opt_state, &p->et);
1806 opt_j(opt_state, &p->ef);
1807 }
1808 }
1809
1810 find_inedges(opt_state, ic->root);
1811 for (i = 1; i <= maxlevel; ++i) {
1812 for (p = opt_state->levels[i]; p; p = p->link) {
1813 or_pullup(opt_state, p);
1814 and_pullup(opt_state, p);
1815 }
1816 }
1817 }
1818
1819 static inline void
1820 link_inedge(struct edge *parent, struct block *child)
1821 {
1822 parent->next = child->in_edges;
1823 child->in_edges = parent;
1824 }
1825
1826 static void
1827 find_inedges(opt_state_t *opt_state, struct block *root)
1828 {
1829 int i;
1830 struct block *b;
1831
1832 for (i = 0; i < opt_state->n_blocks; ++i)
1833 opt_state->blocks[i]->in_edges = 0;
1834
1835 /*
1836 * Traverse the graph, adding each edge to the predecessor
1837 * list of its successors. Skip the leaves (i.e. level 0).
1838 */
1839 for (i = root->level; i > 0; --i) {
1840 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1841 link_inedge(&b->et, JT(b));
1842 link_inedge(&b->ef, JF(b));
1843 }
1844 }
1845 }
1846
1847 static void
1848 opt_root(struct block **b)
1849 {
1850 struct slist *tmp, *s;
1851
1852 s = (*b)->stmts;
1853 (*b)->stmts = 0;
1854 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1855 *b = JT(*b);
1856
1857 tmp = (*b)->stmts;
1858 if (tmp != 0)
1859 sappend(s, tmp);
1860 (*b)->stmts = s;
1861
1862 /*
1863 * If the root node is a return, then there is no
1864 * point executing any statements (since the bpf machine
1865 * has no side effects).
1866 */
1867 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1868 (*b)->stmts = 0;
1869 }
1870
1871 static void
1872 opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
1873 {
1874
1875 #ifdef BDEBUG
1876 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1877 printf("opt_loop(root, %d) begin\n", do_stmts);
1878 opt_dump(opt_state, ic);
1879 }
1880 #endif
1881 do {
1882 opt_state->done = 1;
1883 find_levels(opt_state, ic);
1884 find_dom(opt_state, ic->root);
1885 find_closure(opt_state, ic->root);
1886 find_ud(opt_state, ic->root);
1887 find_edom(opt_state, ic->root);
1888 opt_blks(opt_state, ic, do_stmts);
1889 #ifdef BDEBUG
1890 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1891 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1892 opt_dump(opt_state, ic);
1893 }
1894 #endif
1895 } while (!opt_state->done);
1896 }
1897
1898 /*
1899 * Optimize the filter code in its dag representation.
1900 * Return 0 on success, -1 on error.
1901 */
1902 int
1903 bpf_optimize(struct icode *ic, char *errbuf)
1904 {
1905 opt_state_t opt_state;
1906
1907 memset(&opt_state, 0, sizeof(opt_state));
1908 opt_state.errbuf = errbuf;
1909 if (setjmp(opt_state.top_ctx)) {
1910 opt_cleanup(&opt_state);
1911 return -1;
1912 }
1913 opt_init(&opt_state, ic);
1914 opt_loop(&opt_state, ic, 0);
1915 opt_loop(&opt_state, ic, 1);
1916 intern_blocks(&opt_state, ic);
1917 #ifdef BDEBUG
1918 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1919 printf("after intern_blocks()\n");
1920 opt_dump(&opt_state, ic);
1921 }
1922 #endif
1923 opt_root(&ic->root);
1924 #ifdef BDEBUG
1925 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1926 printf("after opt_root()\n");
1927 opt_dump(&opt_state, ic);
1928 }
1929 #endif
1930 opt_cleanup(&opt_state);
1931 return 0;
1932 }
1933
1934 static void
1935 make_marks(struct icode *ic, struct block *p)
1936 {
1937 if (!isMarked(ic, p)) {
1938 Mark(ic, p);
1939 if (BPF_CLASS(p->s.code) != BPF_RET) {
1940 make_marks(ic, JT(p));
1941 make_marks(ic, JF(p));
1942 }
1943 }
1944 }
1945
1946 /*
1947 * Mark code array such that isMarked(ic->cur_mark, i) is true
1948 * only for nodes that are alive.
1949 */
1950 static void
1951 mark_code(struct icode *ic)
1952 {
1953 ic->cur_mark += 1;
1954 make_marks(ic, ic->root);
1955 }
1956
1957 /*
1958 * True iff the two stmt lists load the same value from the packet into
1959 * the accumulator.
1960 */
1961 static int
1962 eq_slist(struct slist *x, struct slist *y)
1963 {
1964 for (;;) {
1965 while (x && x->s.code == NOP)
1966 x = x->next;
1967 while (y && y->s.code == NOP)
1968 y = y->next;
1969 if (x == 0)
1970 return y == 0;
1971 if (y == 0)
1972 return x == 0;
1973 if (x->s.code != y->s.code || x->s.k != y->s.k)
1974 return 0;
1975 x = x->next;
1976 y = y->next;
1977 }
1978 }
1979
1980 static inline int
1981 eq_blk(struct block *b0, struct block *b1)
1982 {
1983 if (b0->s.code == b1->s.code &&
1984 b0->s.k == b1->s.k &&
1985 b0->et.succ == b1->et.succ &&
1986 b0->ef.succ == b1->ef.succ)
1987 return eq_slist(b0->stmts, b1->stmts);
1988 return 0;
1989 }
1990
1991 static void
1992 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1993 {
1994 struct block *p;
1995 int i, j;
1996 int done1; /* don't shadow global */
1997 top:
1998 done1 = 1;
1999 for (i = 0; i < opt_state->n_blocks; ++i)
2000 opt_state->blocks[i]->link = 0;
2001
2002 mark_code(ic);
2003
2004 for (i = opt_state->n_blocks - 1; --i >= 0; ) {
2005 if (!isMarked(ic, opt_state->blocks[i]))
2006 continue;
2007 for (j = i + 1; j < opt_state->n_blocks; ++j) {
2008 if (!isMarked(ic, opt_state->blocks[j]))
2009 continue;
2010 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
2011 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
2012 opt_state->blocks[j]->link : opt_state->blocks[j];
2013 break;
2014 }
2015 }
2016 }
2017 for (i = 0; i < opt_state->n_blocks; ++i) {
2018 p = opt_state->blocks[i];
2019 if (JT(p) == 0)
2020 continue;
2021 if (JT(p)->link) {
2022 done1 = 0;
2023 JT(p) = JT(p)->link;
2024 }
2025 if (JF(p)->link) {
2026 done1 = 0;
2027 JF(p) = JF(p)->link;
2028 }
2029 }
2030 if (!done1)
2031 goto top;
2032 }
2033
2034 static void
2035 opt_cleanup(opt_state_t *opt_state)
2036 {
2037 free((void *)opt_state->vnode_base);
2038 free((void *)opt_state->vmap);
2039 free((void *)opt_state->edges);
2040 free((void *)opt_state->space);
2041 free((void *)opt_state->levels);
2042 free((void *)opt_state->blocks);
2043 }
2044
2045 /*
2046 * For optimizer errors.
2047 */
2048 static void PCAP_NORETURN
2049 opt_error(opt_state_t *opt_state, const char *fmt, ...)
2050 {
2051 va_list ap;
2052
2053 if (opt_state->errbuf != NULL) {
2054 va_start(ap, fmt);
2055 (void)pcap_vsnprintf(opt_state->errbuf,
2056 PCAP_ERRBUF_SIZE, fmt, ap);
2057 va_end(ap);
2058 }
2059 longjmp(opt_state->top_ctx, 1);
2060 /* NOTREACHED */
2061 }
2062
2063 /*
2064 * Return the number of stmts in 's'.
2065 */
2066 static u_int
2067 slength(struct slist *s)
2068 {
2069 u_int n = 0;
2070
2071 for (; s; s = s->next)
2072 if (s->s.code != NOP)
2073 ++n;
2074 return n;
2075 }
2076
2077 /*
2078 * Return the number of nodes reachable by 'p'.
2079 * All nodes should be initially unmarked.
2080 */
2081 static int
2082 count_blocks(struct icode *ic, struct block *p)
2083 {
2084 if (p == 0 || isMarked(ic, p))
2085 return 0;
2086 Mark(ic, p);
2087 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2088 }
2089
2090 /*
2091 * Do a depth first search on the flow graph, numbering the
2092 * the basic blocks, and entering them into the 'blocks' array.`
2093 */
2094 static void
2095 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2096 {
2097 int n;
2098
2099 if (p == 0 || isMarked(ic, p))
2100 return;
2101
2102 Mark(ic, p);
2103 n = opt_state->n_blocks++;
2104 p->id = n;
2105 opt_state->blocks[n] = p;
2106
2107 number_blks_r(opt_state, ic, JT(p));
2108 number_blks_r(opt_state, ic, JF(p));
2109 }
2110
2111 /*
2112 * Return the number of stmts in the flowgraph reachable by 'p'.
2113 * The nodes should be unmarked before calling.
2114 *
2115 * Note that "stmts" means "instructions", and that this includes
2116 *
2117 * side-effect statements in 'p' (slength(p->stmts));
2118 *
2119 * statements in the true branch from 'p' (count_stmts(JT(p)));
2120 *
2121 * statements in the false branch from 'p' (count_stmts(JF(p)));
2122 *
2123 * the conditional jump itself (1);
2124 *
2125 * an extra long jump if the true branch requires it (p->longjt);
2126 *
2127 * an extra long jump if the false branch requires it (p->longjf).
2128 */
2129 static u_int
2130 count_stmts(struct icode *ic, struct block *p)
2131 {
2132 u_int n;
2133
2134 if (p == 0 || isMarked(ic, p))
2135 return 0;
2136 Mark(ic, p);
2137 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2138 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2139 }
2140
2141 /*
2142 * Allocate memory. All allocation is done before optimization
2143 * is begun. A linear bound on the size of all data structures is computed
2144 * from the total number of blocks and/or statements.
2145 */
2146 static void
2147 opt_init(opt_state_t *opt_state, struct icode *ic)
2148 {
2149 bpf_u_int32 *p;
2150 int i, n, max_stmts;
2151
2152 /*
2153 * First, count the blocks, so we can malloc an array to map
2154 * block number to block. Then, put the blocks into the array.
2155 */
2156 unMarkAll(ic);
2157 n = count_blocks(ic, ic->root);
2158 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2159 if (opt_state->blocks == NULL)
2160 opt_error(opt_state, "malloc");
2161 unMarkAll(ic);
2162 opt_state->n_blocks = 0;
2163 number_blks_r(opt_state, ic, ic->root);
2164
2165 opt_state->n_edges = 2 * opt_state->n_blocks;
2166 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2167 if (opt_state->edges == NULL) {
2168 opt_error(opt_state, "malloc");
2169 }
2170
2171 /*
2172 * The number of levels is bounded by the number of nodes.
2173 */
2174 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2175 if (opt_state->levels == NULL) {
2176 opt_error(opt_state, "malloc");
2177 }
2178
2179 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2180 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2181
2182 /* XXX */
2183 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2184 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2185 if (opt_state->space == NULL) {
2186 opt_error(opt_state, "malloc");
2187 }
2188 p = opt_state->space;
2189 opt_state->all_dom_sets = p;
2190 for (i = 0; i < n; ++i) {
2191 opt_state->blocks[i]->dom = p;
2192 p += opt_state->nodewords;
2193 }
2194 opt_state->all_closure_sets = p;
2195 for (i = 0; i < n; ++i) {
2196 opt_state->blocks[i]->closure = p;
2197 p += opt_state->nodewords;
2198 }
2199 opt_state->all_edge_sets = p;
2200 for (i = 0; i < n; ++i) {
2201 register struct block *b = opt_state->blocks[i];
2202
2203 b->et.edom = p;
2204 p += opt_state->edgewords;
2205 b->ef.edom = p;
2206 p += opt_state->edgewords;
2207 b->et.id = i;
2208 opt_state->edges[i] = &b->et;
2209 b->ef.id = opt_state->n_blocks + i;
2210 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2211 b->et.pred = b;
2212 b->ef.pred = b;
2213 }
2214 max_stmts = 0;
2215 for (i = 0; i < n; ++i)
2216 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2217 /*
2218 * We allocate at most 3 value numbers per statement,
2219 * so this is an upper bound on the number of valnodes
2220 * we'll need.
2221 */
2222 opt_state->maxval = 3 * max_stmts;
2223 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2224 if (opt_state->vmap == NULL) {
2225 opt_error(opt_state, "malloc");
2226 }
2227 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2228 if (opt_state->vnode_base == NULL) {
2229 opt_error(opt_state, "malloc");
2230 }
2231 }
2232
2233 /*
2234 * This is only used when supporting optimizer debugging. It is
2235 * global state, so do *not* do more than one compile in parallel
2236 * and expect it to provide meaningful information.
2237 */
2238 #ifdef BDEBUG
2239 int bids[NBIDS];
2240 #endif
2241
2242 static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
2243 PCAP_PRINTFLIKE(2, 3);
2244
2245 /*
2246 * Returns true if successful. Returns false if a branch has
2247 * an offset that is too large. If so, we have marked that
2248 * branch so that on a subsequent iteration, it will be treated
2249 * properly.
2250 */
2251 static int
2252 convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
2253 {
2254 struct bpf_insn *dst;
2255 struct slist *src;
2256 u_int slen;
2257 u_int off;
2258 u_int extrajmps; /* number of extra jumps inserted */
2259 struct slist **offset = NULL;
2260
2261 if (p == 0 || isMarked(ic, p))
2262 return (1);
2263 Mark(ic, p);
2264
2265 if (convert_code_r(conv_state, ic, JF(p)) == 0)
2266 return (0);
2267 if (convert_code_r(conv_state, ic, JT(p)) == 0)
2268 return (0);
2269
2270 slen = slength(p->stmts);
2271 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2272 /* inflate length by any extra jumps */
2273
2274 p->offset = (int)(dst - conv_state->fstart);
2275
2276 /* generate offset[] for convenience */
2277 if (slen) {
2278 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2279 if (!offset) {
2280 conv_error(conv_state, "not enough core");
2281 /*NOTREACHED*/
2282 }
2283 }
2284 src = p->stmts;
2285 for (off = 0; off < slen && src; off++) {
2286 #if 0
2287 printf("off=%d src=%x\n", off, src);
2288 #endif
2289 offset[off] = src;
2290 src = src->next;
2291 }
2292
2293 off = 0;
2294 for (src = p->stmts; src; src = src->next) {
2295 if (src->s.code == NOP)
2296 continue;
2297 dst->code = (u_short)src->s.code;
2298 dst->k = src->s.k;
2299
2300 /* fill block-local relative jump */
2301 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2302 #if 0
2303 if (src->s.jt || src->s.jf) {
2304 free(offset);
2305 conv_error(conv_state, "illegal jmp destination");
2306 /*NOTREACHED*/
2307 }
2308 #endif
2309 goto filled;
2310 }
2311 if (off == slen - 2) /*???*/
2312 goto filled;
2313
2314 {
2315 u_int i;
2316 int jt, jf;
2317 const char ljerr[] = "%s for block-local relative jump: off=%d";
2318
2319 #if 0
2320 printf("code=%x off=%d %x %x\n", src->s.code,
2321 off, src->s.jt, src->s.jf);
2322 #endif
2323
2324 if (!src->s.jt || !src->s.jf) {
2325 free(offset);
2326 conv_error(conv_state, ljerr, "no jmp destination", off);
2327 /*NOTREACHED*/
2328 }
2329
2330 jt = jf = 0;
2331 for (i = 0; i < slen; i++) {
2332 if (offset[i] == src->s.jt) {
2333 if (jt) {
2334 free(offset);
2335 conv_error(conv_state, ljerr, "multiple matches", off);
2336 /*NOTREACHED*/
2337 }
2338
2339 if (i - off - 1 >= 256) {
2340 free(offset);
2341 conv_error(conv_state, ljerr, "out-of-range jump", off);
2342 /*NOTREACHED*/
2343 }
2344 dst->jt = (u_char)(i - off - 1);
2345 jt++;
2346 }
2347 if (offset[i] == src->s.jf) {
2348 if (jf) {
2349 free(offset);
2350 conv_error(conv_state, ljerr, "multiple matches", off);
2351 /*NOTREACHED*/
2352 }
2353 if (i - off - 1 >= 256) {
2354 free(offset);
2355 conv_error(conv_state, ljerr, "out-of-range jump", off);
2356 /*NOTREACHED*/
2357 }
2358 dst->jf = (u_char)(i - off - 1);
2359 jf++;
2360 }
2361 }
2362 if (!jt || !jf) {
2363 free(offset);
2364 conv_error(conv_state, ljerr, "no destination found", off);
2365 /*NOTREACHED*/
2366 }
2367 }
2368 filled:
2369 ++dst;
2370 ++off;
2371 }
2372 if (offset)
2373 free(offset);
2374
2375 #ifdef BDEBUG
2376 if (dst - conv_state->fstart < NBIDS)
2377 bids[dst - conv_state->fstart] = p->id + 1;
2378 #endif
2379 dst->code = (u_short)p->s.code;
2380 dst->k = p->s.k;
2381 if (JT(p)) {
2382 extrajmps = 0;
2383 off = JT(p)->offset - (p->offset + slen) - 1;
2384 if (off >= 256) {
2385 /* offset too large for branch, must add a jump */
2386 if (p->longjt == 0) {
2387 /* mark this instruction and retry */
2388 p->longjt++;
2389 return(0);
2390 }
2391 /* branch if T to following jump */
2392 if (extrajmps >= 256) {
2393 conv_error(conv_state, "too many extra jumps");
2394 /*NOTREACHED*/
2395 }
2396 dst->jt = (u_char)extrajmps;
2397 extrajmps++;
2398 dst[extrajmps].code = BPF_JMP|BPF_JA;
2399 dst[extrajmps].k = off - extrajmps;
2400 }
2401 else
2402 dst->jt = (u_char)off;
2403 off = JF(p)->offset - (p->offset + slen) - 1;
2404 if (off >= 256) {
2405 /* offset too large for branch, must add a jump */
2406 if (p->longjf == 0) {
2407 /* mark this instruction and retry */
2408 p->longjf++;
2409 return(0);
2410 }
2411 /* branch if F to following jump */
2412 /* if two jumps are inserted, F goes to second one */
2413 if (extrajmps >= 256) {
2414 conv_error(conv_state, "too many extra jumps");
2415 /*NOTREACHED*/
2416 }
2417 dst->jf = (u_char)extrajmps;
2418 extrajmps++;
2419 dst[extrajmps].code = BPF_JMP|BPF_JA;
2420 dst[extrajmps].k = off - extrajmps;
2421 }
2422 else
2423 dst->jf = (u_char)off;
2424 }
2425 return (1);
2426 }
2427
2428
2429 /*
2430 * Convert flowgraph intermediate representation to the
2431 * BPF array representation. Set *lenp to the number of instructions.
2432 *
2433 * This routine does *NOT* leak the memory pointed to by fp. It *must
2434 * not* do free(fp) before returning fp; doing so would make no sense,
2435 * as the BPF array pointed to by the return value of icode_to_fcode()
2436 * must be valid - it's being returned for use in a bpf_program structure.
2437 *
2438 * If it appears that icode_to_fcode() is leaking, the problem is that
2439 * the program using pcap_compile() is failing to free the memory in
2440 * the BPF program when it's done - the leak is in the program, not in
2441 * the routine that happens to be allocating the memory. (By analogy, if
2442 * a program calls fopen() without ever calling fclose() on the FILE *,
2443 * it will leak the FILE structure; the leak is not in fopen(), it's in
2444 * the program.) Change the program to use pcap_freecode() when it's
2445 * done with the filter program. See the pcap man page.
2446 */
2447 struct bpf_insn *
2448 icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
2449 char *errbuf)
2450 {
2451 u_int n;
2452 struct bpf_insn *fp;
2453 conv_state_t conv_state;
2454
2455 conv_state.fstart = NULL;
2456 conv_state.errbuf = errbuf;
2457 if (setjmp(conv_state.top_ctx) != 0) {
2458 free(conv_state.fstart);
2459 return NULL;
2460 }
2461
2462 /*
2463 * Loop doing convert_code_r() until no branches remain
2464 * with too-large offsets.
2465 */
2466 for (;;) {
2467 unMarkAll(ic);
2468 n = *lenp = count_stmts(ic, root);
2469
2470 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2471 if (fp == NULL) {
2472 (void)pcap_snprintf(errbuf, PCAP_ERRBUF_SIZE,
2473 "malloc");
2474 free(fp);
2475 return NULL;
2476 }
2477 memset((char *)fp, 0, sizeof(*fp) * n);
2478 conv_state.fstart = fp;
2479 conv_state.ftail = fp + n;
2480
2481 unMarkAll(ic);
2482 if (convert_code_r(&conv_state, ic, root))
2483 break;
2484 free(fp);
2485 }
2486
2487 return fp;
2488 }
2489
2490 /*
2491 * For iconv_to_fconv() errors.
2492 */
2493 static void PCAP_NORETURN
2494 conv_error(conv_state_t *conv_state, const char *fmt, ...)
2495 {
2496 va_list ap;
2497
2498 va_start(ap, fmt);
2499 (void)pcap_vsnprintf(conv_state->errbuf,
2500 PCAP_ERRBUF_SIZE, fmt, ap);
2501 va_end(ap);
2502 longjmp(conv_state->top_ctx, 1);
2503 /* NOTREACHED */
2504 }
2505
2506 /*
2507 * Make a copy of a BPF program and put it in the "fcode" member of
2508 * a "pcap_t".
2509 *
2510 * If we fail to allocate memory for the copy, fill in the "errbuf"
2511 * member of the "pcap_t" with an error message, and return -1;
2512 * otherwise, return 0.
2513 */
2514 int
2515 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2516 {
2517 size_t prog_size;
2518
2519 /*
2520 * Validate the program.
2521 */
2522 if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2523 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2524 "BPF program is not valid");
2525 return (-1);
2526 }
2527
2528 /*
2529 * Free up any already installed program.
2530 */
2531 pcap_freecode(&p->fcode);
2532
2533 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2534 p->fcode.bf_len = fp->bf_len;
2535 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2536 if (p->fcode.bf_insns == NULL) {
2537 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2538 errno, "malloc");
2539 return (-1);
2540 }
2541 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2542 return (0);
2543 }
2544
2545 #ifdef BDEBUG
2546 static void
2547 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2548 FILE *out)
2549 {
2550 int icount, noffset;
2551 int i;
2552
2553 if (block == NULL || isMarked(ic, block))
2554 return;
2555 Mark(ic, block);
2556
2557 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2558 noffset = min(block->offset + icount, (int)prog->bf_len);
2559
2560 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2561 for (i = block->offset; i < noffset; i++) {
2562 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2563 }
2564 fprintf(out, "\" tooltip=\"");
2565 for (i = 0; i < BPF_MEMWORDS; i++)
2566 if (block->val[i] != VAL_UNKNOWN)
2567 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2568 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2569 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2570 fprintf(out, "\"");
2571 if (JT(block) == NULL)
2572 fprintf(out, ", peripheries=2");
2573 fprintf(out, "];\n");
2574
2575 dot_dump_node(ic, JT(block), prog, out);
2576 dot_dump_node(ic, JF(block), prog, out);
2577 }
2578
2579 static void
2580 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2581 {
2582 if (block == NULL || isMarked(ic, block))
2583 return;
2584 Mark(ic, block);
2585
2586 if (JT(block)) {
2587 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2588 block->id, JT(block)->id);
2589 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2590 block->id, JF(block)->id);
2591 }
2592 dot_dump_edge(ic, JT(block), out);
2593 dot_dump_edge(ic, JF(block), out);
2594 }
2595
2596 /* Output the block CFG using graphviz/DOT language
2597 * In the CFG, block's code, value index for each registers at EXIT,
2598 * and the jump relationship is show.
2599 *
2600 * example DOT for BPF `ip src host 1.1.1.1' is:
2601 digraph BPF {
2602 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2603 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2604 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2605 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2606 "block0":se -> "block1":n [label="T"];
2607 "block0":sw -> "block3":n [label="F"];
2608 "block1":se -> "block2":n [label="T"];
2609 "block1":sw -> "block3":n [label="F"];
2610 }
2611 *
2612 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2613 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2614 */
2615 static int
2616 dot_dump(struct icode *ic, char *errbuf)
2617 {
2618 struct bpf_program f;
2619 FILE *out = stdout;
2620
2621 memset(bids, 0, sizeof bids);
2622 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
2623 if (f.bf_insns == NULL)
2624 return -1;
2625
2626 fprintf(out, "digraph BPF {\n");
2627 unMarkAll(ic);
2628 dot_dump_node(ic, ic->root, &f, out);
2629 unMarkAll(ic);
2630 dot_dump_edge(ic, ic->root, out);
2631 fprintf(out, "}\n");
2632
2633 free((char *)f.bf_insns);
2634 return 0;
2635 }
2636
2637 static int
2638 plain_dump(struct icode *ic, char *errbuf)
2639 {
2640 struct bpf_program f;
2641
2642 memset(bids, 0, sizeof bids);
2643 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
2644 if (f.bf_insns == NULL)
2645 return -1;
2646 bpf_dump(&f, 1);
2647 putchar('\n');
2648 free((char *)f.bf_insns);
2649 return 0;
2650 }
2651
2652 static void
2653 opt_dump(opt_state_t *opt_state, struct icode *ic)
2654 {
2655 int status;
2656 char errbuf[PCAP_ERRBUF_SIZE];
2657
2658 /*
2659 * If the CFG, in DOT format, is requested, output it rather than
2660 * the code that would be generated from that graph.
2661 */
2662 if (pcap_print_dot_graph)
2663 status = dot_dump(ic, errbuf);
2664 else
2665 status = plain_dump(ic, errbuf);
2666 if (status == -1)
2667 opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
2668 }
2669 #endif
2670