optimize.c revision 1.9.10.1 1 /* $NetBSD: optimize.c,v 1.9.10.1 2018/09/06 06:51:45 pgoyette Exp $ */
2
3 /*
4 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that: (1) source code distributions
9 * retain the above copyright notice and this paragraph in its entirety, (2)
10 * distributions including binary code include the above copyright notice and
11 * this paragraph in its entirety in the documentation or other materials
12 * provided with the distribution, and (3) all advertising materials mentioning
13 * features or use of this software display the following acknowledgement:
14 * ``This product includes software developed by the University of California,
15 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
16 * the University nor the names of its contributors may be used to endorse
17 * or promote products derived from this software without specific prior
18 * written permission.
19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
20 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
22 *
23 * Optimization module for BPF code intermediate representation.
24 */
25
26 #include <sys/cdefs.h>
27 __RCSID("$NetBSD: optimize.c,v 1.9.10.1 2018/09/06 06:51:45 pgoyette Exp $");
28
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32
33 #include <pcap-types.h>
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <memory.h>
38 #include <string.h>
39
40 #include <errno.h>
41
42 #include "pcap-int.h"
43
44 #include "gencode.h"
45 #include "optimize.h"
46
47 #ifdef HAVE_OS_PROTO_H
48 #include "os-proto.h"
49 #endif
50
51 #ifdef BDEBUG
52 /*
53 * The internal "debug printout" flag for the filter expression optimizer.
54 * The code to print that stuff is present only if BDEBUG is defined, so
55 * the flag, and the routine to set it, are defined only if BDEBUG is
56 * defined.
57 */
58 static int pcap_optimizer_debug;
59
60 /*
61 * Routine to set that flag.
62 *
63 * This is intended for libpcap developers, not for general use.
64 * If you want to set these in a program, you'll have to declare this
65 * routine yourself, with the appropriate DLL import attribute on Windows;
66 * it's not declared in any header file, and won't be declared in any
67 * header file provided by libpcap.
68 */
69 PCAP_API void pcap_set_optimizer_debug(int value);
70
71 PCAP_API_DEF void
72 pcap_set_optimizer_debug(int value)
73 {
74 pcap_optimizer_debug = value;
75 }
76
77 /*
78 * The internal "print dot graph" flag for the filter expression optimizer.
79 * The code to print that stuff is present only if BDEBUG is defined, so
80 * the flag, and the routine to set it, are defined only if BDEBUG is
81 * defined.
82 */
83 static int pcap_print_dot_graph;
84
85 /*
86 * Routine to set that flag.
87 *
88 * This is intended for libpcap developers, not for general use.
89 * If you want to set these in a program, you'll have to declare this
90 * routine yourself, with the appropriate DLL import attribute on Windows;
91 * it's not declared in any header file, and won't be declared in any
92 * header file provided by libpcap.
93 */
94 PCAP_API void pcap_set_print_dot_graph(int value);
95
96 PCAP_API_DEF void
97 pcap_set_print_dot_graph(int value)
98 {
99 pcap_print_dot_graph = value;
100 }
101
102 #endif
103
104 /*
105 * lowest_set_bit().
106 *
107 * Takes a 32-bit integer as an argument.
108 *
109 * If handed a non-zero value, returns the index of the lowest set bit,
110 * counting upwards fro zero.
111 *
112 * If handed zero, the results are platform- and compiler-dependent.
113 * Keep it out of the light, don't give it any water, don't feed it
114 * after midnight, and don't pass zero to it.
115 *
116 * This is the same as the count of trailing zeroes in the word.
117 */
118 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
119 /*
120 * GCC 3.4 and later; we have __builtin_ctz().
121 */
122 #define lowest_set_bit(mask) __builtin_ctz(mask)
123 #elif defined(_MSC_VER)
124 /*
125 * Visual Studio; we support only 2005 and later, so use
126 * _BitScanForward().
127 */
128 #include <intrin.h>
129
130 #ifndef __clang__
131 #pragma intrinsic(_BitScanForward)
132 #endif
133
134 static __forceinline int
135 lowest_set_bit(int mask)
136 {
137 unsigned long bit;
138
139 /*
140 * Don't sign-extend mask if long is longer than int.
141 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
142 */
143 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
144 return -1; /* mask is zero */
145 return (int)bit;
146 }
147 #elif defined(MSDOS) && defined(__DJGPP__)
148 /*
149 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
150 * we've already included.
151 */
152 #define lowest_set_bit(mask) (ffs((mask)) - 1)
153 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
154 /*
155 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
156 * or some other platform (UN*X conforming to a sufficient recent version
157 * of the Single UNIX Specification).
158 */
159 #include <strings.h>
160 #define lowest_set_bit(mask) (ffs((mask)) - 1)
161 #else
162 /*
163 * None of the above.
164 * Use a perfect-hash-function-based function.
165 */
166 static int
167 lowest_set_bit(int mask)
168 {
169 unsigned int v = (unsigned int)mask;
170
171 static const int MultiplyDeBruijnBitPosition[32] = {
172 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
173 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
174 };
175
176 /*
177 * We strip off all but the lowermost set bit (v & ~v),
178 * and perform a minimal perfect hash on it to look up the
179 * number of low-order zero bits in a table.
180 *
181 * See:
182 *
183 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
184 *
185 * http://supertech.csail.mit.edu/papers/debruijn.pdf
186 */
187 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
188 }
189 #endif
190
191 /*
192 * Represents a deleted instruction.
193 */
194 #define NOP -1
195
196 /*
197 * Register numbers for use-def values.
198 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
199 * location. A_ATOM is the accumulator and X_ATOM is the index
200 * register.
201 */
202 #define A_ATOM BPF_MEMWORDS
203 #define X_ATOM (BPF_MEMWORDS+1)
204
205 /*
206 * This define is used to represent *both* the accumulator and
207 * x register in use-def computations.
208 * Currently, the use-def code assumes only one definition per instruction.
209 */
210 #define AX_ATOM N_ATOMS
211
212 /*
213 * These data structures are used in a Cocke and Shwarz style
214 * value numbering scheme. Since the flowgraph is acyclic,
215 * exit values can be propagated from a node's predecessors
216 * provided it is uniquely defined.
217 */
218 struct valnode {
219 int code;
220 int v0, v1;
221 int val;
222 struct valnode *next;
223 };
224
225 /* Integer constants mapped with the load immediate opcode. */
226 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
227
228 struct vmapinfo {
229 int is_const;
230 bpf_int32 const_val;
231 };
232
233 typedef struct {
234 /*
235 * A flag to indicate that further optimization is needed.
236 * Iterative passes are continued until a given pass yields no
237 * branch movement.
238 */
239 int done;
240
241 int n_blocks;
242 struct block **blocks;
243 int n_edges;
244 struct edge **edges;
245
246 /*
247 * A bit vector set representation of the dominators.
248 * We round up the set size to the next power of two.
249 */
250 int nodewords;
251 int edgewords;
252 struct block **levels;
253 bpf_u_int32 *space;
254
255 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
256 /*
257 * True if a is in uset {p}
258 */
259 #define SET_MEMBER(p, a) \
260 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
261
262 /*
263 * Add 'a' to uset p.
264 */
265 #define SET_INSERT(p, a) \
266 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
267
268 /*
269 * Delete 'a' from uset p.
270 */
271 #define SET_DELETE(p, a) \
272 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
273
274 /*
275 * a := a intersect b
276 */
277 #define SET_INTERSECT(a, b, n)\
278 {\
279 register bpf_u_int32 *_x = a, *_y = b;\
280 register int _n = n;\
281 while (--_n >= 0) *_x++ &= *_y++;\
282 }
283
284 /*
285 * a := a - b
286 */
287 #define SET_SUBTRACT(a, b, n)\
288 {\
289 register bpf_u_int32 *_x = a, *_y = b;\
290 register int _n = n;\
291 while (--_n >= 0) *_x++ &=~ *_y++;\
292 }
293
294 /*
295 * a := a union b
296 */
297 #define SET_UNION(a, b, n)\
298 {\
299 register bpf_u_int32 *_x = a, *_y = b;\
300 register int _n = n;\
301 while (--_n >= 0) *_x++ |= *_y++;\
302 }
303
304 uset all_dom_sets;
305 uset all_closure_sets;
306 uset all_edge_sets;
307
308 #define MODULUS 213
309 struct valnode *hashtbl[MODULUS];
310 int curval;
311 int maxval;
312
313 struct vmapinfo *vmap;
314 struct valnode *vnode_base;
315 struct valnode *next_vnode;
316 } opt_state_t;
317
318 typedef struct {
319 /*
320 * Some pointers used to convert the basic block form of the code,
321 * into the array form that BPF requires. 'fstart' will point to
322 * the malloc'd array while 'ftail' is used during the recursive
323 * traversal.
324 */
325 struct bpf_insn *fstart;
326 struct bpf_insn *ftail;
327 } conv_state_t;
328
329 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
330 static void opt_cleanup(opt_state_t *);
331
332 static void intern_blocks(opt_state_t *, struct icode *);
333
334 static void find_inedges(opt_state_t *, struct block *);
335 #ifdef BDEBUG
336 static void opt_dump(compiler_state_t *, struct icode *);
337 #endif
338
339 #ifndef MAX
340 #define MAX(a,b) ((a)>(b)?(a):(b))
341 #endif
342
343 static void
344 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
345 {
346 int level;
347
348 if (isMarked(ic, b))
349 return;
350
351 Mark(ic, b);
352 b->link = 0;
353
354 if (JT(b)) {
355 find_levels_r(opt_state, ic, JT(b));
356 find_levels_r(opt_state, ic, JF(b));
357 level = MAX(JT(b)->level, JF(b)->level) + 1;
358 } else
359 level = 0;
360 b->level = level;
361 b->link = opt_state->levels[level];
362 opt_state->levels[level] = b;
363 }
364
365 /*
366 * Level graph. The levels go from 0 at the leaves to
367 * N_LEVELS at the root. The opt_state->levels[] array points to the
368 * first node of the level list, whose elements are linked
369 * with the 'link' field of the struct block.
370 */
371 static void
372 find_levels(opt_state_t *opt_state, struct icode *ic)
373 {
374 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
375 unMarkAll(ic);
376 find_levels_r(opt_state, ic, ic->root);
377 }
378
379 /*
380 * Find dominator relationships.
381 * Assumes graph has been leveled.
382 */
383 static void
384 find_dom(opt_state_t *opt_state, struct block *root)
385 {
386 int i;
387 struct block *b;
388 bpf_u_int32 *x;
389
390 /*
391 * Initialize sets to contain all nodes.
392 */
393 x = opt_state->all_dom_sets;
394 i = opt_state->n_blocks * opt_state->nodewords;
395 while (--i >= 0)
396 *x++ = 0xFFFFFFFFU;
397 /* Root starts off empty. */
398 for (i = opt_state->nodewords; --i >= 0;)
399 root->dom[i] = 0;
400
401 /* root->level is the highest level no found. */
402 for (i = root->level; i >= 0; --i) {
403 for (b = opt_state->levels[i]; b; b = b->link) {
404 SET_INSERT(b->dom, b->id);
405 if (JT(b) == 0)
406 continue;
407 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
408 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
409 }
410 }
411 }
412
413 static void
414 propedom(opt_state_t *opt_state, struct edge *ep)
415 {
416 SET_INSERT(ep->edom, ep->id);
417 if (ep->succ) {
418 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
419 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
420 }
421 }
422
423 /*
424 * Compute edge dominators.
425 * Assumes graph has been leveled and predecessors established.
426 */
427 static void
428 find_edom(opt_state_t *opt_state, struct block *root)
429 {
430 int i;
431 uset x;
432 struct block *b;
433
434 x = opt_state->all_edge_sets;
435 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
436 x[i] = 0xFFFFFFFFU;
437
438 /* root->level is the highest level no found. */
439 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
440 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
441 for (i = root->level; i >= 0; --i) {
442 for (b = opt_state->levels[i]; b != 0; b = b->link) {
443 propedom(opt_state, &b->et);
444 propedom(opt_state, &b->ef);
445 }
446 }
447 }
448
449 /*
450 * Find the backwards transitive closure of the flow graph. These sets
451 * are backwards in the sense that we find the set of nodes that reach
452 * a given node, not the set of nodes that can be reached by a node.
453 *
454 * Assumes graph has been leveled.
455 */
456 static void
457 find_closure(opt_state_t *opt_state, struct block *root)
458 {
459 int i;
460 struct block *b;
461
462 /*
463 * Initialize sets to contain no nodes.
464 */
465 memset((char *)opt_state->all_closure_sets, 0,
466 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
467
468 /* root->level is the highest level no found. */
469 for (i = root->level; i >= 0; --i) {
470 for (b = opt_state->levels[i]; b; b = b->link) {
471 SET_INSERT(b->closure, b->id);
472 if (JT(b) == 0)
473 continue;
474 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
475 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
476 }
477 }
478 }
479
480 /*
481 * Return the register number that is used by s. If A and X are both
482 * used, return AX_ATOM. If no register is used, return -1.
483 *
484 * The implementation should probably change to an array access.
485 */
486 static int
487 atomuse(struct stmt *s)
488 {
489 register int c = s->code;
490
491 if (c == NOP)
492 return -1;
493
494 switch (BPF_CLASS(c)) {
495
496 case BPF_RET:
497 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
498 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
499
500 case BPF_LD:
501 case BPF_LDX:
502 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
503 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
504
505 case BPF_ST:
506 return A_ATOM;
507
508 case BPF_STX:
509 return X_ATOM;
510
511 case BPF_JMP:
512 case BPF_ALU:
513 if (BPF_SRC(c) == BPF_X)
514 return AX_ATOM;
515 return A_ATOM;
516
517 case BPF_MISC:
518 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
519 }
520 abort();
521 /* NOTREACHED */
522 }
523
524 /*
525 * Return the register number that is defined by 's'. We assume that
526 * a single stmt cannot define more than one register. If no register
527 * is defined, return -1.
528 *
529 * The implementation should probably change to an array access.
530 */
531 static int
532 atomdef(struct stmt *s)
533 {
534 if (s->code == NOP)
535 return -1;
536
537 switch (BPF_CLASS(s->code)) {
538
539 case BPF_LD:
540 case BPF_ALU:
541 return A_ATOM;
542
543 case BPF_LDX:
544 return X_ATOM;
545
546 case BPF_ST:
547 case BPF_STX:
548 return s->k;
549
550 case BPF_MISC:
551 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
552 }
553 return -1;
554 }
555
556 /*
557 * Compute the sets of registers used, defined, and killed by 'b'.
558 *
559 * "Used" means that a statement in 'b' uses the register before any
560 * statement in 'b' defines it, i.e. it uses the value left in
561 * that register by a predecessor block of this block.
562 * "Defined" means that a statement in 'b' defines it.
563 * "Killed" means that a statement in 'b' defines it before any
564 * statement in 'b' uses it, i.e. it kills the value left in that
565 * register by a predecessor block of this block.
566 */
567 static void
568 compute_local_ud(struct block *b)
569 {
570 struct slist *s;
571 atomset def = 0, use = 0, killed = 0;
572 int atom;
573
574 for (s = b->stmts; s; s = s->next) {
575 if (s->s.code == NOP)
576 continue;
577 atom = atomuse(&s->s);
578 if (atom >= 0) {
579 if (atom == AX_ATOM) {
580 if (!ATOMELEM(def, X_ATOM))
581 use |= ATOMMASK(X_ATOM);
582 if (!ATOMELEM(def, A_ATOM))
583 use |= ATOMMASK(A_ATOM);
584 }
585 else if (atom < N_ATOMS) {
586 if (!ATOMELEM(def, atom))
587 use |= ATOMMASK(atom);
588 }
589 else
590 abort();
591 }
592 atom = atomdef(&s->s);
593 if (atom >= 0) {
594 if (!ATOMELEM(use, atom))
595 killed |= ATOMMASK(atom);
596 def |= ATOMMASK(atom);
597 }
598 }
599 if (BPF_CLASS(b->s.code) == BPF_JMP) {
600 /*
601 * XXX - what about RET?
602 */
603 atom = atomuse(&b->s);
604 if (atom >= 0) {
605 if (atom == AX_ATOM) {
606 if (!ATOMELEM(def, X_ATOM))
607 use |= ATOMMASK(X_ATOM);
608 if (!ATOMELEM(def, A_ATOM))
609 use |= ATOMMASK(A_ATOM);
610 }
611 else if (atom < N_ATOMS) {
612 if (!ATOMELEM(def, atom))
613 use |= ATOMMASK(atom);
614 }
615 else
616 abort();
617 }
618 }
619
620 b->def = def;
621 b->kill = killed;
622 b->in_use = use;
623 }
624
625 /*
626 * Assume graph is already leveled.
627 */
628 static void
629 find_ud(opt_state_t *opt_state, struct block *root)
630 {
631 int i, maxlevel;
632 struct block *p;
633
634 /*
635 * root->level is the highest level no found;
636 * count down from there.
637 */
638 maxlevel = root->level;
639 for (i = maxlevel; i >= 0; --i)
640 for (p = opt_state->levels[i]; p; p = p->link) {
641 compute_local_ud(p);
642 p->out_use = 0;
643 }
644
645 for (i = 1; i <= maxlevel; ++i) {
646 for (p = opt_state->levels[i]; p; p = p->link) {
647 p->out_use |= JT(p)->in_use | JF(p)->in_use;
648 p->in_use |= p->out_use &~ p->kill;
649 }
650 }
651 }
652 static void
653 init_val(opt_state_t *opt_state)
654 {
655 opt_state->curval = 0;
656 opt_state->next_vnode = opt_state->vnode_base;
657 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
658 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
659 }
660
661 /* Because we really don't have an IR, this stuff is a little messy. */
662 static int
663 F(opt_state_t *opt_state, int code, int v0, int v1)
664 {
665 u_int hash;
666 int val;
667 struct valnode *p;
668
669 hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8);
670 hash %= MODULUS;
671
672 for (p = opt_state->hashtbl[hash]; p; p = p->next)
673 if (p->code == code && p->v0 == v0 && p->v1 == v1)
674 return p->val;
675
676 val = ++opt_state->curval;
677 if (BPF_MODE(code) == BPF_IMM &&
678 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
679 opt_state->vmap[val].const_val = v0;
680 opt_state->vmap[val].is_const = 1;
681 }
682 p = opt_state->next_vnode++;
683 p->val = val;
684 p->code = code;
685 p->v0 = v0;
686 p->v1 = v1;
687 p->next = opt_state->hashtbl[hash];
688 opt_state->hashtbl[hash] = p;
689
690 return val;
691 }
692
693 static inline void
694 vstore(struct stmt *s, int *valp, int newval, int alter)
695 {
696 if (alter && newval != VAL_UNKNOWN && *valp == newval)
697 s->code = NOP;
698 else
699 *valp = newval;
700 }
701
702 /*
703 * Do constant-folding on binary operators.
704 * (Unary operators are handled elsewhere.)
705 */
706 static void
707 fold_op(compiler_state_t *cstate, opt_state_t *opt_state,
708 struct stmt *s, int v0, int v1)
709 {
710 bpf_u_int32 a, b;
711
712 a = opt_state->vmap[v0].const_val;
713 b = opt_state->vmap[v1].const_val;
714
715 switch (BPF_OP(s->code)) {
716 case BPF_ADD:
717 a += b;
718 break;
719
720 case BPF_SUB:
721 a -= b;
722 break;
723
724 case BPF_MUL:
725 a *= b;
726 break;
727
728 case BPF_DIV:
729 if (b == 0)
730 bpf_error(cstate, "division by zero");
731 a /= b;
732 break;
733
734 case BPF_MOD:
735 if (b == 0)
736 bpf_error(cstate, "modulus by zero");
737 a %= b;
738 break;
739
740 case BPF_AND:
741 a &= b;
742 break;
743
744 case BPF_OR:
745 a |= b;
746 break;
747
748 case BPF_XOR:
749 a ^= b;
750 break;
751
752 case BPF_LSH:
753 a <<= b;
754 break;
755
756 case BPF_RSH:
757 a >>= b;
758 break;
759
760 default:
761 abort();
762 }
763 s->k = a;
764 s->code = BPF_LD|BPF_IMM;
765 opt_state->done = 0;
766 }
767
768 static inline struct slist *
769 this_op(struct slist *s)
770 {
771 while (s != 0 && s->s.code == NOP)
772 s = s->next;
773 return s;
774 }
775
776 static void
777 opt_not(struct block *b)
778 {
779 struct block *tmp = JT(b);
780
781 JT(b) = JF(b);
782 JF(b) = tmp;
783 }
784
785 static void
786 opt_peep(opt_state_t *opt_state, struct block *b)
787 {
788 struct slist *s;
789 struct slist *next, *last;
790 int val;
791
792 s = b->stmts;
793 if (s == 0)
794 return;
795
796 last = s;
797 for (/*empty*/; /*empty*/; s = next) {
798 /*
799 * Skip over nops.
800 */
801 s = this_op(s);
802 if (s == 0)
803 break; /* nothing left in the block */
804
805 /*
806 * Find the next real instruction after that one
807 * (skipping nops).
808 */
809 next = this_op(s->next);
810 if (next == 0)
811 break; /* no next instruction */
812 last = next;
813
814 /*
815 * st M[k] --> st M[k]
816 * ldx M[k] tax
817 */
818 if (s->s.code == BPF_ST &&
819 next->s.code == (BPF_LDX|BPF_MEM) &&
820 s->s.k == next->s.k) {
821 opt_state->done = 0;
822 next->s.code = BPF_MISC|BPF_TAX;
823 }
824 /*
825 * ld #k --> ldx #k
826 * tax txa
827 */
828 if (s->s.code == (BPF_LD|BPF_IMM) &&
829 next->s.code == (BPF_MISC|BPF_TAX)) {
830 s->s.code = BPF_LDX|BPF_IMM;
831 next->s.code = BPF_MISC|BPF_TXA;
832 opt_state->done = 0;
833 }
834 /*
835 * This is an ugly special case, but it happens
836 * when you say tcp[k] or udp[k] where k is a constant.
837 */
838 if (s->s.code == (BPF_LD|BPF_IMM)) {
839 struct slist *add, *tax, *ild;
840
841 /*
842 * Check that X isn't used on exit from this
843 * block (which the optimizer might cause).
844 * We know the code generator won't generate
845 * any local dependencies.
846 */
847 if (ATOMELEM(b->out_use, X_ATOM))
848 continue;
849
850 /*
851 * Check that the instruction following the ldi
852 * is an addx, or it's an ldxms with an addx
853 * following it (with 0 or more nops between the
854 * ldxms and addx).
855 */
856 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
857 add = next;
858 else
859 add = this_op(next->next);
860 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
861 continue;
862
863 /*
864 * Check that a tax follows that (with 0 or more
865 * nops between them).
866 */
867 tax = this_op(add->next);
868 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
869 continue;
870
871 /*
872 * Check that an ild follows that (with 0 or more
873 * nops between them).
874 */
875 ild = this_op(tax->next);
876 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
877 BPF_MODE(ild->s.code) != BPF_IND)
878 continue;
879 /*
880 * We want to turn this sequence:
881 *
882 * (004) ldi #0x2 {s}
883 * (005) ldxms [14] {next} -- optional
884 * (006) addx {add}
885 * (007) tax {tax}
886 * (008) ild [x+0] {ild}
887 *
888 * into this sequence:
889 *
890 * (004) nop
891 * (005) ldxms [14]
892 * (006) nop
893 * (007) nop
894 * (008) ild [x+2]
895 *
896 * XXX We need to check that X is not
897 * subsequently used, because we want to change
898 * what'll be in it after this sequence.
899 *
900 * We know we can eliminate the accumulator
901 * modifications earlier in the sequence since
902 * it is defined by the last stmt of this sequence
903 * (i.e., the last statement of the sequence loads
904 * a value into the accumulator, so we can eliminate
905 * earlier operations on the accumulator).
906 */
907 ild->s.k += s->s.k;
908 s->s.code = NOP;
909 add->s.code = NOP;
910 tax->s.code = NOP;
911 opt_state->done = 0;
912 }
913 }
914 /*
915 * If the comparison at the end of a block is an equality
916 * comparison against a constant, and nobody uses the value
917 * we leave in the A register at the end of a block, and
918 * the operation preceding the comparison is an arithmetic
919 * operation, we can sometime optimize it away.
920 */
921 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
922 !ATOMELEM(b->out_use, A_ATOM)) {
923 /*
924 * We can optimize away certain subtractions of the
925 * X register.
926 */
927 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
928 val = b->val[X_ATOM];
929 if (opt_state->vmap[val].is_const) {
930 /*
931 * If we have a subtract to do a comparison,
932 * and the X register is a known constant,
933 * we can merge this value into the
934 * comparison:
935 *
936 * sub x -> nop
937 * jeq #y jeq #(x+y)
938 */
939 b->s.k += opt_state->vmap[val].const_val;
940 last->s.code = NOP;
941 opt_state->done = 0;
942 } else if (b->s.k == 0) {
943 /*
944 * If the X register isn't a constant,
945 * and the comparison in the test is
946 * against 0, we can compare with the
947 * X register, instead:
948 *
949 * sub x -> nop
950 * jeq #0 jeq x
951 */
952 last->s.code = NOP;
953 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
954 opt_state->done = 0;
955 }
956 }
957 /*
958 * Likewise, a constant subtract can be simplified:
959 *
960 * sub #x -> nop
961 * jeq #y -> jeq #(x+y)
962 */
963 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
964 last->s.code = NOP;
965 b->s.k += last->s.k;
966 opt_state->done = 0;
967 }
968 /*
969 * And, similarly, a constant AND can be simplified
970 * if we're testing against 0, i.e.:
971 *
972 * and #k nop
973 * jeq #0 -> jset #k
974 */
975 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
976 b->s.k == 0) {
977 b->s.k = last->s.k;
978 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
979 last->s.code = NOP;
980 opt_state->done = 0;
981 opt_not(b);
982 }
983 }
984 /*
985 * jset #0 -> never
986 * jset #ffffffff -> always
987 */
988 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
989 if (b->s.k == 0)
990 JT(b) = JF(b);
991 if ((u_int)b->s.k == 0xffffffffU)
992 JF(b) = JT(b);
993 }
994 /*
995 * If we're comparing against the index register, and the index
996 * register is a known constant, we can just compare against that
997 * constant.
998 */
999 val = b->val[X_ATOM];
1000 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1001 bpf_int32 v = opt_state->vmap[val].const_val;
1002 b->s.code &= ~BPF_X;
1003 b->s.k = v;
1004 }
1005 /*
1006 * If the accumulator is a known constant, we can compute the
1007 * comparison result.
1008 */
1009 val = b->val[A_ATOM];
1010 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1011 bpf_int32 v = opt_state->vmap[val].const_val;
1012 switch (BPF_OP(b->s.code)) {
1013
1014 case BPF_JEQ:
1015 v = v == b->s.k;
1016 break;
1017
1018 case BPF_JGT:
1019 v = (unsigned)v > (unsigned)b->s.k;
1020 break;
1021
1022 case BPF_JGE:
1023 v = (unsigned)v >= (unsigned)b->s.k;
1024 break;
1025
1026 case BPF_JSET:
1027 v &= b->s.k;
1028 break;
1029
1030 default:
1031 abort();
1032 }
1033 if (JF(b) != JT(b))
1034 opt_state->done = 0;
1035 if (v)
1036 JF(b) = JT(b);
1037 else
1038 JT(b) = JF(b);
1039 }
1040 }
1041
1042 /*
1043 * Compute the symbolic value of expression of 's', and update
1044 * anything it defines in the value table 'val'. If 'alter' is true,
1045 * do various optimizations. This code would be cleaner if symbolic
1046 * evaluation and code transformations weren't folded together.
1047 */
1048 static void
1049 opt_stmt(compiler_state_t *cstate, opt_state_t *opt_state,
1050 struct stmt *s, int val[], int alter)
1051 {
1052 int op;
1053 int v;
1054
1055 switch (s->code) {
1056
1057 case BPF_LD|BPF_ABS|BPF_W:
1058 case BPF_LD|BPF_ABS|BPF_H:
1059 case BPF_LD|BPF_ABS|BPF_B:
1060 v = F(opt_state, s->code, s->k, 0L);
1061 vstore(s, &val[A_ATOM], v, alter);
1062 break;
1063
1064 case BPF_LD|BPF_IND|BPF_W:
1065 case BPF_LD|BPF_IND|BPF_H:
1066 case BPF_LD|BPF_IND|BPF_B:
1067 v = val[X_ATOM];
1068 if (alter && opt_state->vmap[v].is_const) {
1069 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1070 s->k += opt_state->vmap[v].const_val;
1071 v = F(opt_state, s->code, s->k, 0L);
1072 opt_state->done = 0;
1073 }
1074 else
1075 v = F(opt_state, s->code, s->k, v);
1076 vstore(s, &val[A_ATOM], v, alter);
1077 break;
1078
1079 case BPF_LD|BPF_LEN:
1080 v = F(opt_state, s->code, 0L, 0L);
1081 vstore(s, &val[A_ATOM], v, alter);
1082 break;
1083
1084 case BPF_LD|BPF_IMM:
1085 v = K(s->k);
1086 vstore(s, &val[A_ATOM], v, alter);
1087 break;
1088
1089 case BPF_LDX|BPF_IMM:
1090 v = K(s->k);
1091 vstore(s, &val[X_ATOM], v, alter);
1092 break;
1093
1094 case BPF_LDX|BPF_MSH|BPF_B:
1095 v = F(opt_state, s->code, s->k, 0L);
1096 vstore(s, &val[X_ATOM], v, alter);
1097 break;
1098
1099 case BPF_ALU|BPF_NEG:
1100 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1101 s->code = BPF_LD|BPF_IMM;
1102 s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1103 val[A_ATOM] = K(s->k);
1104 }
1105 else
1106 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1107 break;
1108
1109 case BPF_ALU|BPF_ADD|BPF_K:
1110 case BPF_ALU|BPF_SUB|BPF_K:
1111 case BPF_ALU|BPF_MUL|BPF_K:
1112 case BPF_ALU|BPF_DIV|BPF_K:
1113 case BPF_ALU|BPF_MOD|BPF_K:
1114 case BPF_ALU|BPF_AND|BPF_K:
1115 case BPF_ALU|BPF_OR|BPF_K:
1116 case BPF_ALU|BPF_XOR|BPF_K:
1117 case BPF_ALU|BPF_LSH|BPF_K:
1118 case BPF_ALU|BPF_RSH|BPF_K:
1119 op = BPF_OP(s->code);
1120 if (alter) {
1121 if (s->k == 0) {
1122 /* don't optimize away "sub #0"
1123 * as it may be needed later to
1124 * fixup the generated math code */
1125 if (op == BPF_ADD ||
1126 op == BPF_LSH || op == BPF_RSH ||
1127 op == BPF_OR || op == BPF_XOR) {
1128 s->code = NOP;
1129 break;
1130 }
1131 if (op == BPF_MUL || op == BPF_AND) {
1132 s->code = BPF_LD|BPF_IMM;
1133 val[A_ATOM] = K(s->k);
1134 break;
1135 }
1136 }
1137 if (opt_state->vmap[val[A_ATOM]].is_const) {
1138 fold_op(cstate, opt_state, s, val[A_ATOM], K(s->k));
1139 val[A_ATOM] = K(s->k);
1140 break;
1141 }
1142 }
1143 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1144 break;
1145
1146 case BPF_ALU|BPF_ADD|BPF_X:
1147 case BPF_ALU|BPF_SUB|BPF_X:
1148 case BPF_ALU|BPF_MUL|BPF_X:
1149 case BPF_ALU|BPF_DIV|BPF_X:
1150 case BPF_ALU|BPF_MOD|BPF_X:
1151 case BPF_ALU|BPF_AND|BPF_X:
1152 case BPF_ALU|BPF_OR|BPF_X:
1153 case BPF_ALU|BPF_XOR|BPF_X:
1154 case BPF_ALU|BPF_LSH|BPF_X:
1155 case BPF_ALU|BPF_RSH|BPF_X:
1156 op = BPF_OP(s->code);
1157 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1158 if (opt_state->vmap[val[A_ATOM]].is_const) {
1159 fold_op(cstate, opt_state, s, val[A_ATOM], val[X_ATOM]);
1160 val[A_ATOM] = K(s->k);
1161 }
1162 else {
1163 s->code = BPF_ALU|BPF_K|op;
1164 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1165 opt_state->done = 0;
1166 val[A_ATOM] =
1167 F(opt_state, s->code, val[A_ATOM], K(s->k));
1168 }
1169 break;
1170 }
1171 /*
1172 * Check if we're doing something to an accumulator
1173 * that is 0, and simplify. This may not seem like
1174 * much of a simplification but it could open up further
1175 * optimizations.
1176 * XXX We could also check for mul by 1, etc.
1177 */
1178 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1179 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1180 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1181 s->code = BPF_MISC|BPF_TXA;
1182 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1183 break;
1184 }
1185 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1186 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1187 s->code = BPF_LD|BPF_IMM;
1188 s->k = 0;
1189 vstore(s, &val[A_ATOM], K(s->k), alter);
1190 break;
1191 }
1192 else if (op == BPF_NEG) {
1193 s->code = NOP;
1194 break;
1195 }
1196 }
1197 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1198 break;
1199
1200 case BPF_MISC|BPF_TXA:
1201 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1202 break;
1203
1204 case BPF_LD|BPF_MEM:
1205 v = val[s->k];
1206 if (alter && opt_state->vmap[v].is_const) {
1207 s->code = BPF_LD|BPF_IMM;
1208 s->k = opt_state->vmap[v].const_val;
1209 opt_state->done = 0;
1210 }
1211 vstore(s, &val[A_ATOM], v, alter);
1212 break;
1213
1214 case BPF_MISC|BPF_TAX:
1215 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1216 break;
1217
1218 case BPF_LDX|BPF_MEM:
1219 v = val[s->k];
1220 if (alter && opt_state->vmap[v].is_const) {
1221 s->code = BPF_LDX|BPF_IMM;
1222 s->k = opt_state->vmap[v].const_val;
1223 opt_state->done = 0;
1224 }
1225 vstore(s, &val[X_ATOM], v, alter);
1226 break;
1227
1228 case BPF_ST:
1229 vstore(s, &val[s->k], val[A_ATOM], alter);
1230 break;
1231
1232 case BPF_STX:
1233 vstore(s, &val[s->k], val[X_ATOM], alter);
1234 break;
1235 }
1236 }
1237
1238 static void
1239 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1240 {
1241 register int atom;
1242
1243 atom = atomuse(s);
1244 if (atom >= 0) {
1245 if (atom == AX_ATOM) {
1246 last[X_ATOM] = 0;
1247 last[A_ATOM] = 0;
1248 }
1249 else
1250 last[atom] = 0;
1251 }
1252 atom = atomdef(s);
1253 if (atom >= 0) {
1254 if (last[atom]) {
1255 opt_state->done = 0;
1256 last[atom]->code = NOP;
1257 }
1258 last[atom] = s;
1259 }
1260 }
1261
1262 static void
1263 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1264 {
1265 register struct slist *s;
1266 register int atom;
1267 struct stmt *last[N_ATOMS];
1268
1269 memset((char *)last, 0, sizeof last);
1270
1271 for (s = b->stmts; s != 0; s = s->next)
1272 deadstmt(opt_state, &s->s, last);
1273 deadstmt(opt_state, &b->s, last);
1274
1275 for (atom = 0; atom < N_ATOMS; ++atom)
1276 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1277 last[atom]->code = NOP;
1278 opt_state->done = 0;
1279 }
1280 }
1281
1282 static void
1283 opt_blk(compiler_state_t *cstate, opt_state_t *opt_state,
1284 struct block *b, int do_stmts)
1285 {
1286 struct slist *s;
1287 struct edge *p;
1288 int i;
1289 bpf_int32 aval, xval;
1290
1291 #if 0
1292 for (s = b->stmts; s && s->next; s = s->next)
1293 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1294 do_stmts = 0;
1295 break;
1296 }
1297 #endif
1298
1299 /*
1300 * Initialize the atom values.
1301 */
1302 p = b->in_edges;
1303 if (p == 0) {
1304 /*
1305 * We have no predecessors, so everything is undefined
1306 * upon entry to this block.
1307 */
1308 memset((char *)b->val, 0, sizeof(b->val));
1309 } else {
1310 /*
1311 * Inherit values from our predecessors.
1312 *
1313 * First, get the values from the predecessor along the
1314 * first edge leading to this node.
1315 */
1316 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1317 /*
1318 * Now look at all the other nodes leading to this node.
1319 * If, for the predecessor along that edge, a register
1320 * has a different value from the one we have (i.e.,
1321 * control paths are merging, and the merging paths
1322 * assign different values to that register), give the
1323 * register the undefined value of 0.
1324 */
1325 while ((p = p->next) != NULL) {
1326 for (i = 0; i < N_ATOMS; ++i)
1327 if (b->val[i] != p->pred->val[i])
1328 b->val[i] = 0;
1329 }
1330 }
1331 aval = b->val[A_ATOM];
1332 xval = b->val[X_ATOM];
1333 for (s = b->stmts; s; s = s->next)
1334 opt_stmt(cstate, opt_state, &s->s, b->val, do_stmts);
1335
1336 /*
1337 * This is a special case: if we don't use anything from this
1338 * block, and we load the accumulator or index register with a
1339 * value that is already there, or if this block is a return,
1340 * eliminate all the statements.
1341 *
1342 * XXX - what if it does a store?
1343 *
1344 * XXX - why does it matter whether we use anything from this
1345 * block? If the accumulator or index register doesn't change
1346 * its value, isn't that OK even if we use that value?
1347 *
1348 * XXX - if we load the accumulator with a different value,
1349 * and the block ends with a conditional branch, we obviously
1350 * can't eliminate it, as the branch depends on that value.
1351 * For the index register, the conditional branch only depends
1352 * on the index register value if the test is against the index
1353 * register value rather than a constant; if nothing uses the
1354 * value we put into the index register, and we're not testing
1355 * against the index register's value, and there aren't any
1356 * other problems that would keep us from eliminating this
1357 * block, can we eliminate it?
1358 */
1359 if (do_stmts &&
1360 ((b->out_use == 0 &&
1361 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1362 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1363 BPF_CLASS(b->s.code) == BPF_RET)) {
1364 if (b->stmts != 0) {
1365 b->stmts = 0;
1366 opt_state->done = 0;
1367 }
1368 } else {
1369 opt_peep(opt_state, b);
1370 opt_deadstores(opt_state, b);
1371 }
1372 /*
1373 * Set up values for branch optimizer.
1374 */
1375 if (BPF_SRC(b->s.code) == BPF_K)
1376 b->oval = K(b->s.k);
1377 else
1378 b->oval = b->val[X_ATOM];
1379 b->et.code = b->s.code;
1380 b->ef.code = -b->s.code;
1381 }
1382
1383 /*
1384 * Return true if any register that is used on exit from 'succ', has
1385 * an exit value that is different from the corresponding exit value
1386 * from 'b'.
1387 */
1388 static int
1389 use_conflict(struct block *b, struct block *succ)
1390 {
1391 int atom;
1392 atomset use = succ->out_use;
1393
1394 if (use == 0)
1395 return 0;
1396
1397 for (atom = 0; atom < N_ATOMS; ++atom)
1398 if (ATOMELEM(use, atom))
1399 if (b->val[atom] != succ->val[atom])
1400 return 1;
1401 return 0;
1402 }
1403
1404 static struct block *
1405 fold_edge(struct block *child, struct edge *ep)
1406 {
1407 int sense;
1408 int aval0, aval1, oval0, oval1;
1409 int code = ep->code;
1410
1411 if (code < 0) {
1412 code = -code;
1413 sense = 0;
1414 } else
1415 sense = 1;
1416
1417 if (child->s.code != code)
1418 return 0;
1419
1420 aval0 = child->val[A_ATOM];
1421 oval0 = child->oval;
1422 aval1 = ep->pred->val[A_ATOM];
1423 oval1 = ep->pred->oval;
1424
1425 if (aval0 != aval1)
1426 return 0;
1427
1428 if (oval0 == oval1)
1429 /*
1430 * The operands of the branch instructions are
1431 * identical, so the result is true if a true
1432 * branch was taken to get here, otherwise false.
1433 */
1434 return sense ? JT(child) : JF(child);
1435
1436 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1437 /*
1438 * At this point, we only know the comparison if we
1439 * came down the true branch, and it was an equality
1440 * comparison with a constant.
1441 *
1442 * I.e., if we came down the true branch, and the branch
1443 * was an equality comparison with a constant, we know the
1444 * accumulator contains that constant. If we came down
1445 * the false branch, or the comparison wasn't with a
1446 * constant, we don't know what was in the accumulator.
1447 *
1448 * We rely on the fact that distinct constants have distinct
1449 * value numbers.
1450 */
1451 return JF(child);
1452
1453 return 0;
1454 }
1455
1456 static void
1457 opt_j(opt_state_t *opt_state, struct edge *ep)
1458 {
1459 register int i, k;
1460 register struct block *target;
1461
1462 if (JT(ep->succ) == 0)
1463 return;
1464
1465 if (JT(ep->succ) == JF(ep->succ)) {
1466 /*
1467 * Common branch targets can be eliminated, provided
1468 * there is no data dependency.
1469 */
1470 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1471 opt_state->done = 0;
1472 ep->succ = JT(ep->succ);
1473 }
1474 }
1475 /*
1476 * For each edge dominator that matches the successor of this
1477 * edge, promote the edge successor to the its grandchild.
1478 *
1479 * XXX We violate the set abstraction here in favor a reasonably
1480 * efficient loop.
1481 */
1482 top:
1483 for (i = 0; i < opt_state->edgewords; ++i) {
1484 register bpf_u_int32 x = ep->edom[i];
1485
1486 while (x != 0) {
1487 k = lowest_set_bit(x);
1488 x &=~ (1 << k);
1489 k += i * BITS_PER_WORD;
1490
1491 target = fold_edge(ep->succ, opt_state->edges[k]);
1492 /*
1493 * Check that there is no data dependency between
1494 * nodes that will be violated if we move the edge.
1495 */
1496 if (target != 0 && !use_conflict(ep->pred, target)) {
1497 opt_state->done = 0;
1498 ep->succ = target;
1499 if (JT(target) != 0)
1500 /*
1501 * Start over unless we hit a leaf.
1502 */
1503 goto top;
1504 return;
1505 }
1506 }
1507 }
1508 }
1509
1510
1511 static void
1512 or_pullup(opt_state_t *opt_state, struct block *b)
1513 {
1514 int val, at_top;
1515 struct block *pull;
1516 struct block **diffp, **samep;
1517 struct edge *ep;
1518
1519 ep = b->in_edges;
1520 if (ep == 0)
1521 return;
1522
1523 /*
1524 * Make sure each predecessor loads the same value.
1525 * XXX why?
1526 */
1527 val = ep->pred->val[A_ATOM];
1528 for (ep = ep->next; ep != 0; ep = ep->next)
1529 if (val != ep->pred->val[A_ATOM])
1530 return;
1531
1532 if (JT(b->in_edges->pred) == b)
1533 diffp = &JT(b->in_edges->pred);
1534 else
1535 diffp = &JF(b->in_edges->pred);
1536
1537 at_top = 1;
1538 for (;;) {
1539 if (*diffp == 0)
1540 return;
1541
1542 if (JT(*diffp) != JT(b))
1543 return;
1544
1545 if (!SET_MEMBER((*diffp)->dom, b->id))
1546 return;
1547
1548 if ((*diffp)->val[A_ATOM] != val)
1549 break;
1550
1551 diffp = &JF(*diffp);
1552 at_top = 0;
1553 }
1554 samep = &JF(*diffp);
1555 for (;;) {
1556 if (*samep == 0)
1557 return;
1558
1559 if (JT(*samep) != JT(b))
1560 return;
1561
1562 if (!SET_MEMBER((*samep)->dom, b->id))
1563 return;
1564
1565 if ((*samep)->val[A_ATOM] == val)
1566 break;
1567
1568 /* XXX Need to check that there are no data dependencies
1569 between dp0 and dp1. Currently, the code generator
1570 will not produce such dependencies. */
1571 samep = &JF(*samep);
1572 }
1573 #ifdef notdef
1574 /* XXX This doesn't cover everything. */
1575 for (i = 0; i < N_ATOMS; ++i)
1576 if ((*samep)->val[i] != pred->val[i])
1577 return;
1578 #endif
1579 /* Pull up the node. */
1580 pull = *samep;
1581 *samep = JF(pull);
1582 JF(pull) = *diffp;
1583
1584 /*
1585 * At the top of the chain, each predecessor needs to point at the
1586 * pulled up node. Inside the chain, there is only one predecessor
1587 * to worry about.
1588 */
1589 if (at_top) {
1590 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1591 if (JT(ep->pred) == b)
1592 JT(ep->pred) = pull;
1593 else
1594 JF(ep->pred) = pull;
1595 }
1596 }
1597 else
1598 *diffp = pull;
1599
1600 opt_state->done = 0;
1601 }
1602
1603 static void
1604 and_pullup(opt_state_t *opt_state, struct block *b)
1605 {
1606 int val, at_top;
1607 struct block *pull;
1608 struct block **diffp, **samep;
1609 struct edge *ep;
1610
1611 ep = b->in_edges;
1612 if (ep == 0)
1613 return;
1614
1615 /*
1616 * Make sure each predecessor loads the same value.
1617 */
1618 val = ep->pred->val[A_ATOM];
1619 for (ep = ep->next; ep != 0; ep = ep->next)
1620 if (val != ep->pred->val[A_ATOM])
1621 return;
1622
1623 if (JT(b->in_edges->pred) == b)
1624 diffp = &JT(b->in_edges->pred);
1625 else
1626 diffp = &JF(b->in_edges->pred);
1627
1628 at_top = 1;
1629 for (;;) {
1630 if (*diffp == 0)
1631 return;
1632
1633 if (JF(*diffp) != JF(b))
1634 return;
1635
1636 if (!SET_MEMBER((*diffp)->dom, b->id))
1637 return;
1638
1639 if ((*diffp)->val[A_ATOM] != val)
1640 break;
1641
1642 diffp = &JT(*diffp);
1643 at_top = 0;
1644 }
1645 samep = &JT(*diffp);
1646 for (;;) {
1647 if (*samep == 0)
1648 return;
1649
1650 if (JF(*samep) != JF(b))
1651 return;
1652
1653 if (!SET_MEMBER((*samep)->dom, b->id))
1654 return;
1655
1656 if ((*samep)->val[A_ATOM] == val)
1657 break;
1658
1659 /* XXX Need to check that there are no data dependencies
1660 between diffp and samep. Currently, the code generator
1661 will not produce such dependencies. */
1662 samep = &JT(*samep);
1663 }
1664 #ifdef notdef
1665 /* XXX This doesn't cover everything. */
1666 for (i = 0; i < N_ATOMS; ++i)
1667 if ((*samep)->val[i] != pred->val[i])
1668 return;
1669 #endif
1670 /* Pull up the node. */
1671 pull = *samep;
1672 *samep = JT(pull);
1673 JT(pull) = *diffp;
1674
1675 /*
1676 * At the top of the chain, each predecessor needs to point at the
1677 * pulled up node. Inside the chain, there is only one predecessor
1678 * to worry about.
1679 */
1680 if (at_top) {
1681 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1682 if (JT(ep->pred) == b)
1683 JT(ep->pred) = pull;
1684 else
1685 JF(ep->pred) = pull;
1686 }
1687 }
1688 else
1689 *diffp = pull;
1690
1691 opt_state->done = 0;
1692 }
1693
1694 static void
1695 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1696 int do_stmts)
1697 {
1698 int i, maxlevel;
1699 struct block *p;
1700
1701 init_val(opt_state);
1702 maxlevel = ic->root->level;
1703
1704 find_inedges(opt_state, ic->root);
1705 for (i = maxlevel; i >= 0; --i)
1706 for (p = opt_state->levels[i]; p; p = p->link)
1707 opt_blk(cstate, opt_state, p, do_stmts);
1708
1709 if (do_stmts)
1710 /*
1711 * No point trying to move branches; it can't possibly
1712 * make a difference at this point.
1713 */
1714 return;
1715
1716 for (i = 1; i <= maxlevel; ++i) {
1717 for (p = opt_state->levels[i]; p; p = p->link) {
1718 opt_j(opt_state, &p->et);
1719 opt_j(opt_state, &p->ef);
1720 }
1721 }
1722
1723 find_inedges(opt_state, ic->root);
1724 for (i = 1; i <= maxlevel; ++i) {
1725 for (p = opt_state->levels[i]; p; p = p->link) {
1726 or_pullup(opt_state, p);
1727 and_pullup(opt_state, p);
1728 }
1729 }
1730 }
1731
1732 static inline void
1733 link_inedge(struct edge *parent, struct block *child)
1734 {
1735 parent->next = child->in_edges;
1736 child->in_edges = parent;
1737 }
1738
1739 static void
1740 find_inedges(opt_state_t *opt_state, struct block *root)
1741 {
1742 int i;
1743 struct block *b;
1744
1745 for (i = 0; i < opt_state->n_blocks; ++i)
1746 opt_state->blocks[i]->in_edges = 0;
1747
1748 /*
1749 * Traverse the graph, adding each edge to the predecessor
1750 * list of its successors. Skip the leaves (i.e. level 0).
1751 */
1752 for (i = root->level; i > 0; --i) {
1753 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1754 link_inedge(&b->et, JT(b));
1755 link_inedge(&b->ef, JF(b));
1756 }
1757 }
1758 }
1759
1760 static void
1761 opt_root(struct block **b)
1762 {
1763 struct slist *tmp, *s;
1764
1765 s = (*b)->stmts;
1766 (*b)->stmts = 0;
1767 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1768 *b = JT(*b);
1769
1770 tmp = (*b)->stmts;
1771 if (tmp != 0)
1772 sappend(s, tmp);
1773 (*b)->stmts = s;
1774
1775 /*
1776 * If the root node is a return, then there is no
1777 * point executing any statements (since the bpf machine
1778 * has no side effects).
1779 */
1780 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1781 (*b)->stmts = 0;
1782 }
1783
1784 static void
1785 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1786 int do_stmts)
1787 {
1788
1789 #ifdef BDEBUG
1790 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1791 printf("opt_loop(root, %d) begin\n", do_stmts);
1792 opt_dump(cstate, ic);
1793 }
1794 #endif
1795 do {
1796 opt_state->done = 1;
1797 find_levels(opt_state, ic);
1798 find_dom(opt_state, ic->root);
1799 find_closure(opt_state, ic->root);
1800 find_ud(opt_state, ic->root);
1801 find_edom(opt_state, ic->root);
1802 opt_blks(cstate, opt_state, ic, do_stmts);
1803 #ifdef BDEBUG
1804 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1805 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1806 opt_dump(cstate, ic);
1807 }
1808 #endif
1809 } while (!opt_state->done);
1810 }
1811
1812 /*
1813 * Optimize the filter code in its dag representation.
1814 */
1815 void
1816 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1817 {
1818 opt_state_t opt_state;
1819
1820 opt_init(cstate, &opt_state, ic);
1821 opt_loop(cstate, &opt_state, ic, 0);
1822 opt_loop(cstate, &opt_state, ic, 1);
1823 intern_blocks(&opt_state, ic);
1824 #ifdef BDEBUG
1825 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1826 printf("after intern_blocks()\n");
1827 opt_dump(cstate, ic);
1828 }
1829 #endif
1830 opt_root(&ic->root);
1831 #ifdef BDEBUG
1832 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1833 printf("after opt_root()\n");
1834 opt_dump(cstate, ic);
1835 }
1836 #endif
1837 opt_cleanup(&opt_state);
1838 }
1839
1840 static void
1841 make_marks(struct icode *ic, struct block *p)
1842 {
1843 if (!isMarked(ic, p)) {
1844 Mark(ic, p);
1845 if (BPF_CLASS(p->s.code) != BPF_RET) {
1846 make_marks(ic, JT(p));
1847 make_marks(ic, JF(p));
1848 }
1849 }
1850 }
1851
1852 /*
1853 * Mark code array such that isMarked(ic->cur_mark, i) is true
1854 * only for nodes that are alive.
1855 */
1856 static void
1857 mark_code(struct icode *ic)
1858 {
1859 ic->cur_mark += 1;
1860 make_marks(ic, ic->root);
1861 }
1862
1863 /*
1864 * True iff the two stmt lists load the same value from the packet into
1865 * the accumulator.
1866 */
1867 static int
1868 eq_slist(struct slist *x, struct slist *y)
1869 {
1870 for (;;) {
1871 while (x && x->s.code == NOP)
1872 x = x->next;
1873 while (y && y->s.code == NOP)
1874 y = y->next;
1875 if (x == 0)
1876 return y == 0;
1877 if (y == 0)
1878 return x == 0;
1879 if (x->s.code != y->s.code || x->s.k != y->s.k)
1880 return 0;
1881 x = x->next;
1882 y = y->next;
1883 }
1884 }
1885
1886 static inline int
1887 eq_blk(struct block *b0, struct block *b1)
1888 {
1889 if (b0->s.code == b1->s.code &&
1890 b0->s.k == b1->s.k &&
1891 b0->et.succ == b1->et.succ &&
1892 b0->ef.succ == b1->ef.succ)
1893 return eq_slist(b0->stmts, b1->stmts);
1894 return 0;
1895 }
1896
1897 static void
1898 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1899 {
1900 struct block *p;
1901 int i, j;
1902 int done1; /* don't shadow global */
1903 top:
1904 done1 = 1;
1905 for (i = 0; i < opt_state->n_blocks; ++i)
1906 opt_state->blocks[i]->link = 0;
1907
1908 mark_code(ic);
1909
1910 for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1911 if (!isMarked(ic, opt_state->blocks[i]))
1912 continue;
1913 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1914 if (!isMarked(ic, opt_state->blocks[j]))
1915 continue;
1916 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1917 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1918 opt_state->blocks[j]->link : opt_state->blocks[j];
1919 break;
1920 }
1921 }
1922 }
1923 for (i = 0; i < opt_state->n_blocks; ++i) {
1924 p = opt_state->blocks[i];
1925 if (JT(p) == 0)
1926 continue;
1927 if (JT(p)->link) {
1928 done1 = 0;
1929 JT(p) = JT(p)->link;
1930 }
1931 if (JF(p)->link) {
1932 done1 = 0;
1933 JF(p) = JF(p)->link;
1934 }
1935 }
1936 if (!done1)
1937 goto top;
1938 }
1939
1940 static void
1941 opt_cleanup(opt_state_t *opt_state)
1942 {
1943 free((void *)opt_state->vnode_base);
1944 free((void *)opt_state->vmap);
1945 free((void *)opt_state->edges);
1946 free((void *)opt_state->space);
1947 free((void *)opt_state->levels);
1948 free((void *)opt_state->blocks);
1949 }
1950
1951 /*
1952 * Return the number of stmts in 's'.
1953 */
1954 static u_int
1955 slength(struct slist *s)
1956 {
1957 u_int n = 0;
1958
1959 for (; s; s = s->next)
1960 if (s->s.code != NOP)
1961 ++n;
1962 return n;
1963 }
1964
1965 /*
1966 * Return the number of nodes reachable by 'p'.
1967 * All nodes should be initially unmarked.
1968 */
1969 static int
1970 count_blocks(struct icode *ic, struct block *p)
1971 {
1972 if (p == 0 || isMarked(ic, p))
1973 return 0;
1974 Mark(ic, p);
1975 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1976 }
1977
1978 /*
1979 * Do a depth first search on the flow graph, numbering the
1980 * the basic blocks, and entering them into the 'blocks' array.`
1981 */
1982 static void
1983 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1984 {
1985 int n;
1986
1987 if (p == 0 || isMarked(ic, p))
1988 return;
1989
1990 Mark(ic, p);
1991 n = opt_state->n_blocks++;
1992 p->id = n;
1993 opt_state->blocks[n] = p;
1994
1995 number_blks_r(opt_state, ic, JT(p));
1996 number_blks_r(opt_state, ic, JF(p));
1997 }
1998
1999 /*
2000 * Return the number of stmts in the flowgraph reachable by 'p'.
2001 * The nodes should be unmarked before calling.
2002 *
2003 * Note that "stmts" means "instructions", and that this includes
2004 *
2005 * side-effect statements in 'p' (slength(p->stmts));
2006 *
2007 * statements in the true branch from 'p' (count_stmts(JT(p)));
2008 *
2009 * statements in the false branch from 'p' (count_stmts(JF(p)));
2010 *
2011 * the conditional jump itself (1);
2012 *
2013 * an extra long jump if the true branch requires it (p->longjt);
2014 *
2015 * an extra long jump if the false branch requires it (p->longjf).
2016 */
2017 static u_int
2018 count_stmts(struct icode *ic, struct block *p)
2019 {
2020 u_int n;
2021
2022 if (p == 0 || isMarked(ic, p))
2023 return 0;
2024 Mark(ic, p);
2025 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2026 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2027 }
2028
2029 /*
2030 * Allocate memory. All allocation is done before optimization
2031 * is begun. A linear bound on the size of all data structures is computed
2032 * from the total number of blocks and/or statements.
2033 */
2034 static void
2035 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
2036 {
2037 bpf_u_int32 *p;
2038 int i, n, max_stmts;
2039
2040 /*
2041 * First, count the blocks, so we can malloc an array to map
2042 * block number to block. Then, put the blocks into the array.
2043 */
2044 unMarkAll(ic);
2045 n = count_blocks(ic, ic->root);
2046 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2047 if (opt_state->blocks == NULL)
2048 bpf_error(cstate, "malloc");
2049 unMarkAll(ic);
2050 opt_state->n_blocks = 0;
2051 number_blks_r(opt_state, ic, ic->root);
2052
2053 opt_state->n_edges = 2 * opt_state->n_blocks;
2054 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2055 if (opt_state->edges == NULL)
2056 bpf_error(cstate, "malloc");
2057
2058 /*
2059 * The number of levels is bounded by the number of nodes.
2060 */
2061 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2062 if (opt_state->levels == NULL)
2063 bpf_error(cstate, "malloc");
2064
2065 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2066 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2067
2068 /* XXX */
2069 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2070 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2071 if (opt_state->space == NULL)
2072 bpf_error(cstate, "malloc");
2073 p = opt_state->space;
2074 opt_state->all_dom_sets = p;
2075 for (i = 0; i < n; ++i) {
2076 opt_state->blocks[i]->dom = p;
2077 p += opt_state->nodewords;
2078 }
2079 opt_state->all_closure_sets = p;
2080 for (i = 0; i < n; ++i) {
2081 opt_state->blocks[i]->closure = p;
2082 p += opt_state->nodewords;
2083 }
2084 opt_state->all_edge_sets = p;
2085 for (i = 0; i < n; ++i) {
2086 register struct block *b = opt_state->blocks[i];
2087
2088 b->et.edom = p;
2089 p += opt_state->edgewords;
2090 b->ef.edom = p;
2091 p += opt_state->edgewords;
2092 b->et.id = i;
2093 opt_state->edges[i] = &b->et;
2094 b->ef.id = opt_state->n_blocks + i;
2095 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2096 b->et.pred = b;
2097 b->ef.pred = b;
2098 }
2099 max_stmts = 0;
2100 for (i = 0; i < n; ++i)
2101 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2102 /*
2103 * We allocate at most 3 value numbers per statement,
2104 * so this is an upper bound on the number of valnodes
2105 * we'll need.
2106 */
2107 opt_state->maxval = 3 * max_stmts;
2108 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2109 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2110 if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2111 bpf_error(cstate, "malloc");
2112 }
2113
2114 /*
2115 * This is only used when supporting optimizer debugging. It is
2116 * global state, so do *not* do more than one compile in parallel
2117 * and expect it to provide meaningful information.
2118 */
2119 #ifdef BDEBUG
2120 int bids[NBIDS];
2121 #endif
2122
2123 /*
2124 * Returns true if successful. Returns false if a branch has
2125 * an offset that is too large. If so, we have marked that
2126 * branch so that on a subsequent iteration, it will be treated
2127 * properly.
2128 */
2129 static int
2130 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2131 struct icode *ic, struct block *p)
2132 {
2133 struct bpf_insn *dst;
2134 struct slist *src;
2135 u_int slen;
2136 u_int off;
2137 u_int extrajmps; /* number of extra jumps inserted */
2138 struct slist **offset = NULL;
2139
2140 if (p == 0 || isMarked(ic, p))
2141 return (1);
2142 Mark(ic, p);
2143
2144 if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2145 return (0);
2146 if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2147 return (0);
2148
2149 slen = slength(p->stmts);
2150 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2151 /* inflate length by any extra jumps */
2152
2153 p->offset = (int)(dst - conv_state->fstart);
2154
2155 /* generate offset[] for convenience */
2156 if (slen) {
2157 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2158 if (!offset) {
2159 bpf_error(cstate, "not enough core");
2160 /*NOTREACHED*/
2161 }
2162 }
2163 src = p->stmts;
2164 for (off = 0; off < slen && src; off++) {
2165 #if 0
2166 printf("off=%d src=%x\n", off, src);
2167 #endif
2168 offset[off] = src;
2169 src = src->next;
2170 }
2171
2172 off = 0;
2173 for (src = p->stmts; src; src = src->next) {
2174 if (src->s.code == NOP)
2175 continue;
2176 dst->code = (u_short)src->s.code;
2177 dst->k = src->s.k;
2178
2179 /* fill block-local relative jump */
2180 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2181 #if 0
2182 if (src->s.jt || src->s.jf) {
2183 bpf_error(cstate, "illegal jmp destination");
2184 /*NOTREACHED*/
2185 }
2186 #endif
2187 goto filled;
2188 }
2189 if (off == slen - 2) /*???*/
2190 goto filled;
2191
2192 {
2193 u_int i;
2194 int jt, jf;
2195 const char ljerr[] = "%s for block-local relative jump: off=%d";
2196
2197 #if 0
2198 printf("code=%x off=%d %x %x\n", src->s.code,
2199 off, src->s.jt, src->s.jf);
2200 #endif
2201
2202 if (!src->s.jt || !src->s.jf) {
2203 bpf_error(cstate, ljerr, "no jmp destination", off);
2204 /*NOTREACHED*/
2205 }
2206
2207 jt = jf = 0;
2208 for (i = 0; i < slen; i++) {
2209 if (offset[i] == src->s.jt) {
2210 if (jt) {
2211 bpf_error(cstate, ljerr, "multiple matches", off);
2212 /*NOTREACHED*/
2213 }
2214
2215 if (i - off - 1 >= 256) {
2216 bpf_error(cstate, ljerr, "out-of-range jump", off);
2217 /*NOTREACHED*/
2218 }
2219 dst->jt = (u_char)(i - off - 1);
2220 jt++;
2221 }
2222 if (offset[i] == src->s.jf) {
2223 if (jf) {
2224 bpf_error(cstate, ljerr, "multiple matches", off);
2225 /*NOTREACHED*/
2226 }
2227 if (i - off - 1 >= 256) {
2228 bpf_error(cstate, ljerr, "out-of-range jump", off);
2229 /*NOTREACHED*/
2230 }
2231 dst->jf = (u_char)(i - off - 1);
2232 jf++;
2233 }
2234 }
2235 if (!jt || !jf) {
2236 bpf_error(cstate, ljerr, "no destination found", off);
2237 /*NOTREACHED*/
2238 }
2239 }
2240 filled:
2241 ++dst;
2242 ++off;
2243 }
2244 if (offset)
2245 free(offset);
2246
2247 #ifdef BDEBUG
2248 if (dst - conv_state->fstart < NBIDS)
2249 bids[dst - conv_state->fstart] = p->id + 1;
2250 #endif
2251 dst->code = (u_short)p->s.code;
2252 dst->k = p->s.k;
2253 if (JT(p)) {
2254 extrajmps = 0;
2255 off = JT(p)->offset - (p->offset + slen) - 1;
2256 if (off >= 256) {
2257 /* offset too large for branch, must add a jump */
2258 if (p->longjt == 0) {
2259 /* mark this instruction and retry */
2260 p->longjt++;
2261 return(0);
2262 }
2263 /* branch if T to following jump */
2264 if (extrajmps >= 256) {
2265 bpf_error(cstate, "too many extra jumps");
2266 /*NOTREACHED*/
2267 }
2268 dst->jt = (u_char)extrajmps;
2269 extrajmps++;
2270 dst[extrajmps].code = BPF_JMP|BPF_JA;
2271 dst[extrajmps].k = off - extrajmps;
2272 }
2273 else
2274 dst->jt = (u_char)off;
2275 off = JF(p)->offset - (p->offset + slen) - 1;
2276 if (off >= 256) {
2277 /* offset too large for branch, must add a jump */
2278 if (p->longjf == 0) {
2279 /* mark this instruction and retry */
2280 p->longjf++;
2281 return(0);
2282 }
2283 /* branch if F to following jump */
2284 /* if two jumps are inserted, F goes to second one */
2285 if (extrajmps >= 256) {
2286 bpf_error(cstate, "too many extra jumps");
2287 /*NOTREACHED*/
2288 }
2289 dst->jf = (u_char)extrajmps;
2290 extrajmps++;
2291 dst[extrajmps].code = BPF_JMP|BPF_JA;
2292 dst[extrajmps].k = off - extrajmps;
2293 }
2294 else
2295 dst->jf = (u_char)off;
2296 }
2297 return (1);
2298 }
2299
2300
2301 /*
2302 * Convert flowgraph intermediate representation to the
2303 * BPF array representation. Set *lenp to the number of instructions.
2304 *
2305 * This routine does *NOT* leak the memory pointed to by fp. It *must
2306 * not* do free(fp) before returning fp; doing so would make no sense,
2307 * as the BPF array pointed to by the return value of icode_to_fcode()
2308 * must be valid - it's being returned for use in a bpf_program structure.
2309 *
2310 * If it appears that icode_to_fcode() is leaking, the problem is that
2311 * the program using pcap_compile() is failing to free the memory in
2312 * the BPF program when it's done - the leak is in the program, not in
2313 * the routine that happens to be allocating the memory. (By analogy, if
2314 * a program calls fopen() without ever calling fclose() on the FILE *,
2315 * it will leak the FILE structure; the leak is not in fopen(), it's in
2316 * the program.) Change the program to use pcap_freecode() when it's
2317 * done with the filter program. See the pcap man page.
2318 */
2319 struct bpf_insn *
2320 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2321 struct block *root, u_int *lenp)
2322 {
2323 u_int n;
2324 struct bpf_insn *fp;
2325 conv_state_t conv_state;
2326
2327 /*
2328 * Loop doing convert_code_r() until no branches remain
2329 * with too-large offsets.
2330 */
2331 for (;;) {
2332 unMarkAll(ic);
2333 n = *lenp = count_stmts(ic, root);
2334
2335 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2336 if (fp == NULL)
2337 bpf_error(cstate, "malloc");
2338 memset((char *)fp, 0, sizeof(*fp) * n);
2339 conv_state.fstart = fp;
2340 conv_state.ftail = fp + n;
2341
2342 unMarkAll(ic);
2343 if (convert_code_r(cstate, &conv_state, ic, root))
2344 break;
2345 free(fp);
2346 }
2347
2348 return fp;
2349 }
2350
2351 /*
2352 * Make a copy of a BPF program and put it in the "fcode" member of
2353 * a "pcap_t".
2354 *
2355 * If we fail to allocate memory for the copy, fill in the "errbuf"
2356 * member of the "pcap_t" with an error message, and return -1;
2357 * otherwise, return 0.
2358 */
2359 int
2360 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2361 {
2362 size_t prog_size;
2363
2364 /*
2365 * Validate the program.
2366 */
2367 if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2368 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2369 "BPF program is not valid");
2370 return (-1);
2371 }
2372
2373 /*
2374 * Free up any already installed program.
2375 */
2376 pcap_freecode(&p->fcode);
2377
2378 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2379 p->fcode.bf_len = fp->bf_len;
2380 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2381 if (p->fcode.bf_insns == NULL) {
2382 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2383 errno, "malloc");
2384 return (-1);
2385 }
2386 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2387 return (0);
2388 }
2389
2390 #ifdef BDEBUG
2391 static void
2392 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2393 FILE *out)
2394 {
2395 int icount, noffset;
2396 int i;
2397
2398 if (block == NULL || isMarked(ic, block))
2399 return;
2400 Mark(ic, block);
2401
2402 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2403 noffset = min(block->offset + icount, (int)prog->bf_len);
2404
2405 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2406 for (i = block->offset; i < noffset; i++) {
2407 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2408 }
2409 fprintf(out, "\" tooltip=\"");
2410 for (i = 0; i < BPF_MEMWORDS; i++)
2411 if (block->val[i] != VAL_UNKNOWN)
2412 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2413 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2414 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2415 fprintf(out, "\"");
2416 if (JT(block) == NULL)
2417 fprintf(out, ", peripheries=2");
2418 fprintf(out, "];\n");
2419
2420 dot_dump_node(ic, JT(block), prog, out);
2421 dot_dump_node(ic, JF(block), prog, out);
2422 }
2423
2424 static void
2425 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2426 {
2427 if (block == NULL || isMarked(ic, block))
2428 return;
2429 Mark(ic, block);
2430
2431 if (JT(block)) {
2432 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2433 block->id, JT(block)->id);
2434 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2435 block->id, JF(block)->id);
2436 }
2437 dot_dump_edge(ic, JT(block), out);
2438 dot_dump_edge(ic, JF(block), out);
2439 }
2440
2441 /* Output the block CFG using graphviz/DOT language
2442 * In the CFG, block's code, value index for each registers at EXIT,
2443 * and the jump relationship is show.
2444 *
2445 * example DOT for BPF `ip src host 1.1.1.1' is:
2446 digraph BPF {
2447 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2448 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2449 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2450 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2451 "block0":se -> "block1":n [label="T"];
2452 "block0":sw -> "block3":n [label="F"];
2453 "block1":se -> "block2":n [label="T"];
2454 "block1":sw -> "block3":n [label="F"];
2455 }
2456 *
2457 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2458 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2459 */
2460 static void
2461 dot_dump(compiler_state_t *cstate, struct icode *ic)
2462 {
2463 struct bpf_program f;
2464 FILE *out = stdout;
2465
2466 memset(bids, 0, sizeof bids);
2467 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2468
2469 fprintf(out, "digraph BPF {\n");
2470 unMarkAll(ic);
2471 dot_dump_node(ic, ic->root, &f, out);
2472 unMarkAll(ic);
2473 dot_dump_edge(ic, ic->root, out);
2474 fprintf(out, "}\n");
2475
2476 free((char *)f.bf_insns);
2477 }
2478
2479 static void
2480 plain_dump(compiler_state_t *cstate, struct icode *ic)
2481 {
2482 struct bpf_program f;
2483
2484 memset(bids, 0, sizeof bids);
2485 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2486 bpf_dump(&f, 1);
2487 putchar('\n');
2488 free((char *)f.bf_insns);
2489 }
2490
2491 static void
2492 opt_dump(compiler_state_t *cstate, struct icode *ic)
2493 {
2494 /*
2495 * If the CFG, in DOT format, is requested, output it rather than
2496 * the code that would be generated from that graph.
2497 */
2498 if (pcap_print_dot_graph)
2499 dot_dump(cstate, ic);
2500 else
2501 plain_dump(cstate, ic);
2502 }
2503 #endif
2504