optimize.c revision 1.8.2.1 1 /* $NetBSD: optimize.c,v 1.8.2.1 2017/03/20 06:52:24 pgoyette Exp $ */
2
3 /*
4 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that: (1) source code distributions
9 * retain the above copyright notice and this paragraph in its entirety, (2)
10 * distributions including binary code include the above copyright notice and
11 * this paragraph in its entirety in the documentation or other materials
12 * provided with the distribution, and (3) all advertising materials mentioning
13 * features or use of this software display the following acknowledgement:
14 * ``This product includes software developed by the University of California,
15 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
16 * the University nor the names of its contributors may be used to endorse
17 * or promote products derived from this software without specific prior
18 * written permission.
19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
20 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
21 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
22 *
23 * Optimization module for tcpdump intermediate representation.
24 */
25
26 #include <sys/cdefs.h>
27 __RCSID("$NetBSD: optimize.c,v 1.8.2.1 2017/03/20 06:52:24 pgoyette Exp $");
28
29 #ifdef HAVE_CONFIG_H
30 #include "config.h"
31 #endif
32
33 #ifdef _WIN32
34 #include <pcap-stdinc.h>
35 #else /* _WIN32 */
36 #if HAVE_INTTYPES_H
37 #include <inttypes.h>
38 #elif HAVE_STDINT_H
39 #include <stdint.h>
40 #endif
41 #ifdef HAVE_SYS_BITYPES_H
42 #include <sys/bitypes.h>
43 #endif
44 #include <sys/types.h>
45 #endif /* _WIN32 */
46
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <memory.h>
50 #include <string.h>
51
52 #include <errno.h>
53
54 #include "pcap-int.h"
55
56 #include "gencode.h"
57
58 #ifdef HAVE_OS_PROTO_H
59 #include "os-proto.h"
60 #endif
61
62 #ifdef BDEBUG
63 int pcap_optimizer_debug;
64 #endif
65
66 #if defined(MSDOS) && !defined(__DJGPP__)
67 extern int _w32_ffs (int mask);
68 #define ffs _w32_ffs
69 #endif
70
71 /*
72 * So is the check for _MSC_VER done because MinGW has this?
73 */
74 #if defined(_WIN32) && defined (_MSC_VER)
75 /*
76 * ffs -- vax ffs instruction
77 *
78 * XXX - with versions of VS that have it, use _BitScanForward()?
79 */
80 static int
81 ffs(int mask)
82 {
83 int bit;
84
85 if (mask == 0)
86 return(0);
87 for (bit = 1; !(mask & 1); bit++)
88 mask >>= 1;
89 return(bit);
90 }
91 #endif
92
93 /*
94 * Represents a deleted instruction.
95 */
96 #define NOP -1
97
98 /*
99 * Register numbers for use-def values.
100 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
101 * location. A_ATOM is the accumulator and X_ATOM is the index
102 * register.
103 */
104 #define A_ATOM BPF_MEMWORDS
105 #define X_ATOM (BPF_MEMWORDS+1)
106
107 /*
108 * This define is used to represent *both* the accumulator and
109 * x register in use-def computations.
110 * Currently, the use-def code assumes only one definition per instruction.
111 */
112 #define AX_ATOM N_ATOMS
113
114 /*
115 * These data structures are used in a Cocke and Shwarz style
116 * value numbering scheme. Since the flowgraph is acyclic,
117 * exit values can be propagated from a node's predecessors
118 * provided it is uniquely defined.
119 */
120 struct valnode {
121 int code;
122 int v0, v1;
123 int val;
124 struct valnode *next;
125 };
126
127 /* Integer constants mapped with the load immediate opcode. */
128 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
129
130 struct vmapinfo {
131 int is_const;
132 bpf_int32 const_val;
133 };
134
135 struct _opt_state {
136 /*
137 * A flag to indicate that further optimization is needed.
138 * Iterative passes are continued until a given pass yields no
139 * branch movement.
140 */
141 int done;
142
143 int n_blocks;
144 struct block **blocks;
145 int n_edges;
146 struct edge **edges;
147
148 /*
149 * A bit vector set representation of the dominators.
150 * We round up the set size to the next power of two.
151 */
152 int nodewords;
153 int edgewords;
154 struct block **levels;
155 bpf_u_int32 *space;
156
157 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
158 /*
159 * True if a is in uset {p}
160 */
161 #define SET_MEMBER(p, a) \
162 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
163
164 /*
165 * Add 'a' to uset p.
166 */
167 #define SET_INSERT(p, a) \
168 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
169
170 /*
171 * Delete 'a' from uset p.
172 */
173 #define SET_DELETE(p, a) \
174 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
175
176 /*
177 * a := a intersect b
178 */
179 #define SET_INTERSECT(a, b, n)\
180 {\
181 register bpf_u_int32 *_x = a, *_y = b;\
182 register int _n = n;\
183 while (--_n >= 0) *_x++ &= *_y++;\
184 }
185
186 /*
187 * a := a - b
188 */
189 #define SET_SUBTRACT(a, b, n)\
190 {\
191 register bpf_u_int32 *_x = a, *_y = b;\
192 register int _n = n;\
193 while (--_n >= 0) *_x++ &=~ *_y++;\
194 }
195
196 /*
197 * a := a union b
198 */
199 #define SET_UNION(a, b, n)\
200 {\
201 register bpf_u_int32 *_x = a, *_y = b;\
202 register int _n = n;\
203 while (--_n >= 0) *_x++ |= *_y++;\
204 }
205
206 uset all_dom_sets;
207 uset all_closure_sets;
208 uset all_edge_sets;
209
210 #define MODULUS 213
211 struct valnode *hashtbl[MODULUS];
212 int curval;
213 int maxval;
214
215 struct vmapinfo *vmap;
216 struct valnode *vnode_base;
217 struct valnode *next_vnode;
218 };
219
220 typedef struct {
221 /*
222 * Some pointers used to convert the basic block form of the code,
223 * into the array form that BPF requires. 'fstart' will point to
224 * the malloc'd array while 'ftail' is used during the recursive
225 * traversal.
226 */
227 struct bpf_insn *fstart;
228 struct bpf_insn *ftail;
229 } conv_state_t;
230
231 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
232 static void opt_cleanup(opt_state_t *);
233
234 static void intern_blocks(opt_state_t *, struct icode *);
235
236 static void find_inedges(opt_state_t *, struct block *);
237 #ifdef BDEBUG
238 static void opt_dump(compiler_state_t *, struct icode *);
239 #endif
240
241 #ifndef MAX
242 #define MAX(a,b) ((a)>(b)?(a):(b))
243 #endif
244
245 static void
246 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
247 {
248 int level;
249
250 if (isMarked(ic, b))
251 return;
252
253 Mark(ic, b);
254 b->link = 0;
255
256 if (JT(b)) {
257 find_levels_r(opt_state, ic, JT(b));
258 find_levels_r(opt_state, ic, JF(b));
259 level = MAX(JT(b)->level, JF(b)->level) + 1;
260 } else
261 level = 0;
262 b->level = level;
263 b->link = opt_state->levels[level];
264 opt_state->levels[level] = b;
265 }
266
267 /*
268 * Level graph. The levels go from 0 at the leaves to
269 * N_LEVELS at the root. The opt_state->levels[] array points to the
270 * first node of the level list, whose elements are linked
271 * with the 'link' field of the struct block.
272 */
273 static void
274 find_levels(opt_state_t *opt_state, struct icode *ic)
275 {
276 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
277 unMarkAll(ic);
278 find_levels_r(opt_state, ic, ic->root);
279 }
280
281 /*
282 * Find dominator relationships.
283 * Assumes graph has been leveled.
284 */
285 static void
286 find_dom(opt_state_t *opt_state, struct block *root)
287 {
288 int i;
289 struct block *b;
290 bpf_u_int32 *x;
291
292 /*
293 * Initialize sets to contain all nodes.
294 */
295 x = opt_state->all_dom_sets;
296 i = opt_state->n_blocks * opt_state->nodewords;
297 while (--i >= 0)
298 *x++ = ~0;
299 /* Root starts off empty. */
300 for (i = opt_state->nodewords; --i >= 0;)
301 root->dom[i] = 0;
302
303 /* root->level is the highest level no found. */
304 for (i = root->level; i >= 0; --i) {
305 for (b = opt_state->levels[i]; b; b = b->link) {
306 SET_INSERT(b->dom, b->id);
307 if (JT(b) == 0)
308 continue;
309 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
310 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
311 }
312 }
313 }
314
315 static void
316 propedom(opt_state_t *opt_state, struct edge *ep)
317 {
318 SET_INSERT(ep->edom, ep->id);
319 if (ep->succ) {
320 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
321 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
322 }
323 }
324
325 /*
326 * Compute edge dominators.
327 * Assumes graph has been leveled and predecessors established.
328 */
329 static void
330 find_edom(opt_state_t *opt_state, struct block *root)
331 {
332 int i;
333 uset x;
334 struct block *b;
335
336 x = opt_state->all_edge_sets;
337 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
338 x[i] = ~0;
339
340 /* root->level is the highest level no found. */
341 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
342 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
343 for (i = root->level; i >= 0; --i) {
344 for (b = opt_state->levels[i]; b != 0; b = b->link) {
345 propedom(opt_state, &b->et);
346 propedom(opt_state, &b->ef);
347 }
348 }
349 }
350
351 /*
352 * Find the backwards transitive closure of the flow graph. These sets
353 * are backwards in the sense that we find the set of nodes that reach
354 * a given node, not the set of nodes that can be reached by a node.
355 *
356 * Assumes graph has been leveled.
357 */
358 static void
359 find_closure(opt_state_t *opt_state, struct block *root)
360 {
361 int i;
362 struct block *b;
363
364 /*
365 * Initialize sets to contain no nodes.
366 */
367 memset((char *)opt_state->all_closure_sets, 0,
368 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
369
370 /* root->level is the highest level no found. */
371 for (i = root->level; i >= 0; --i) {
372 for (b = opt_state->levels[i]; b; b = b->link) {
373 SET_INSERT(b->closure, b->id);
374 if (JT(b) == 0)
375 continue;
376 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
377 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
378 }
379 }
380 }
381
382 /*
383 * Return the register number that is used by s. If A and X are both
384 * used, return AX_ATOM. If no register is used, return -1.
385 *
386 * The implementation should probably change to an array access.
387 */
388 static int
389 atomuse(struct stmt *s)
390 {
391 register int c = s->code;
392
393 if (c == NOP)
394 return -1;
395
396 switch (BPF_CLASS(c)) {
397
398 case BPF_RET:
399 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
400 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
401
402 case BPF_LD:
403 case BPF_LDX:
404 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
405 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
406
407 case BPF_ST:
408 return A_ATOM;
409
410 case BPF_STX:
411 return X_ATOM;
412
413 case BPF_JMP:
414 case BPF_ALU:
415 if (BPF_SRC(c) == BPF_X)
416 return AX_ATOM;
417 return A_ATOM;
418
419 case BPF_MISC:
420 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
421 }
422 abort();
423 /* NOTREACHED */
424 }
425
426 /*
427 * Return the register number that is defined by 's'. We assume that
428 * a single stmt cannot define more than one register. If no register
429 * is defined, return -1.
430 *
431 * The implementation should probably change to an array access.
432 */
433 static int
434 atomdef(struct stmt *s)
435 {
436 if (s->code == NOP)
437 return -1;
438
439 switch (BPF_CLASS(s->code)) {
440
441 case BPF_LD:
442 case BPF_ALU:
443 return A_ATOM;
444
445 case BPF_LDX:
446 return X_ATOM;
447
448 case BPF_ST:
449 case BPF_STX:
450 return s->k;
451
452 case BPF_MISC:
453 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
454 }
455 return -1;
456 }
457
458 /*
459 * Compute the sets of registers used, defined, and killed by 'b'.
460 *
461 * "Used" means that a statement in 'b' uses the register before any
462 * statement in 'b' defines it, i.e. it uses the value left in
463 * that register by a predecessor block of this block.
464 * "Defined" means that a statement in 'b' defines it.
465 * "Killed" means that a statement in 'b' defines it before any
466 * statement in 'b' uses it, i.e. it kills the value left in that
467 * register by a predecessor block of this block.
468 */
469 static void
470 compute_local_ud(struct block *b)
471 {
472 struct slist *s;
473 atomset def = 0, use = 0, killed = 0;
474 int atom;
475
476 for (s = b->stmts; s; s = s->next) {
477 if (s->s.code == NOP)
478 continue;
479 atom = atomuse(&s->s);
480 if (atom >= 0) {
481 if (atom == AX_ATOM) {
482 if (!ATOMELEM(def, X_ATOM))
483 use |= ATOMMASK(X_ATOM);
484 if (!ATOMELEM(def, A_ATOM))
485 use |= ATOMMASK(A_ATOM);
486 }
487 else if (atom < N_ATOMS) {
488 if (!ATOMELEM(def, atom))
489 use |= ATOMMASK(atom);
490 }
491 else
492 abort();
493 }
494 atom = atomdef(&s->s);
495 if (atom >= 0) {
496 if (!ATOMELEM(use, atom))
497 killed |= ATOMMASK(atom);
498 def |= ATOMMASK(atom);
499 }
500 }
501 if (BPF_CLASS(b->s.code) == BPF_JMP) {
502 /*
503 * XXX - what about RET?
504 */
505 atom = atomuse(&b->s);
506 if (atom >= 0) {
507 if (atom == AX_ATOM) {
508 if (!ATOMELEM(def, X_ATOM))
509 use |= ATOMMASK(X_ATOM);
510 if (!ATOMELEM(def, A_ATOM))
511 use |= ATOMMASK(A_ATOM);
512 }
513 else if (atom < N_ATOMS) {
514 if (!ATOMELEM(def, atom))
515 use |= ATOMMASK(atom);
516 }
517 else
518 abort();
519 }
520 }
521
522 b->def = def;
523 b->kill = killed;
524 b->in_use = use;
525 }
526
527 /*
528 * Assume graph is already leveled.
529 */
530 static void
531 find_ud(opt_state_t *opt_state, struct block *root)
532 {
533 int i, maxlevel;
534 struct block *p;
535
536 /*
537 * root->level is the highest level no found;
538 * count down from there.
539 */
540 maxlevel = root->level;
541 for (i = maxlevel; i >= 0; --i)
542 for (p = opt_state->levels[i]; p; p = p->link) {
543 compute_local_ud(p);
544 p->out_use = 0;
545 }
546
547 for (i = 1; i <= maxlevel; ++i) {
548 for (p = opt_state->levels[i]; p; p = p->link) {
549 p->out_use |= JT(p)->in_use | JF(p)->in_use;
550 p->in_use |= p->out_use &~ p->kill;
551 }
552 }
553 }
554 static void
555 init_val(opt_state_t *opt_state)
556 {
557 opt_state->curval = 0;
558 opt_state->next_vnode = opt_state->vnode_base;
559 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
560 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
561 }
562
563 /* Because we really don't have an IR, this stuff is a little messy. */
564 static int
565 F(opt_state_t *opt_state, int code, int v0, int v1)
566 {
567 u_int hash;
568 int val;
569 struct valnode *p;
570
571 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
572 hash %= MODULUS;
573
574 for (p = opt_state->hashtbl[hash]; p; p = p->next)
575 if (p->code == code && p->v0 == v0 && p->v1 == v1)
576 return p->val;
577
578 val = ++opt_state->curval;
579 if (BPF_MODE(code) == BPF_IMM &&
580 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
581 opt_state->vmap[val].const_val = v0;
582 opt_state->vmap[val].is_const = 1;
583 }
584 p = opt_state->next_vnode++;
585 p->val = val;
586 p->code = code;
587 p->v0 = v0;
588 p->v1 = v1;
589 p->next = opt_state->hashtbl[hash];
590 opt_state->hashtbl[hash] = p;
591
592 return val;
593 }
594
595 static inline void
596 vstore(struct stmt *s, int *valp, int newval, int alter)
597 {
598 if (alter && *valp == newval)
599 s->code = NOP;
600 else
601 *valp = newval;
602 }
603
604 /*
605 * Do constant-folding on binary operators.
606 * (Unary operators are handled elsewhere.)
607 */
608 static void
609 fold_op(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
610 struct stmt *s, int v0, int v1)
611 {
612 bpf_u_int32 a, b;
613
614 a = opt_state->vmap[v0].const_val;
615 b = opt_state->vmap[v1].const_val;
616
617 switch (BPF_OP(s->code)) {
618 case BPF_ADD:
619 a += b;
620 break;
621
622 case BPF_SUB:
623 a -= b;
624 break;
625
626 case BPF_MUL:
627 a *= b;
628 break;
629
630 case BPF_DIV:
631 if (b == 0)
632 bpf_error(cstate, "division by zero");
633 a /= b;
634 break;
635
636 case BPF_MOD:
637 if (b == 0)
638 bpf_error(cstate, "modulus by zero");
639 a %= b;
640 break;
641
642 case BPF_AND:
643 a &= b;
644 break;
645
646 case BPF_OR:
647 a |= b;
648 break;
649
650 case BPF_XOR:
651 a ^= b;
652 break;
653
654 case BPF_LSH:
655 a <<= b;
656 break;
657
658 case BPF_RSH:
659 a >>= b;
660 break;
661
662 default:
663 abort();
664 }
665 s->k = a;
666 s->code = BPF_LD|BPF_IMM;
667 opt_state->done = 0;
668 }
669
670 static inline struct slist *
671 this_op(struct slist *s)
672 {
673 while (s != 0 && s->s.code == NOP)
674 s = s->next;
675 return s;
676 }
677
678 static void
679 opt_not(struct block *b)
680 {
681 struct block *tmp = JT(b);
682
683 JT(b) = JF(b);
684 JF(b) = tmp;
685 }
686
687 static void
688 opt_peep(opt_state_t *opt_state, struct block *b)
689 {
690 struct slist *s;
691 struct slist *next, *last;
692 int val;
693
694 s = b->stmts;
695 if (s == 0)
696 return;
697
698 last = s;
699 for (/*empty*/; /*empty*/; s = next) {
700 /*
701 * Skip over nops.
702 */
703 s = this_op(s);
704 if (s == 0)
705 break; /* nothing left in the block */
706
707 /*
708 * Find the next real instruction after that one
709 * (skipping nops).
710 */
711 next = this_op(s->next);
712 if (next == 0)
713 break; /* no next instruction */
714 last = next;
715
716 /*
717 * st M[k] --> st M[k]
718 * ldx M[k] tax
719 */
720 if (s->s.code == BPF_ST &&
721 next->s.code == (BPF_LDX|BPF_MEM) &&
722 s->s.k == next->s.k) {
723 opt_state->done = 0;
724 next->s.code = BPF_MISC|BPF_TAX;
725 }
726 /*
727 * ld #k --> ldx #k
728 * tax txa
729 */
730 if (s->s.code == (BPF_LD|BPF_IMM) &&
731 next->s.code == (BPF_MISC|BPF_TAX)) {
732 s->s.code = BPF_LDX|BPF_IMM;
733 next->s.code = BPF_MISC|BPF_TXA;
734 opt_state->done = 0;
735 }
736 /*
737 * This is an ugly special case, but it happens
738 * when you say tcp[k] or udp[k] where k is a constant.
739 */
740 if (s->s.code == (BPF_LD|BPF_IMM)) {
741 struct slist *add, *tax, *ild;
742
743 /*
744 * Check that X isn't used on exit from this
745 * block (which the optimizer might cause).
746 * We know the code generator won't generate
747 * any local dependencies.
748 */
749 if (ATOMELEM(b->out_use, X_ATOM))
750 continue;
751
752 /*
753 * Check that the instruction following the ldi
754 * is an addx, or it's an ldxms with an addx
755 * following it (with 0 or more nops between the
756 * ldxms and addx).
757 */
758 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
759 add = next;
760 else
761 add = this_op(next->next);
762 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
763 continue;
764
765 /*
766 * Check that a tax follows that (with 0 or more
767 * nops between them).
768 */
769 tax = this_op(add->next);
770 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
771 continue;
772
773 /*
774 * Check that an ild follows that (with 0 or more
775 * nops between them).
776 */
777 ild = this_op(tax->next);
778 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
779 BPF_MODE(ild->s.code) != BPF_IND)
780 continue;
781 /*
782 * We want to turn this sequence:
783 *
784 * (004) ldi #0x2 {s}
785 * (005) ldxms [14] {next} -- optional
786 * (006) addx {add}
787 * (007) tax {tax}
788 * (008) ild [x+0] {ild}
789 *
790 * into this sequence:
791 *
792 * (004) nop
793 * (005) ldxms [14]
794 * (006) nop
795 * (007) nop
796 * (008) ild [x+2]
797 *
798 * XXX We need to check that X is not
799 * subsequently used, because we want to change
800 * what'll be in it after this sequence.
801 *
802 * We know we can eliminate the accumulator
803 * modifications earlier in the sequence since
804 * it is defined by the last stmt of this sequence
805 * (i.e., the last statement of the sequence loads
806 * a value into the accumulator, so we can eliminate
807 * earlier operations on the accumulator).
808 */
809 ild->s.k += s->s.k;
810 s->s.code = NOP;
811 add->s.code = NOP;
812 tax->s.code = NOP;
813 opt_state->done = 0;
814 }
815 }
816 /*
817 * If the comparison at the end of a block is an equality
818 * comparison against a constant, and nobody uses the value
819 * we leave in the A register at the end of a block, and
820 * the operation preceding the comparison is an arithmetic
821 * operation, we can sometime optimize it away.
822 */
823 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
824 !ATOMELEM(b->out_use, A_ATOM)) {
825 /*
826 * We can optimize away certain subtractions of the
827 * X register.
828 */
829 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
830 val = b->val[X_ATOM];
831 if (opt_state->vmap[val].is_const) {
832 /*
833 * If we have a subtract to do a comparison,
834 * and the X register is a known constant,
835 * we can merge this value into the
836 * comparison:
837 *
838 * sub x -> nop
839 * jeq #y jeq #(x+y)
840 */
841 b->s.k += opt_state->vmap[val].const_val;
842 last->s.code = NOP;
843 opt_state->done = 0;
844 } else if (b->s.k == 0) {
845 /*
846 * If the X register isn't a constant,
847 * and the comparison in the test is
848 * against 0, we can compare with the
849 * X register, instead:
850 *
851 * sub x -> nop
852 * jeq #0 jeq x
853 */
854 last->s.code = NOP;
855 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
856 opt_state->done = 0;
857 }
858 }
859 /*
860 * Likewise, a constant subtract can be simplified:
861 *
862 * sub #x -> nop
863 * jeq #y -> jeq #(x+y)
864 */
865 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
866 last->s.code = NOP;
867 b->s.k += last->s.k;
868 opt_state->done = 0;
869 }
870 /*
871 * And, similarly, a constant AND can be simplified
872 * if we're testing against 0, i.e.:
873 *
874 * and #k nop
875 * jeq #0 -> jset #k
876 */
877 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
878 b->s.k == 0) {
879 b->s.k = last->s.k;
880 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
881 last->s.code = NOP;
882 opt_state->done = 0;
883 opt_not(b);
884 }
885 }
886 /*
887 * jset #0 -> never
888 * jset #ffffffff -> always
889 */
890 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
891 if (b->s.k == 0)
892 JT(b) = JF(b);
893 if ((u_int)b->s.k == 0xffffffffU)
894 JF(b) = JT(b);
895 }
896 /*
897 * If we're comparing against the index register, and the index
898 * register is a known constant, we can just compare against that
899 * constant.
900 */
901 val = b->val[X_ATOM];
902 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
903 bpf_int32 v = opt_state->vmap[val].const_val;
904 b->s.code &= ~BPF_X;
905 b->s.k = v;
906 }
907 /*
908 * If the accumulator is a known constant, we can compute the
909 * comparison result.
910 */
911 val = b->val[A_ATOM];
912 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
913 bpf_int32 v = opt_state->vmap[val].const_val;
914 switch (BPF_OP(b->s.code)) {
915
916 case BPF_JEQ:
917 v = v == b->s.k;
918 break;
919
920 case BPF_JGT:
921 v = (unsigned)v > (unsigned)b->s.k;
922 break;
923
924 case BPF_JGE:
925 v = (unsigned)v >= (unsigned)b->s.k;
926 break;
927
928 case BPF_JSET:
929 v &= b->s.k;
930 break;
931
932 default:
933 abort();
934 }
935 if (JF(b) != JT(b))
936 opt_state->done = 0;
937 if (v)
938 JF(b) = JT(b);
939 else
940 JT(b) = JF(b);
941 }
942 }
943
944 /*
945 * Compute the symbolic value of expression of 's', and update
946 * anything it defines in the value table 'val'. If 'alter' is true,
947 * do various optimizations. This code would be cleaner if symbolic
948 * evaluation and code transformations weren't folded together.
949 */
950 static void
951 opt_stmt(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
952 struct stmt *s, int val[], int alter)
953 {
954 int op;
955 int v;
956
957 switch (s->code) {
958
959 case BPF_LD|BPF_ABS|BPF_W:
960 case BPF_LD|BPF_ABS|BPF_H:
961 case BPF_LD|BPF_ABS|BPF_B:
962 v = F(opt_state, s->code, s->k, 0L);
963 vstore(s, &val[A_ATOM], v, alter);
964 break;
965
966 case BPF_LD|BPF_IND|BPF_W:
967 case BPF_LD|BPF_IND|BPF_H:
968 case BPF_LD|BPF_IND|BPF_B:
969 v = val[X_ATOM];
970 if (alter && opt_state->vmap[v].is_const) {
971 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
972 s->k += opt_state->vmap[v].const_val;
973 v = F(opt_state, s->code, s->k, 0L);
974 opt_state->done = 0;
975 }
976 else
977 v = F(opt_state, s->code, s->k, v);
978 vstore(s, &val[A_ATOM], v, alter);
979 break;
980
981 case BPF_LD|BPF_LEN:
982 v = F(opt_state, s->code, 0L, 0L);
983 vstore(s, &val[A_ATOM], v, alter);
984 break;
985
986 case BPF_LD|BPF_IMM:
987 v = K(s->k);
988 vstore(s, &val[A_ATOM], v, alter);
989 break;
990
991 case BPF_LDX|BPF_IMM:
992 v = K(s->k);
993 vstore(s, &val[X_ATOM], v, alter);
994 break;
995
996 case BPF_LDX|BPF_MSH|BPF_B:
997 v = F(opt_state, s->code, s->k, 0L);
998 vstore(s, &val[X_ATOM], v, alter);
999 break;
1000
1001 case BPF_ALU|BPF_NEG:
1002 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1003 s->code = BPF_LD|BPF_IMM;
1004 s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1005 val[A_ATOM] = K(s->k);
1006 }
1007 else
1008 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1009 break;
1010
1011 case BPF_ALU|BPF_ADD|BPF_K:
1012 case BPF_ALU|BPF_SUB|BPF_K:
1013 case BPF_ALU|BPF_MUL|BPF_K:
1014 case BPF_ALU|BPF_DIV|BPF_K:
1015 case BPF_ALU|BPF_MOD|BPF_K:
1016 case BPF_ALU|BPF_AND|BPF_K:
1017 case BPF_ALU|BPF_OR|BPF_K:
1018 case BPF_ALU|BPF_XOR|BPF_K:
1019 case BPF_ALU|BPF_LSH|BPF_K:
1020 case BPF_ALU|BPF_RSH|BPF_K:
1021 op = BPF_OP(s->code);
1022 if (alter) {
1023 if (s->k == 0) {
1024 /* don't optimize away "sub #0"
1025 * as it may be needed later to
1026 * fixup the generated math code */
1027 if (op == BPF_ADD ||
1028 op == BPF_LSH || op == BPF_RSH ||
1029 op == BPF_OR || op == BPF_XOR) {
1030 s->code = NOP;
1031 break;
1032 }
1033 if (op == BPF_MUL || op == BPF_AND) {
1034 s->code = BPF_LD|BPF_IMM;
1035 val[A_ATOM] = K(s->k);
1036 break;
1037 }
1038 }
1039 if (opt_state->vmap[val[A_ATOM]].is_const) {
1040 fold_op(cstate, ic, opt_state, s, val[A_ATOM], K(s->k));
1041 val[A_ATOM] = K(s->k);
1042 break;
1043 }
1044 }
1045 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1046 break;
1047
1048 case BPF_ALU|BPF_ADD|BPF_X:
1049 case BPF_ALU|BPF_SUB|BPF_X:
1050 case BPF_ALU|BPF_MUL|BPF_X:
1051 case BPF_ALU|BPF_DIV|BPF_X:
1052 case BPF_ALU|BPF_MOD|BPF_X:
1053 case BPF_ALU|BPF_AND|BPF_X:
1054 case BPF_ALU|BPF_OR|BPF_X:
1055 case BPF_ALU|BPF_XOR|BPF_X:
1056 case BPF_ALU|BPF_LSH|BPF_X:
1057 case BPF_ALU|BPF_RSH|BPF_X:
1058 op = BPF_OP(s->code);
1059 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1060 if (opt_state->vmap[val[A_ATOM]].is_const) {
1061 fold_op(cstate, ic, opt_state, s, val[A_ATOM], val[X_ATOM]);
1062 val[A_ATOM] = K(s->k);
1063 }
1064 else {
1065 s->code = BPF_ALU|BPF_K|op;
1066 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1067 opt_state->done = 0;
1068 val[A_ATOM] =
1069 F(opt_state, s->code, val[A_ATOM], K(s->k));
1070 }
1071 break;
1072 }
1073 /*
1074 * Check if we're doing something to an accumulator
1075 * that is 0, and simplify. This may not seem like
1076 * much of a simplification but it could open up further
1077 * optimizations.
1078 * XXX We could also check for mul by 1, etc.
1079 */
1080 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1081 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1082 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1083 s->code = BPF_MISC|BPF_TXA;
1084 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1085 break;
1086 }
1087 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1088 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1089 s->code = BPF_LD|BPF_IMM;
1090 s->k = 0;
1091 vstore(s, &val[A_ATOM], K(s->k), alter);
1092 break;
1093 }
1094 else if (op == BPF_NEG) {
1095 s->code = NOP;
1096 break;
1097 }
1098 }
1099 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1100 break;
1101
1102 case BPF_MISC|BPF_TXA:
1103 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1104 break;
1105
1106 case BPF_LD|BPF_MEM:
1107 v = val[s->k];
1108 if (alter && opt_state->vmap[v].is_const) {
1109 s->code = BPF_LD|BPF_IMM;
1110 s->k = opt_state->vmap[v].const_val;
1111 opt_state->done = 0;
1112 }
1113 vstore(s, &val[A_ATOM], v, alter);
1114 break;
1115
1116 case BPF_MISC|BPF_TAX:
1117 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1118 break;
1119
1120 case BPF_LDX|BPF_MEM:
1121 v = val[s->k];
1122 if (alter && opt_state->vmap[v].is_const) {
1123 s->code = BPF_LDX|BPF_IMM;
1124 s->k = opt_state->vmap[v].const_val;
1125 opt_state->done = 0;
1126 }
1127 vstore(s, &val[X_ATOM], v, alter);
1128 break;
1129
1130 case BPF_ST:
1131 vstore(s, &val[s->k], val[A_ATOM], alter);
1132 break;
1133
1134 case BPF_STX:
1135 vstore(s, &val[s->k], val[X_ATOM], alter);
1136 break;
1137 }
1138 }
1139
1140 static void
1141 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1142 {
1143 register int atom;
1144
1145 atom = atomuse(s);
1146 if (atom >= 0) {
1147 if (atom == AX_ATOM) {
1148 last[X_ATOM] = 0;
1149 last[A_ATOM] = 0;
1150 }
1151 else
1152 last[atom] = 0;
1153 }
1154 atom = atomdef(s);
1155 if (atom >= 0) {
1156 if (last[atom]) {
1157 opt_state->done = 0;
1158 last[atom]->code = NOP;
1159 }
1160 last[atom] = s;
1161 }
1162 }
1163
1164 static void
1165 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1166 {
1167 register struct slist *s;
1168 register int atom;
1169 struct stmt *last[N_ATOMS];
1170
1171 memset((char *)last, 0, sizeof last);
1172
1173 for (s = b->stmts; s != 0; s = s->next)
1174 deadstmt(opt_state, &s->s, last);
1175 deadstmt(opt_state, &b->s, last);
1176
1177 for (atom = 0; atom < N_ATOMS; ++atom)
1178 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1179 last[atom]->code = NOP;
1180 opt_state->done = 0;
1181 }
1182 }
1183
1184 static void
1185 opt_blk(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
1186 struct block *b, int do_stmts)
1187 {
1188 struct slist *s;
1189 struct edge *p;
1190 int i;
1191 bpf_int32 aval, xval;
1192
1193 #if 0
1194 for (s = b->stmts; s && s->next; s = s->next)
1195 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1196 do_stmts = 0;
1197 break;
1198 }
1199 #endif
1200
1201 /*
1202 * Initialize the atom values.
1203 */
1204 p = b->in_edges;
1205 if (p == 0) {
1206 /*
1207 * We have no predecessors, so everything is undefined
1208 * upon entry to this block.
1209 */
1210 memset((char *)b->val, 0, sizeof(b->val));
1211 } else {
1212 /*
1213 * Inherit values from our predecessors.
1214 *
1215 * First, get the values from the predecessor along the
1216 * first edge leading to this node.
1217 */
1218 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1219 /*
1220 * Now look at all the other nodes leading to this node.
1221 * If, for the predecessor along that edge, a register
1222 * has a different value from the one we have (i.e.,
1223 * control paths are merging, and the merging paths
1224 * assign different values to that register), give the
1225 * register the undefined value of 0.
1226 */
1227 while ((p = p->next) != NULL) {
1228 for (i = 0; i < N_ATOMS; ++i)
1229 if (b->val[i] != p->pred->val[i])
1230 b->val[i] = 0;
1231 }
1232 }
1233 aval = b->val[A_ATOM];
1234 xval = b->val[X_ATOM];
1235 for (s = b->stmts; s; s = s->next)
1236 opt_stmt(cstate, ic, opt_state, &s->s, b->val, do_stmts);
1237
1238 /*
1239 * This is a special case: if we don't use anything from this
1240 * block, and we load the accumulator or index register with a
1241 * value that is already there, or if this block is a return,
1242 * eliminate all the statements.
1243 *
1244 * XXX - what if it does a store?
1245 *
1246 * XXX - why does it matter whether we use anything from this
1247 * block? If the accumulator or index register doesn't change
1248 * its value, isn't that OK even if we use that value?
1249 *
1250 * XXX - if we load the accumulator with a different value,
1251 * and the block ends with a conditional branch, we obviously
1252 * can't eliminate it, as the branch depends on that value.
1253 * For the index register, the conditional branch only depends
1254 * on the index register value if the test is against the index
1255 * register value rather than a constant; if nothing uses the
1256 * value we put into the index register, and we're not testing
1257 * against the index register's value, and there aren't any
1258 * other problems that would keep us from eliminating this
1259 * block, can we eliminate it?
1260 */
1261 if (do_stmts &&
1262 ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1263 xval != 0 && b->val[X_ATOM] == xval) ||
1264 BPF_CLASS(b->s.code) == BPF_RET)) {
1265 if (b->stmts != 0) {
1266 b->stmts = 0;
1267 opt_state->done = 0;
1268 }
1269 } else {
1270 opt_peep(opt_state, b);
1271 opt_deadstores(opt_state, b);
1272 }
1273 /*
1274 * Set up values for branch optimizer.
1275 */
1276 if (BPF_SRC(b->s.code) == BPF_K)
1277 b->oval = K(b->s.k);
1278 else
1279 b->oval = b->val[X_ATOM];
1280 b->et.code = b->s.code;
1281 b->ef.code = -b->s.code;
1282 }
1283
1284 /*
1285 * Return true if any register that is used on exit from 'succ', has
1286 * an exit value that is different from the corresponding exit value
1287 * from 'b'.
1288 */
1289 static int
1290 use_conflict(struct block *b, struct block *succ)
1291 {
1292 int atom;
1293 atomset use = succ->out_use;
1294
1295 if (use == 0)
1296 return 0;
1297
1298 for (atom = 0; atom < N_ATOMS; ++atom)
1299 if (ATOMELEM(use, atom))
1300 if (b->val[atom] != succ->val[atom])
1301 return 1;
1302 return 0;
1303 }
1304
1305 static struct block *
1306 fold_edge(struct block *child, struct edge *ep)
1307 {
1308 int sense;
1309 int aval0, aval1, oval0, oval1;
1310 int code = ep->code;
1311
1312 if (code < 0) {
1313 code = -code;
1314 sense = 0;
1315 } else
1316 sense = 1;
1317
1318 if (child->s.code != code)
1319 return 0;
1320
1321 aval0 = child->val[A_ATOM];
1322 oval0 = child->oval;
1323 aval1 = ep->pred->val[A_ATOM];
1324 oval1 = ep->pred->oval;
1325
1326 if (aval0 != aval1)
1327 return 0;
1328
1329 if (oval0 == oval1)
1330 /*
1331 * The operands of the branch instructions are
1332 * identical, so the result is true if a true
1333 * branch was taken to get here, otherwise false.
1334 */
1335 return sense ? JT(child) : JF(child);
1336
1337 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1338 /*
1339 * At this point, we only know the comparison if we
1340 * came down the true branch, and it was an equality
1341 * comparison with a constant.
1342 *
1343 * I.e., if we came down the true branch, and the branch
1344 * was an equality comparison with a constant, we know the
1345 * accumulator contains that constant. If we came down
1346 * the false branch, or the comparison wasn't with a
1347 * constant, we don't know what was in the accumulator.
1348 *
1349 * We rely on the fact that distinct constants have distinct
1350 * value numbers.
1351 */
1352 return JF(child);
1353
1354 return 0;
1355 }
1356
1357 static void
1358 opt_j(opt_state_t *opt_state, struct edge *ep)
1359 {
1360 register int i, k;
1361 register struct block *target;
1362
1363 if (JT(ep->succ) == 0)
1364 return;
1365
1366 if (JT(ep->succ) == JF(ep->succ)) {
1367 /*
1368 * Common branch targets can be eliminated, provided
1369 * there is no data dependency.
1370 */
1371 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1372 opt_state->done = 0;
1373 ep->succ = JT(ep->succ);
1374 }
1375 }
1376 /*
1377 * For each edge dominator that matches the successor of this
1378 * edge, promote the edge successor to the its grandchild.
1379 *
1380 * XXX We violate the set abstraction here in favor a reasonably
1381 * efficient loop.
1382 */
1383 top:
1384 for (i = 0; i < opt_state->edgewords; ++i) {
1385 register bpf_u_int32 x = ep->edom[i];
1386
1387 while (x != 0) {
1388 k = ffs(x) - 1;
1389 x &=~ (1 << k);
1390 k += i * BITS_PER_WORD;
1391
1392 target = fold_edge(ep->succ, opt_state->edges[k]);
1393 /*
1394 * Check that there is no data dependency between
1395 * nodes that will be violated if we move the edge.
1396 */
1397 if (target != 0 && !use_conflict(ep->pred, target)) {
1398 opt_state->done = 0;
1399 ep->succ = target;
1400 if (JT(target) != 0)
1401 /*
1402 * Start over unless we hit a leaf.
1403 */
1404 goto top;
1405 return;
1406 }
1407 }
1408 }
1409 }
1410
1411
1412 static void
1413 or_pullup(opt_state_t *opt_state, struct block *b)
1414 {
1415 int val, at_top;
1416 struct block *pull;
1417 struct block **diffp, **samep;
1418 struct edge *ep;
1419
1420 ep = b->in_edges;
1421 if (ep == 0)
1422 return;
1423
1424 /*
1425 * Make sure each predecessor loads the same value.
1426 * XXX why?
1427 */
1428 val = ep->pred->val[A_ATOM];
1429 for (ep = ep->next; ep != 0; ep = ep->next)
1430 if (val != ep->pred->val[A_ATOM])
1431 return;
1432
1433 if (JT(b->in_edges->pred) == b)
1434 diffp = &JT(b->in_edges->pred);
1435 else
1436 diffp = &JF(b->in_edges->pred);
1437
1438 at_top = 1;
1439 while (1) {
1440 if (*diffp == 0)
1441 return;
1442
1443 if (JT(*diffp) != JT(b))
1444 return;
1445
1446 if (!SET_MEMBER((*diffp)->dom, b->id))
1447 return;
1448
1449 if ((*diffp)->val[A_ATOM] != val)
1450 break;
1451
1452 diffp = &JF(*diffp);
1453 at_top = 0;
1454 }
1455 samep = &JF(*diffp);
1456 while (1) {
1457 if (*samep == 0)
1458 return;
1459
1460 if (JT(*samep) != JT(b))
1461 return;
1462
1463 if (!SET_MEMBER((*samep)->dom, b->id))
1464 return;
1465
1466 if ((*samep)->val[A_ATOM] == val)
1467 break;
1468
1469 /* XXX Need to check that there are no data dependencies
1470 between dp0 and dp1. Currently, the code generator
1471 will not produce such dependencies. */
1472 samep = &JF(*samep);
1473 }
1474 #ifdef notdef
1475 /* XXX This doesn't cover everything. */
1476 for (i = 0; i < N_ATOMS; ++i)
1477 if ((*samep)->val[i] != pred->val[i])
1478 return;
1479 #endif
1480 /* Pull up the node. */
1481 pull = *samep;
1482 *samep = JF(pull);
1483 JF(pull) = *diffp;
1484
1485 /*
1486 * At the top of the chain, each predecessor needs to point at the
1487 * pulled up node. Inside the chain, there is only one predecessor
1488 * to worry about.
1489 */
1490 if (at_top) {
1491 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1492 if (JT(ep->pred) == b)
1493 JT(ep->pred) = pull;
1494 else
1495 JF(ep->pred) = pull;
1496 }
1497 }
1498 else
1499 *diffp = pull;
1500
1501 opt_state->done = 0;
1502 }
1503
1504 static void
1505 and_pullup(opt_state_t *opt_state, struct block *b)
1506 {
1507 int val, at_top;
1508 struct block *pull;
1509 struct block **diffp, **samep;
1510 struct edge *ep;
1511
1512 ep = b->in_edges;
1513 if (ep == 0)
1514 return;
1515
1516 /*
1517 * Make sure each predecessor loads the same value.
1518 */
1519 val = ep->pred->val[A_ATOM];
1520 for (ep = ep->next; ep != 0; ep = ep->next)
1521 if (val != ep->pred->val[A_ATOM])
1522 return;
1523
1524 if (JT(b->in_edges->pred) == b)
1525 diffp = &JT(b->in_edges->pred);
1526 else
1527 diffp = &JF(b->in_edges->pred);
1528
1529 at_top = 1;
1530 while (1) {
1531 if (*diffp == 0)
1532 return;
1533
1534 if (JF(*diffp) != JF(b))
1535 return;
1536
1537 if (!SET_MEMBER((*diffp)->dom, b->id))
1538 return;
1539
1540 if ((*diffp)->val[A_ATOM] != val)
1541 break;
1542
1543 diffp = &JT(*diffp);
1544 at_top = 0;
1545 }
1546 samep = &JT(*diffp);
1547 while (1) {
1548 if (*samep == 0)
1549 return;
1550
1551 if (JF(*samep) != JF(b))
1552 return;
1553
1554 if (!SET_MEMBER((*samep)->dom, b->id))
1555 return;
1556
1557 if ((*samep)->val[A_ATOM] == val)
1558 break;
1559
1560 /* XXX Need to check that there are no data dependencies
1561 between diffp and samep. Currently, the code generator
1562 will not produce such dependencies. */
1563 samep = &JT(*samep);
1564 }
1565 #ifdef notdef
1566 /* XXX This doesn't cover everything. */
1567 for (i = 0; i < N_ATOMS; ++i)
1568 if ((*samep)->val[i] != pred->val[i])
1569 return;
1570 #endif
1571 /* Pull up the node. */
1572 pull = *samep;
1573 *samep = JT(pull);
1574 JT(pull) = *diffp;
1575
1576 /*
1577 * At the top of the chain, each predecessor needs to point at the
1578 * pulled up node. Inside the chain, there is only one predecessor
1579 * to worry about.
1580 */
1581 if (at_top) {
1582 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1583 if (JT(ep->pred) == b)
1584 JT(ep->pred) = pull;
1585 else
1586 JF(ep->pred) = pull;
1587 }
1588 }
1589 else
1590 *diffp = pull;
1591
1592 opt_state->done = 0;
1593 }
1594
1595 static void
1596 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1597 int do_stmts)
1598 {
1599 int i, maxlevel;
1600 struct block *p;
1601
1602 init_val(opt_state);
1603 maxlevel = ic->root->level;
1604
1605 find_inedges(opt_state, ic->root);
1606 for (i = maxlevel; i >= 0; --i)
1607 for (p = opt_state->levels[i]; p; p = p->link)
1608 opt_blk(cstate, ic, opt_state, p, do_stmts);
1609
1610 if (do_stmts)
1611 /*
1612 * No point trying to move branches; it can't possibly
1613 * make a difference at this point.
1614 */
1615 return;
1616
1617 for (i = 1; i <= maxlevel; ++i) {
1618 for (p = opt_state->levels[i]; p; p = p->link) {
1619 opt_j(opt_state, &p->et);
1620 opt_j(opt_state, &p->ef);
1621 }
1622 }
1623
1624 find_inedges(opt_state, ic->root);
1625 for (i = 1; i <= maxlevel; ++i) {
1626 for (p = opt_state->levels[i]; p; p = p->link) {
1627 or_pullup(opt_state, p);
1628 and_pullup(opt_state, p);
1629 }
1630 }
1631 }
1632
1633 static inline void
1634 link_inedge(struct edge *parent, struct block *child)
1635 {
1636 parent->next = child->in_edges;
1637 child->in_edges = parent;
1638 }
1639
1640 static void
1641 find_inedges(opt_state_t *opt_state, struct block *root)
1642 {
1643 int i;
1644 struct block *b;
1645
1646 for (i = 0; i < opt_state->n_blocks; ++i)
1647 opt_state->blocks[i]->in_edges = 0;
1648
1649 /*
1650 * Traverse the graph, adding each edge to the predecessor
1651 * list of its successors. Skip the leaves (i.e. level 0).
1652 */
1653 for (i = root->level; i > 0; --i) {
1654 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1655 link_inedge(&b->et, JT(b));
1656 link_inedge(&b->ef, JF(b));
1657 }
1658 }
1659 }
1660
1661 static void
1662 opt_root(struct block **b)
1663 {
1664 struct slist *tmp, *s;
1665
1666 s = (*b)->stmts;
1667 (*b)->stmts = 0;
1668 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1669 *b = JT(*b);
1670
1671 tmp = (*b)->stmts;
1672 if (tmp != 0)
1673 sappend(s, tmp);
1674 (*b)->stmts = s;
1675
1676 /*
1677 * If the root node is a return, then there is no
1678 * point executing any statements (since the bpf machine
1679 * has no side effects).
1680 */
1681 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1682 (*b)->stmts = 0;
1683 }
1684
1685 static void
1686 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1687 int do_stmts)
1688 {
1689
1690 #ifdef BDEBUG
1691 if (pcap_optimizer_debug > 1) {
1692 printf("opt_loop(root, %d) begin\n", do_stmts);
1693 opt_dump(cstate, ic);
1694 }
1695 #endif
1696 do {
1697 opt_state->done = 1;
1698 find_levels(opt_state, ic);
1699 find_dom(opt_state, ic->root);
1700 find_closure(opt_state, ic->root);
1701 find_ud(opt_state, ic->root);
1702 find_edom(opt_state, ic->root);
1703 opt_blks(cstate, opt_state, ic, do_stmts);
1704 #ifdef BDEBUG
1705 if (pcap_optimizer_debug > 1) {
1706 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1707 opt_dump(cstate, ic);
1708 }
1709 #endif
1710 } while (!opt_state->done);
1711 }
1712
1713 /*
1714 * Optimize the filter code in its dag representation.
1715 */
1716 void
1717 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1718 {
1719 opt_state_t opt_state;
1720
1721 opt_init(cstate, &opt_state, ic);
1722 opt_loop(cstate, &opt_state, ic, 0);
1723 opt_loop(cstate, &opt_state, ic, 1);
1724 intern_blocks(&opt_state, ic);
1725 #ifdef BDEBUG
1726 if (pcap_optimizer_debug > 1) {
1727 printf("after intern_blocks()\n");
1728 opt_dump(cstate, ic);
1729 }
1730 #endif
1731 opt_root(&ic->root);
1732 #ifdef BDEBUG
1733 if (pcap_optimizer_debug > 1) {
1734 printf("after opt_root()\n");
1735 opt_dump(cstate, ic);
1736 }
1737 #endif
1738 opt_cleanup(&opt_state);
1739 }
1740
1741 static void
1742 make_marks(struct icode *ic, struct block *p)
1743 {
1744 if (!isMarked(ic, p)) {
1745 Mark(ic, p);
1746 if (BPF_CLASS(p->s.code) != BPF_RET) {
1747 make_marks(ic, JT(p));
1748 make_marks(ic, JF(p));
1749 }
1750 }
1751 }
1752
1753 /*
1754 * Mark code array such that isMarked(ic->cur_mark, i) is true
1755 * only for nodes that are alive.
1756 */
1757 static void
1758 mark_code(struct icode *ic)
1759 {
1760 ic->cur_mark += 1;
1761 make_marks(ic, ic->root);
1762 }
1763
1764 /*
1765 * True iff the two stmt lists load the same value from the packet into
1766 * the accumulator.
1767 */
1768 static int
1769 eq_slist(struct slist *x, struct slist *y)
1770 {
1771 while (1) {
1772 while (x && x->s.code == NOP)
1773 x = x->next;
1774 while (y && y->s.code == NOP)
1775 y = y->next;
1776 if (x == 0)
1777 return y == 0;
1778 if (y == 0)
1779 return x == 0;
1780 if (x->s.code != y->s.code || x->s.k != y->s.k)
1781 return 0;
1782 x = x->next;
1783 y = y->next;
1784 }
1785 }
1786
1787 static inline int
1788 eq_blk(struct block *b0, struct block *b1)
1789 {
1790 if (b0->s.code == b1->s.code &&
1791 b0->s.k == b1->s.k &&
1792 b0->et.succ == b1->et.succ &&
1793 b0->ef.succ == b1->ef.succ)
1794 return eq_slist(b0->stmts, b1->stmts);
1795 return 0;
1796 }
1797
1798 static void
1799 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1800 {
1801 struct block *p;
1802 int i, j;
1803 int done1; /* don't shadow global */
1804 top:
1805 done1 = 1;
1806 for (i = 0; i < opt_state->n_blocks; ++i)
1807 opt_state->blocks[i]->link = 0;
1808
1809 mark_code(ic);
1810
1811 for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1812 if (!isMarked(ic, opt_state->blocks[i]))
1813 continue;
1814 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1815 if (!isMarked(ic, opt_state->blocks[j]))
1816 continue;
1817 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1818 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1819 opt_state->blocks[j]->link : opt_state->blocks[j];
1820 break;
1821 }
1822 }
1823 }
1824 for (i = 0; i < opt_state->n_blocks; ++i) {
1825 p = opt_state->blocks[i];
1826 if (JT(p) == 0)
1827 continue;
1828 if (JT(p)->link) {
1829 done1 = 0;
1830 JT(p) = JT(p)->link;
1831 }
1832 if (JF(p)->link) {
1833 done1 = 0;
1834 JF(p) = JF(p)->link;
1835 }
1836 }
1837 if (!done1)
1838 goto top;
1839 }
1840
1841 static void
1842 opt_cleanup(opt_state_t *opt_state)
1843 {
1844 free((void *)opt_state->vnode_base);
1845 free((void *)opt_state->vmap);
1846 free((void *)opt_state->edges);
1847 free((void *)opt_state->space);
1848 free((void *)opt_state->levels);
1849 free((void *)opt_state->blocks);
1850 }
1851
1852 /*
1853 * Return the number of stmts in 's'.
1854 */
1855 static u_int
1856 slength(struct slist *s)
1857 {
1858 u_int n = 0;
1859
1860 for (; s; s = s->next)
1861 if (s->s.code != NOP)
1862 ++n;
1863 return n;
1864 }
1865
1866 /*
1867 * Return the number of nodes reachable by 'p'.
1868 * All nodes should be initially unmarked.
1869 */
1870 static int
1871 count_blocks(struct icode *ic, struct block *p)
1872 {
1873 if (p == 0 || isMarked(ic, p))
1874 return 0;
1875 Mark(ic, p);
1876 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1877 }
1878
1879 /*
1880 * Do a depth first search on the flow graph, numbering the
1881 * the basic blocks, and entering them into the 'blocks' array.`
1882 */
1883 static void
1884 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1885 {
1886 int n;
1887
1888 if (p == 0 || isMarked(ic, p))
1889 return;
1890
1891 Mark(ic, p);
1892 n = opt_state->n_blocks++;
1893 p->id = n;
1894 opt_state->blocks[n] = p;
1895
1896 number_blks_r(opt_state, ic, JT(p));
1897 number_blks_r(opt_state, ic, JF(p));
1898 }
1899
1900 /*
1901 * Return the number of stmts in the flowgraph reachable by 'p'.
1902 * The nodes should be unmarked before calling.
1903 *
1904 * Note that "stmts" means "instructions", and that this includes
1905 *
1906 * side-effect statements in 'p' (slength(p->stmts));
1907 *
1908 * statements in the true branch from 'p' (count_stmts(JT(p)));
1909 *
1910 * statements in the false branch from 'p' (count_stmts(JF(p)));
1911 *
1912 * the conditional jump itself (1);
1913 *
1914 * an extra long jump if the true branch requires it (p->longjt);
1915 *
1916 * an extra long jump if the false branch requires it (p->longjf).
1917 */
1918 static u_int
1919 count_stmts(struct icode *ic, struct block *p)
1920 {
1921 u_int n;
1922
1923 if (p == 0 || isMarked(ic, p))
1924 return 0;
1925 Mark(ic, p);
1926 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
1927 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1928 }
1929
1930 /*
1931 * Allocate memory. All allocation is done before optimization
1932 * is begun. A linear bound on the size of all data structures is computed
1933 * from the total number of blocks and/or statements.
1934 */
1935 static void
1936 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
1937 {
1938 bpf_u_int32 *p;
1939 int i, n, max_stmts;
1940
1941 /*
1942 * First, count the blocks, so we can malloc an array to map
1943 * block number to block. Then, put the blocks into the array.
1944 */
1945 unMarkAll(ic);
1946 n = count_blocks(ic, ic->root);
1947 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
1948 if (opt_state->blocks == NULL)
1949 bpf_error(cstate, "malloc");
1950 unMarkAll(ic);
1951 opt_state->n_blocks = 0;
1952 number_blks_r(opt_state, ic, ic->root);
1953
1954 opt_state->n_edges = 2 * opt_state->n_blocks;
1955 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
1956 if (opt_state->edges == NULL)
1957 bpf_error(cstate, "malloc");
1958
1959 /*
1960 * The number of levels is bounded by the number of nodes.
1961 */
1962 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
1963 if (opt_state->levels == NULL)
1964 bpf_error(cstate, "malloc");
1965
1966 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1967 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1968
1969 /* XXX */
1970 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
1971 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
1972 if (opt_state->space == NULL)
1973 bpf_error(cstate, "malloc");
1974 p = opt_state->space;
1975 opt_state->all_dom_sets = p;
1976 for (i = 0; i < n; ++i) {
1977 opt_state->blocks[i]->dom = p;
1978 p += opt_state->nodewords;
1979 }
1980 opt_state->all_closure_sets = p;
1981 for (i = 0; i < n; ++i) {
1982 opt_state->blocks[i]->closure = p;
1983 p += opt_state->nodewords;
1984 }
1985 opt_state->all_edge_sets = p;
1986 for (i = 0; i < n; ++i) {
1987 register struct block *b = opt_state->blocks[i];
1988
1989 b->et.edom = p;
1990 p += opt_state->edgewords;
1991 b->ef.edom = p;
1992 p += opt_state->edgewords;
1993 b->et.id = i;
1994 opt_state->edges[i] = &b->et;
1995 b->ef.id = opt_state->n_blocks + i;
1996 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
1997 b->et.pred = b;
1998 b->ef.pred = b;
1999 }
2000 max_stmts = 0;
2001 for (i = 0; i < n; ++i)
2002 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2003 /*
2004 * We allocate at most 3 value numbers per statement,
2005 * so this is an upper bound on the number of valnodes
2006 * we'll need.
2007 */
2008 opt_state->maxval = 3 * max_stmts;
2009 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2010 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2011 if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2012 bpf_error(cstate, "malloc");
2013 }
2014
2015 /*
2016 * This is only used when supporting optimizer debugging. It is
2017 * global state, so do *not* do more than one compile in parallel
2018 * and expect it to provide meaningful information.
2019 */
2020 #ifdef BDEBUG
2021 int bids[1000];
2022 #endif
2023
2024 /*
2025 * Returns true if successful. Returns false if a branch has
2026 * an offset that is too large. If so, we have marked that
2027 * branch so that on a subsequent iteration, it will be treated
2028 * properly.
2029 */
2030 static int
2031 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2032 struct icode *ic, struct block *p)
2033 {
2034 struct bpf_insn *dst;
2035 struct slist *src;
2036 u_int slen;
2037 u_int off;
2038 int extrajmps; /* number of extra jumps inserted */
2039 struct slist **offset = NULL;
2040
2041 if (p == 0 || isMarked(ic, p))
2042 return (1);
2043 Mark(ic, p);
2044
2045 if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2046 return (0);
2047 if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2048 return (0);
2049
2050 slen = slength(p->stmts);
2051 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2052 /* inflate length by any extra jumps */
2053
2054 p->offset = (int)(dst - conv_state->fstart);
2055
2056 /* generate offset[] for convenience */
2057 if (slen) {
2058 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2059 if (!offset) {
2060 bpf_error(cstate, "not enough core");
2061 /*NOTREACHED*/
2062 }
2063 }
2064 src = p->stmts;
2065 for (off = 0; off < slen && src; off++) {
2066 #if 0
2067 printf("off=%d src=%x\n", off, src);
2068 #endif
2069 offset[off] = src;
2070 src = src->next;
2071 }
2072
2073 off = 0;
2074 for (src = p->stmts; src; src = src->next) {
2075 if (src->s.code == NOP)
2076 continue;
2077 dst->code = (u_short)src->s.code;
2078 dst->k = src->s.k;
2079
2080 /* fill block-local relative jump */
2081 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2082 #if 0
2083 if (src->s.jt || src->s.jf) {
2084 bpf_error(cstate, "illegal jmp destination");
2085 /*NOTREACHED*/
2086 }
2087 #endif
2088 goto filled;
2089 }
2090 if (off == slen - 2) /*???*/
2091 goto filled;
2092
2093 {
2094 u_int i;
2095 int jt, jf;
2096 static const char ljerr[] = "%s for block-local relative jump: off=%d";
2097
2098 #if 0
2099 printf("code=%x off=%d %x %x\n", src->s.code,
2100 off, src->s.jt, src->s.jf);
2101 #endif
2102
2103 if (!src->s.jt || !src->s.jf) {
2104 bpf_error(cstate, ljerr, "no jmp destination", off);
2105 /*NOTREACHED*/
2106 }
2107
2108 jt = jf = 0;
2109 for (i = 0; i < slen; i++) {
2110 if (offset[i] == src->s.jt) {
2111 if (jt) {
2112 bpf_error(cstate, ljerr, "multiple matches", off);
2113 /*NOTREACHED*/
2114 }
2115
2116 dst->jt = i - off - 1;
2117 jt++;
2118 }
2119 if (offset[i] == src->s.jf) {
2120 if (jf) {
2121 bpf_error(cstate, ljerr, "multiple matches", off);
2122 /*NOTREACHED*/
2123 }
2124 dst->jf = i - off - 1;
2125 jf++;
2126 }
2127 }
2128 if (!jt || !jf) {
2129 bpf_error(cstate, ljerr, "no destination found", off);
2130 /*NOTREACHED*/
2131 }
2132 }
2133 filled:
2134 ++dst;
2135 ++off;
2136 }
2137 if (offset)
2138 free(offset);
2139
2140 #ifdef BDEBUG
2141 bids[dst - conv_state->fstart] = p->id + 1;
2142 #endif
2143 dst->code = (u_short)p->s.code;
2144 dst->k = p->s.k;
2145 if (JT(p)) {
2146 extrajmps = 0;
2147 off = JT(p)->offset - (p->offset + slen) - 1;
2148 if (off >= 256) {
2149 /* offset too large for branch, must add a jump */
2150 if (p->longjt == 0) {
2151 /* mark this instruction and retry */
2152 p->longjt++;
2153 return(0);
2154 }
2155 /* branch if T to following jump */
2156 dst->jt = extrajmps;
2157 extrajmps++;
2158 dst[extrajmps].code = BPF_JMP|BPF_JA;
2159 dst[extrajmps].k = off - extrajmps;
2160 }
2161 else
2162 dst->jt = off;
2163 off = JF(p)->offset - (p->offset + slen) - 1;
2164 if (off >= 256) {
2165 /* offset too large for branch, must add a jump */
2166 if (p->longjf == 0) {
2167 /* mark this instruction and retry */
2168 p->longjf++;
2169 return(0);
2170 }
2171 /* branch if F to following jump */
2172 /* if two jumps are inserted, F goes to second one */
2173 dst->jf = extrajmps;
2174 extrajmps++;
2175 dst[extrajmps].code = BPF_JMP|BPF_JA;
2176 dst[extrajmps].k = off - extrajmps;
2177 }
2178 else
2179 dst->jf = off;
2180 }
2181 return (1);
2182 }
2183
2184
2185 /*
2186 * Convert flowgraph intermediate representation to the
2187 * BPF array representation. Set *lenp to the number of instructions.
2188 *
2189 * This routine does *NOT* leak the memory pointed to by fp. It *must
2190 * not* do free(fp) before returning fp; doing so would make no sense,
2191 * as the BPF array pointed to by the return value of icode_to_fcode()
2192 * must be valid - it's being returned for use in a bpf_program structure.
2193 *
2194 * If it appears that icode_to_fcode() is leaking, the problem is that
2195 * the program using pcap_compile() is failing to free the memory in
2196 * the BPF program when it's done - the leak is in the program, not in
2197 * the routine that happens to be allocating the memory. (By analogy, if
2198 * a program calls fopen() without ever calling fclose() on the FILE *,
2199 * it will leak the FILE structure; the leak is not in fopen(), it's in
2200 * the program.) Change the program to use pcap_freecode() when it's
2201 * done with the filter program. See the pcap man page.
2202 */
2203 struct bpf_insn *
2204 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2205 struct block *root, u_int *lenp)
2206 {
2207 u_int n;
2208 struct bpf_insn *fp;
2209 conv_state_t conv_state;
2210
2211 /*
2212 * Loop doing convert_code_r() until no branches remain
2213 * with too-large offsets.
2214 */
2215 while (1) {
2216 unMarkAll(ic);
2217 n = *lenp = count_stmts(ic, root);
2218
2219 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2220 if (fp == NULL)
2221 bpf_error(cstate, "malloc");
2222 memset((char *)fp, 0, sizeof(*fp) * n);
2223 conv_state.fstart = fp;
2224 conv_state.ftail = fp + n;
2225
2226 unMarkAll(ic);
2227 if (convert_code_r(cstate, &conv_state, ic, root))
2228 break;
2229 free(fp);
2230 }
2231
2232 return fp;
2233 }
2234
2235 /*
2236 * Make a copy of a BPF program and put it in the "fcode" member of
2237 * a "pcap_t".
2238 *
2239 * If we fail to allocate memory for the copy, fill in the "errbuf"
2240 * member of the "pcap_t" with an error message, and return -1;
2241 * otherwise, return 0.
2242 */
2243 int
2244 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2245 {
2246 size_t prog_size;
2247
2248 /*
2249 * Validate the program.
2250 */
2251 if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2252 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2253 "BPF program is not valid");
2254 return (-1);
2255 }
2256
2257 /*
2258 * Free up any already installed program.
2259 */
2260 pcap_freecode(&p->fcode);
2261
2262 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2263 p->fcode.bf_len = fp->bf_len;
2264 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2265 if (p->fcode.bf_insns == NULL) {
2266 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2267 "malloc: %s", pcap_strerror(errno));
2268 return (-1);
2269 }
2270 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2271 return (0);
2272 }
2273
2274 #ifdef BDEBUG
2275 static void
2276 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2277 FILE *out)
2278 {
2279 int icount, noffset;
2280 int i;
2281
2282 if (block == NULL || isMarked(ic, block))
2283 return;
2284 Mark(ic, block);
2285
2286 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2287 noffset = min(block->offset + icount, (int)prog->bf_len);
2288
2289 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2290 for (i = block->offset; i < noffset; i++) {
2291 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2292 }
2293 fprintf(out, "\" tooltip=\"");
2294 for (i = 0; i < BPF_MEMWORDS; i++)
2295 if (block->val[i] != 0)
2296 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2297 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2298 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2299 fprintf(out, "\"");
2300 if (JT(block) == NULL)
2301 fprintf(out, ", peripheries=2");
2302 fprintf(out, "];\n");
2303
2304 dot_dump_node(ic, JT(block), prog, out);
2305 dot_dump_node(ic, JF(block), prog, out);
2306 }
2307
2308 static void
2309 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2310 {
2311 if (block == NULL || isMarked(ic, block))
2312 return;
2313 Mark(ic, block);
2314
2315 if (JT(block)) {
2316 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2317 block->id, JT(block)->id);
2318 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2319 block->id, JF(block)->id);
2320 }
2321 dot_dump_edge(ic, JT(block), out);
2322 dot_dump_edge(ic, JF(block), out);
2323 }
2324
2325 /* Output the block CFG using graphviz/DOT language
2326 * In the CFG, block's code, value index for each registers at EXIT,
2327 * and the jump relationship is show.
2328 *
2329 * example DOT for BPF `ip src host 1.1.1.1' is:
2330 digraph BPF {
2331 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2332 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2333 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2334 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2335 "block0":se -> "block1":n [label="T"];
2336 "block0":sw -> "block3":n [label="F"];
2337 "block1":se -> "block2":n [label="T"];
2338 "block1":sw -> "block3":n [label="F"];
2339 }
2340 *
2341 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2342 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2343 */
2344 static void
2345 dot_dump(compiler_state_t *cstate, struct icode *ic)
2346 {
2347 struct bpf_program f;
2348 FILE *out = stdout;
2349
2350 memset(bids, 0, sizeof bids);
2351 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2352
2353 fprintf(out, "digraph BPF {\n");
2354 ic->cur_mark = 0;
2355 unMarkAll(ic);
2356 dot_dump_node(ic, ic->root, &f, out);
2357 ic->cur_mark = 0;
2358 unMarkAll(ic);
2359 dot_dump_edge(ic, ic->root, out);
2360 fprintf(out, "}\n");
2361
2362 free((char *)f.bf_insns);
2363 }
2364
2365 static void
2366 plain_dump(compiler_state_t *cstate, struct icode *ic)
2367 {
2368 struct bpf_program f;
2369
2370 memset(bids, 0, sizeof bids);
2371 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2372 bpf_dump(&f, 1);
2373 putchar('\n');
2374 free((char *)f.bf_insns);
2375 }
2376
2377 static void
2378 opt_dump(compiler_state_t *cstate, struct icode *ic)
2379 {
2380 /* if optimizer debugging is enabled, output DOT graph
2381 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2382 * convention in tcpdump command line
2383 */
2384 if (pcap_optimizer_debug > 3)
2385 dot_dump(cstate, ic);
2386 else
2387 plain_dump(cstate, ic);
2388 }
2389 #endif
2390