npf_bpf_comp.c revision 1.16 1 /*-
2 * Copyright (c) 2010-2020 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This material is based upon work partially supported by The
6 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 /*
31 * BPF byte-code generation for NPF rules.
32 *
33 * Overview
34 *
35 * Each NPF rule is compiled into a BPF micro-program. There is a
36 * BPF byte-code fragment for each higher-level filtering logic,
37 * e.g. to match L4 protocol, IP/mask, etc. The generation process
38 * combines multiple BPF-byte code fragments into one program.
39 *
40 * Basic case
41 *
42 * Consider a basic case where all filters should match. They
43 * are expressed as logical conjunction, e.g.:
44 *
45 * A and B and C and D
46 *
47 * Each test (filter) criterion can be evaluated to true (match) or
48 * false (no match) and the logic is as follows:
49 *
50 * - If the value is true, then jump to the "next" test (offset 0).
51 *
52 * - If the value is false, then jump to the JUMP_MAGIC value (0xff).
53 * This "magic" value is used to indicate that it will have to be
54 * patched at a later stage.
55 *
56 * Once all byte-code fragments are combined into one, then there
57 * are two additional steps:
58 *
59 * - Two instructions are appended at the end of the program: "return
60 * success" followed by "return failure".
61 *
62 * - All jumps with the JUMP_MAGIC value are patched to point to the
63 * "return failure" instruction.
64 *
65 * Therefore, if all filter criteria will match, then the first
66 * instruction will be reached, indicating a successful match of the
67 * rule. Otherwise, if any of the criteria will not match, it will
68 * take the failure path and the rule will not be matching.
69 *
70 * Grouping
71 *
72 * Filters can have groups, which have an effect of logical
73 * disjunction, e.g.:
74 *
75 * A and B and (C or D)
76 *
77 * In such case, the logic inside the group has to be inverted i.e.
78 * the jump values swapped. If the test value is true, then jump
79 * out of the group; if false, then jump "next". At the end of the
80 * group, an addition failure path is appended and the JUMP_MAGIC
81 * uses within the group are patched to jump past the said path.
82 */
83
84 #include <sys/cdefs.h>
85 __RCSID("$NetBSD: npf_bpf_comp.c,v 1.16 2020/05/30 14:16:56 rmind Exp $");
86
87 #include <stdlib.h>
88 #include <stdbool.h>
89 #include <stddef.h>
90 #include <string.h>
91 #include <inttypes.h>
92 #include <err.h>
93 #include <assert.h>
94
95 #include <netinet/in.h>
96 #include <netinet/in_systm.h>
97 #define __FAVOR_BSD
98 #include <netinet/ip.h>
99 #include <netinet/ip6.h>
100 #include <netinet/udp.h>
101 #include <netinet/tcp.h>
102 #include <netinet/ip_icmp.h>
103 #include <netinet/icmp6.h>
104
105 #include <net/bpf.h>
106
107 #include "npfctl.h"
108
109 /*
110 * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores
111 * something other than L4 header offset. Generally, when BPF_LDX is used.
112 */
113 #define FETCHED_L3 0x01
114 #define CHECKED_L4_PROTO 0x02
115 #define X_EQ_L4OFF 0x04
116
117 struct npf_bpf {
118 /*
119 * BPF program code, the allocated length (in bytes), the number
120 * of logical blocks and the flags.
121 */
122 struct bpf_program prog;
123 size_t alen;
124 unsigned nblocks;
125 sa_family_t af;
126 uint32_t flags;
127
128 /*
129 * Indicators whether we are inside the group and whether this
130 * group is implementing inverted logic.
131 *
132 * The current group offset (counted in BPF instructions)
133 * and block number at the start of the group.
134 */
135 unsigned ingroup;
136 bool invert;
137 unsigned goff;
138 unsigned gblock;
139
140 /* Track inversion (excl. mark). */
141 uint32_t invflags;
142
143 /* BPF marks, allocated length and the real length. */
144 uint32_t * marks;
145 size_t malen;
146 size_t mlen;
147 };
148
149 /*
150 * NPF success and failure values to be returned from BPF.
151 */
152 #define NPF_BPF_SUCCESS ((u_int)-1)
153 #define NPF_BPF_FAILURE 0
154
155 /*
156 * Magic value to indicate the failure path, which is fixed up on completion.
157 * Note: this is the longest jump offset in BPF, since the offset is one byte.
158 */
159 #define JUMP_MAGIC 0xff
160
161 /* Reduce re-allocations by expanding in 64 byte blocks. */
162 #define ALLOC_MASK (64 - 1)
163 #define ALLOC_ROUND(x) (((x) + ALLOC_MASK) & ~ALLOC_MASK)
164
165 #ifndef IPV6_VERSION
166 #define IPV6_VERSION 0x60
167 #endif
168
169 npf_bpf_t *
170 npfctl_bpf_create(void)
171 {
172 return ecalloc(1, sizeof(npf_bpf_t));
173 }
174
175 static void
176 fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap)
177 {
178 struct bpf_program *bp = &ctx->prog;
179
180 for (u_int i = start; i < end; i++) {
181 struct bpf_insn *insn = &bp->bf_insns[i];
182 const u_int fail_off = end - i;
183 bool seen_magic = false;
184
185 if (fail_off >= JUMP_MAGIC) {
186 errx(EXIT_FAILURE, "BPF generation error: "
187 "the number of instructions is over the limit");
188 }
189 if (BPF_CLASS(insn->code) != BPF_JMP) {
190 continue;
191 }
192 if (BPF_OP(insn->code) == BPF_JA) {
193 /*
194 * BPF_JA can be used to jump to the failure path.
195 * If we are swapping i.e. inside the group, then
196 * jump "next"; groups have a failure path appended
197 * at their end.
198 */
199 if (insn->k == JUMP_MAGIC) {
200 insn->k = swap ? 0 : fail_off;
201 }
202 continue;
203 }
204
205 /*
206 * Fixup the "magic" value. Swap only the "magic" jumps.
207 */
208
209 if (insn->jt == JUMP_MAGIC) {
210 insn->jt = fail_off;
211 seen_magic = true;
212 }
213 if (insn->jf == JUMP_MAGIC) {
214 insn->jf = fail_off;
215 seen_magic = true;
216 }
217
218 if (seen_magic && swap) {
219 uint8_t jt = insn->jt;
220 insn->jt = insn->jf;
221 insn->jf = jt;
222 }
223 }
224 }
225
226 static void
227 add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count)
228 {
229 struct bpf_program *bp = &ctx->prog;
230 size_t offset, len, reqlen;
231
232 /* Note: bf_len is the count of instructions. */
233 offset = bp->bf_len * sizeof(struct bpf_insn);
234 len = count * sizeof(struct bpf_insn);
235
236 /* Ensure the memory buffer for the program. */
237 reqlen = ALLOC_ROUND(offset + len);
238 if (reqlen > ctx->alen) {
239 bp->bf_insns = erealloc(bp->bf_insns, reqlen);
240 ctx->alen = reqlen;
241 }
242
243 /* Add the code block. */
244 memcpy((uint8_t *)bp->bf_insns + offset, insns, len);
245 bp->bf_len += count;
246 }
247
248 static void
249 add_bmarks(npf_bpf_t *ctx, const uint32_t *m, size_t len)
250 {
251 size_t reqlen, nargs = m[1];
252
253 if ((len / sizeof(uint32_t) - 2) != nargs) {
254 errx(EXIT_FAILURE, "invalid BPF block description");
255 }
256 reqlen = ALLOC_ROUND(ctx->mlen + len);
257 if (reqlen > ctx->malen) {
258 ctx->marks = erealloc(ctx->marks, reqlen);
259 ctx->malen = reqlen;
260 }
261 memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len);
262 ctx->mlen += len;
263 }
264
265 static void
266 done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
267 {
268 add_bmarks(ctx, m, len);
269 ctx->nblocks++;
270 }
271
272 struct bpf_program *
273 npfctl_bpf_complete(npf_bpf_t *ctx)
274 {
275 struct bpf_program *bp = &ctx->prog;
276 const u_int retoff = bp->bf_len;
277
278 /* No instructions (optimised out). */
279 if (!bp->bf_len)
280 return NULL;
281
282 /* Add the return fragment (success and failure paths). */
283 struct bpf_insn insns_ret[] = {
284 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS),
285 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
286 };
287 add_insns(ctx, insns_ret, __arraycount(insns_ret));
288
289 /* Fixup all jumps to the main failure path. */
290 fixup_jumps(ctx, 0, retoff, false);
291
292 return &ctx->prog;
293 }
294
295 const void *
296 npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len)
297 {
298 *len = ctx->mlen;
299 return ctx->marks;
300 }
301
302 void
303 npfctl_bpf_destroy(npf_bpf_t *ctx)
304 {
305 free(ctx->prog.bf_insns);
306 free(ctx->marks);
307 free(ctx);
308 }
309
310 /*
311 * npfctl_bpf_group_enter: begin a logical group. It merely uses logical
312 * disjunction (OR) for comparisons within the group.
313 */
314 void
315 npfctl_bpf_group_enter(npf_bpf_t *ctx, bool invert)
316 {
317 struct bpf_program *bp = &ctx->prog;
318
319 assert(ctx->goff == 0);
320 assert(ctx->gblock == 0);
321
322 ctx->goff = bp->bf_len;
323 ctx->gblock = ctx->nblocks;
324 ctx->invert = invert;
325 ctx->ingroup++;
326 }
327
328 void
329 npfctl_bpf_group_exit(npf_bpf_t *ctx)
330 {
331 struct bpf_program *bp = &ctx->prog;
332 const size_t curoff = bp->bf_len;
333
334 assert(ctx->ingroup);
335 ctx->ingroup--;
336
337 /* If there are no blocks or only one - nothing to do. */
338 if (!ctx->invert && (ctx->nblocks - ctx->gblock) <= 1) {
339 ctx->goff = ctx->gblock = 0;
340 return;
341 }
342
343 /*
344 * If inverting, then prepend a jump over the statement below.
345 * On match, it will skip-through and the fail path will be taken.
346 */
347 if (ctx->invert) {
348 struct bpf_insn insns_ret[] = {
349 BPF_STMT(BPF_JMP+BPF_JA, 1),
350 };
351 add_insns(ctx, insns_ret, __arraycount(insns_ret));
352 }
353
354 /*
355 * Append a failure return as a fall-through i.e. if there is
356 * no match within the group.
357 */
358 struct bpf_insn insns_ret[] = {
359 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
360 };
361 add_insns(ctx, insns_ret, __arraycount(insns_ret));
362
363 /*
364 * Adjust jump offsets: on match - jump outside the group i.e.
365 * to the current offset. Otherwise, jump to the next instruction
366 * which would lead to the fall-through code above if none matches.
367 */
368 fixup_jumps(ctx, ctx->goff, curoff, true);
369 ctx->goff = ctx->gblock = 0;
370 }
371
372 static void
373 fetch_l3(npf_bpf_t *ctx, sa_family_t af, unsigned flags)
374 {
375 unsigned ver;
376
377 switch (af) {
378 case AF_INET:
379 ver = IPVERSION;
380 break;
381 case AF_INET6:
382 ver = IPV6_VERSION >> 4;
383 break;
384 case AF_UNSPEC:
385 ver = 0;
386 break;
387 default:
388 abort();
389 }
390
391 /*
392 * The memory store is populated with:
393 * - BPF_MW_IPVER: IP version (4 or 6).
394 * - BPF_MW_L4OFF: L4 header offset.
395 * - BPF_MW_L4PROTO: L4 protocol.
396 */
397 if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) {
398 const uint8_t jt = ver ? 0 : JUMP_MAGIC;
399 const uint8_t jf = ver ? JUMP_MAGIC : 0;
400 const bool ingroup = ctx->ingroup != 0;
401 const bool invert = ctx->invert;
402
403 /*
404 * L3 block cannot be inserted in the middle of a group.
405 * In fact, it never is. Check and start the group after.
406 */
407 if (ingroup) {
408 assert(ctx->nblocks == ctx->gblock);
409 npfctl_bpf_group_exit(ctx);
410 }
411
412 /*
413 * A <- IP version; A == expected-version?
414 * If no particular version specified, check for non-zero.
415 */
416 struct bpf_insn insns_af[] = {
417 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER),
418 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf),
419 };
420 add_insns(ctx, insns_af, __arraycount(insns_af));
421 ctx->flags |= FETCHED_L3;
422 ctx->af = af;
423
424 if (af) {
425 uint32_t mwords[] = { BM_IPVER, 1, af };
426 add_bmarks(ctx, mwords, sizeof(mwords));
427 }
428 if (ingroup) {
429 npfctl_bpf_group_enter(ctx, invert);
430 }
431
432 } else if (af && af != ctx->af) {
433 errx(EXIT_FAILURE, "address family mismatch");
434 }
435
436 if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) {
437 /* X <- IP header length */
438 struct bpf_insn insns_hlen[] = {
439 BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF),
440 };
441 add_insns(ctx, insns_hlen, __arraycount(insns_hlen));
442 ctx->flags |= X_EQ_L4OFF;
443 }
444 }
445
446 static void
447 bm_invert_checkpoint(npf_bpf_t *ctx, const unsigned opts)
448 {
449 uint32_t bm = 0;
450
451 if (ctx->ingroup && ctx->invert) {
452 const unsigned seen = ctx->invflags;
453
454 if ((opts & MATCH_SRC) != 0 && (seen & MATCH_SRC) == 0) {
455 bm = BM_SRC_NEG;
456 }
457 if ((opts & MATCH_DST) != 0 && (seen & MATCH_DST) == 0) {
458 bm = BM_DST_NEG;
459 }
460 ctx->invflags |= opts & (MATCH_SRC | MATCH_DST);
461 }
462 if (bm) {
463 uint32_t mwords[] = { bm, 0 };
464 add_bmarks(ctx, mwords, sizeof(mwords));
465 }
466 }
467
468 /*
469 * npfctl_bpf_ipver: match the IP version.
470 */
471 void
472 npfctl_bpf_ipver(npf_bpf_t *ctx, sa_family_t af)
473 {
474 fetch_l3(ctx, af, 0);
475 }
476
477 /*
478 * npfctl_bpf_proto: code block to match IP version and L4 protocol.
479 */
480 void
481 npfctl_bpf_proto(npf_bpf_t *ctx, unsigned proto)
482 {
483 struct bpf_insn insns_proto[] = {
484 /* A <- L4 protocol; A == expected-protocol? */
485 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
486 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC),
487 };
488 add_insns(ctx, insns_proto, __arraycount(insns_proto));
489
490 uint32_t mwords[] = { BM_PROTO, 1, proto };
491 done_block(ctx, mwords, sizeof(mwords));
492 ctx->flags |= CHECKED_L4_PROTO;
493 }
494
495 /*
496 * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR.
497 *
498 * => IP address shall be in the network byte order.
499 */
500 void
501 npfctl_bpf_cidr(npf_bpf_t *ctx, unsigned opts, sa_family_t af,
502 const npf_addr_t *addr, const npf_netmask_t mask)
503 {
504 const uint32_t *awords = (const uint32_t *)addr;
505 unsigned nwords, length, maxmask, off;
506
507 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
508 assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK);
509
510 switch (af) {
511 case AF_INET:
512 maxmask = 32;
513 off = (opts & MATCH_SRC) ?
514 offsetof(struct ip, ip_src) :
515 offsetof(struct ip, ip_dst);
516 nwords = sizeof(struct in_addr) / sizeof(uint32_t);
517 break;
518 case AF_INET6:
519 maxmask = 128;
520 off = (opts & MATCH_SRC) ?
521 offsetof(struct ip6_hdr, ip6_src) :
522 offsetof(struct ip6_hdr, ip6_dst);
523 nwords = sizeof(struct in6_addr) / sizeof(uint32_t);
524 break;
525 default:
526 abort();
527 }
528
529 /* Ensure address family. */
530 fetch_l3(ctx, af, 0);
531
532 length = (mask == NPF_NO_NETMASK) ? maxmask : mask;
533
534 /* CAUTION: BPF operates in host byte-order. */
535 for (unsigned i = 0; i < nwords; i++) {
536 const unsigned woff = i * sizeof(uint32_t);
537 uint32_t word = ntohl(awords[i]);
538 uint32_t wordmask;
539
540 if (length >= 32) {
541 /* The mask is a full word - do not apply it. */
542 wordmask = 0;
543 length -= 32;
544 } else if (length) {
545 wordmask = 0xffffffff << (32 - length);
546 length = 0;
547 } else {
548 /* The mask became zero - skip the rest. */
549 break;
550 }
551
552 /* A <- IP address (or one word of it) */
553 struct bpf_insn insns_ip[] = {
554 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff),
555 };
556 add_insns(ctx, insns_ip, __arraycount(insns_ip));
557
558 /* A <- (A & MASK) */
559 if (wordmask) {
560 struct bpf_insn insns_mask[] = {
561 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask),
562 };
563 add_insns(ctx, insns_mask, __arraycount(insns_mask));
564 }
565
566 /* A == expected-IP-word ? */
567 struct bpf_insn insns_cmp[] = {
568 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC),
569 };
570 add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
571 }
572
573 uint32_t mwords[] = {
574 (opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6,
575 af, mask, awords[0], awords[1], awords[2], awords[3],
576 };
577 bm_invert_checkpoint(ctx, opts);
578 done_block(ctx, mwords, sizeof(mwords));
579 }
580
581 /*
582 * npfctl_bpf_ports: code block to match TCP/UDP port range.
583 *
584 * => Port numbers shall be in the network byte order.
585 */
586 void
587 npfctl_bpf_ports(npf_bpf_t *ctx, unsigned opts, in_port_t from, in_port_t to)
588 {
589 const unsigned sport_off = offsetof(struct udphdr, uh_sport);
590 const unsigned dport_off = offsetof(struct udphdr, uh_dport);
591 unsigned off;
592
593 /* TCP and UDP port offsets are the same. */
594 assert(sport_off == offsetof(struct tcphdr, th_sport));
595 assert(dport_off == offsetof(struct tcphdr, th_dport));
596 assert(ctx->flags & CHECKED_L4_PROTO);
597
598 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
599 off = (opts & MATCH_SRC) ? sport_off : dport_off;
600
601 /* X <- IP header length */
602 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
603
604 struct bpf_insn insns_fetch[] = {
605 /* A <- port */
606 BPF_STMT(BPF_LD+BPF_H+BPF_IND, off),
607 };
608 add_insns(ctx, insns_fetch, __arraycount(insns_fetch));
609
610 /* CAUTION: BPF operates in host byte-order. */
611 from = ntohs(from);
612 to = ntohs(to);
613
614 if (from == to) {
615 /* Single port case. */
616 struct bpf_insn insns_port[] = {
617 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC),
618 };
619 add_insns(ctx, insns_port, __arraycount(insns_port));
620 } else {
621 /* Port range case. */
622 struct bpf_insn insns_range[] = {
623 BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, 1),
624 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, 0, 1),
625 BPF_STMT(BPF_JMP+BPF_JA, JUMP_MAGIC),
626 };
627 add_insns(ctx, insns_range, __arraycount(insns_range));
628 }
629
630 uint32_t mwords[] = {
631 (opts & MATCH_SRC) ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to
632 };
633 done_block(ctx, mwords, sizeof(mwords));
634 }
635
636 /*
637 * npfctl_bpf_tcpfl: code block to match TCP flags.
638 */
639 void
640 npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask)
641 {
642 const unsigned tcpfl_off = offsetof(struct tcphdr, th_flags);
643 const bool usingmask = tf_mask != tf;
644
645 /* X <- IP header length */
646 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
647
648 if ((ctx->flags & CHECKED_L4_PROTO) == 0) {
649 const unsigned jf = usingmask ? 3 : 2;
650 assert(ctx->ingroup == 0);
651
652 /*
653 * A <- L4 protocol; A == TCP? If not, jump out.
654 *
655 * Note: the TCP flag matching might be without 'proto tcp'
656 * when using a plain 'stateful' rule. In such case it also
657 * handles other protocols, thus no strict TCP check.
658 */
659 struct bpf_insn insns_tcp[] = {
660 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
661 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, jf),
662 };
663 add_insns(ctx, insns_tcp, __arraycount(insns_tcp));
664 }
665
666 struct bpf_insn insns_tf[] = {
667 /* A <- TCP flags */
668 BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off),
669 };
670 add_insns(ctx, insns_tf, __arraycount(insns_tf));
671
672 if (usingmask) {
673 /* A <- (A & mask) */
674 struct bpf_insn insns_mask[] = {
675 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask),
676 };
677 add_insns(ctx, insns_mask, __arraycount(insns_mask));
678 }
679
680 struct bpf_insn insns_cmp[] = {
681 /* A == expected-TCP-flags? */
682 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC),
683 };
684 add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
685
686 uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask };
687 done_block(ctx, mwords, sizeof(mwords));
688 }
689
690 /*
691 * npfctl_bpf_icmp: code block to match ICMP type and/or code.
692 * Note: suitable for both the ICMPv4 and ICMPv6.
693 */
694 void
695 npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code)
696 {
697 const u_int type_off = offsetof(struct icmp, icmp_type);
698 const u_int code_off = offsetof(struct icmp, icmp_code);
699
700 assert(ctx->flags & CHECKED_L4_PROTO);
701 assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off);
702 assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off);
703 assert(type != -1 || code != -1);
704
705 /* X <- IP header length */
706 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
707
708 if (type != -1) {
709 struct bpf_insn insns_type[] = {
710 BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off),
711 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC),
712 };
713 add_insns(ctx, insns_type, __arraycount(insns_type));
714
715 uint32_t mwords[] = { BM_ICMP_TYPE, 1, type };
716 done_block(ctx, mwords, sizeof(mwords));
717 }
718
719 if (code != -1) {
720 struct bpf_insn insns_code[] = {
721 BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off),
722 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC),
723 };
724 add_insns(ctx, insns_code, __arraycount(insns_code));
725
726 uint32_t mwords[] = { BM_ICMP_CODE, 1, code };
727 done_block(ctx, mwords, sizeof(mwords));
728 }
729 }
730
731 #define SRC_FLAG_BIT (1U << 31)
732
733 /*
734 * npfctl_bpf_table: code block to match source/destination IP address
735 * against NPF table specified by ID.
736 */
737 void
738 npfctl_bpf_table(npf_bpf_t *ctx, unsigned opts, unsigned tid)
739 {
740 const bool src = (opts & MATCH_SRC) != 0;
741
742 struct bpf_insn insns_table[] = {
743 BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid),
744 BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE),
745 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0),
746 };
747 add_insns(ctx, insns_table, __arraycount(insns_table));
748
749 uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid };
750 bm_invert_checkpoint(ctx, opts);
751 done_block(ctx, mwords, sizeof(mwords));
752 }
753