Home | History | Annotate | Line # | Download | only in npfctl
npf_bpf_comp.c revision 1.14
      1 /*-
      2  * Copyright (c) 2010-2019 The NetBSD Foundation, Inc.
      3  * All rights reserved.
      4  *
      5  * This material is based upon work partially supported by The
      6  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27  * POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 /*
     31  * BPF byte-code generation for NPF rules.
     32  *
     33  * Overview
     34  *
     35  *	Each NPF rule is compiled into BPF micro-program.  There is a
     36  *	BPF byte-code fragment for each higher-level filtering logic,
     37  *	e.g. to match L4 protocol, IP/mask, etc.  The generation process
     38  *	combines multiple BPF-byte code fragments into one program.
     39  *
     40  * Basic case
     41  *
     42  *	Consider a basic case, where all filters should match.  They
     43  *	are expressed as logical conjunction, e.g.:
     44  *
     45  *		A and B and C and D
     46  *
     47  *	Each test (filter) criterion can be evaluated to true (match) or
     48  *	false (no match) and the logic is as follows:
     49  *
     50  *	- If the value is true, then jump to the "next" test (offset 0).
     51  *
     52  *	- If the value is false, then jump to the JUMP_MAGIC value (0xff).
     53  *	This "magic" value is used to indicate that it will have to be
     54  *	patched at a later stage.
     55  *
     56  *	Once all byte-code fragments are combined into one, then there
     57  *	are two additional steps:
     58  *
     59  *	- Two instructions are appended at the end of the program: return
     60  *	"success" followed by return "failure".
     61  *
     62  *	- All jumps with the JUMP_MAGIC value are patched to point to the
     63  *	"return failure" instruction.
     64  *
     65  *	Therefore, if all filter criteria will match, then the first
     66  *	instruction will be reached, indicating a successful match of the
     67  *	rule.  Otherwise, if any of the criteria will not match, it will
     68  *	take the failure path and the rule will not matching.
     69  *
     70  * Grouping
     71  *
     72  *	Filters can have groups, which are have a meaning of logical
     73  *	disjunction, e.g.:
     74  *
     75  *		A and B and (C or D)
     76  *
     77  *	In such case, the logic inside the group has to be inverted i.e.
     78  *	the jump values swapped.  If the test value is true, then jump
     79  *	out of the group; if false, then jump "next".  At the end of the
     80  *	group, an addition failure path is appended and the JUMP_MAGIC
     81  *	uses within the group are patched to jump past the said path.
     82  */
     83 
     84 #include <sys/cdefs.h>
     85 __RCSID("$NetBSD: npf_bpf_comp.c,v 1.14 2019/08/08 21:29:15 rmind Exp $");
     86 
     87 #include <stdlib.h>
     88 #include <stdbool.h>
     89 #include <stddef.h>
     90 #include <string.h>
     91 #include <inttypes.h>
     92 #include <err.h>
     93 #include <assert.h>
     94 
     95 #include <netinet/in.h>
     96 #include <netinet/in_systm.h>
     97 #define	__FAVOR_BSD
     98 #include <netinet/ip.h>
     99 #include <netinet/ip6.h>
    100 #include <netinet/udp.h>
    101 #include <netinet/tcp.h>
    102 #include <netinet/ip_icmp.h>
    103 #include <netinet/icmp6.h>
    104 
    105 #include <net/bpf.h>
    106 
    107 #include "npfctl.h"
    108 
    109 /*
    110  * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores
    111  * something other than L4 header offset.  Generally, when BPF_LDX is used.
    112  */
    113 #define	FETCHED_L3		0x01
    114 #define	CHECKED_L4		0x02
    115 #define	X_EQ_L4OFF		0x04
    116 
    117 struct npf_bpf {
    118 	/*
    119 	 * BPF program code, the allocated length (in bytes), the number
    120 	 * of logical blocks and the flags.
    121 	 */
    122 	struct bpf_program	prog;
    123 	size_t			alen;
    124 	u_int			nblocks;
    125 	sa_family_t		af;
    126 	uint32_t		flags;
    127 
    128 	/*
    129 	 * The current group offset (counted in BPF instructions)
    130 	 * and block number at the start of the group.
    131 	 */
    132 	bool			ingroup;
    133 	u_int			goff;
    134 	u_int			gblock;
    135 
    136 	/* BPF marks, allocated length and the real length. */
    137 	uint32_t *		marks;
    138 	size_t			malen;
    139 	size_t			mlen;
    140 };
    141 
    142 /*
    143  * NPF success and failure values to be returned from BPF.
    144  */
    145 #define	NPF_BPF_SUCCESS		((u_int)-1)
    146 #define	NPF_BPF_FAILURE		0
    147 
    148 /*
    149  * Magic value to indicate the failure path, which is fixed up on completion.
    150  * Note: this is the longest jump offset in BPF, since the offset is one byte.
    151  */
    152 #define	JUMP_MAGIC		0xff
    153 
    154 /* Reduce re-allocations by expanding in 64 byte blocks. */
    155 #define	ALLOC_MASK		(64 - 1)
    156 #define	ALLOC_ROUND(x)		(((x) + ALLOC_MASK) & ~ALLOC_MASK)
    157 
    158 #ifndef IPV6_VERSION
    159 #define	IPV6_VERSION		0x60
    160 #endif
    161 
    162 npf_bpf_t *
    163 npfctl_bpf_create(void)
    164 {
    165 	return ecalloc(1, sizeof(npf_bpf_t));
    166 }
    167 
    168 static void
    169 fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap)
    170 {
    171 	struct bpf_program *bp = &ctx->prog;
    172 
    173 	for (u_int i = start; i < end; i++) {
    174 		struct bpf_insn *insn = &bp->bf_insns[i];
    175 		const u_int fail_off = end - i;
    176 		bool seen_magic = false;
    177 
    178 		if (fail_off >= JUMP_MAGIC) {
    179 			errx(EXIT_FAILURE, "BPF generation error: "
    180 			    "the number of instructions is over the limit");
    181 		}
    182 		if (BPF_CLASS(insn->code) != BPF_JMP) {
    183 			continue;
    184 		}
    185 		if (BPF_OP(insn->code) == BPF_JA) {
    186 			/*
    187 			 * BPF_JA can be used to jump to the failure path.
    188 			 * If we are swapping i.e. inside the group, then
    189 			 * jump "next"; groups have a failure path appended
    190 			 * at their end.
    191 			 */
    192 			if (insn->k == JUMP_MAGIC) {
    193 				insn->k = swap ? 0 : fail_off;
    194 			}
    195 			continue;
    196 		}
    197 
    198 		/*
    199 		 * Fixup the "magic" value.  Swap only the "magic" jumps.
    200 		 */
    201 
    202 		if (insn->jt == JUMP_MAGIC) {
    203 			insn->jt = fail_off;
    204 			seen_magic = true;
    205 		}
    206 		if (insn->jf == JUMP_MAGIC) {
    207 			insn->jf = fail_off;
    208 			seen_magic = true;
    209 		}
    210 
    211 		if (seen_magic && swap) {
    212 			uint8_t jt = insn->jt;
    213 			insn->jt = insn->jf;
    214 			insn->jf = jt;
    215 		}
    216 	}
    217 }
    218 
    219 static void
    220 add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count)
    221 {
    222 	struct bpf_program *bp = &ctx->prog;
    223 	size_t offset, len, reqlen;
    224 
    225 	/* Note: bf_len is the count of instructions. */
    226 	offset = bp->bf_len * sizeof(struct bpf_insn);
    227 	len = count * sizeof(struct bpf_insn);
    228 
    229 	/* Ensure the memory buffer for the program. */
    230 	reqlen = ALLOC_ROUND(offset + len);
    231 	if (reqlen > ctx->alen) {
    232 		bp->bf_insns = erealloc(bp->bf_insns, reqlen);
    233 		ctx->alen = reqlen;
    234 	}
    235 
    236 	/* Add the code block. */
    237 	memcpy((uint8_t *)bp->bf_insns + offset, insns, len);
    238 	bp->bf_len += count;
    239 }
    240 
    241 static void
    242 done_raw_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
    243 {
    244 	size_t reqlen, nargs = m[1];
    245 
    246 	if ((len / sizeof(uint32_t) - 2) != nargs) {
    247 		errx(EXIT_FAILURE, "invalid BPF block description");
    248 	}
    249 	reqlen = ALLOC_ROUND(ctx->mlen + len);
    250 	if (reqlen > ctx->malen) {
    251 		ctx->marks = erealloc(ctx->marks, reqlen);
    252 		ctx->malen = reqlen;
    253 	}
    254 	memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len);
    255 	ctx->mlen += len;
    256 }
    257 
    258 static void
    259 done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
    260 {
    261 	done_raw_block(ctx, m, len);
    262 	ctx->nblocks++;
    263 }
    264 
    265 struct bpf_program *
    266 npfctl_bpf_complete(npf_bpf_t *ctx)
    267 {
    268 	struct bpf_program *bp = &ctx->prog;
    269 	const u_int retoff = bp->bf_len;
    270 
    271 	/* No instructions (optimised out). */
    272 	if (!bp->bf_len)
    273 		return NULL;
    274 
    275 	/* Add the return fragment (success and failure paths). */
    276 	struct bpf_insn insns_ret[] = {
    277 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS),
    278 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
    279 	};
    280 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
    281 
    282 	/* Fixup all jumps to the main failure path. */
    283 	fixup_jumps(ctx, 0, retoff, false);
    284 
    285 	return &ctx->prog;
    286 }
    287 
    288 const void *
    289 npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len)
    290 {
    291 	*len = ctx->mlen;
    292 	return ctx->marks;
    293 }
    294 
    295 void
    296 npfctl_bpf_destroy(npf_bpf_t *ctx)
    297 {
    298 	free(ctx->prog.bf_insns);
    299 	free(ctx->marks);
    300 	free(ctx);
    301 }
    302 
    303 /*
    304  * npfctl_bpf_group_enter: begin a logical group.  It merely uses logical
    305  * disjunction (OR) for compares within the group.
    306  */
    307 void
    308 npfctl_bpf_group_enter(npf_bpf_t *ctx)
    309 {
    310 	struct bpf_program *bp = &ctx->prog;
    311 
    312 	assert(ctx->goff == 0);
    313 	assert(ctx->gblock == 0);
    314 
    315 	ctx->goff = bp->bf_len;
    316 	ctx->gblock = ctx->nblocks;
    317 	ctx->ingroup = true;
    318 }
    319 
    320 void
    321 npfctl_bpf_group_exit(npf_bpf_t *ctx, bool invert)
    322 {
    323 	struct bpf_program *bp = &ctx->prog;
    324 	const size_t curoff = bp->bf_len;
    325 
    326 	/* If there are no blocks or only one - nothing to do. */
    327 	if (!invert && (ctx->nblocks - ctx->gblock) <= 1) {
    328 		ctx->goff = ctx->gblock = 0;
    329 		return;
    330 	}
    331 
    332 	/*
    333 	 * If inverting, then prepend a jump over the statement below.
    334 	 * On match, it will skip-through and the fail path will be taken.
    335 	 */
    336 	if (invert) {
    337 		struct bpf_insn insns_ret[] = {
    338 			BPF_STMT(BPF_JMP+BPF_JA, 1),
    339 		};
    340 		add_insns(ctx, insns_ret, __arraycount(insns_ret));
    341 	}
    342 
    343 	/*
    344 	 * Append a failure return as a fall-through i.e. if there is
    345 	 * no match within the group.
    346 	 */
    347 	struct bpf_insn insns_ret[] = {
    348 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
    349 	};
    350 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
    351 
    352 	/*
    353 	 * Adjust jump offsets: on match - jump outside the group i.e.
    354 	 * to the current offset.  Otherwise, jump to the next instruction
    355 	 * which would lead to the fall-through code above if none matches.
    356 	 */
    357 	fixup_jumps(ctx, ctx->goff, curoff, true);
    358 	ctx->goff = ctx->gblock = 0;
    359 }
    360 
    361 static void
    362 fetch_l3(npf_bpf_t *ctx, sa_family_t af, u_int flags)
    363 {
    364 	u_int ver;
    365 
    366 	switch (af) {
    367 	case AF_INET:
    368 		ver = IPVERSION;
    369 		break;
    370 	case AF_INET6:
    371 		ver = IPV6_VERSION >> 4;
    372 		break;
    373 	case AF_UNSPEC:
    374 		ver = 0;
    375 		break;
    376 	default:
    377 		abort();
    378 	}
    379 
    380 	/*
    381 	 * The memory store is populated with:
    382 	 * - BPF_MW_IPVER: IP version (4 or 6).
    383 	 * - BPF_MW_L4OFF: L4 header offset.
    384 	 * - BPF_MW_L4PROTO: L4 protocol.
    385 	 */
    386 	if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) {
    387 		const uint8_t jt = ver ? 0 : JUMP_MAGIC;
    388 		const uint8_t jf = ver ? JUMP_MAGIC : 0;
    389 		bool ingroup = ctx->ingroup;
    390 
    391 		/*
    392 		 * L3 block cannot be inserted in the middle of a group.
    393 		 * In fact, it never is.  Check and start the group after.
    394 		 */
    395 		if (ingroup) {
    396 			assert(ctx->nblocks == ctx->gblock);
    397 			npfctl_bpf_group_exit(ctx, false);
    398 		}
    399 
    400 		/*
    401 		 * A <- IP version; A == expected-version?
    402 		 * If no particular version specified, check for non-zero.
    403 		 */
    404 		struct bpf_insn insns_af[] = {
    405 			BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER),
    406 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf),
    407 		};
    408 		add_insns(ctx, insns_af, __arraycount(insns_af));
    409 		ctx->flags |= FETCHED_L3;
    410 		ctx->af = af;
    411 
    412 		if (af) {
    413 			uint32_t mwords[] = { BM_IPVER, 1, af };
    414 			done_raw_block(ctx, mwords, sizeof(mwords));
    415 		}
    416 		if (ingroup) {
    417 			npfctl_bpf_group_enter(ctx);
    418 		}
    419 
    420 	} else if (af && af != ctx->af) {
    421 		errx(EXIT_FAILURE, "address family mismatch");
    422 	}
    423 
    424 	if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) {
    425 		/* X <- IP header length */
    426 		struct bpf_insn insns_hlen[] = {
    427 			BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF),
    428 		};
    429 		add_insns(ctx, insns_hlen, __arraycount(insns_hlen));
    430 		ctx->flags |= X_EQ_L4OFF;
    431 	}
    432 }
    433 
    434 /*
    435  * npfctl_bpf_proto: code block to match IP version and L4 protocol.
    436  */
    437 void
    438 npfctl_bpf_proto(npf_bpf_t *ctx, sa_family_t af, int proto)
    439 {
    440 	assert(af != AF_UNSPEC || proto != -1);
    441 
    442 	/* Note: fails if IP version does not match. */
    443 	fetch_l3(ctx, af, 0);
    444 	if (proto == -1) {
    445 		return;
    446 	}
    447 
    448 	struct bpf_insn insns_proto[] = {
    449 		/* A <- L4 protocol; A == expected-protocol? */
    450 		BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
    451 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC),
    452 	};
    453 	add_insns(ctx, insns_proto, __arraycount(insns_proto));
    454 
    455 	uint32_t mwords[] = { BM_PROTO, 1, proto };
    456 	done_block(ctx, mwords, sizeof(mwords));
    457 	ctx->flags |= CHECKED_L4;
    458 }
    459 
    460 /*
    461  * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR.
    462  *
    463  * => IP address shall be in the network byte order.
    464  */
    465 void
    466 npfctl_bpf_cidr(npf_bpf_t *ctx, u_int opts, sa_family_t af,
    467     const npf_addr_t *addr, const npf_netmask_t mask)
    468 {
    469 	const uint32_t *awords = (const uint32_t *)addr;
    470 	u_int nwords, length, maxmask, off;
    471 
    472 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
    473 	assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK);
    474 
    475 	switch (af) {
    476 	case AF_INET:
    477 		maxmask = 32;
    478 		off = (opts & MATCH_SRC) ?
    479 		    offsetof(struct ip, ip_src) :
    480 		    offsetof(struct ip, ip_dst);
    481 		nwords = sizeof(struct in_addr) / sizeof(uint32_t);
    482 		break;
    483 	case AF_INET6:
    484 		maxmask = 128;
    485 		off = (opts & MATCH_SRC) ?
    486 		    offsetof(struct ip6_hdr, ip6_src) :
    487 		    offsetof(struct ip6_hdr, ip6_dst);
    488 		nwords = sizeof(struct in6_addr) / sizeof(uint32_t);
    489 		break;
    490 	default:
    491 		abort();
    492 	}
    493 
    494 	/* Ensure address family. */
    495 	fetch_l3(ctx, af, 0);
    496 
    497 	length = (mask == NPF_NO_NETMASK) ? maxmask : mask;
    498 
    499 	/* CAUTION: BPF operates in host byte-order. */
    500 	for (u_int i = 0; i < nwords; i++) {
    501 		const u_int woff = i * sizeof(uint32_t);
    502 		uint32_t word = ntohl(awords[i]);
    503 		uint32_t wordmask;
    504 
    505 		if (length >= 32) {
    506 			/* The mask is a full word - do not apply it. */
    507 			wordmask = 0;
    508 			length -= 32;
    509 		} else if (length) {
    510 			wordmask = 0xffffffff << (32 - length);
    511 			length = 0;
    512 		} else {
    513 			/* The mask became zero - skip the rest. */
    514 			break;
    515 		}
    516 
    517 		/* A <- IP address (or one word of it) */
    518 		struct bpf_insn insns_ip[] = {
    519 			BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff),
    520 		};
    521 		add_insns(ctx, insns_ip, __arraycount(insns_ip));
    522 
    523 		/* A <- (A & MASK) */
    524 		if (wordmask) {
    525 			struct bpf_insn insns_mask[] = {
    526 				BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask),
    527 			};
    528 			add_insns(ctx, insns_mask, __arraycount(insns_mask));
    529 		}
    530 
    531 		/* A == expected-IP-word ? */
    532 		struct bpf_insn insns_cmp[] = {
    533 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC),
    534 		};
    535 		add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
    536 	}
    537 
    538 	uint32_t mwords[] = {
    539 		(opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6,
    540 		af, mask, awords[0], awords[1], awords[2], awords[3],
    541 	};
    542 	done_block(ctx, mwords, sizeof(mwords));
    543 }
    544 
    545 /*
    546  * npfctl_bpf_ports: code block to match TCP/UDP port range.
    547  *
    548  * => Port numbers shall be in the network byte order.
    549  */
    550 void
    551 npfctl_bpf_ports(npf_bpf_t *ctx, u_int opts, in_port_t from, in_port_t to)
    552 {
    553 	const u_int sport_off = offsetof(struct udphdr, uh_sport);
    554 	const u_int dport_off = offsetof(struct udphdr, uh_dport);
    555 	u_int off;
    556 
    557 	/* TCP and UDP port offsets are the same. */
    558 	assert(sport_off == offsetof(struct tcphdr, th_sport));
    559 	assert(dport_off == offsetof(struct tcphdr, th_dport));
    560 	assert(ctx->flags & CHECKED_L4);
    561 
    562 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
    563 	off = (opts & MATCH_SRC) ? sport_off : dport_off;
    564 
    565 	/* X <- IP header length */
    566 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    567 
    568 	struct bpf_insn insns_fetch[] = {
    569 		/* A <- port */
    570 		BPF_STMT(BPF_LD+BPF_H+BPF_IND, off),
    571 	};
    572 	add_insns(ctx, insns_fetch, __arraycount(insns_fetch));
    573 
    574 	/* CAUTION: BPF operates in host byte-order. */
    575 	from = ntohs(from);
    576 	to = ntohs(to);
    577 
    578 	if (from == to) {
    579 		/* Single port case. */
    580 		struct bpf_insn insns_port[] = {
    581 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC),
    582 		};
    583 		add_insns(ctx, insns_port, __arraycount(insns_port));
    584 	} else {
    585 		/* Port range case. */
    586 		struct bpf_insn insns_range[] = {
    587 			BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, 1),
    588 			BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, 0, 1),
    589 			BPF_STMT(BPF_JMP+BPF_JA, JUMP_MAGIC),
    590 		};
    591 		add_insns(ctx, insns_range, __arraycount(insns_range));
    592 	}
    593 
    594 	uint32_t mwords[] = {
    595 		opts & MATCH_SRC ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to
    596 	};
    597 	done_block(ctx, mwords, sizeof(mwords));
    598 }
    599 
    600 /*
    601  * npfctl_bpf_tcpfl: code block to match TCP flags.
    602  */
    603 void
    604 npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask, bool checktcp)
    605 {
    606 	const u_int tcpfl_off = offsetof(struct tcphdr, th_flags);
    607 	const bool usingmask = tf_mask != tf;
    608 
    609 	/* X <- IP header length */
    610 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    611 	if (checktcp) {
    612 		const u_int jf = usingmask ? 3 : 2;
    613 		assert(ctx->ingroup == false);
    614 
    615 		/* A <- L4 protocol; A == TCP?  If not, jump out. */
    616 		struct bpf_insn insns_tcp[] = {
    617 			BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
    618 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, jf),
    619 		};
    620 		add_insns(ctx, insns_tcp, __arraycount(insns_tcp));
    621 	} else {
    622 		assert(ctx->flags & CHECKED_L4);
    623 	}
    624 
    625 	struct bpf_insn insns_tf[] = {
    626 		/* A <- TCP flags */
    627 		BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off),
    628 	};
    629 	add_insns(ctx, insns_tf, __arraycount(insns_tf));
    630 
    631 	if (usingmask) {
    632 		/* A <- (A & mask) */
    633 		struct bpf_insn insns_mask[] = {
    634 			BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask),
    635 		};
    636 		add_insns(ctx, insns_mask, __arraycount(insns_mask));
    637 	}
    638 
    639 	struct bpf_insn insns_cmp[] = {
    640 		/* A == expected-TCP-flags? */
    641 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC),
    642 	};
    643 	add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
    644 
    645 	uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask};
    646 	done_block(ctx, mwords, sizeof(mwords));
    647 }
    648 
    649 /*
    650  * npfctl_bpf_icmp: code block to match ICMP type and/or code.
    651  * Note: suitable both for the ICMPv4 and ICMPv6.
    652  */
    653 void
    654 npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code)
    655 {
    656 	const u_int type_off = offsetof(struct icmp, icmp_type);
    657 	const u_int code_off = offsetof(struct icmp, icmp_code);
    658 
    659 	assert(ctx->flags & CHECKED_L4);
    660 	assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off);
    661 	assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off);
    662 	assert(type != -1 || code != -1);
    663 
    664 	/* X <- IP header length */
    665 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    666 
    667 	if (type != -1) {
    668 		struct bpf_insn insns_type[] = {
    669 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off),
    670 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC),
    671 		};
    672 		add_insns(ctx, insns_type, __arraycount(insns_type));
    673 
    674 		uint32_t mwords[] = { BM_ICMP_TYPE, 1, type };
    675 		done_block(ctx, mwords, sizeof(mwords));
    676 	}
    677 
    678 	if (code != -1) {
    679 		struct bpf_insn insns_code[] = {
    680 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off),
    681 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC),
    682 		};
    683 		add_insns(ctx, insns_code, __arraycount(insns_code));
    684 
    685 		uint32_t mwords[] = { BM_ICMP_CODE, 1, code };
    686 		done_block(ctx, mwords, sizeof(mwords));
    687 	}
    688 }
    689 
    690 #define	SRC_FLAG_BIT	(1U << 31)
    691 
    692 /*
    693  * npfctl_bpf_table: code block to match source/destination IP address
    694  * against NPF table specified by ID.
    695  */
    696 void
    697 npfctl_bpf_table(npf_bpf_t *ctx, u_int opts, u_int tid)
    698 {
    699 	const bool src = (opts & MATCH_SRC) != 0;
    700 
    701 	struct bpf_insn insns_table[] = {
    702 		BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid),
    703 		BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE),
    704 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0),
    705 	};
    706 	add_insns(ctx, insns_table, __arraycount(insns_table));
    707 
    708 	uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid };
    709 	done_block(ctx, mwords, sizeof(mwords));
    710 }
    711