Home | History | Annotate | Line # | Download | only in npfctl
npf_bpf_comp.c revision 1.16
      1 /*-
      2  * Copyright (c) 2010-2020 The NetBSD Foundation, Inc.
      3  * All rights reserved.
      4  *
      5  * This material is based upon work partially supported by The
      6  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27  * POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 /*
     31  * BPF byte-code generation for NPF rules.
     32  *
     33  * Overview
     34  *
     35  *	Each NPF rule is compiled into a BPF micro-program.  There is a
     36  *	BPF byte-code fragment for each higher-level filtering logic,
     37  *	e.g. to match L4 protocol, IP/mask, etc.  The generation process
     38  *	combines multiple BPF-byte code fragments into one program.
     39  *
     40  * Basic case
     41  *
     42  *	Consider a basic case where all filters should match.  They
     43  *	are expressed as logical conjunction, e.g.:
     44  *
     45  *		A and B and C and D
     46  *
     47  *	Each test (filter) criterion can be evaluated to true (match) or
     48  *	false (no match) and the logic is as follows:
     49  *
     50  *	- If the value is true, then jump to the "next" test (offset 0).
     51  *
     52  *	- If the value is false, then jump to the JUMP_MAGIC value (0xff).
     53  *	This "magic" value is used to indicate that it will have to be
     54  *	patched at a later stage.
     55  *
     56  *	Once all byte-code fragments are combined into one, then there
     57  *	are two additional steps:
     58  *
     59  *	- Two instructions are appended at the end of the program: "return
     60  *	success" followed by "return failure".
     61  *
     62  *	- All jumps with the JUMP_MAGIC value are patched to point to the
     63  *	"return failure" instruction.
     64  *
     65  *	Therefore, if all filter criteria will match, then the first
     66  *	instruction will be reached, indicating a successful match of the
     67  *	rule.  Otherwise, if any of the criteria will not match, it will
     68  *	take the failure path and the rule will not be matching.
     69  *
     70  * Grouping
     71  *
     72  *	Filters can have groups, which have an effect of logical
     73  *	disjunction, e.g.:
     74  *
     75  *		A and B and (C or D)
     76  *
     77  *	In such case, the logic inside the group has to be inverted i.e.
     78  *	the jump values swapped.  If the test value is true, then jump
     79  *	out of the group; if false, then jump "next".  At the end of the
     80  *	group, an addition failure path is appended and the JUMP_MAGIC
     81  *	uses within the group are patched to jump past the said path.
     82  */
     83 
     84 #include <sys/cdefs.h>
     85 __RCSID("$NetBSD: npf_bpf_comp.c,v 1.16 2020/05/30 14:16:56 rmind Exp $");
     86 
     87 #include <stdlib.h>
     88 #include <stdbool.h>
     89 #include <stddef.h>
     90 #include <string.h>
     91 #include <inttypes.h>
     92 #include <err.h>
     93 #include <assert.h>
     94 
     95 #include <netinet/in.h>
     96 #include <netinet/in_systm.h>
     97 #define	__FAVOR_BSD
     98 #include <netinet/ip.h>
     99 #include <netinet/ip6.h>
    100 #include <netinet/udp.h>
    101 #include <netinet/tcp.h>
    102 #include <netinet/ip_icmp.h>
    103 #include <netinet/icmp6.h>
    104 
    105 #include <net/bpf.h>
    106 
    107 #include "npfctl.h"
    108 
    109 /*
    110  * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores
    111  * something other than L4 header offset.  Generally, when BPF_LDX is used.
    112  */
    113 #define	FETCHED_L3		0x01
    114 #define	CHECKED_L4_PROTO	0x02
    115 #define	X_EQ_L4OFF		0x04
    116 
    117 struct npf_bpf {
    118 	/*
    119 	 * BPF program code, the allocated length (in bytes), the number
    120 	 * of logical blocks and the flags.
    121 	 */
    122 	struct bpf_program	prog;
    123 	size_t			alen;
    124 	unsigned		nblocks;
    125 	sa_family_t		af;
    126 	uint32_t		flags;
    127 
    128 	/*
    129 	 * Indicators whether we are inside the group and whether this
    130 	 * group is implementing inverted logic.
    131 	 *
    132 	 * The current group offset (counted in BPF instructions)
    133 	 * and block number at the start of the group.
    134 	 */
    135 	unsigned		ingroup;
    136 	bool			invert;
    137 	unsigned		goff;
    138 	unsigned		gblock;
    139 
    140 	/* Track inversion (excl. mark). */
    141 	uint32_t		invflags;
    142 
    143 	/* BPF marks, allocated length and the real length. */
    144 	uint32_t *		marks;
    145 	size_t			malen;
    146 	size_t			mlen;
    147 };
    148 
    149 /*
    150  * NPF success and failure values to be returned from BPF.
    151  */
    152 #define	NPF_BPF_SUCCESS		((u_int)-1)
    153 #define	NPF_BPF_FAILURE		0
    154 
    155 /*
    156  * Magic value to indicate the failure path, which is fixed up on completion.
    157  * Note: this is the longest jump offset in BPF, since the offset is one byte.
    158  */
    159 #define	JUMP_MAGIC		0xff
    160 
    161 /* Reduce re-allocations by expanding in 64 byte blocks. */
    162 #define	ALLOC_MASK		(64 - 1)
    163 #define	ALLOC_ROUND(x)		(((x) + ALLOC_MASK) & ~ALLOC_MASK)
    164 
    165 #ifndef IPV6_VERSION
    166 #define	IPV6_VERSION		0x60
    167 #endif
    168 
    169 npf_bpf_t *
    170 npfctl_bpf_create(void)
    171 {
    172 	return ecalloc(1, sizeof(npf_bpf_t));
    173 }
    174 
    175 static void
    176 fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap)
    177 {
    178 	struct bpf_program *bp = &ctx->prog;
    179 
    180 	for (u_int i = start; i < end; i++) {
    181 		struct bpf_insn *insn = &bp->bf_insns[i];
    182 		const u_int fail_off = end - i;
    183 		bool seen_magic = false;
    184 
    185 		if (fail_off >= JUMP_MAGIC) {
    186 			errx(EXIT_FAILURE, "BPF generation error: "
    187 			    "the number of instructions is over the limit");
    188 		}
    189 		if (BPF_CLASS(insn->code) != BPF_JMP) {
    190 			continue;
    191 		}
    192 		if (BPF_OP(insn->code) == BPF_JA) {
    193 			/*
    194 			 * BPF_JA can be used to jump to the failure path.
    195 			 * If we are swapping i.e. inside the group, then
    196 			 * jump "next"; groups have a failure path appended
    197 			 * at their end.
    198 			 */
    199 			if (insn->k == JUMP_MAGIC) {
    200 				insn->k = swap ? 0 : fail_off;
    201 			}
    202 			continue;
    203 		}
    204 
    205 		/*
    206 		 * Fixup the "magic" value.  Swap only the "magic" jumps.
    207 		 */
    208 
    209 		if (insn->jt == JUMP_MAGIC) {
    210 			insn->jt = fail_off;
    211 			seen_magic = true;
    212 		}
    213 		if (insn->jf == JUMP_MAGIC) {
    214 			insn->jf = fail_off;
    215 			seen_magic = true;
    216 		}
    217 
    218 		if (seen_magic && swap) {
    219 			uint8_t jt = insn->jt;
    220 			insn->jt = insn->jf;
    221 			insn->jf = jt;
    222 		}
    223 	}
    224 }
    225 
    226 static void
    227 add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count)
    228 {
    229 	struct bpf_program *bp = &ctx->prog;
    230 	size_t offset, len, reqlen;
    231 
    232 	/* Note: bf_len is the count of instructions. */
    233 	offset = bp->bf_len * sizeof(struct bpf_insn);
    234 	len = count * sizeof(struct bpf_insn);
    235 
    236 	/* Ensure the memory buffer for the program. */
    237 	reqlen = ALLOC_ROUND(offset + len);
    238 	if (reqlen > ctx->alen) {
    239 		bp->bf_insns = erealloc(bp->bf_insns, reqlen);
    240 		ctx->alen = reqlen;
    241 	}
    242 
    243 	/* Add the code block. */
    244 	memcpy((uint8_t *)bp->bf_insns + offset, insns, len);
    245 	bp->bf_len += count;
    246 }
    247 
    248 static void
    249 add_bmarks(npf_bpf_t *ctx, const uint32_t *m, size_t len)
    250 {
    251 	size_t reqlen, nargs = m[1];
    252 
    253 	if ((len / sizeof(uint32_t) - 2) != nargs) {
    254 		errx(EXIT_FAILURE, "invalid BPF block description");
    255 	}
    256 	reqlen = ALLOC_ROUND(ctx->mlen + len);
    257 	if (reqlen > ctx->malen) {
    258 		ctx->marks = erealloc(ctx->marks, reqlen);
    259 		ctx->malen = reqlen;
    260 	}
    261 	memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len);
    262 	ctx->mlen += len;
    263 }
    264 
    265 static void
    266 done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
    267 {
    268 	add_bmarks(ctx, m, len);
    269 	ctx->nblocks++;
    270 }
    271 
    272 struct bpf_program *
    273 npfctl_bpf_complete(npf_bpf_t *ctx)
    274 {
    275 	struct bpf_program *bp = &ctx->prog;
    276 	const u_int retoff = bp->bf_len;
    277 
    278 	/* No instructions (optimised out). */
    279 	if (!bp->bf_len)
    280 		return NULL;
    281 
    282 	/* Add the return fragment (success and failure paths). */
    283 	struct bpf_insn insns_ret[] = {
    284 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS),
    285 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
    286 	};
    287 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
    288 
    289 	/* Fixup all jumps to the main failure path. */
    290 	fixup_jumps(ctx, 0, retoff, false);
    291 
    292 	return &ctx->prog;
    293 }
    294 
    295 const void *
    296 npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len)
    297 {
    298 	*len = ctx->mlen;
    299 	return ctx->marks;
    300 }
    301 
    302 void
    303 npfctl_bpf_destroy(npf_bpf_t *ctx)
    304 {
    305 	free(ctx->prog.bf_insns);
    306 	free(ctx->marks);
    307 	free(ctx);
    308 }
    309 
    310 /*
    311  * npfctl_bpf_group_enter: begin a logical group.  It merely uses logical
    312  * disjunction (OR) for comparisons within the group.
    313  */
    314 void
    315 npfctl_bpf_group_enter(npf_bpf_t *ctx, bool invert)
    316 {
    317 	struct bpf_program *bp = &ctx->prog;
    318 
    319 	assert(ctx->goff == 0);
    320 	assert(ctx->gblock == 0);
    321 
    322 	ctx->goff = bp->bf_len;
    323 	ctx->gblock = ctx->nblocks;
    324 	ctx->invert = invert;
    325 	ctx->ingroup++;
    326 }
    327 
    328 void
    329 npfctl_bpf_group_exit(npf_bpf_t *ctx)
    330 {
    331 	struct bpf_program *bp = &ctx->prog;
    332 	const size_t curoff = bp->bf_len;
    333 
    334 	assert(ctx->ingroup);
    335 	ctx->ingroup--;
    336 
    337 	/* If there are no blocks or only one - nothing to do. */
    338 	if (!ctx->invert && (ctx->nblocks - ctx->gblock) <= 1) {
    339 		ctx->goff = ctx->gblock = 0;
    340 		return;
    341 	}
    342 
    343 	/*
    344 	 * If inverting, then prepend a jump over the statement below.
    345 	 * On match, it will skip-through and the fail path will be taken.
    346 	 */
    347 	if (ctx->invert) {
    348 		struct bpf_insn insns_ret[] = {
    349 			BPF_STMT(BPF_JMP+BPF_JA, 1),
    350 		};
    351 		add_insns(ctx, insns_ret, __arraycount(insns_ret));
    352 	}
    353 
    354 	/*
    355 	 * Append a failure return as a fall-through i.e. if there is
    356 	 * no match within the group.
    357 	 */
    358 	struct bpf_insn insns_ret[] = {
    359 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
    360 	};
    361 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
    362 
    363 	/*
    364 	 * Adjust jump offsets: on match - jump outside the group i.e.
    365 	 * to the current offset.  Otherwise, jump to the next instruction
    366 	 * which would lead to the fall-through code above if none matches.
    367 	 */
    368 	fixup_jumps(ctx, ctx->goff, curoff, true);
    369 	ctx->goff = ctx->gblock = 0;
    370 }
    371 
    372 static void
    373 fetch_l3(npf_bpf_t *ctx, sa_family_t af, unsigned flags)
    374 {
    375 	unsigned ver;
    376 
    377 	switch (af) {
    378 	case AF_INET:
    379 		ver = IPVERSION;
    380 		break;
    381 	case AF_INET6:
    382 		ver = IPV6_VERSION >> 4;
    383 		break;
    384 	case AF_UNSPEC:
    385 		ver = 0;
    386 		break;
    387 	default:
    388 		abort();
    389 	}
    390 
    391 	/*
    392 	 * The memory store is populated with:
    393 	 * - BPF_MW_IPVER: IP version (4 or 6).
    394 	 * - BPF_MW_L4OFF: L4 header offset.
    395 	 * - BPF_MW_L4PROTO: L4 protocol.
    396 	 */
    397 	if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) {
    398 		const uint8_t jt = ver ? 0 : JUMP_MAGIC;
    399 		const uint8_t jf = ver ? JUMP_MAGIC : 0;
    400 		const bool ingroup = ctx->ingroup != 0;
    401 		const bool invert = ctx->invert;
    402 
    403 		/*
    404 		 * L3 block cannot be inserted in the middle of a group.
    405 		 * In fact, it never is.  Check and start the group after.
    406 		 */
    407 		if (ingroup) {
    408 			assert(ctx->nblocks == ctx->gblock);
    409 			npfctl_bpf_group_exit(ctx);
    410 		}
    411 
    412 		/*
    413 		 * A <- IP version; A == expected-version?
    414 		 * If no particular version specified, check for non-zero.
    415 		 */
    416 		struct bpf_insn insns_af[] = {
    417 			BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER),
    418 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf),
    419 		};
    420 		add_insns(ctx, insns_af, __arraycount(insns_af));
    421 		ctx->flags |= FETCHED_L3;
    422 		ctx->af = af;
    423 
    424 		if (af) {
    425 			uint32_t mwords[] = { BM_IPVER, 1, af };
    426 			add_bmarks(ctx, mwords, sizeof(mwords));
    427 		}
    428 		if (ingroup) {
    429 			npfctl_bpf_group_enter(ctx, invert);
    430 		}
    431 
    432 	} else if (af && af != ctx->af) {
    433 		errx(EXIT_FAILURE, "address family mismatch");
    434 	}
    435 
    436 	if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) {
    437 		/* X <- IP header length */
    438 		struct bpf_insn insns_hlen[] = {
    439 			BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF),
    440 		};
    441 		add_insns(ctx, insns_hlen, __arraycount(insns_hlen));
    442 		ctx->flags |= X_EQ_L4OFF;
    443 	}
    444 }
    445 
    446 static void
    447 bm_invert_checkpoint(npf_bpf_t *ctx, const unsigned opts)
    448 {
    449 	uint32_t bm = 0;
    450 
    451 	if (ctx->ingroup && ctx->invert) {
    452 		const unsigned seen = ctx->invflags;
    453 
    454 		if ((opts & MATCH_SRC) != 0 && (seen & MATCH_SRC) == 0) {
    455 			bm = BM_SRC_NEG;
    456 		}
    457 		if ((opts & MATCH_DST) != 0 && (seen & MATCH_DST) == 0) {
    458 			bm = BM_DST_NEG;
    459 		}
    460 		ctx->invflags |= opts & (MATCH_SRC | MATCH_DST);
    461 	}
    462 	if (bm) {
    463 		uint32_t mwords[] = { bm, 0 };
    464 		add_bmarks(ctx, mwords, sizeof(mwords));
    465 	}
    466 }
    467 
    468 /*
    469  * npfctl_bpf_ipver: match the IP version.
    470  */
    471 void
    472 npfctl_bpf_ipver(npf_bpf_t *ctx, sa_family_t af)
    473 {
    474 	fetch_l3(ctx, af, 0);
    475 }
    476 
    477 /*
    478  * npfctl_bpf_proto: code block to match IP version and L4 protocol.
    479  */
    480 void
    481 npfctl_bpf_proto(npf_bpf_t *ctx, unsigned proto)
    482 {
    483 	struct bpf_insn insns_proto[] = {
    484 		/* A <- L4 protocol; A == expected-protocol? */
    485 		BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
    486 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC),
    487 	};
    488 	add_insns(ctx, insns_proto, __arraycount(insns_proto));
    489 
    490 	uint32_t mwords[] = { BM_PROTO, 1, proto };
    491 	done_block(ctx, mwords, sizeof(mwords));
    492 	ctx->flags |= CHECKED_L4_PROTO;
    493 }
    494 
    495 /*
    496  * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR.
    497  *
    498  * => IP address shall be in the network byte order.
    499  */
    500 void
    501 npfctl_bpf_cidr(npf_bpf_t *ctx, unsigned opts, sa_family_t af,
    502     const npf_addr_t *addr, const npf_netmask_t mask)
    503 {
    504 	const uint32_t *awords = (const uint32_t *)addr;
    505 	unsigned nwords, length, maxmask, off;
    506 
    507 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
    508 	assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK);
    509 
    510 	switch (af) {
    511 	case AF_INET:
    512 		maxmask = 32;
    513 		off = (opts & MATCH_SRC) ?
    514 		    offsetof(struct ip, ip_src) :
    515 		    offsetof(struct ip, ip_dst);
    516 		nwords = sizeof(struct in_addr) / sizeof(uint32_t);
    517 		break;
    518 	case AF_INET6:
    519 		maxmask = 128;
    520 		off = (opts & MATCH_SRC) ?
    521 		    offsetof(struct ip6_hdr, ip6_src) :
    522 		    offsetof(struct ip6_hdr, ip6_dst);
    523 		nwords = sizeof(struct in6_addr) / sizeof(uint32_t);
    524 		break;
    525 	default:
    526 		abort();
    527 	}
    528 
    529 	/* Ensure address family. */
    530 	fetch_l3(ctx, af, 0);
    531 
    532 	length = (mask == NPF_NO_NETMASK) ? maxmask : mask;
    533 
    534 	/* CAUTION: BPF operates in host byte-order. */
    535 	for (unsigned i = 0; i < nwords; i++) {
    536 		const unsigned woff = i * sizeof(uint32_t);
    537 		uint32_t word = ntohl(awords[i]);
    538 		uint32_t wordmask;
    539 
    540 		if (length >= 32) {
    541 			/* The mask is a full word - do not apply it. */
    542 			wordmask = 0;
    543 			length -= 32;
    544 		} else if (length) {
    545 			wordmask = 0xffffffff << (32 - length);
    546 			length = 0;
    547 		} else {
    548 			/* The mask became zero - skip the rest. */
    549 			break;
    550 		}
    551 
    552 		/* A <- IP address (or one word of it) */
    553 		struct bpf_insn insns_ip[] = {
    554 			BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff),
    555 		};
    556 		add_insns(ctx, insns_ip, __arraycount(insns_ip));
    557 
    558 		/* A <- (A & MASK) */
    559 		if (wordmask) {
    560 			struct bpf_insn insns_mask[] = {
    561 				BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask),
    562 			};
    563 			add_insns(ctx, insns_mask, __arraycount(insns_mask));
    564 		}
    565 
    566 		/* A == expected-IP-word ? */
    567 		struct bpf_insn insns_cmp[] = {
    568 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC),
    569 		};
    570 		add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
    571 	}
    572 
    573 	uint32_t mwords[] = {
    574 		(opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6,
    575 		af, mask, awords[0], awords[1], awords[2], awords[3],
    576 	};
    577 	bm_invert_checkpoint(ctx, opts);
    578 	done_block(ctx, mwords, sizeof(mwords));
    579 }
    580 
    581 /*
    582  * npfctl_bpf_ports: code block to match TCP/UDP port range.
    583  *
    584  * => Port numbers shall be in the network byte order.
    585  */
    586 void
    587 npfctl_bpf_ports(npf_bpf_t *ctx, unsigned opts, in_port_t from, in_port_t to)
    588 {
    589 	const unsigned sport_off = offsetof(struct udphdr, uh_sport);
    590 	const unsigned dport_off = offsetof(struct udphdr, uh_dport);
    591 	unsigned off;
    592 
    593 	/* TCP and UDP port offsets are the same. */
    594 	assert(sport_off == offsetof(struct tcphdr, th_sport));
    595 	assert(dport_off == offsetof(struct tcphdr, th_dport));
    596 	assert(ctx->flags & CHECKED_L4_PROTO);
    597 
    598 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
    599 	off = (opts & MATCH_SRC) ? sport_off : dport_off;
    600 
    601 	/* X <- IP header length */
    602 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    603 
    604 	struct bpf_insn insns_fetch[] = {
    605 		/* A <- port */
    606 		BPF_STMT(BPF_LD+BPF_H+BPF_IND, off),
    607 	};
    608 	add_insns(ctx, insns_fetch, __arraycount(insns_fetch));
    609 
    610 	/* CAUTION: BPF operates in host byte-order. */
    611 	from = ntohs(from);
    612 	to = ntohs(to);
    613 
    614 	if (from == to) {
    615 		/* Single port case. */
    616 		struct bpf_insn insns_port[] = {
    617 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC),
    618 		};
    619 		add_insns(ctx, insns_port, __arraycount(insns_port));
    620 	} else {
    621 		/* Port range case. */
    622 		struct bpf_insn insns_range[] = {
    623 			BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, 1),
    624 			BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, 0, 1),
    625 			BPF_STMT(BPF_JMP+BPF_JA, JUMP_MAGIC),
    626 		};
    627 		add_insns(ctx, insns_range, __arraycount(insns_range));
    628 	}
    629 
    630 	uint32_t mwords[] = {
    631 		(opts & MATCH_SRC) ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to
    632 	};
    633 	done_block(ctx, mwords, sizeof(mwords));
    634 }
    635 
    636 /*
    637  * npfctl_bpf_tcpfl: code block to match TCP flags.
    638  */
    639 void
    640 npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask)
    641 {
    642 	const unsigned tcpfl_off = offsetof(struct tcphdr, th_flags);
    643 	const bool usingmask = tf_mask != tf;
    644 
    645 	/* X <- IP header length */
    646 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    647 
    648 	if ((ctx->flags & CHECKED_L4_PROTO) == 0) {
    649 		const unsigned jf = usingmask ? 3 : 2;
    650 		assert(ctx->ingroup == 0);
    651 
    652 		/*
    653 		 * A <- L4 protocol; A == TCP?  If not, jump out.
    654 		 *
    655 		 * Note: the TCP flag matching might be without 'proto tcp'
    656 		 * when using a plain 'stateful' rule.  In such case it also
    657 		 * handles other protocols, thus no strict TCP check.
    658 		 */
    659 		struct bpf_insn insns_tcp[] = {
    660 			BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
    661 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, jf),
    662 		};
    663 		add_insns(ctx, insns_tcp, __arraycount(insns_tcp));
    664 	}
    665 
    666 	struct bpf_insn insns_tf[] = {
    667 		/* A <- TCP flags */
    668 		BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off),
    669 	};
    670 	add_insns(ctx, insns_tf, __arraycount(insns_tf));
    671 
    672 	if (usingmask) {
    673 		/* A <- (A & mask) */
    674 		struct bpf_insn insns_mask[] = {
    675 			BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask),
    676 		};
    677 		add_insns(ctx, insns_mask, __arraycount(insns_mask));
    678 	}
    679 
    680 	struct bpf_insn insns_cmp[] = {
    681 		/* A == expected-TCP-flags? */
    682 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC),
    683 	};
    684 	add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
    685 
    686 	uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask };
    687 	done_block(ctx, mwords, sizeof(mwords));
    688 }
    689 
    690 /*
    691  * npfctl_bpf_icmp: code block to match ICMP type and/or code.
    692  * Note: suitable for both the ICMPv4 and ICMPv6.
    693  */
    694 void
    695 npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code)
    696 {
    697 	const u_int type_off = offsetof(struct icmp, icmp_type);
    698 	const u_int code_off = offsetof(struct icmp, icmp_code);
    699 
    700 	assert(ctx->flags & CHECKED_L4_PROTO);
    701 	assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off);
    702 	assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off);
    703 	assert(type != -1 || code != -1);
    704 
    705 	/* X <- IP header length */
    706 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    707 
    708 	if (type != -1) {
    709 		struct bpf_insn insns_type[] = {
    710 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off),
    711 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC),
    712 		};
    713 		add_insns(ctx, insns_type, __arraycount(insns_type));
    714 
    715 		uint32_t mwords[] = { BM_ICMP_TYPE, 1, type };
    716 		done_block(ctx, mwords, sizeof(mwords));
    717 	}
    718 
    719 	if (code != -1) {
    720 		struct bpf_insn insns_code[] = {
    721 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off),
    722 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC),
    723 		};
    724 		add_insns(ctx, insns_code, __arraycount(insns_code));
    725 
    726 		uint32_t mwords[] = { BM_ICMP_CODE, 1, code };
    727 		done_block(ctx, mwords, sizeof(mwords));
    728 	}
    729 }
    730 
    731 #define	SRC_FLAG_BIT	(1U << 31)
    732 
    733 /*
    734  * npfctl_bpf_table: code block to match source/destination IP address
    735  * against NPF table specified by ID.
    736  */
    737 void
    738 npfctl_bpf_table(npf_bpf_t *ctx, unsigned opts, unsigned tid)
    739 {
    740 	const bool src = (opts & MATCH_SRC) != 0;
    741 
    742 	struct bpf_insn insns_table[] = {
    743 		BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid),
    744 		BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE),
    745 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0),
    746 	};
    747 	add_insns(ctx, insns_table, __arraycount(insns_table));
    748 
    749 	uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid };
    750 	bm_invert_checkpoint(ctx, opts);
    751 	done_block(ctx, mwords, sizeof(mwords));
    752 }
    753