Home | History | Annotate | Line # | Download | only in npfctl
npf_bpf_comp.c revision 1.18
      1   1.1     rmind /*-
      2  1.16     rmind  * Copyright (c) 2010-2020 The NetBSD Foundation, Inc.
      3   1.1     rmind  * All rights reserved.
      4   1.1     rmind  *
      5   1.1     rmind  * This material is based upon work partially supported by The
      6   1.1     rmind  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      7   1.1     rmind  *
      8   1.1     rmind  * Redistribution and use in source and binary forms, with or without
      9   1.1     rmind  * modification, are permitted provided that the following conditions
     10   1.1     rmind  * are met:
     11   1.1     rmind  * 1. Redistributions of source code must retain the above copyright
     12   1.1     rmind  *    notice, this list of conditions and the following disclaimer.
     13   1.1     rmind  * 2. Redistributions in binary form must reproduce the above copyright
     14   1.1     rmind  *    notice, this list of conditions and the following disclaimer in the
     15   1.1     rmind  *    documentation and/or other materials provided with the distribution.
     16   1.1     rmind  *
     17   1.1     rmind  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18   1.1     rmind  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19   1.1     rmind  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20   1.1     rmind  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21   1.1     rmind  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22   1.1     rmind  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23   1.1     rmind  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24   1.1     rmind  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25   1.1     rmind  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26   1.1     rmind  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27   1.1     rmind  * POSSIBILITY OF SUCH DAMAGE.
     28   1.1     rmind  */
     29   1.1     rmind 
     30   1.1     rmind /*
     31   1.1     rmind  * BPF byte-code generation for NPF rules.
     32  1.14     rmind  *
     33  1.14     rmind  * Overview
     34  1.14     rmind  *
     35  1.15     rmind  *	Each NPF rule is compiled into a BPF micro-program.  There is a
     36  1.14     rmind  *	BPF byte-code fragment for each higher-level filtering logic,
     37  1.14     rmind  *	e.g. to match L4 protocol, IP/mask, etc.  The generation process
     38  1.14     rmind  *	combines multiple BPF-byte code fragments into one program.
     39  1.14     rmind  *
     40  1.14     rmind  * Basic case
     41  1.14     rmind  *
     42  1.15     rmind  *	Consider a basic case where all filters should match.  They
     43  1.14     rmind  *	are expressed as logical conjunction, e.g.:
     44  1.14     rmind  *
     45  1.14     rmind  *		A and B and C and D
     46  1.14     rmind  *
     47  1.14     rmind  *	Each test (filter) criterion can be evaluated to true (match) or
     48  1.14     rmind  *	false (no match) and the logic is as follows:
     49  1.14     rmind  *
     50  1.14     rmind  *	- If the value is true, then jump to the "next" test (offset 0).
     51  1.14     rmind  *
     52  1.14     rmind  *	- If the value is false, then jump to the JUMP_MAGIC value (0xff).
     53  1.14     rmind  *	This "magic" value is used to indicate that it will have to be
     54  1.14     rmind  *	patched at a later stage.
     55  1.14     rmind  *
     56  1.14     rmind  *	Once all byte-code fragments are combined into one, then there
     57  1.14     rmind  *	are two additional steps:
     58  1.14     rmind  *
     59  1.15     rmind  *	- Two instructions are appended at the end of the program: "return
     60  1.15     rmind  *	success" followed by "return failure".
     61  1.14     rmind  *
     62  1.14     rmind  *	- All jumps with the JUMP_MAGIC value are patched to point to the
     63  1.14     rmind  *	"return failure" instruction.
     64  1.14     rmind  *
     65  1.14     rmind  *	Therefore, if all filter criteria will match, then the first
     66  1.14     rmind  *	instruction will be reached, indicating a successful match of the
     67  1.14     rmind  *	rule.  Otherwise, if any of the criteria will not match, it will
     68  1.15     rmind  *	take the failure path and the rule will not be matching.
     69  1.14     rmind  *
     70  1.14     rmind  * Grouping
     71  1.14     rmind  *
     72  1.16     rmind  *	Filters can have groups, which have an effect of logical
     73  1.14     rmind  *	disjunction, e.g.:
     74  1.14     rmind  *
     75  1.14     rmind  *		A and B and (C or D)
     76  1.14     rmind  *
     77  1.14     rmind  *	In such case, the logic inside the group has to be inverted i.e.
     78  1.14     rmind  *	the jump values swapped.  If the test value is true, then jump
     79  1.14     rmind  *	out of the group; if false, then jump "next".  At the end of the
     80  1.14     rmind  *	group, an addition failure path is appended and the JUMP_MAGIC
     81  1.14     rmind  *	uses within the group are patched to jump past the said path.
     82  1.17  riastrad  *
     83  1.17  riastrad  *	For multi-word comparisons (IPv6 addresses), there is another
     84  1.17  riastrad  *	layer of grouping:
     85  1.17  riastrad  *
     86  1.17  riastrad  *		A and B and ((C and D) or (E and F))
     87  1.17  riastrad  *
     88  1.17  riastrad  *	This strains the simple-minded JUMP_MAGIC logic, so for now,
     89  1.17  riastrad  *	when generating the jump-if-false targets for (C and D), we
     90  1.17  riastrad  *	simply count the number of instructions left to skip over.
     91  1.17  riastrad  *
     92  1.17  riastrad  *	A better architecture might be to create asm-type labels for
     93  1.17  riastrad  *	the jt and jf continuations in the first pass, and then, once
     94  1.17  riastrad  *	their offsets are determined, go back and fill them in in the
     95  1.17  riastrad  *	second pass.  This would simplify the logic (no need to compute
     96  1.17  riastrad  *	exactly how many instructions we're about to generate in a
     97  1.17  riastrad  *	chain of conditionals) and eliminate redundant RET #0
     98  1.17  riastrad  *	instructions which are currently generated after some groups.
     99   1.1     rmind  */
    100   1.1     rmind 
    101   1.1     rmind #include <sys/cdefs.h>
    102  1.18       joe __RCSID("$NetBSD: npf_bpf_comp.c,v 1.18 2025/07/01 19:55:15 joe Exp $");
    103   1.1     rmind 
    104   1.1     rmind #include <stdlib.h>
    105   1.1     rmind #include <stdbool.h>
    106   1.1     rmind #include <stddef.h>
    107   1.1     rmind #include <string.h>
    108   1.1     rmind #include <inttypes.h>
    109   1.1     rmind #include <err.h>
    110   1.1     rmind #include <assert.h>
    111   1.1     rmind 
    112   1.1     rmind #include <netinet/in.h>
    113   1.1     rmind #include <netinet/in_systm.h>
    114   1.9  christos #define	__FAVOR_BSD
    115   1.1     rmind #include <netinet/ip.h>
    116   1.1     rmind #include <netinet/ip6.h>
    117   1.1     rmind #include <netinet/udp.h>
    118   1.1     rmind #include <netinet/tcp.h>
    119   1.1     rmind #include <netinet/ip_icmp.h>
    120   1.1     rmind #include <netinet/icmp6.h>
    121   1.1     rmind 
    122   1.1     rmind #include <net/bpf.h>
    123   1.1     rmind 
    124   1.1     rmind #include "npfctl.h"
    125   1.1     rmind 
    126   1.1     rmind /*
    127   1.1     rmind  * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores
    128   1.1     rmind  * something other than L4 header offset.  Generally, when BPF_LDX is used.
    129   1.1     rmind  */
    130   1.1     rmind #define	FETCHED_L3		0x01
    131  1.16     rmind #define	CHECKED_L4_PROTO	0x02
    132   1.6     rmind #define	X_EQ_L4OFF		0x04
    133  1.18       joe #define	FETCHED_L2		0x08
    134   1.1     rmind 
    135   1.1     rmind struct npf_bpf {
    136   1.1     rmind 	/*
    137   1.1     rmind 	 * BPF program code, the allocated length (in bytes), the number
    138   1.1     rmind 	 * of logical blocks and the flags.
    139   1.1     rmind 	 */
    140   1.1     rmind 	struct bpf_program	prog;
    141   1.1     rmind 	size_t			alen;
    142  1.16     rmind 	unsigned		nblocks;
    143   1.1     rmind 	sa_family_t		af;
    144   1.1     rmind 	uint32_t		flags;
    145  1.18       joe 	uint8_t			eth_type;
    146   1.1     rmind 
    147  1.14     rmind 	/*
    148  1.16     rmind 	 * Indicators whether we are inside the group and whether this
    149  1.16     rmind 	 * group is implementing inverted logic.
    150  1.16     rmind 	 *
    151  1.14     rmind 	 * The current group offset (counted in BPF instructions)
    152  1.14     rmind 	 * and block number at the start of the group.
    153  1.14     rmind 	 */
    154  1.16     rmind 	unsigned		ingroup;
    155  1.16     rmind 	bool			invert;
    156  1.17  riastrad 	bool			multiword;
    157  1.16     rmind 	unsigned		goff;
    158  1.16     rmind 	unsigned		gblock;
    159  1.16     rmind 
    160  1.16     rmind 	/* Track inversion (excl. mark). */
    161  1.16     rmind 	uint32_t		invflags;
    162   1.1     rmind 
    163   1.1     rmind 	/* BPF marks, allocated length and the real length. */
    164   1.1     rmind 	uint32_t *		marks;
    165   1.1     rmind 	size_t			malen;
    166   1.1     rmind 	size_t			mlen;
    167   1.1     rmind };
    168   1.1     rmind 
    169   1.1     rmind /*
    170   1.1     rmind  * NPF success and failure values to be returned from BPF.
    171   1.1     rmind  */
    172   1.1     rmind #define	NPF_BPF_SUCCESS		((u_int)-1)
    173   1.1     rmind #define	NPF_BPF_FAILURE		0
    174   1.1     rmind 
    175   1.1     rmind /*
    176   1.1     rmind  * Magic value to indicate the failure path, which is fixed up on completion.
    177   1.1     rmind  * Note: this is the longest jump offset in BPF, since the offset is one byte.
    178   1.1     rmind  */
    179   1.1     rmind #define	JUMP_MAGIC		0xff
    180   1.1     rmind 
    181   1.1     rmind /* Reduce re-allocations by expanding in 64 byte blocks. */
    182   1.1     rmind #define	ALLOC_MASK		(64 - 1)
    183   1.1     rmind #define	ALLOC_ROUND(x)		(((x) + ALLOC_MASK) & ~ALLOC_MASK)
    184   1.1     rmind 
    185   1.9  christos #ifndef IPV6_VERSION
    186   1.9  christos #define	IPV6_VERSION		0x60
    187   1.9  christos #endif
    188   1.9  christos 
    189   1.1     rmind npf_bpf_t *
    190   1.1     rmind npfctl_bpf_create(void)
    191   1.1     rmind {
    192   1.1     rmind 	return ecalloc(1, sizeof(npf_bpf_t));
    193   1.1     rmind }
    194   1.1     rmind 
    195   1.1     rmind static void
    196   1.1     rmind fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap)
    197   1.1     rmind {
    198   1.1     rmind 	struct bpf_program *bp = &ctx->prog;
    199   1.1     rmind 
    200   1.1     rmind 	for (u_int i = start; i < end; i++) {
    201   1.1     rmind 		struct bpf_insn *insn = &bp->bf_insns[i];
    202   1.1     rmind 		const u_int fail_off = end - i;
    203  1.14     rmind 		bool seen_magic = false;
    204   1.1     rmind 
    205   1.1     rmind 		if (fail_off >= JUMP_MAGIC) {
    206   1.1     rmind 			errx(EXIT_FAILURE, "BPF generation error: "
    207   1.1     rmind 			    "the number of instructions is over the limit");
    208   1.1     rmind 		}
    209   1.1     rmind 		if (BPF_CLASS(insn->code) != BPF_JMP) {
    210   1.1     rmind 			continue;
    211   1.1     rmind 		}
    212  1.14     rmind 		if (BPF_OP(insn->code) == BPF_JA) {
    213  1.14     rmind 			/*
    214  1.14     rmind 			 * BPF_JA can be used to jump to the failure path.
    215  1.14     rmind 			 * If we are swapping i.e. inside the group, then
    216  1.14     rmind 			 * jump "next"; groups have a failure path appended
    217  1.14     rmind 			 * at their end.
    218  1.14     rmind 			 */
    219  1.14     rmind 			if (insn->k == JUMP_MAGIC) {
    220  1.14     rmind 				insn->k = swap ? 0 : fail_off;
    221  1.14     rmind 			}
    222  1.14     rmind 			continue;
    223  1.14     rmind 		}
    224  1.14     rmind 
    225  1.14     rmind 		/*
    226  1.14     rmind 		 * Fixup the "magic" value.  Swap only the "magic" jumps.
    227  1.14     rmind 		 */
    228  1.14     rmind 
    229  1.14     rmind 		if (insn->jt == JUMP_MAGIC) {
    230  1.14     rmind 			insn->jt = fail_off;
    231  1.14     rmind 			seen_magic = true;
    232  1.14     rmind 		}
    233  1.14     rmind 		if (insn->jf == JUMP_MAGIC) {
    234  1.14     rmind 			insn->jf = fail_off;
    235  1.14     rmind 			seen_magic = true;
    236  1.14     rmind 		}
    237  1.14     rmind 
    238  1.14     rmind 		if (seen_magic && swap) {
    239   1.1     rmind 			uint8_t jt = insn->jt;
    240   1.1     rmind 			insn->jt = insn->jf;
    241   1.1     rmind 			insn->jf = jt;
    242   1.1     rmind 		}
    243   1.1     rmind 	}
    244   1.1     rmind }
    245   1.1     rmind 
    246   1.1     rmind static void
    247   1.1     rmind add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count)
    248   1.1     rmind {
    249   1.1     rmind 	struct bpf_program *bp = &ctx->prog;
    250   1.1     rmind 	size_t offset, len, reqlen;
    251   1.1     rmind 
    252   1.1     rmind 	/* Note: bf_len is the count of instructions. */
    253   1.1     rmind 	offset = bp->bf_len * sizeof(struct bpf_insn);
    254   1.1     rmind 	len = count * sizeof(struct bpf_insn);
    255   1.1     rmind 
    256   1.1     rmind 	/* Ensure the memory buffer for the program. */
    257   1.1     rmind 	reqlen = ALLOC_ROUND(offset + len);
    258   1.1     rmind 	if (reqlen > ctx->alen) {
    259   1.1     rmind 		bp->bf_insns = erealloc(bp->bf_insns, reqlen);
    260   1.1     rmind 		ctx->alen = reqlen;
    261   1.1     rmind 	}
    262   1.1     rmind 
    263   1.1     rmind 	/* Add the code block. */
    264   1.1     rmind 	memcpy((uint8_t *)bp->bf_insns + offset, insns, len);
    265   1.1     rmind 	bp->bf_len += count;
    266   1.1     rmind }
    267   1.1     rmind 
    268   1.1     rmind static void
    269  1.16     rmind add_bmarks(npf_bpf_t *ctx, const uint32_t *m, size_t len)
    270   1.1     rmind {
    271   1.1     rmind 	size_t reqlen, nargs = m[1];
    272   1.1     rmind 
    273   1.1     rmind 	if ((len / sizeof(uint32_t) - 2) != nargs) {
    274   1.1     rmind 		errx(EXIT_FAILURE, "invalid BPF block description");
    275   1.1     rmind 	}
    276   1.1     rmind 	reqlen = ALLOC_ROUND(ctx->mlen + len);
    277   1.1     rmind 	if (reqlen > ctx->malen) {
    278   1.1     rmind 		ctx->marks = erealloc(ctx->marks, reqlen);
    279   1.1     rmind 		ctx->malen = reqlen;
    280   1.1     rmind 	}
    281   1.1     rmind 	memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len);
    282   1.1     rmind 	ctx->mlen += len;
    283   1.1     rmind }
    284   1.1     rmind 
    285   1.1     rmind static void
    286   1.1     rmind done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
    287   1.1     rmind {
    288  1.16     rmind 	add_bmarks(ctx, m, len);
    289   1.1     rmind 	ctx->nblocks++;
    290   1.1     rmind }
    291   1.1     rmind 
    292   1.1     rmind struct bpf_program *
    293   1.1     rmind npfctl_bpf_complete(npf_bpf_t *ctx)
    294   1.1     rmind {
    295   1.1     rmind 	struct bpf_program *bp = &ctx->prog;
    296   1.1     rmind 	const u_int retoff = bp->bf_len;
    297   1.1     rmind 
    298   1.8     rmind 	/* No instructions (optimised out). */
    299   1.8     rmind 	if (!bp->bf_len)
    300   1.8     rmind 		return NULL;
    301   1.8     rmind 
    302   1.1     rmind 	/* Add the return fragment (success and failure paths). */
    303   1.1     rmind 	struct bpf_insn insns_ret[] = {
    304   1.1     rmind 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS),
    305   1.1     rmind 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
    306   1.1     rmind 	};
    307   1.1     rmind 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
    308   1.1     rmind 
    309   1.1     rmind 	/* Fixup all jumps to the main failure path. */
    310   1.1     rmind 	fixup_jumps(ctx, 0, retoff, false);
    311   1.1     rmind 
    312   1.1     rmind 	return &ctx->prog;
    313   1.1     rmind }
    314   1.1     rmind 
    315   1.1     rmind const void *
    316   1.1     rmind npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len)
    317   1.1     rmind {
    318   1.1     rmind 	*len = ctx->mlen;
    319   1.1     rmind 	return ctx->marks;
    320   1.1     rmind }
    321   1.1     rmind 
    322   1.1     rmind void
    323   1.1     rmind npfctl_bpf_destroy(npf_bpf_t *ctx)
    324   1.1     rmind {
    325   1.1     rmind 	free(ctx->prog.bf_insns);
    326   1.1     rmind 	free(ctx->marks);
    327   1.1     rmind 	free(ctx);
    328   1.1     rmind }
    329   1.1     rmind 
    330   1.1     rmind /*
    331  1.14     rmind  * npfctl_bpf_group_enter: begin a logical group.  It merely uses logical
    332  1.16     rmind  * disjunction (OR) for comparisons within the group.
    333   1.1     rmind  */
    334   1.1     rmind void
    335  1.16     rmind npfctl_bpf_group_enter(npf_bpf_t *ctx, bool invert)
    336   1.1     rmind {
    337   1.1     rmind 	struct bpf_program *bp = &ctx->prog;
    338   1.1     rmind 
    339   1.1     rmind 	assert(ctx->goff == 0);
    340   1.1     rmind 	assert(ctx->gblock == 0);
    341   1.1     rmind 
    342   1.1     rmind 	ctx->goff = bp->bf_len;
    343   1.1     rmind 	ctx->gblock = ctx->nblocks;
    344  1.16     rmind 	ctx->invert = invert;
    345  1.17  riastrad 	ctx->multiword = false;
    346  1.16     rmind 	ctx->ingroup++;
    347   1.1     rmind }
    348   1.1     rmind 
    349   1.1     rmind void
    350  1.16     rmind npfctl_bpf_group_exit(npf_bpf_t *ctx)
    351   1.1     rmind {
    352   1.1     rmind 	struct bpf_program *bp = &ctx->prog;
    353   1.1     rmind 	const size_t curoff = bp->bf_len;
    354   1.1     rmind 
    355  1.16     rmind 	assert(ctx->ingroup);
    356  1.16     rmind 	ctx->ingroup--;
    357  1.16     rmind 
    358  1.17  riastrad 	/*
    359  1.17  riastrad 	 * If we're not inverting, there were only zero or one options,
    360  1.17  riastrad 	 * and the last comparison was not a multi-word comparison
    361  1.17  riastrad 	 * requiring a fallthrough failure -- nothing to do.
    362  1.17  riastrad 	 */
    363  1.17  riastrad 	if (!ctx->invert &&
    364  1.17  riastrad 	    (ctx->nblocks - ctx->gblock) <= 1 &&
    365  1.17  riastrad 	    !ctx->multiword) {
    366   1.1     rmind 		ctx->goff = ctx->gblock = 0;
    367   1.1     rmind 		return;
    368   1.1     rmind 	}
    369   1.1     rmind 
    370   1.1     rmind 	/*
    371  1.10     rmind 	 * If inverting, then prepend a jump over the statement below.
    372  1.14     rmind 	 * On match, it will skip-through and the fail path will be taken.
    373  1.10     rmind 	 */
    374  1.16     rmind 	if (ctx->invert) {
    375  1.10     rmind 		struct bpf_insn insns_ret[] = {
    376  1.10     rmind 			BPF_STMT(BPF_JMP+BPF_JA, 1),
    377  1.10     rmind 		};
    378  1.10     rmind 		add_insns(ctx, insns_ret, __arraycount(insns_ret));
    379  1.10     rmind 	}
    380  1.10     rmind 
    381  1.10     rmind 	/*
    382   1.1     rmind 	 * Append a failure return as a fall-through i.e. if there is
    383   1.1     rmind 	 * no match within the group.
    384   1.1     rmind 	 */
    385   1.1     rmind 	struct bpf_insn insns_ret[] = {
    386   1.1     rmind 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
    387   1.1     rmind 	};
    388   1.1     rmind 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
    389   1.1     rmind 
    390   1.1     rmind 	/*
    391   1.1     rmind 	 * Adjust jump offsets: on match - jump outside the group i.e.
    392   1.1     rmind 	 * to the current offset.  Otherwise, jump to the next instruction
    393   1.1     rmind 	 * which would lead to the fall-through code above if none matches.
    394   1.1     rmind 	 */
    395   1.1     rmind 	fixup_jumps(ctx, ctx->goff, curoff, true);
    396   1.1     rmind 	ctx->goff = ctx->gblock = 0;
    397   1.1     rmind }
    398   1.1     rmind 
    399   1.1     rmind static void
    400  1.16     rmind fetch_l3(npf_bpf_t *ctx, sa_family_t af, unsigned flags)
    401   1.1     rmind {
    402  1.16     rmind 	unsigned ver;
    403   1.1     rmind 
    404   1.1     rmind 	switch (af) {
    405   1.1     rmind 	case AF_INET:
    406   1.1     rmind 		ver = IPVERSION;
    407   1.1     rmind 		break;
    408   1.1     rmind 	case AF_INET6:
    409   1.1     rmind 		ver = IPV6_VERSION >> 4;
    410   1.1     rmind 		break;
    411   1.1     rmind 	case AF_UNSPEC:
    412   1.1     rmind 		ver = 0;
    413   1.1     rmind 		break;
    414   1.1     rmind 	default:
    415   1.1     rmind 		abort();
    416   1.1     rmind 	}
    417   1.1     rmind 
    418   1.1     rmind 	/*
    419   1.7     rmind 	 * The memory store is populated with:
    420   1.1     rmind 	 * - BPF_MW_IPVER: IP version (4 or 6).
    421   1.1     rmind 	 * - BPF_MW_L4OFF: L4 header offset.
    422   1.1     rmind 	 * - BPF_MW_L4PROTO: L4 protocol.
    423   1.1     rmind 	 */
    424   1.1     rmind 	if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) {
    425   1.1     rmind 		const uint8_t jt = ver ? 0 : JUMP_MAGIC;
    426   1.1     rmind 		const uint8_t jf = ver ? JUMP_MAGIC : 0;
    427  1.16     rmind 		const bool ingroup = ctx->ingroup != 0;
    428  1.16     rmind 		const bool invert = ctx->invert;
    429   1.1     rmind 
    430   1.1     rmind 		/*
    431   1.1     rmind 		 * L3 block cannot be inserted in the middle of a group.
    432   1.1     rmind 		 * In fact, it never is.  Check and start the group after.
    433   1.1     rmind 		 */
    434   1.1     rmind 		if (ingroup) {
    435   1.1     rmind 			assert(ctx->nblocks == ctx->gblock);
    436  1.16     rmind 			npfctl_bpf_group_exit(ctx);
    437   1.1     rmind 		}
    438   1.1     rmind 
    439   1.1     rmind 		/*
    440   1.1     rmind 		 * A <- IP version; A == expected-version?
    441   1.1     rmind 		 * If no particular version specified, check for non-zero.
    442   1.1     rmind 		 */
    443   1.7     rmind 		struct bpf_insn insns_af[] = {
    444   1.7     rmind 			BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER),
    445   1.7     rmind 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf),
    446   1.7     rmind 		};
    447   1.7     rmind 		add_insns(ctx, insns_af, __arraycount(insns_af));
    448   1.7     rmind 		ctx->flags |= FETCHED_L3;
    449   1.1     rmind 		ctx->af = af;
    450   1.1     rmind 
    451   1.1     rmind 		if (af) {
    452   1.1     rmind 			uint32_t mwords[] = { BM_IPVER, 1, af };
    453  1.16     rmind 			add_bmarks(ctx, mwords, sizeof(mwords));
    454   1.1     rmind 		}
    455   1.1     rmind 		if (ingroup) {
    456  1.16     rmind 			npfctl_bpf_group_enter(ctx, invert);
    457   1.1     rmind 		}
    458   1.1     rmind 
    459   1.1     rmind 	} else if (af && af != ctx->af) {
    460   1.1     rmind 		errx(EXIT_FAILURE, "address family mismatch");
    461   1.1     rmind 	}
    462   1.1     rmind 
    463   1.1     rmind 	if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) {
    464   1.1     rmind 		/* X <- IP header length */
    465   1.1     rmind 		struct bpf_insn insns_hlen[] = {
    466   1.1     rmind 			BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF),
    467   1.1     rmind 		};
    468   1.1     rmind 		add_insns(ctx, insns_hlen, __arraycount(insns_hlen));
    469   1.1     rmind 		ctx->flags |= X_EQ_L4OFF;
    470   1.1     rmind 	}
    471   1.1     rmind }
    472   1.1     rmind 
    473  1.18       joe void
    474  1.18       joe fetch_ether_type(npf_bpf_t *ctx, uint16_t type)
    475  1.18       joe {
    476  1.18       joe 	if ((ctx->flags & FETCHED_L2) != 0 || (type && ctx->eth_type != 0))
    477  1.18       joe 		return;
    478  1.18       joe 
    479  1.18       joe 	const uint8_t jt = type ? 0 : JUMP_MAGIC;
    480  1.18       joe 	const uint8_t jf = type ? JUMP_MAGIC : 0;
    481  1.18       joe 	const bool ingroup = ctx->ingroup != 0;
    482  1.18       joe 	const bool invert = ctx->invert;
    483  1.18       joe 	unsigned off = offsetof(struct ether_header, ether_type);
    484  1.18       joe 
    485  1.18       joe 	/*
    486  1.18       joe 	 * L2 block cannot be inserted in the middle of a group.
    487  1.18       joe 	 * Check and start the group after.
    488  1.18       joe 	 */
    489  1.18       joe 	if (ingroup) {
    490  1.18       joe 		assert(ctx->nblocks == ctx->gblock);
    491  1.18       joe 		npfctl_bpf_group_exit(ctx);
    492  1.18       joe 	}
    493  1.18       joe 
    494  1.18       joe 	type = ntohs(type);
    495  1.18       joe 
    496  1.18       joe 	struct bpf_insn insns_et[] = {
    497  1.18       joe 		BPF_STMT(BPF_LD+BPF_H+BPF_ABS, off),
    498  1.18       joe 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, jt, jf),
    499  1.18       joe 	};
    500  1.18       joe 	add_insns(ctx, insns_et, __arraycount(insns_et));
    501  1.18       joe 	ctx->flags |= FETCHED_L2;
    502  1.18       joe 	ctx->eth_type = type;
    503  1.18       joe 
    504  1.18       joe  	if (type) { /* bookmark ether type */
    505  1.18       joe 		uint32_t mwords[] = { BM_ETHER_TYPE, 1, htons(type) };
    506  1.18       joe 		add_bmarks(ctx, mwords, sizeof(mwords));
    507  1.18       joe 	}
    508  1.18       joe 	if (ingroup) {
    509  1.18       joe 		npfctl_bpf_group_enter(ctx, invert);
    510  1.18       joe 	}
    511  1.18       joe }
    512  1.18       joe 
    513  1.16     rmind static void
    514  1.18       joe bm_invert_checkpoint(npf_bpf_t *ctx, const unsigned opts, uint32_t layer)
    515  1.16     rmind {
    516  1.16     rmind 	uint32_t bm = 0;
    517  1.16     rmind 
    518  1.16     rmind 	if (ctx->ingroup && ctx->invert) {
    519  1.16     rmind 		const unsigned seen = ctx->invflags;
    520  1.16     rmind 
    521  1.16     rmind 		if ((opts & MATCH_SRC) != 0 && (seen & MATCH_SRC) == 0) {
    522  1.18       joe 			bm = (layer & NPF_RULE_LAYER_3) ? BM_SRC_NEG : BM_SRC_ENEG;
    523  1.16     rmind 		}
    524  1.16     rmind 		if ((opts & MATCH_DST) != 0 && (seen & MATCH_DST) == 0) {
    525  1.18       joe 			bm = (layer & NPF_RULE_LAYER_3) ? BM_DST_NEG : BM_DST_ENEG;
    526  1.16     rmind 		}
    527  1.16     rmind 		ctx->invflags |= opts & (MATCH_SRC | MATCH_DST);
    528  1.16     rmind 	}
    529  1.16     rmind 	if (bm) {
    530  1.16     rmind 		uint32_t mwords[] = { bm, 0 };
    531  1.16     rmind 		add_bmarks(ctx, mwords, sizeof(mwords));
    532  1.16     rmind 	}
    533  1.16     rmind }
    534  1.16     rmind 
    535   1.1     rmind /*
    536  1.16     rmind  * npfctl_bpf_ipver: match the IP version.
    537   1.1     rmind  */
    538   1.1     rmind void
    539  1.16     rmind npfctl_bpf_ipver(npf_bpf_t *ctx, sa_family_t af)
    540   1.1     rmind {
    541   1.1     rmind 	fetch_l3(ctx, af, 0);
    542  1.16     rmind }
    543   1.1     rmind 
    544  1.16     rmind /*
    545  1.16     rmind  * npfctl_bpf_proto: code block to match IP version and L4 protocol.
    546  1.16     rmind  */
    547  1.16     rmind void
    548  1.16     rmind npfctl_bpf_proto(npf_bpf_t *ctx, unsigned proto)
    549  1.16     rmind {
    550   1.1     rmind 	struct bpf_insn insns_proto[] = {
    551   1.1     rmind 		/* A <- L4 protocol; A == expected-protocol? */
    552   1.1     rmind 		BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
    553   1.1     rmind 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC),
    554   1.1     rmind 	};
    555   1.1     rmind 	add_insns(ctx, insns_proto, __arraycount(insns_proto));
    556   1.1     rmind 
    557   1.1     rmind 	uint32_t mwords[] = { BM_PROTO, 1, proto };
    558   1.1     rmind 	done_block(ctx, mwords, sizeof(mwords));
    559  1.16     rmind 	ctx->flags |= CHECKED_L4_PROTO;
    560   1.1     rmind }
    561   1.1     rmind 
    562   1.1     rmind /*
    563   1.1     rmind  * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR.
    564   1.1     rmind  *
    565   1.1     rmind  * => IP address shall be in the network byte order.
    566   1.1     rmind  */
    567   1.1     rmind void
    568  1.16     rmind npfctl_bpf_cidr(npf_bpf_t *ctx, unsigned opts, sa_family_t af,
    569   1.1     rmind     const npf_addr_t *addr, const npf_netmask_t mask)
    570   1.1     rmind {
    571   1.1     rmind 	const uint32_t *awords = (const uint32_t *)addr;
    572  1.17  riastrad 	unsigned nwords, origlength, length, maxmask, off;
    573   1.1     rmind 
    574   1.1     rmind 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
    575   1.1     rmind 	assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK);
    576   1.1     rmind 
    577   1.1     rmind 	switch (af) {
    578   1.1     rmind 	case AF_INET:
    579   1.1     rmind 		maxmask = 32;
    580   1.1     rmind 		off = (opts & MATCH_SRC) ?
    581   1.1     rmind 		    offsetof(struct ip, ip_src) :
    582   1.1     rmind 		    offsetof(struct ip, ip_dst);
    583   1.1     rmind 		nwords = sizeof(struct in_addr) / sizeof(uint32_t);
    584   1.1     rmind 		break;
    585   1.1     rmind 	case AF_INET6:
    586   1.1     rmind 		maxmask = 128;
    587   1.1     rmind 		off = (opts & MATCH_SRC) ?
    588   1.1     rmind 		    offsetof(struct ip6_hdr, ip6_src) :
    589   1.1     rmind 		    offsetof(struct ip6_hdr, ip6_dst);
    590   1.1     rmind 		nwords = sizeof(struct in6_addr) / sizeof(uint32_t);
    591   1.1     rmind 		break;
    592   1.1     rmind 	default:
    593   1.1     rmind 		abort();
    594   1.1     rmind 	}
    595   1.1     rmind 
    596   1.1     rmind 	/* Ensure address family. */
    597   1.1     rmind 	fetch_l3(ctx, af, 0);
    598   1.1     rmind 
    599  1.17  riastrad 	length = origlength = (mask == NPF_NO_NETMASK) ? maxmask : mask;
    600   1.1     rmind 
    601   1.1     rmind 	/* CAUTION: BPF operates in host byte-order. */
    602  1.16     rmind 	for (unsigned i = 0; i < nwords; i++) {
    603  1.16     rmind 		const unsigned woff = i * sizeof(uint32_t);
    604   1.1     rmind 		uint32_t word = ntohl(awords[i]);
    605   1.1     rmind 		uint32_t wordmask;
    606   1.1     rmind 
    607   1.1     rmind 		if (length >= 32) {
    608   1.1     rmind 			/* The mask is a full word - do not apply it. */
    609   1.1     rmind 			wordmask = 0;
    610   1.1     rmind 			length -= 32;
    611   1.1     rmind 		} else if (length) {
    612   1.4     rmind 			wordmask = 0xffffffff << (32 - length);
    613   1.1     rmind 			length = 0;
    614   1.1     rmind 		} else {
    615   1.3     rmind 			/* The mask became zero - skip the rest. */
    616   1.3     rmind 			break;
    617   1.1     rmind 		}
    618   1.1     rmind 
    619   1.1     rmind 		/* A <- IP address (or one word of it) */
    620   1.1     rmind 		struct bpf_insn insns_ip[] = {
    621   1.1     rmind 			BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff),
    622   1.1     rmind 		};
    623   1.1     rmind 		add_insns(ctx, insns_ip, __arraycount(insns_ip));
    624   1.1     rmind 
    625   1.1     rmind 		/* A <- (A & MASK) */
    626   1.1     rmind 		if (wordmask) {
    627   1.1     rmind 			struct bpf_insn insns_mask[] = {
    628   1.1     rmind 				BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask),
    629   1.1     rmind 			};
    630   1.1     rmind 			add_insns(ctx, insns_mask, __arraycount(insns_mask));
    631   1.1     rmind 		}
    632   1.1     rmind 
    633  1.17  riastrad 		/*
    634  1.17  riastrad 		 * Determine how many instructions we have to jump
    635  1.17  riastrad 		 * ahead if the match fails.
    636  1.17  riastrad 		 *
    637  1.17  riastrad 		 * - If this is the last word, we jump to the final
    638  1.17  riastrad                  *   failure, JUMP_MAGIC.
    639  1.17  riastrad 		 *
    640  1.17  riastrad 		 * - If this is not the last word, we jump past the
    641  1.17  riastrad 		 *   remaining instructions to match this sequence.
    642  1.17  riastrad 		 *   Each 32-bit word in the sequence takes two
    643  1.17  riastrad 		 *   instructions (BPF_LD and BPF_JMP).  If there is a
    644  1.17  riastrad 		 *   partial-word mask ahead, there will be one
    645  1.17  riastrad 		 *   additional instruction (BPF_ALU).
    646  1.17  riastrad 		 */
    647  1.17  riastrad 		uint8_t jf;
    648  1.17  riastrad 		if (i + 1 == (origlength + 31)/32) {
    649  1.17  riastrad 			jf = JUMP_MAGIC;
    650  1.17  riastrad 		} else {
    651  1.17  riastrad 			jf = 2*((origlength + 31)/32 - i - 1);
    652  1.17  riastrad 			if (origlength % 32 != 0 && wordmask == 0)
    653  1.17  riastrad 				jf += 1;
    654  1.17  riastrad 		}
    655  1.17  riastrad 
    656   1.1     rmind 		/* A == expected-IP-word ? */
    657   1.1     rmind 		struct bpf_insn insns_cmp[] = {
    658  1.17  riastrad 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, jf),
    659   1.1     rmind 		};
    660   1.1     rmind 		add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
    661   1.1     rmind 	}
    662   1.1     rmind 
    663  1.17  riastrad 	/*
    664  1.17  riastrad 	 * If we checked a chain of words in sequence, mark this as a
    665  1.17  riastrad 	 * multi-word comparison so if this is in a group there will be
    666  1.17  riastrad 	 * a fallthrough case.
    667  1.17  riastrad 	 *
    668  1.17  riastrad 	 * XXX This is a little silly; the compiler should really just
    669  1.17  riastrad 	 * record holes where conditional jumps need success/failure
    670  1.17  riastrad 	 * continuations, and go back to fill in the holes when the
    671  1.17  riastrad 	 * locations of the continuations are determined later.  But
    672  1.17  riastrad 	 * that requires restructuring this code a little more.
    673  1.17  riastrad 	 */
    674  1.17  riastrad 	ctx->multiword = (origlength + 31)/32 > 1;
    675  1.17  riastrad 
    676   1.1     rmind 	uint32_t mwords[] = {
    677   1.1     rmind 		(opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6,
    678   1.1     rmind 		af, mask, awords[0], awords[1], awords[2], awords[3],
    679   1.1     rmind 	};
    680  1.18       joe 	bm_invert_checkpoint(ctx, opts, NPF_RULE_LAYER_3);
    681  1.18       joe 	done_block(ctx, mwords, sizeof(mwords));
    682  1.18       joe }
    683  1.18       joe 
    684  1.18       joe /*
    685  1.18       joe  * for ether address, 6 octets(a word and halfword)
    686  1.18       joe  * just fetch directly using a word and halfword fetch
    687  1.18       joe  */
    688  1.18       joe void
    689  1.18       joe npfctl_bpf_ether(npf_bpf_t *ctx, unsigned opts, struct ether_addr *ether_addr)
    690  1.18       joe {
    691  1.18       joe 	uint32_t mac_word;
    692  1.18       joe 	uint16_t mac_hword;
    693  1.18       joe 	unsigned off;
    694  1.18       joe 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
    695  1.18       joe 
    696  1.18       joe 	off = (opts & MATCH_SRC) ? offsetof(struct ether_header, ether_shost) :
    697  1.18       joe 	    offsetof(struct ether_header, ether_dhost);
    698  1.18       joe 
    699  1.18       joe 	memcpy(&mac_word, ether_addr, sizeof(mac_word));
    700  1.18       joe 	mac_word = ntohl(mac_word);
    701  1.18       joe 
    702  1.18       joe 	/* copy the last two bytes of the 6 byte ether address */
    703  1.18       joe 	memcpy(&mac_hword, (uint8_t *)ether_addr + sizeof(mac_word), sizeof(mac_hword));
    704  1.18       joe 	mac_hword = ntohs(mac_hword);
    705  1.18       joe 
    706  1.18       joe 	/* load and compare first word then do same to last halfword */
    707  1.18       joe 	struct bpf_insn insns_ether_w[] = {
    708  1.18       joe 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off),
    709  1.18       joe 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, mac_word, 0, JUMP_MAGIC),
    710  1.18       joe 	};
    711  1.18       joe 	add_insns(ctx, insns_ether_w, __arraycount(insns_ether_w));
    712  1.18       joe 
    713  1.18       joe 	struct bpf_insn insns_ether_h[] = {
    714  1.18       joe 		BPF_STMT(BPF_LD+BPF_H+BPF_ABS, off + sizeof(mac_word)),
    715  1.18       joe 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, mac_hword, 0, JUMP_MAGIC),
    716  1.18       joe 	};
    717  1.18       joe 	add_insns(ctx, insns_ether_h, __arraycount(insns_ether_h));
    718  1.18       joe 
    719  1.18       joe 	uint32_t mwords[] = {
    720  1.18       joe 		(opts & MATCH_SRC) ? BM_SRC_ETHER: BM_DST_ETHER, 2,
    721  1.18       joe 		htonl(mac_word), htons(mac_hword)
    722  1.18       joe 	};
    723  1.18       joe 
    724  1.18       joe 	bm_invert_checkpoint(ctx, opts, NPF_RULE_LAYER_2);
    725   1.1     rmind 	done_block(ctx, mwords, sizeof(mwords));
    726   1.1     rmind }
    727   1.1     rmind 
    728   1.1     rmind /*
    729   1.1     rmind  * npfctl_bpf_ports: code block to match TCP/UDP port range.
    730   1.1     rmind  *
    731   1.1     rmind  * => Port numbers shall be in the network byte order.
    732   1.1     rmind  */
    733   1.1     rmind void
    734  1.16     rmind npfctl_bpf_ports(npf_bpf_t *ctx, unsigned opts, in_port_t from, in_port_t to)
    735   1.1     rmind {
    736  1.16     rmind 	const unsigned sport_off = offsetof(struct udphdr, uh_sport);
    737  1.16     rmind 	const unsigned dport_off = offsetof(struct udphdr, uh_dport);
    738  1.16     rmind 	unsigned off;
    739   1.1     rmind 
    740   1.1     rmind 	/* TCP and UDP port offsets are the same. */
    741   1.1     rmind 	assert(sport_off == offsetof(struct tcphdr, th_sport));
    742   1.1     rmind 	assert(dport_off == offsetof(struct tcphdr, th_dport));
    743  1.16     rmind 	assert(ctx->flags & CHECKED_L4_PROTO);
    744   1.1     rmind 
    745   1.1     rmind 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
    746   1.1     rmind 	off = (opts & MATCH_SRC) ? sport_off : dport_off;
    747   1.1     rmind 
    748   1.1     rmind 	/* X <- IP header length */
    749   1.2     rmind 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    750   1.1     rmind 
    751   1.1     rmind 	struct bpf_insn insns_fetch[] = {
    752   1.1     rmind 		/* A <- port */
    753   1.1     rmind 		BPF_STMT(BPF_LD+BPF_H+BPF_IND, off),
    754   1.1     rmind 	};
    755   1.1     rmind 	add_insns(ctx, insns_fetch, __arraycount(insns_fetch));
    756   1.1     rmind 
    757   1.1     rmind 	/* CAUTION: BPF operates in host byte-order. */
    758   1.1     rmind 	from = ntohs(from);
    759   1.1     rmind 	to = ntohs(to);
    760   1.1     rmind 
    761   1.1     rmind 	if (from == to) {
    762   1.1     rmind 		/* Single port case. */
    763   1.1     rmind 		struct bpf_insn insns_port[] = {
    764   1.1     rmind 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC),
    765   1.1     rmind 		};
    766   1.1     rmind 		add_insns(ctx, insns_port, __arraycount(insns_port));
    767   1.1     rmind 	} else {
    768   1.1     rmind 		/* Port range case. */
    769   1.1     rmind 		struct bpf_insn insns_range[] = {
    770  1.14     rmind 			BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, 1),
    771  1.14     rmind 			BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, 0, 1),
    772  1.14     rmind 			BPF_STMT(BPF_JMP+BPF_JA, JUMP_MAGIC),
    773   1.1     rmind 		};
    774   1.1     rmind 		add_insns(ctx, insns_range, __arraycount(insns_range));
    775   1.1     rmind 	}
    776   1.1     rmind 
    777   1.1     rmind 	uint32_t mwords[] = {
    778  1.16     rmind 		(opts & MATCH_SRC) ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to
    779   1.1     rmind 	};
    780   1.1     rmind 	done_block(ctx, mwords, sizeof(mwords));
    781   1.1     rmind }
    782   1.1     rmind 
    783   1.1     rmind /*
    784   1.1     rmind  * npfctl_bpf_tcpfl: code block to match TCP flags.
    785   1.1     rmind  */
    786   1.1     rmind void
    787  1.16     rmind npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask)
    788   1.1     rmind {
    789  1.16     rmind 	const unsigned tcpfl_off = offsetof(struct tcphdr, th_flags);
    790   1.6     rmind 	const bool usingmask = tf_mask != tf;
    791   1.1     rmind 
    792   1.1     rmind 	/* X <- IP header length */
    793   1.2     rmind 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    794   1.5     rmind 
    795  1.16     rmind 	if ((ctx->flags & CHECKED_L4_PROTO) == 0) {
    796  1.16     rmind 		const unsigned jf = usingmask ? 3 : 2;
    797  1.16     rmind 		assert(ctx->ingroup == 0);
    798  1.16     rmind 
    799  1.16     rmind 		/*
    800  1.16     rmind 		 * A <- L4 protocol; A == TCP?  If not, jump out.
    801  1.16     rmind 		 *
    802  1.16     rmind 		 * Note: the TCP flag matching might be without 'proto tcp'
    803  1.16     rmind 		 * when using a plain 'stateful' rule.  In such case it also
    804  1.16     rmind 		 * handles other protocols, thus no strict TCP check.
    805  1.16     rmind 		 */
    806   1.5     rmind 		struct bpf_insn insns_tcp[] = {
    807   1.5     rmind 			BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
    808   1.5     rmind 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, jf),
    809   1.5     rmind 		};
    810   1.5     rmind 		add_insns(ctx, insns_tcp, __arraycount(insns_tcp));
    811   1.5     rmind 	}
    812   1.1     rmind 
    813   1.1     rmind 	struct bpf_insn insns_tf[] = {
    814   1.1     rmind 		/* A <- TCP flags */
    815   1.1     rmind 		BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off),
    816   1.1     rmind 	};
    817   1.1     rmind 	add_insns(ctx, insns_tf, __arraycount(insns_tf));
    818   1.1     rmind 
    819   1.6     rmind 	if (usingmask) {
    820   1.1     rmind 		/* A <- (A & mask) */
    821   1.1     rmind 		struct bpf_insn insns_mask[] = {
    822   1.1     rmind 			BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask),
    823   1.1     rmind 		};
    824   1.1     rmind 		add_insns(ctx, insns_mask, __arraycount(insns_mask));
    825   1.1     rmind 	}
    826   1.1     rmind 
    827   1.1     rmind 	struct bpf_insn insns_cmp[] = {
    828   1.1     rmind 		/* A == expected-TCP-flags? */
    829   1.1     rmind 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC),
    830   1.1     rmind 	};
    831   1.1     rmind 	add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
    832   1.1     rmind 
    833  1.16     rmind 	uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask };
    834  1.12       tih 	done_block(ctx, mwords, sizeof(mwords));
    835   1.1     rmind }
    836   1.1     rmind 
    837   1.1     rmind /*
    838   1.1     rmind  * npfctl_bpf_icmp: code block to match ICMP type and/or code.
    839  1.16     rmind  * Note: suitable for both the ICMPv4 and ICMPv6.
    840   1.1     rmind  */
    841   1.1     rmind void
    842   1.1     rmind npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code)
    843   1.1     rmind {
    844   1.1     rmind 	const u_int type_off = offsetof(struct icmp, icmp_type);
    845   1.1     rmind 	const u_int code_off = offsetof(struct icmp, icmp_code);
    846   1.1     rmind 
    847  1.16     rmind 	assert(ctx->flags & CHECKED_L4_PROTO);
    848   1.1     rmind 	assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off);
    849   1.1     rmind 	assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off);
    850   1.1     rmind 	assert(type != -1 || code != -1);
    851   1.1     rmind 
    852   1.1     rmind 	/* X <- IP header length */
    853   1.2     rmind 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    854   1.1     rmind 
    855   1.1     rmind 	if (type != -1) {
    856   1.1     rmind 		struct bpf_insn insns_type[] = {
    857   1.1     rmind 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off),
    858   1.1     rmind 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC),
    859   1.1     rmind 		};
    860   1.1     rmind 		add_insns(ctx, insns_type, __arraycount(insns_type));
    861   1.1     rmind 
    862   1.1     rmind 		uint32_t mwords[] = { BM_ICMP_TYPE, 1, type };
    863   1.1     rmind 		done_block(ctx, mwords, sizeof(mwords));
    864   1.1     rmind 	}
    865   1.1     rmind 
    866   1.1     rmind 	if (code != -1) {
    867   1.1     rmind 		struct bpf_insn insns_code[] = {
    868   1.1     rmind 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off),
    869   1.1     rmind 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC),
    870   1.1     rmind 		};
    871   1.1     rmind 		add_insns(ctx, insns_code, __arraycount(insns_code));
    872   1.1     rmind 
    873   1.1     rmind 		uint32_t mwords[] = { BM_ICMP_CODE, 1, code };
    874   1.1     rmind 		done_block(ctx, mwords, sizeof(mwords));
    875   1.1     rmind 	}
    876   1.1     rmind }
    877   1.1     rmind 
    878   1.1     rmind #define	SRC_FLAG_BIT	(1U << 31)
    879   1.1     rmind 
    880   1.1     rmind /*
    881   1.1     rmind  * npfctl_bpf_table: code block to match source/destination IP address
    882   1.1     rmind  * against NPF table specified by ID.
    883   1.1     rmind  */
    884   1.1     rmind void
    885  1.16     rmind npfctl_bpf_table(npf_bpf_t *ctx, unsigned opts, unsigned tid)
    886   1.1     rmind {
    887   1.1     rmind 	const bool src = (opts & MATCH_SRC) != 0;
    888   1.1     rmind 
    889   1.1     rmind 	struct bpf_insn insns_table[] = {
    890   1.1     rmind 		BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid),
    891   1.1     rmind 		BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE),
    892   1.1     rmind 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0),
    893   1.1     rmind 	};
    894   1.1     rmind 	add_insns(ctx, insns_table, __arraycount(insns_table));
    895   1.1     rmind 
    896   1.1     rmind 	uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid };
    897  1.18       joe 	bm_invert_checkpoint(ctx, opts, NPF_RULE_LAYER_3);
    898   1.1     rmind 	done_block(ctx, mwords, sizeof(mwords));
    899   1.1     rmind }
    900