npf_bpf_comp.c revision 1.2       1  1.2  rmind /*	$NetBSD: npf_bpf_comp.c,v 1.2 2013/11/05 01:50:30 rmind Exp $	*/
      2  1.1  rmind 
      3  1.1  rmind /*-
      4  1.1  rmind  * Copyright (c) 2010-2013 The NetBSD Foundation, Inc.
      5  1.1  rmind  * All rights reserved.
      6  1.1  rmind  *
      7  1.1  rmind  * This material is based upon work partially supported by The
      8  1.1  rmind  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      9  1.1  rmind  *
     10  1.1  rmind  * Redistribution and use in source and binary forms, with or without
     11  1.1  rmind  * modification, are permitted provided that the following conditions
     12  1.1  rmind  * are met:
     13  1.1  rmind  * 1. Redistributions of source code must retain the above copyright
     14  1.1  rmind  *    notice, this list of conditions and the following disclaimer.
     15  1.1  rmind  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.1  rmind  *    notice, this list of conditions and the following disclaimer in the
     17  1.1  rmind  *    documentation and/or other materials provided with the distribution.
     18  1.1  rmind  *
     19  1.1  rmind  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  1.1  rmind  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  1.1  rmind  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  1.1  rmind  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  1.1  rmind  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  1.1  rmind  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  1.1  rmind  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  1.1  rmind  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  1.1  rmind  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  1.1  rmind  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  1.1  rmind  * POSSIBILITY OF SUCH DAMAGE.
     30  1.1  rmind  */
     31  1.1  rmind 
     32  1.1  rmind /*
     33  1.1  rmind  * BPF byte-code generation for NPF rules.
     34  1.1  rmind  */
     35  1.1  rmind 
     36  1.1  rmind #include <sys/cdefs.h>
     37  1.2  rmind __RCSID("$NetBSD: npf_bpf_comp.c,v 1.2 2013/11/05 01:50:30 rmind Exp $");
     38  1.1  rmind 
     39  1.1  rmind #include <stdlib.h>
     40  1.1  rmind #include <stdbool.h>
     41  1.1  rmind #include <stddef.h>
     42  1.1  rmind #include <string.h>
     43  1.1  rmind #include <inttypes.h>
     44  1.1  rmind #include <err.h>
     45  1.1  rmind #include <assert.h>
     46  1.1  rmind 
     47  1.1  rmind #include <netinet/in.h>
     48  1.1  rmind #include <netinet/in_systm.h>
     49  1.1  rmind #include <netinet/ip.h>
     50  1.1  rmind #include <netinet/ip6.h>
     51  1.1  rmind #include <netinet/udp.h>
     52  1.1  rmind #include <netinet/tcp.h>
     53  1.1  rmind #include <netinet/ip_icmp.h>
     54  1.1  rmind #include <netinet/icmp6.h>
     55  1.1  rmind 
     56  1.1  rmind #include <net/bpf.h>
     57  1.1  rmind 
     58  1.1  rmind #include "npfctl.h"
     59  1.1  rmind 
     60  1.1  rmind /*
     61  1.1  rmind  * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores
     62  1.1  rmind  * something other than L4 header offset.  Generally, when BPF_LDX is used.
     63  1.1  rmind  */
     64  1.1  rmind #define	FETCHED_L3		0x01
     65  1.1  rmind #define	X_EQ_L4OFF		0x02
     66  1.1  rmind 
     67  1.1  rmind struct npf_bpf {
     68  1.1  rmind 	/*
     69  1.1  rmind 	 * BPF program code, the allocated length (in bytes), the number
     70  1.1  rmind 	 * of logical blocks and the flags.
     71  1.1  rmind 	 */
     72  1.1  rmind 	struct bpf_program	prog;
     73  1.1  rmind 	size_t			alen;
     74  1.1  rmind 	u_int			nblocks;
     75  1.1  rmind 	sa_family_t		af;
     76  1.1  rmind 	uint32_t		flags;
     77  1.1  rmind 
     78  1.1  rmind 	/* The current group offset and block number. */
     79  1.1  rmind 	bool			ingroup;
     80  1.1  rmind 	u_int			goff;
     81  1.1  rmind 	u_int			gblock;
     82  1.1  rmind 
     83  1.1  rmind 	/* BPF marks, allocated length and the real length. */
     84  1.1  rmind 	uint32_t *		marks;
     85  1.1  rmind 	size_t			malen;
     86  1.1  rmind 	size_t			mlen;
     87  1.1  rmind };
     88  1.1  rmind 
     89  1.1  rmind /*
     90  1.1  rmind  * NPF success and failure values to be returned from BPF.
     91  1.1  rmind  */
     92  1.1  rmind #define	NPF_BPF_SUCCESS		((u_int)-1)
     93  1.1  rmind #define	NPF_BPF_FAILURE		0
     94  1.1  rmind 
     95  1.1  rmind /*
     96  1.1  rmind  * Magic value to indicate the failure path, which is fixed up on completion.
     97  1.1  rmind  * Note: this is the longest jump offset in BPF, since the offset is one byte.
     98  1.1  rmind  */
     99  1.1  rmind #define	JUMP_MAGIC		0xff
    100  1.1  rmind 
    101  1.1  rmind /* Reduce re-allocations by expanding in 64 byte blocks. */
    102  1.1  rmind #define	ALLOC_MASK		(64 - 1)
    103  1.1  rmind #define	ALLOC_ROUND(x)		(((x) + ALLOC_MASK) & ~ALLOC_MASK)
    104  1.1  rmind 
    105  1.1  rmind npf_bpf_t *
    106  1.1  rmind npfctl_bpf_create(void)
    107  1.1  rmind {
    108  1.1  rmind 	return ecalloc(1, sizeof(npf_bpf_t));
    109  1.1  rmind }
    110  1.1  rmind 
    111  1.1  rmind static void
    112  1.1  rmind fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap)
    113  1.1  rmind {
    114  1.1  rmind 	struct bpf_program *bp = &ctx->prog;
    115  1.1  rmind 
    116  1.1  rmind 	for (u_int i = start; i < end; i++) {
    117  1.1  rmind 		struct bpf_insn *insn = &bp->bf_insns[i];
    118  1.1  rmind 		const u_int fail_off = end - i;
    119  1.1  rmind 
    120  1.1  rmind 		if (fail_off >= JUMP_MAGIC) {
    121  1.1  rmind 			errx(EXIT_FAILURE, "BPF generation error: "
    122  1.1  rmind 			    "the number of instructions is over the limit");
    123  1.1  rmind 		}
    124  1.1  rmind 		if (BPF_CLASS(insn->code) != BPF_JMP) {
    125  1.1  rmind 			continue;
    126  1.1  rmind 		}
    127  1.1  rmind 		if (swap) {
    128  1.1  rmind 			uint8_t jt = insn->jt;
    129  1.1  rmind 			insn->jt = insn->jf;
    130  1.1  rmind 			insn->jf = jt;
    131  1.1  rmind 		}
    132  1.1  rmind 		if (insn->jt == JUMP_MAGIC)
    133  1.1  rmind 			insn->jt = fail_off;
    134  1.1  rmind 		if (insn->jf == JUMP_MAGIC)
    135  1.1  rmind 			insn->jf = fail_off;
    136  1.1  rmind 	}
    137  1.1  rmind }
    138  1.1  rmind 
    139  1.1  rmind static void
    140  1.1  rmind add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count)
    141  1.1  rmind {
    142  1.1  rmind 	struct bpf_program *bp = &ctx->prog;
    143  1.1  rmind 	size_t offset, len, reqlen;
    144  1.1  rmind 
    145  1.1  rmind 	/* Note: bf_len is the count of instructions. */
    146  1.1  rmind 	offset = bp->bf_len * sizeof(struct bpf_insn);
    147  1.1  rmind 	len = count * sizeof(struct bpf_insn);
    148  1.1  rmind 
    149  1.1  rmind 	/* Ensure the memory buffer for the program. */
    150  1.1  rmind 	reqlen = ALLOC_ROUND(offset + len);
    151  1.1  rmind 	if (reqlen > ctx->alen) {
    152  1.1  rmind 		bp->bf_insns = erealloc(bp->bf_insns, reqlen);
    153  1.1  rmind 		ctx->alen = reqlen;
    154  1.1  rmind 	}
    155  1.1  rmind 
    156  1.1  rmind 	/* Add the code block. */
    157  1.1  rmind 	memcpy((uint8_t *)bp->bf_insns + offset, insns, len);
    158  1.1  rmind 	bp->bf_len += count;
    159  1.1  rmind }
    160  1.1  rmind 
    161  1.1  rmind static void
    162  1.1  rmind done_raw_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
    163  1.1  rmind {
    164  1.1  rmind 	size_t reqlen, nargs = m[1];
    165  1.1  rmind 
    166  1.1  rmind 	if ((len / sizeof(uint32_t) - 2) != nargs) {
    167  1.1  rmind 		errx(EXIT_FAILURE, "invalid BPF block description");
    168  1.1  rmind 	}
    169  1.1  rmind 	reqlen = ALLOC_ROUND(ctx->mlen + len);
    170  1.1  rmind 	if (reqlen > ctx->malen) {
    171  1.1  rmind 		ctx->marks = erealloc(ctx->marks, reqlen);
    172  1.1  rmind 		ctx->malen = reqlen;
    173  1.1  rmind 	}
    174  1.1  rmind 	memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len);
    175  1.1  rmind 	ctx->mlen += len;
    176  1.1  rmind }
    177  1.1  rmind 
    178  1.1  rmind static void
    179  1.1  rmind done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
    180  1.1  rmind {
    181  1.1  rmind 	done_raw_block(ctx, m, len);
    182  1.1  rmind 	ctx->nblocks++;
    183  1.1  rmind }
    184  1.1  rmind 
    185  1.1  rmind struct bpf_program *
    186  1.1  rmind npfctl_bpf_complete(npf_bpf_t *ctx)
    187  1.1  rmind {
    188  1.1  rmind 	struct bpf_program *bp = &ctx->prog;
    189  1.1  rmind 	const u_int retoff = bp->bf_len;
    190  1.1  rmind 
    191  1.1  rmind 	/* Add the return fragment (success and failure paths). */
    192  1.1  rmind 	struct bpf_insn insns_ret[] = {
    193  1.1  rmind 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS),
    194  1.1  rmind 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
    195  1.1  rmind 	};
    196  1.1  rmind 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
    197  1.1  rmind 
    198  1.1  rmind 	/* Fixup all jumps to the main failure path. */
    199  1.1  rmind 	fixup_jumps(ctx, 0, retoff, false);
    200  1.1  rmind 
    201  1.1  rmind 	return &ctx->prog;
    202  1.1  rmind }
    203  1.1  rmind 
    204  1.1  rmind const void *
    205  1.1  rmind npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len)
    206  1.1  rmind {
    207  1.1  rmind 	*len = ctx->mlen;
    208  1.1  rmind 	return ctx->marks;
    209  1.1  rmind }
    210  1.1  rmind 
    211  1.1  rmind void
    212  1.1  rmind npfctl_bpf_destroy(npf_bpf_t *ctx)
    213  1.1  rmind {
    214  1.1  rmind 	free(ctx->prog.bf_insns);
    215  1.1  rmind 	free(ctx->marks);
    216  1.1  rmind 	free(ctx);
    217  1.1  rmind }
    218  1.1  rmind 
    219  1.1  rmind /*
    220  1.1  rmind  * npfctl_bpf_group: begin a logical group.  It merely uses logical
    221  1.1  rmind  * disjunction (OR) for compares within the group.
    222  1.1  rmind  */
    223  1.1  rmind void
    224  1.1  rmind npfctl_bpf_group(npf_bpf_t *ctx)
    225  1.1  rmind {
    226  1.1  rmind 	struct bpf_program *bp = &ctx->prog;
    227  1.1  rmind 
    228  1.1  rmind 	assert(ctx->goff == 0);
    229  1.1  rmind 	assert(ctx->gblock == 0);
    230  1.1  rmind 
    231  1.1  rmind 	ctx->goff = bp->bf_len;
    232  1.1  rmind 	ctx->gblock = ctx->nblocks;
    233  1.1  rmind 	ctx->ingroup = true;
    234  1.1  rmind }
    235  1.1  rmind 
    236  1.1  rmind void
    237  1.1  rmind npfctl_bpf_endgroup(npf_bpf_t *ctx)
    238  1.1  rmind {
    239  1.1  rmind 	struct bpf_program *bp = &ctx->prog;
    240  1.1  rmind 	const size_t curoff = bp->bf_len;
    241  1.1  rmind 
    242  1.1  rmind 	/* If there are no blocks or only one - nothing to do. */
    243  1.1  rmind 	if ((ctx->nblocks - ctx->gblock) <= 1) {
    244  1.1  rmind 		ctx->goff = ctx->gblock = 0;
    245  1.1  rmind 		return;
    246  1.1  rmind 	}
    247  1.1  rmind 
    248  1.1  rmind 	/*
    249  1.1  rmind 	 * Append a failure return as a fall-through i.e. if there is
    250  1.1  rmind 	 * no match within the group.
    251  1.1  rmind 	 */
    252  1.1  rmind 	struct bpf_insn insns_ret[] = {
    253  1.1  rmind 		BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
    254  1.1  rmind 	};
    255  1.1  rmind 	add_insns(ctx, insns_ret, __arraycount(insns_ret));
    256  1.1  rmind 
    257  1.1  rmind 	/*
    258  1.1  rmind 	 * Adjust jump offsets: on match - jump outside the group i.e.
    259  1.1  rmind 	 * to the current offset.  Otherwise, jump to the next instruction
    260  1.1  rmind 	 * which would lead to the fall-through code above if none matches.
    261  1.1  rmind 	 */
    262  1.1  rmind 	fixup_jumps(ctx, ctx->goff, curoff, true);
    263  1.1  rmind 	ctx->goff = ctx->gblock = 0;
    264  1.1  rmind }
    265  1.1  rmind 
    266  1.1  rmind static void
    267  1.1  rmind fetch_l3(npf_bpf_t *ctx, sa_family_t af, u_int flags)
    268  1.1  rmind {
    269  1.1  rmind 	u_int ver;
    270  1.1  rmind 
    271  1.1  rmind 	switch (af) {
    272  1.1  rmind 	case AF_INET:
    273  1.1  rmind 		ver = IPVERSION;
    274  1.1  rmind 		break;
    275  1.1  rmind 	case AF_INET6:
    276  1.1  rmind 		ver = IPV6_VERSION >> 4;
    277  1.1  rmind 		break;
    278  1.1  rmind 	case AF_UNSPEC:
    279  1.1  rmind 		ver = 0;
    280  1.1  rmind 		break;
    281  1.1  rmind 	default:
    282  1.1  rmind 		abort();
    283  1.1  rmind 	}
    284  1.1  rmind 
    285  1.1  rmind 	/*
    286  1.1  rmind 	 * Fetch L3 information.  The coprocessor populates the following
    287  1.1  rmind 	 * words in the scratch memory store:
    288  1.1  rmind 	 * - BPF_MW_IPVER: IP version (4 or 6).
    289  1.1  rmind 	 * - BPF_MW_L4OFF: L4 header offset.
    290  1.1  rmind 	 * - BPF_MW_L4PROTO: L4 protocol.
    291  1.1  rmind 	 */
    292  1.1  rmind 	if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) {
    293  1.1  rmind 		const uint8_t jt = ver ? 0 : JUMP_MAGIC;
    294  1.1  rmind 		const uint8_t jf = ver ? JUMP_MAGIC : 0;
    295  1.1  rmind 		bool ingroup = ctx->ingroup;
    296  1.1  rmind 
    297  1.1  rmind 		/*
    298  1.1  rmind 		 * L3 block cannot be inserted in the middle of a group.
    299  1.1  rmind 		 * In fact, it never is.  Check and start the group after.
    300  1.1  rmind 		 */
    301  1.1  rmind 		if (ingroup) {
    302  1.1  rmind 			assert(ctx->nblocks == ctx->gblock);
    303  1.1  rmind 			npfctl_bpf_endgroup(ctx);
    304  1.1  rmind 		}
    305  1.1  rmind 
    306  1.1  rmind 		/*
    307  1.1  rmind 		 * A <- IP version; A == expected-version?
    308  1.1  rmind 		 * If no particular version specified, check for non-zero.
    309  1.1  rmind 		 */
    310  1.2  rmind 		if ((ctx->flags & FETCHED_L3) == 0) {
    311  1.2  rmind 			struct bpf_insn insns_l3[] = {
    312  1.2  rmind 				BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_L3),
    313  1.2  rmind 				BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf),
    314  1.2  rmind 			};
    315  1.2  rmind 			add_insns(ctx, insns_l3, __arraycount(insns_l3));
    316  1.2  rmind 			ctx->flags |= FETCHED_L3;
    317  1.2  rmind 		} else {
    318  1.2  rmind 			/* IP version is already fetched in BPF_MW_IPVER. */
    319  1.2  rmind 			struct bpf_insn insns_af[] = {
    320  1.2  rmind 				BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER),
    321  1.2  rmind 				BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf),
    322  1.2  rmind 			};
    323  1.2  rmind 			add_insns(ctx, insns_af, __arraycount(insns_af));
    324  1.2  rmind 		}
    325  1.1  rmind 		ctx->af = af;
    326  1.1  rmind 
    327  1.1  rmind 		if (af) {
    328  1.1  rmind 			uint32_t mwords[] = { BM_IPVER, 1, af };
    329  1.1  rmind 			done_raw_block(ctx, mwords, sizeof(mwords));
    330  1.1  rmind 		}
    331  1.1  rmind 		if (ingroup) {
    332  1.1  rmind 			npfctl_bpf_group(ctx);
    333  1.1  rmind 		}
    334  1.1  rmind 
    335  1.1  rmind 	} else if (af && af != ctx->af) {
    336  1.1  rmind 		errx(EXIT_FAILURE, "address family mismatch");
    337  1.1  rmind 	}
    338  1.1  rmind 
    339  1.1  rmind 	if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) {
    340  1.1  rmind 		/* X <- IP header length */
    341  1.1  rmind 		struct bpf_insn insns_hlen[] = {
    342  1.1  rmind 			BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF),
    343  1.1  rmind 		};
    344  1.1  rmind 		add_insns(ctx, insns_hlen, __arraycount(insns_hlen));
    345  1.1  rmind 		ctx->flags |= X_EQ_L4OFF;
    346  1.1  rmind 	}
    347  1.1  rmind }
    348  1.1  rmind 
    349  1.1  rmind /*
    350  1.1  rmind  * npfctl_bpf_proto: code block to match IP version and L4 protocol.
    351  1.1  rmind  */
    352  1.1  rmind void
    353  1.1  rmind npfctl_bpf_proto(npf_bpf_t *ctx, sa_family_t af, int proto)
    354  1.1  rmind {
    355  1.1  rmind 	assert(af != AF_UNSPEC || proto != -1);
    356  1.1  rmind 
    357  1.1  rmind 	/* Note: fails if IP version does not match. */
    358  1.1  rmind 	fetch_l3(ctx, af, 0);
    359  1.1  rmind 	if (proto == -1) {
    360  1.1  rmind 		return;
    361  1.1  rmind 	}
    362  1.1  rmind 
    363  1.1  rmind 	struct bpf_insn insns_proto[] = {
    364  1.1  rmind 		/* A <- L4 protocol; A == expected-protocol? */
    365  1.1  rmind 		BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
    366  1.1  rmind 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC),
    367  1.1  rmind 	};
    368  1.1  rmind 	add_insns(ctx, insns_proto, __arraycount(insns_proto));
    369  1.1  rmind 
    370  1.1  rmind 	uint32_t mwords[] = { BM_PROTO, 1, proto };
    371  1.1  rmind 	done_block(ctx, mwords, sizeof(mwords));
    372  1.1  rmind }
    373  1.1  rmind 
    374  1.1  rmind /*
    375  1.1  rmind  * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR.
    376  1.1  rmind  *
    377  1.1  rmind  * => IP address shall be in the network byte order.
    378  1.1  rmind  */
    379  1.1  rmind void
    380  1.1  rmind npfctl_bpf_cidr(npf_bpf_t *ctx, u_int opts, sa_family_t af,
    381  1.1  rmind     const npf_addr_t *addr, const npf_netmask_t mask)
    382  1.1  rmind {
    383  1.1  rmind 	const uint32_t *awords = (const uint32_t *)addr;
    384  1.1  rmind 	u_int nwords, length, maxmask, off;
    385  1.1  rmind 
    386  1.1  rmind 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
    387  1.1  rmind 	assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK);
    388  1.1  rmind 
    389  1.1  rmind 	switch (af) {
    390  1.1  rmind 	case AF_INET:
    391  1.1  rmind 		maxmask = 32;
    392  1.1  rmind 		off = (opts & MATCH_SRC) ?
    393  1.1  rmind 		    offsetof(struct ip, ip_src) :
    394  1.1  rmind 		    offsetof(struct ip, ip_dst);
    395  1.1  rmind 		nwords = sizeof(struct in_addr) / sizeof(uint32_t);
    396  1.1  rmind 		break;
    397  1.1  rmind 	case AF_INET6:
    398  1.1  rmind 		maxmask = 128;
    399  1.1  rmind 		off = (opts & MATCH_SRC) ?
    400  1.1  rmind 		    offsetof(struct ip6_hdr, ip6_src) :
    401  1.1  rmind 		    offsetof(struct ip6_hdr, ip6_dst);
    402  1.1  rmind 		nwords = sizeof(struct in6_addr) / sizeof(uint32_t);
    403  1.1  rmind 		break;
    404  1.1  rmind 	default:
    405  1.1  rmind 		abort();
    406  1.1  rmind 	}
    407  1.1  rmind 
    408  1.1  rmind 	/* Ensure address family. */
    409  1.1  rmind 	fetch_l3(ctx, af, 0);
    410  1.1  rmind 
    411  1.1  rmind 	length = (mask == NPF_NO_NETMASK) ? maxmask : mask;
    412  1.1  rmind 
    413  1.1  rmind 	/* CAUTION: BPF operates in host byte-order. */
    414  1.1  rmind 	for (u_int i = 0; i < nwords; i++) {
    415  1.1  rmind 		const u_int woff = i * sizeof(uint32_t);
    416  1.1  rmind 		uint32_t word = ntohl(awords[i]);
    417  1.1  rmind 		uint32_t wordmask;
    418  1.1  rmind 
    419  1.1  rmind 		if (length >= 32) {
    420  1.1  rmind 			/* The mask is a full word - do not apply it. */
    421  1.1  rmind 			wordmask = 0;
    422  1.1  rmind 			length -= 32;
    423  1.1  rmind 		} else if (length) {
    424  1.1  rmind 			wordmask = 0xffffffff << (maxmask - length);
    425  1.1  rmind 			length = 0;
    426  1.1  rmind 		} else {
    427  1.1  rmind 			/*
    428  1.1  rmind 			 * The mask is zero - just compare the word
    429  1.1  rmind 			 * against zero.
    430  1.1  rmind 			 */
    431  1.1  rmind 			wordmask = 0;
    432  1.1  rmind 			word = 0;
    433  1.1  rmind 		}
    434  1.1  rmind 
    435  1.1  rmind 		/* A <- IP address (or one word of it) */
    436  1.1  rmind 		struct bpf_insn insns_ip[] = {
    437  1.1  rmind 			BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff),
    438  1.1  rmind 		};
    439  1.1  rmind 		add_insns(ctx, insns_ip, __arraycount(insns_ip));
    440  1.1  rmind 
    441  1.1  rmind 		/* A <- (A & MASK) */
    442  1.1  rmind 		if (wordmask) {
    443  1.1  rmind 			struct bpf_insn insns_mask[] = {
    444  1.1  rmind 				BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask),
    445  1.1  rmind 			};
    446  1.1  rmind 			add_insns(ctx, insns_mask, __arraycount(insns_mask));
    447  1.1  rmind 		}
    448  1.1  rmind 
    449  1.1  rmind 		/* A == expected-IP-word ? */
    450  1.1  rmind 		struct bpf_insn insns_cmp[] = {
    451  1.1  rmind 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC),
    452  1.1  rmind 		};
    453  1.1  rmind 		add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
    454  1.1  rmind 	}
    455  1.1  rmind 
    456  1.1  rmind 	uint32_t mwords[] = {
    457  1.1  rmind 		(opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6,
    458  1.1  rmind 		af, mask, awords[0], awords[1], awords[2], awords[3],
    459  1.1  rmind 	};
    460  1.1  rmind 	done_block(ctx, mwords, sizeof(mwords));
    461  1.1  rmind }
    462  1.1  rmind 
    463  1.1  rmind /*
    464  1.1  rmind  * npfctl_bpf_ports: code block to match TCP/UDP port range.
    465  1.1  rmind  *
    466  1.1  rmind  * => Port numbers shall be in the network byte order.
    467  1.1  rmind  */
    468  1.1  rmind void
    469  1.1  rmind npfctl_bpf_ports(npf_bpf_t *ctx, u_int opts, in_port_t from, in_port_t to)
    470  1.1  rmind {
    471  1.1  rmind 	const u_int sport_off = offsetof(struct udphdr, uh_sport);
    472  1.1  rmind 	const u_int dport_off = offsetof(struct udphdr, uh_dport);
    473  1.1  rmind 	u_int off;
    474  1.1  rmind 
    475  1.1  rmind 	/* TCP and UDP port offsets are the same. */
    476  1.1  rmind 	assert(sport_off == offsetof(struct tcphdr, th_sport));
    477  1.1  rmind 	assert(dport_off == offsetof(struct tcphdr, th_dport));
    478  1.1  rmind 
    479  1.1  rmind 	assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
    480  1.1  rmind 	off = (opts & MATCH_SRC) ? sport_off : dport_off;
    481  1.1  rmind 
    482  1.1  rmind 	/* X <- IP header length */
    483  1.2  rmind 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    484  1.1  rmind 
    485  1.1  rmind 	struct bpf_insn insns_fetch[] = {
    486  1.1  rmind 		/* A <- port */
    487  1.1  rmind 		BPF_STMT(BPF_LD+BPF_H+BPF_IND, off),
    488  1.1  rmind 	};
    489  1.1  rmind 	add_insns(ctx, insns_fetch, __arraycount(insns_fetch));
    490  1.1  rmind 
    491  1.1  rmind 	/* CAUTION: BPF operates in host byte-order. */
    492  1.1  rmind 	from = ntohs(from);
    493  1.1  rmind 	to = ntohs(to);
    494  1.1  rmind 
    495  1.1  rmind 	if (from == to) {
    496  1.1  rmind 		/* Single port case. */
    497  1.1  rmind 		struct bpf_insn insns_port[] = {
    498  1.1  rmind 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC),
    499  1.1  rmind 		};
    500  1.1  rmind 		add_insns(ctx, insns_port, __arraycount(insns_port));
    501  1.1  rmind 	} else {
    502  1.1  rmind 		/* Port range case. */
    503  1.1  rmind 		struct bpf_insn insns_range[] = {
    504  1.1  rmind 			BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, JUMP_MAGIC),
    505  1.1  rmind 			BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, JUMP_MAGIC, 0),
    506  1.1  rmind 		};
    507  1.1  rmind 		add_insns(ctx, insns_range, __arraycount(insns_range));
    508  1.1  rmind 	}
    509  1.1  rmind 
    510  1.1  rmind 	uint32_t mwords[] = {
    511  1.1  rmind 		opts & MATCH_SRC ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to
    512  1.1  rmind 	};
    513  1.1  rmind 	done_block(ctx, mwords, sizeof(mwords));
    514  1.1  rmind }
    515  1.1  rmind 
    516  1.1  rmind /*
    517  1.1  rmind  * npfctl_bpf_tcpfl: code block to match TCP flags.
    518  1.1  rmind  */
    519  1.1  rmind void
    520  1.1  rmind npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask)
    521  1.1  rmind {
    522  1.1  rmind 	const u_int tcpfl_off = offsetof(struct tcphdr, th_flags);
    523  1.1  rmind 
    524  1.1  rmind 	/* X <- IP header length */
    525  1.2  rmind 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    526  1.1  rmind 
    527  1.1  rmind 	struct bpf_insn insns_tf[] = {
    528  1.1  rmind 		/* A <- TCP flags */
    529  1.1  rmind 		BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off),
    530  1.1  rmind 	};
    531  1.1  rmind 	add_insns(ctx, insns_tf, __arraycount(insns_tf));
    532  1.1  rmind 
    533  1.1  rmind 	if (tf_mask != tf) {
    534  1.1  rmind 		/* A <- (A & mask) */
    535  1.1  rmind 		struct bpf_insn insns_mask[] = {
    536  1.1  rmind 			BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask),
    537  1.1  rmind 		};
    538  1.1  rmind 		add_insns(ctx, insns_mask, __arraycount(insns_mask));
    539  1.1  rmind 	}
    540  1.1  rmind 
    541  1.1  rmind 	struct bpf_insn insns_cmp[] = {
    542  1.1  rmind 		/* A == expected-TCP-flags? */
    543  1.1  rmind 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC),
    544  1.1  rmind 	};
    545  1.1  rmind 	add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
    546  1.1  rmind 
    547  1.1  rmind 	uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask};
    548  1.1  rmind 	done_block(ctx, mwords, sizeof(mwords));
    549  1.1  rmind }
    550  1.1  rmind 
    551  1.1  rmind /*
    552  1.1  rmind  * npfctl_bpf_icmp: code block to match ICMP type and/or code.
    553  1.1  rmind  * Note: suitable both for the ICMPv4 and ICMPv6.
    554  1.1  rmind  */
    555  1.1  rmind void
    556  1.1  rmind npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code)
    557  1.1  rmind {
    558  1.1  rmind 	const u_int type_off = offsetof(struct icmp, icmp_type);
    559  1.1  rmind 	const u_int code_off = offsetof(struct icmp, icmp_code);
    560  1.1  rmind 
    561  1.1  rmind 	assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off);
    562  1.1  rmind 	assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off);
    563  1.1  rmind 	assert(type != -1 || code != -1);
    564  1.1  rmind 
    565  1.1  rmind 	/* X <- IP header length */
    566  1.2  rmind 	fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
    567  1.1  rmind 
    568  1.1  rmind 	if (type != -1) {
    569  1.1  rmind 		struct bpf_insn insns_type[] = {
    570  1.1  rmind 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off),
    571  1.1  rmind 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC),
    572  1.1  rmind 		};
    573  1.1  rmind 		add_insns(ctx, insns_type, __arraycount(insns_type));
    574  1.1  rmind 
    575  1.1  rmind 		uint32_t mwords[] = { BM_ICMP_TYPE, 1, type };
    576  1.1  rmind 		done_block(ctx, mwords, sizeof(mwords));
    577  1.1  rmind 	}
    578  1.1  rmind 
    579  1.1  rmind 	if (code != -1) {
    580  1.1  rmind 		struct bpf_insn insns_code[] = {
    581  1.1  rmind 			BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off),
    582  1.1  rmind 			BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC),
    583  1.1  rmind 		};
    584  1.1  rmind 		add_insns(ctx, insns_code, __arraycount(insns_code));
    585  1.1  rmind 
    586  1.1  rmind 		uint32_t mwords[] = { BM_ICMP_CODE, 1, code };
    587  1.1  rmind 		done_block(ctx, mwords, sizeof(mwords));
    588  1.1  rmind 	}
    589  1.1  rmind }
    590  1.1  rmind 
    591  1.1  rmind #define	SRC_FLAG_BIT	(1U << 31)
    592  1.1  rmind 
    593  1.1  rmind /*
    594  1.1  rmind  * npfctl_bpf_table: code block to match source/destination IP address
    595  1.1  rmind  * against NPF table specified by ID.
    596  1.1  rmind  */
    597  1.1  rmind void
    598  1.1  rmind npfctl_bpf_table(npf_bpf_t *ctx, u_int opts, u_int tid)
    599  1.1  rmind {
    600  1.1  rmind 	const bool src = (opts & MATCH_SRC) != 0;
    601  1.1  rmind 
    602  1.1  rmind 	struct bpf_insn insns_table[] = {
    603  1.1  rmind 		BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid),
    604  1.1  rmind 		BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE),
    605  1.1  rmind 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0),
    606  1.1  rmind 	};
    607  1.1  rmind 	add_insns(ctx, insns_table, __arraycount(insns_table));
    608  1.1  rmind 
    609  1.1  rmind 	uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid };
    610  1.1  rmind 	done_block(ctx, mwords, sizeof(mwords));
    611  1.1  rmind }
    612