Home | History | Annotate | Line # | Download | only in npf
npf_ruleset.c revision 1.51.20.1
      1 /*-
      2  * Copyright (c) 2020 Mindaugas Rasiukevicius <rmind at noxt eu>
      3  * Copyright (c) 2009-2015 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This material is based upon work partially supported by The
      7  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * NPF ruleset module.
     33  */
     34 
     35 #ifdef _KERNEL
     36 #include <sys/cdefs.h>
     37 __KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.51.20.1 2023/08/23 18:19:32 martin Exp $");
     38 
     39 #include <sys/param.h>
     40 #include <sys/types.h>
     41 
     42 #include <sys/atomic.h>
     43 #include <sys/kmem.h>
     44 #include <sys/queue.h>
     45 #include <sys/mbuf.h>
     46 #include <sys/types.h>
     47 
     48 #include <net/bpf.h>
     49 #include <net/bpfjit.h>
     50 #include <net/pfil.h>
     51 #include <net/if.h>
     52 #endif
     53 
     54 #include "npf_impl.h"
     55 
     56 struct npf_ruleset {
     57 	/*
     58 	 * - List of all rules.
     59 	 * - Dynamic (i.e. named) rules.
     60 	 * - G/C list for convenience.
     61 	 */
     62 	LIST_HEAD(, npf_rule)	rs_all;
     63 	LIST_HEAD(, npf_rule)	rs_dynamic;
     64 	LIST_HEAD(, npf_rule)	rs_gc;
     65 
     66 	/* Unique ID counter. */
     67 	uint64_t		rs_idcnt;
     68 
     69 	/* Number of array slots and active rules. */
     70 	unsigned		rs_slots;
     71 	unsigned		rs_nitems;
     72 
     73 	/* Array of ordered rules. */
     74 	npf_rule_t *		rs_rules[];
     75 };
     76 
     77 struct npf_rule {
     78 	/* Attributes, interface and skip slot. */
     79 	uint32_t		r_attr;
     80 	unsigned		r_ifid;
     81 	unsigned		r_skip_to;
     82 
     83 	/* Code to process, if any. */
     84 	int			r_type;
     85 	bpfjit_func_t		r_jcode;
     86 	void *			r_code;
     87 	unsigned		r_clen;
     88 
     89 	/* NAT policy (optional), rule procedure and subset. */
     90 	npf_natpolicy_t *	r_natp;
     91 	npf_rproc_t *		r_rproc;
     92 
     93 	union {
     94 		/*
     95 		 * Dynamic group: rule subset and a group list entry.
     96 		 */
     97 		struct {
     98 			npf_rule_t *		r_subset;
     99 			LIST_ENTRY(npf_rule)	r_dentry;
    100 		};
    101 
    102 		/*
    103 		 * Dynamic rule: priority, parent group and next rule.
    104 		 */
    105 		struct {
    106 			int			r_priority;
    107 			npf_rule_t *		r_parent;
    108 			npf_rule_t *		r_next;
    109 		};
    110 	};
    111 
    112 	/* Rule ID, name and the optional key. */
    113 	uint64_t		r_id;
    114 	char			r_name[NPF_RULE_MAXNAMELEN];
    115 	uint8_t			r_key[NPF_RULE_MAXKEYLEN];
    116 
    117 	/* All-list entry and the auxiliary info. */
    118 	LIST_ENTRY(npf_rule)	r_aentry;
    119 	nvlist_t *		r_info;
    120 	size_t			r_info_len;
    121 };
    122 
    123 #define	SKIPTO_ADJ_FLAG		(1U << 31)
    124 #define	SKIPTO_MASK		(SKIPTO_ADJ_FLAG - 1)
    125 
    126 static nvlist_t *	npf_rule_export(npf_t *, const npf_rule_t *);
    127 
    128 /*
    129  * Private attributes - must be in the NPF_RULE_PRIVMASK range.
    130  */
    131 #define	NPF_RULE_KEEPNAT	(0x01000000 & NPF_RULE_PRIVMASK)
    132 
    133 #define	NPF_DYNAMIC_GROUP_P(attr) \
    134     (((attr) & NPF_DYNAMIC_GROUP) == NPF_DYNAMIC_GROUP)
    135 
    136 #define	NPF_DYNAMIC_RULE_P(attr) \
    137     (((attr) & NPF_DYNAMIC_GROUP) == NPF_RULE_DYNAMIC)
    138 
    139 npf_ruleset_t *
    140 npf_ruleset_create(size_t slots)
    141 {
    142 	size_t len = offsetof(npf_ruleset_t, rs_rules[slots]);
    143 	npf_ruleset_t *rlset;
    144 
    145 	rlset = kmem_zalloc(len, KM_SLEEP);
    146 	LIST_INIT(&rlset->rs_dynamic);
    147 	LIST_INIT(&rlset->rs_all);
    148 	LIST_INIT(&rlset->rs_gc);
    149 	rlset->rs_slots = slots;
    150 
    151 	return rlset;
    152 }
    153 
    154 void
    155 npf_ruleset_destroy(npf_ruleset_t *rlset)
    156 {
    157 	size_t len = offsetof(npf_ruleset_t, rs_rules[rlset->rs_slots]);
    158 	npf_rule_t *rl;
    159 
    160 	while ((rl = LIST_FIRST(&rlset->rs_all)) != NULL) {
    161 		if (NPF_DYNAMIC_GROUP_P(rl->r_attr)) {
    162 			/*
    163 			 * Note: r_subset may point to the rules which
    164 			 * were inherited by a new ruleset.
    165 			 */
    166 			rl->r_subset = NULL;
    167 			LIST_REMOVE(rl, r_dentry);
    168 		}
    169 		if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
    170 			/* Not removing from r_subset, see above. */
    171 			KASSERT(rl->r_parent != NULL);
    172 		}
    173 		LIST_REMOVE(rl, r_aentry);
    174 		npf_rule_free(rl);
    175 	}
    176 	KASSERT(LIST_EMPTY(&rlset->rs_dynamic));
    177 
    178 	npf_ruleset_gc(rlset);
    179 	KASSERT(LIST_EMPTY(&rlset->rs_gc));
    180 	kmem_free(rlset, len);
    181 }
    182 
    183 /*
    184  * npf_ruleset_insert: insert the rule into the specified ruleset.
    185  */
    186 void
    187 npf_ruleset_insert(npf_ruleset_t *rlset, npf_rule_t *rl)
    188 {
    189 	unsigned n = rlset->rs_nitems;
    190 
    191 	KASSERT(n < rlset->rs_slots);
    192 
    193 	LIST_INSERT_HEAD(&rlset->rs_all, rl, r_aentry);
    194 	if (NPF_DYNAMIC_GROUP_P(rl->r_attr)) {
    195 		LIST_INSERT_HEAD(&rlset->rs_dynamic, rl, r_dentry);
    196 	} else {
    197 		KASSERTMSG(rl->r_parent == NULL, "cannot be dynamic rule");
    198 		rl->r_attr &= ~NPF_RULE_DYNAMIC;
    199 	}
    200 
    201 	rlset->rs_rules[n] = rl;
    202 	rlset->rs_nitems++;
    203 	rl->r_id = ++rlset->rs_idcnt;
    204 
    205 	if (rl->r_skip_to < ++n) {
    206 		rl->r_skip_to = SKIPTO_ADJ_FLAG | n;
    207 	}
    208 }
    209 
    210 npf_rule_t *
    211 npf_ruleset_lookup(npf_ruleset_t *rlset, const char *name)
    212 {
    213 	npf_rule_t *rl;
    214 
    215 	LIST_FOREACH(rl, &rlset->rs_dynamic, r_dentry) {
    216 		KASSERT(NPF_DYNAMIC_GROUP_P(rl->r_attr));
    217 		if (strncmp(rl->r_name, name, NPF_RULE_MAXNAMELEN) == 0)
    218 			break;
    219 	}
    220 	return rl;
    221 }
    222 
    223 /*
    224  * npf_ruleset_add: insert dynamic rule into the (active) ruleset.
    225  */
    226 int
    227 npf_ruleset_add(npf_ruleset_t *rlset, const char *rname, npf_rule_t *rl)
    228 {
    229 	npf_rule_t *rg, *it, *target;
    230 	int priocmd;
    231 
    232 	if (!NPF_DYNAMIC_RULE_P(rl->r_attr)) {
    233 		return EINVAL;
    234 	}
    235 	rg = npf_ruleset_lookup(rlset, rname);
    236 	if (rg == NULL) {
    237 		return ESRCH;
    238 	}
    239 
    240 	/* Dynamic rule - assign a unique ID and save the parent. */
    241 	rl->r_id = ++rlset->rs_idcnt;
    242 	rl->r_parent = rg;
    243 
    244 	/*
    245 	 * Rule priority: (highest) 1, 2 ... n (lowest).
    246 	 * Negative priority indicates an operation and is reset to zero.
    247 	 */
    248 	if ((priocmd = rl->r_priority) < 0) {
    249 		rl->r_priority = 0;
    250 	}
    251 
    252 	/*
    253 	 * WARNING: once rg->subset or target->r_next of an *active*
    254 	 * rule is set, then our rule becomes globally visible and active.
    255 	 * Must issue a load fence to ensure rl->r_next visibility first.
    256 	 */
    257 	switch (priocmd) {
    258 	case NPF_PRI_LAST:
    259 	default:
    260 		target = NULL;
    261 		it = rg->r_subset;
    262 		while (it && it->r_priority <= rl->r_priority) {
    263 			target = it;
    264 			it = it->r_next;
    265 		}
    266 		if (target) {
    267 			atomic_store_relaxed(&rl->r_next, target->r_next);
    268 			membar_producer();
    269 			atomic_store_relaxed(&target->r_next, rl);
    270 			break;
    271 		}
    272 		/* FALLTHROUGH */
    273 
    274 	case NPF_PRI_FIRST:
    275 		atomic_store_relaxed(&rl->r_next, rg->r_subset);
    276 		membar_producer();
    277 		atomic_store_relaxed(&rg->r_subset, rl);
    278 		break;
    279 	}
    280 
    281 	/* Finally, add into the all-list. */
    282 	LIST_INSERT_HEAD(&rlset->rs_all, rl, r_aentry);
    283 	return 0;
    284 }
    285 
    286 static void
    287 npf_ruleset_unlink(npf_rule_t *rl, npf_rule_t *prev)
    288 {
    289 	KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    290 	if (prev) {
    291 		prev->r_next = rl->r_next;
    292 	} else {
    293 		npf_rule_t *rg = rl->r_parent;
    294 		rg->r_subset = rl->r_next;
    295 	}
    296 	LIST_REMOVE(rl, r_aentry);
    297 }
    298 
    299 /*
    300  * npf_ruleset_remove: remove the dynamic rule given the rule ID.
    301  */
    302 int
    303 npf_ruleset_remove(npf_ruleset_t *rlset, const char *rname, uint64_t id)
    304 {
    305 	npf_rule_t *rg, *prev = NULL;
    306 
    307 	if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
    308 		return ESRCH;
    309 	}
    310 	for (npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
    311 		KASSERT(rl->r_parent == rg);
    312 		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    313 
    314 		/* Compare ID.  On match, remove and return. */
    315 		if (rl->r_id == id) {
    316 			npf_ruleset_unlink(rl, prev);
    317 			LIST_INSERT_HEAD(&rlset->rs_gc, rl, r_aentry);
    318 			return 0;
    319 		}
    320 		prev = rl;
    321 	}
    322 	return ENOENT;
    323 }
    324 
    325 /*
    326  * npf_ruleset_remkey: remove the dynamic rule given the rule key.
    327  */
    328 int
    329 npf_ruleset_remkey(npf_ruleset_t *rlset, const char *rname,
    330     const void *key, size_t len)
    331 {
    332 	npf_rule_t *rg, *rlast = NULL, *prev = NULL, *lastprev = NULL;
    333 
    334 	KASSERT(len && len <= NPF_RULE_MAXKEYLEN);
    335 
    336 	if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
    337 		return ESRCH;
    338 	}
    339 
    340 	/* Compare the key and find the last in the list. */
    341 	for (npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
    342 		KASSERT(rl->r_parent == rg);
    343 		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    344 		if (memcmp(rl->r_key, key, len) == 0) {
    345 			lastprev = prev;
    346 			rlast = rl;
    347 		}
    348 		prev = rl;
    349 	}
    350 	if (!rlast) {
    351 		return ENOENT;
    352 	}
    353 	npf_ruleset_unlink(rlast, lastprev);
    354 	LIST_INSERT_HEAD(&rlset->rs_gc, rlast, r_aentry);
    355 	return 0;
    356 }
    357 
    358 /*
    359  * npf_ruleset_list: serialise and return the dynamic rules.
    360  */
    361 int
    362 npf_ruleset_list(npf_t *npf, npf_ruleset_t *rlset, const char *rname,
    363     nvlist_t *rlset_nvl)
    364 {
    365 	const npf_rule_t *rg;
    366 
    367 	KASSERT(npf_config_locked_p(npf));
    368 
    369 	if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
    370 		return ESRCH;
    371 	}
    372 	for (const npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
    373 		nvlist_t *rule;
    374 
    375 		KASSERT(rl->r_parent == rg);
    376 		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    377 
    378 		if ((rule = npf_rule_export(npf, rl)) == NULL) {
    379 			return ENOMEM;
    380 		}
    381 		nvlist_append_nvlist_array(rlset_nvl, "rules", rule);
    382 		nvlist_destroy(rule);
    383 	}
    384 	return 0;
    385 }
    386 
    387 /*
    388  * npf_ruleset_flush: flush the dynamic rules in the ruleset by inserting
    389  * them into the G/C list.
    390  */
    391 int
    392 npf_ruleset_flush(npf_ruleset_t *rlset, const char *rname)
    393 {
    394 	npf_rule_t *rg, *rl;
    395 
    396 	if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
    397 		return ESRCH;
    398 	}
    399 
    400 	rl = atomic_swap_ptr(&rg->r_subset, NULL);
    401 	membar_producer();
    402 
    403 	while (rl) {
    404 		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    405 		KASSERT(rl->r_parent == rg);
    406 
    407 		LIST_REMOVE(rl, r_aentry);
    408 		LIST_INSERT_HEAD(&rlset->rs_gc, rl, r_aentry);
    409 		rl = rl->r_next;
    410 	}
    411 	rlset->rs_idcnt = 0;
    412 	return 0;
    413 }
    414 
    415 /*
    416  * npf_ruleset_gc: destroy the rules in G/C list.
    417  */
    418 void
    419 npf_ruleset_gc(npf_ruleset_t *rlset)
    420 {
    421 	npf_rule_t *rl;
    422 
    423 	while ((rl = LIST_FIRST(&rlset->rs_gc)) != NULL) {
    424 		LIST_REMOVE(rl, r_aentry);
    425 		npf_rule_free(rl);
    426 	}
    427 }
    428 
    429 /*
    430  * npf_ruleset_export: serialise and return the static rules.
    431  */
    432 int
    433 npf_ruleset_export(npf_t *npf, const npf_ruleset_t *rlset,
    434     const char *key, nvlist_t *npf_nv)
    435 {
    436 	const unsigned nitems = rlset->rs_nitems;
    437 	unsigned n = 0;
    438 	int error = 0;
    439 
    440 	KASSERT(npf_config_locked_p(npf));
    441 
    442 	while (n < nitems) {
    443 		const npf_rule_t *rl = rlset->rs_rules[n];
    444 		const npf_natpolicy_t *natp = rl->r_natp;
    445 		nvlist_t *rule;
    446 
    447 		rule = npf_rule_export(npf, rl);
    448 		if (!rule) {
    449 			error = ENOMEM;
    450 			break;
    451 		}
    452 		if (natp && (error = npf_natpolicy_export(natp, rule)) != 0) {
    453 			nvlist_destroy(rule);
    454 			break;
    455 		}
    456 		nvlist_append_nvlist_array(npf_nv, key, rule);
    457 		nvlist_destroy(rule);
    458 		n++;
    459 	}
    460 	return error;
    461 }
    462 
    463 /*
    464  * npf_ruleset_reload: prepare the new ruleset by scanning the active
    465  * ruleset and: 1) sharing the dynamic rules 2) sharing NAT policies.
    466  *
    467  * => The active (old) ruleset should be exclusively locked.
    468  */
    469 void
    470 npf_ruleset_reload(npf_t *npf, npf_ruleset_t *newset,
    471     npf_ruleset_t *oldset, bool load)
    472 {
    473 	npf_rule_t *rg, *rl;
    474 	uint64_t nid = 0;
    475 
    476 	KASSERT(npf_config_locked_p(npf));
    477 
    478 	/*
    479 	 * Scan the dynamic rules and share (migrate) if needed.
    480 	 */
    481 	LIST_FOREACH(rg, &newset->rs_dynamic, r_dentry) {
    482 		npf_rule_t *active_rgroup;
    483 
    484 		/* Look for a dynamic ruleset group with such name. */
    485 		active_rgroup = npf_ruleset_lookup(oldset, rg->r_name);
    486 		if (active_rgroup == NULL) {
    487 			continue;
    488 		}
    489 
    490 		/*
    491 		 * ATOMICITY: Copy the head pointer of the linked-list,
    492 		 * but do not remove the rules from the active r_subset.
    493 		 * This is necessary because the rules are still active
    494 		 * and therefore are accessible for inspection via the
    495 		 * old ruleset.
    496 		 */
    497 		rg->r_subset = active_rgroup->r_subset;
    498 
    499 		/*
    500 		 * We can safely migrate to the new all-rule list and
    501 		 * reset the parent rule, though.
    502 		 */
    503 		for (rl = rg->r_subset; rl; rl = rl->r_next) {
    504 			KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    505 			LIST_REMOVE(rl, r_aentry);
    506 			LIST_INSERT_HEAD(&newset->rs_all, rl, r_aentry);
    507 
    508 			KASSERT(rl->r_parent == active_rgroup);
    509 			rl->r_parent = rg;
    510 		}
    511 	}
    512 
    513 	/*
    514 	 * If performing the load of connections then NAT policies might
    515 	 * already have translated connections associated with them and
    516 	 * we should not share or inherit anything.
    517 	 */
    518 	if (load)
    519 		return;
    520 
    521 	/*
    522 	 * Scan all rules in the new ruleset and inherit the active NAT
    523 	 * policies if they are the same.  Also, assign a unique ID for
    524 	 * each policy here.
    525 	 */
    526 	LIST_FOREACH(rl, &newset->rs_all, r_aentry) {
    527 		npf_natpolicy_t *np;
    528 		npf_rule_t *actrl;
    529 
    530 		/* Does the rule have a NAT policy associated? */
    531 		if ((np = rl->r_natp) == NULL) {
    532 			continue;
    533 		}
    534 
    535 		/* Does it match with any policy in the active ruleset? */
    536 		LIST_FOREACH(actrl, &oldset->rs_all, r_aentry) {
    537 			if (!actrl->r_natp)
    538 				continue;
    539 			if ((actrl->r_attr & NPF_RULE_KEEPNAT) != 0)
    540 				continue;
    541 			if (npf_natpolicy_cmp(actrl->r_natp, np))
    542 				break;
    543 		}
    544 		if (!actrl) {
    545 			/* No: just set the ID and continue. */
    546 			npf_nat_setid(np, ++nid);
    547 			continue;
    548 		}
    549 
    550 		/* Yes: inherit the matching NAT policy. */
    551 		rl->r_natp = actrl->r_natp;
    552 		npf_nat_setid(rl->r_natp, ++nid);
    553 
    554 		/*
    555 		 * Finally, mark the active rule to not destroy its NAT
    556 		 * policy later as we inherited it (but the rule must be
    557 		 * kept active for now).  Destroy the new/unused policy.
    558 		 */
    559 		actrl->r_attr |= NPF_RULE_KEEPNAT;
    560 		npf_natpolicy_destroy(np);
    561 	}
    562 
    563 	/* Inherit the ID counter. */
    564 	newset->rs_idcnt = oldset->rs_idcnt;
    565 }
    566 
    567 /*
    568  * npf_ruleset_findnat: find a NAT policy in the ruleset by a given ID.
    569  */
    570 npf_natpolicy_t *
    571 npf_ruleset_findnat(npf_ruleset_t *rlset, uint64_t id)
    572 {
    573 	npf_rule_t *rl;
    574 
    575 	LIST_FOREACH(rl, &rlset->rs_all, r_aentry) {
    576 		npf_natpolicy_t *np = rl->r_natp;
    577 		if (np && npf_nat_getid(np) == id) {
    578 			return np;
    579 		}
    580 	}
    581 	return NULL;
    582 }
    583 
    584 /*
    585  * npf_ruleset_freealg: inspect the ruleset and disassociate specified
    586  * ALG from all NAT entries using it.
    587  */
    588 void
    589 npf_ruleset_freealg(npf_ruleset_t *rlset, npf_alg_t *alg)
    590 {
    591 	npf_rule_t *rl;
    592 	npf_natpolicy_t *np;
    593 
    594 	LIST_FOREACH(rl, &rlset->rs_all, r_aentry) {
    595 		if ((np = rl->r_natp) != NULL) {
    596 			npf_nat_freealg(np, alg);
    597 		}
    598 	}
    599 }
    600 
    601 /*
    602  * npf_rule_alloc: allocate a rule and initialise it.
    603  */
    604 npf_rule_t *
    605 npf_rule_alloc(npf_t *npf, const nvlist_t *rule)
    606 {
    607 	npf_rule_t *rl;
    608 	const char *rname;
    609 	const void *key, *info;
    610 	size_t len;
    611 
    612 	/* Allocate a rule structure and keep the information. */
    613 	rl = kmem_zalloc(sizeof(npf_rule_t), KM_SLEEP);
    614 	info = dnvlist_get_binary(rule, "info", &rl->r_info_len, NULL, 0);
    615 	if (info) {
    616 		rl->r_info = kmem_alloc(rl->r_info_len, KM_SLEEP);
    617 		memcpy(rl->r_info, info, rl->r_info_len);
    618 	}
    619 	rl->r_natp = NULL;
    620 
    621 	/* Name (optional) */
    622 	if ((rname = dnvlist_get_string(rule, "name", NULL)) != NULL) {
    623 		strlcpy(rl->r_name, rname, NPF_RULE_MAXNAMELEN);
    624 	} else {
    625 		rl->r_name[0] = '\0';
    626 	}
    627 
    628 	/* Attributes, priority and interface ID (optional). */
    629 	rl->r_attr = dnvlist_get_number(rule, "attr", 0);
    630 	rl->r_attr &= ~NPF_RULE_PRIVMASK;
    631 
    632 	if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
    633 		/* Priority of the dynamic rule. */
    634 		rl->r_priority = (int)dnvlist_get_number(rule, "prio", 0);
    635 	} else {
    636 		/* The skip-to index.  No need to validate it. */
    637 		rl->r_skip_to = dnvlist_get_number(rule, "skip-to", 0);
    638 	}
    639 
    640 	/* Interface name; register and get the npf-if-id. */
    641 	if ((rname = dnvlist_get_string(rule, "ifname", NULL)) != NULL) {
    642 		if ((rl->r_ifid = npf_ifmap_register(npf, rname)) == 0) {
    643 			kmem_free(rl, sizeof(npf_rule_t));
    644 			return NULL;
    645 		}
    646 	} else {
    647 		rl->r_ifid = 0;
    648 	}
    649 
    650 	/* Key (optional). */
    651 	if ((key = dnvlist_get_binary(rule, "key", &len, NULL, 0)) != NULL) {
    652 		if (len > NPF_RULE_MAXKEYLEN) {
    653 			kmem_free(rl, sizeof(npf_rule_t));
    654 			return NULL;
    655 		}
    656 		memcpy(rl->r_key, key, len);
    657 	}
    658 	return rl;
    659 }
    660 
    661 static nvlist_t *
    662 npf_rule_export(npf_t *npf, const npf_rule_t *rl)
    663 {
    664 	nvlist_t *rule = nvlist_create(0);
    665 	unsigned skip_to = 0;
    666 	npf_rproc_t *rp;
    667 
    668 	nvlist_add_number(rule, "attr", rl->r_attr);
    669 	nvlist_add_number(rule, "prio", rl->r_priority);
    670 	if ((rl->r_skip_to & SKIPTO_ADJ_FLAG) == 0) {
    671 		skip_to = rl->r_skip_to & SKIPTO_MASK;
    672 	}
    673 	nvlist_add_number(rule, "skip-to", skip_to);
    674 	nvlist_add_number(rule, "code-type", rl->r_type);
    675 	if (rl->r_code) {
    676 		nvlist_add_binary(rule, "code", rl->r_code, rl->r_clen);
    677 	}
    678 	if (rl->r_ifid) {
    679 		char ifname[IFNAMSIZ];
    680 		npf_ifmap_copyname(npf, rl->r_ifid, ifname, sizeof(ifname));
    681 		nvlist_add_string(rule, "ifname", ifname);
    682 	}
    683 	nvlist_add_number(rule, "id", rl->r_id);
    684 
    685 	if (rl->r_name[0]) {
    686 		nvlist_add_string(rule, "name", rl->r_name);
    687 	}
    688 	if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
    689 		nvlist_add_binary(rule, "key", rl->r_key, NPF_RULE_MAXKEYLEN);
    690 	}
    691 	if (rl->r_info) {
    692 		nvlist_add_binary(rule, "info", rl->r_info, rl->r_info_len);
    693 	}
    694 	if ((rp = npf_rule_getrproc(rl)) != NULL) {
    695 		const char *rname = npf_rproc_getname(rp);
    696 		nvlist_add_string(rule, "rproc", rname);
    697 		npf_rproc_release(rp);
    698 	}
    699 	return rule;
    700 }
    701 
    702 /*
    703  * npf_rule_setcode: assign filter code to the rule.
    704  *
    705  * => The code must be validated by the caller.
    706  * => JIT compilation may be performed here.
    707  */
    708 void
    709 npf_rule_setcode(npf_rule_t *rl, const int type, void *code, size_t size)
    710 {
    711 	KASSERT(type == NPF_CODE_BPF);
    712 
    713 	rl->r_type = type;
    714 	rl->r_code = code;
    715 	rl->r_clen = size;
    716 	rl->r_jcode = npf_bpf_compile(code, size);
    717 }
    718 
    719 /*
    720  * npf_rule_setrproc: assign a rule procedure and hold a reference on it.
    721  */
    722 void
    723 npf_rule_setrproc(npf_rule_t *rl, npf_rproc_t *rp)
    724 {
    725 	npf_rproc_acquire(rp);
    726 	rl->r_rproc = rp;
    727 }
    728 
    729 /*
    730  * npf_rule_free: free the specified rule.
    731  */
    732 void
    733 npf_rule_free(npf_rule_t *rl)
    734 {
    735 	npf_natpolicy_t *np = rl->r_natp;
    736 	npf_rproc_t *rp = rl->r_rproc;
    737 
    738 	if (np && (rl->r_attr & NPF_RULE_KEEPNAT) == 0) {
    739 		/* Destroy the NAT policy. */
    740 		npf_natpolicy_destroy(np);
    741 	}
    742 	if (rp) {
    743 		/* Release rule procedure. */
    744 		npf_rproc_release(rp);
    745 	}
    746 	if (rl->r_code) {
    747 		/* Free byte-code. */
    748 		kmem_free(rl->r_code, rl->r_clen);
    749 	}
    750 	if (rl->r_jcode) {
    751 		/* Free JIT code. */
    752 		bpf_jit_freecode(rl->r_jcode);
    753 	}
    754 	if (rl->r_info) {
    755 		kmem_free(rl->r_info, rl->r_info_len);
    756 	}
    757 	kmem_free(rl, sizeof(npf_rule_t));
    758 }
    759 
    760 /*
    761  * npf_rule_getid: return the unique ID of a rule.
    762  * npf_rule_getrproc: acquire a reference and return rule procedure, if any.
    763  * npf_rule_getnat: get NAT policy assigned to the rule.
    764  */
    765 
    766 uint64_t
    767 npf_rule_getid(const npf_rule_t *rl)
    768 {
    769 	KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    770 	return rl->r_id;
    771 }
    772 
    773 npf_rproc_t *
    774 npf_rule_getrproc(const npf_rule_t *rl)
    775 {
    776 	npf_rproc_t *rp = rl->r_rproc;
    777 
    778 	if (rp) {
    779 		npf_rproc_acquire(rp);
    780 	}
    781 	return rp;
    782 }
    783 
    784 npf_natpolicy_t *
    785 npf_rule_getnat(const npf_rule_t *rl)
    786 {
    787 	return rl->r_natp;
    788 }
    789 
    790 /*
    791  * npf_rule_setnat: assign NAT policy to the rule and insert into the
    792  * NAT policy list in the ruleset.
    793  */
    794 void
    795 npf_rule_setnat(npf_rule_t *rl, npf_natpolicy_t *np)
    796 {
    797 	KASSERT(rl->r_natp == NULL);
    798 	rl->r_natp = np;
    799 }
    800 
    801 /*
    802  * npf_rule_inspect: match the interface, direction and run the filter code.
    803  * Returns true if rule matches and false otherwise.
    804  */
    805 static inline bool
    806 npf_rule_inspect(const npf_rule_t *rl, bpf_args_t *bc_args,
    807     const int di_mask, const unsigned ifid)
    808 {
    809 	/* Match the interface. */
    810 	if (rl->r_ifid && rl->r_ifid != ifid) {
    811 		return false;
    812 	}
    813 
    814 	/* Match the direction. */
    815 	if ((rl->r_attr & NPF_RULE_DIMASK) != NPF_RULE_DIMASK) {
    816 		if ((rl->r_attr & di_mask) == 0)
    817 			return false;
    818 	}
    819 
    820 	/* Any code? */
    821 	if (!rl->r_code) {
    822 		KASSERT(rl->r_jcode == NULL);
    823 		return true;
    824 	}
    825 	KASSERT(rl->r_type == NPF_CODE_BPF);
    826 	return npf_bpf_filter(bc_args, rl->r_code, rl->r_jcode) != 0;
    827 }
    828 
    829 /*
    830  * npf_rule_reinspect: re-inspect the dynamic rule by iterating its list.
    831  * This is only for the dynamic rules.  Subrules cannot have nested rules.
    832  */
    833 static inline npf_rule_t *
    834 npf_rule_reinspect(const npf_rule_t *rg, bpf_args_t *bc_args,
    835     const int di_mask, const unsigned ifid)
    836 {
    837 	npf_rule_t *final_rl = NULL, *rl;
    838 
    839 	KASSERT(NPF_DYNAMIC_GROUP_P(rg->r_attr));
    840 
    841 	rl = atomic_load_relaxed(&rg->r_subset);
    842 	for (; rl; rl = atomic_load_relaxed(&rl->r_next)) {
    843 		KASSERT(!final_rl || rl->r_priority >= final_rl->r_priority);
    844 		if (!npf_rule_inspect(rl, bc_args, di_mask, ifid)) {
    845 			continue;
    846 		}
    847 		if (rl->r_attr & NPF_RULE_FINAL) {
    848 			return rl;
    849 		}
    850 		final_rl = rl;
    851 	}
    852 	return final_rl;
    853 }
    854 
    855 /*
    856  * npf_ruleset_inspect: inspect the packet against the given ruleset.
    857  *
    858  * Loop through the rules in the set and run the byte-code of each rule
    859  * against the packet (nbuf chain).  If sub-ruleset is found, inspect it.
    860  */
    861 npf_rule_t *
    862 npf_ruleset_inspect(npf_cache_t *npc, const npf_ruleset_t *rlset,
    863     const int di, const int layer)
    864 {
    865 	nbuf_t *nbuf = npc->npc_nbuf;
    866 	const int di_mask = (di & PFIL_IN) ? NPF_RULE_IN : NPF_RULE_OUT;
    867 	const unsigned nitems = rlset->rs_nitems;
    868 	const unsigned ifid = nbuf->nb_ifid;
    869 	npf_rule_t *final_rl = NULL;
    870 	bpf_args_t bc_args;
    871 	unsigned n = 0;
    872 
    873 	KASSERT(((di & PFIL_IN) != 0) ^ ((di & PFIL_OUT) != 0));
    874 
    875 	/*
    876 	 * Prepare the external memory store and the arguments for
    877 	 * the BPF programs to be executed.  Reset mbuf before taking
    878 	 * any pointers for the BPF.
    879 	 */
    880 	uint32_t bc_words[NPF_BPF_NWORDS];
    881 
    882 	nbuf_reset(nbuf);
    883 	npf_bpf_prepare(npc, &bc_args, bc_words);
    884 
    885 	while (n < nitems) {
    886 		npf_rule_t *rl = rlset->rs_rules[n];
    887 		const unsigned skip_to = rl->r_skip_to & SKIPTO_MASK;
    888 		const uint32_t attr = rl->r_attr;
    889 
    890 		KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
    891 		KASSERT(n < skip_to);
    892 
    893 		/* Group is a barrier: return a matching if found any. */
    894 		if ((attr & NPF_DYNAMIC_GROUP) == NPF_RULE_GROUP && final_rl) {
    895 			break;
    896 		}
    897 
    898 		/* Main inspection of the rule. */
    899 		if (!npf_rule_inspect(rl, &bc_args, di_mask, ifid)) {
    900 			n = skip_to;
    901 			continue;
    902 		}
    903 
    904 		if (NPF_DYNAMIC_GROUP_P(attr)) {
    905 			/*
    906 			 * If this is a dynamic rule, re-inspect the subrules.
    907 			 * If it has any matching rule, then it is final.
    908 			 */
    909 			rl = npf_rule_reinspect(rl, &bc_args, di_mask, ifid);
    910 			if (rl != NULL) {
    911 				final_rl = rl;
    912 				break;
    913 			}
    914 		} else if ((attr & NPF_RULE_GROUP) == 0) {
    915 			/*
    916 			 * Groups themselves are not matching.
    917 			 */
    918 			final_rl = rl;
    919 		}
    920 
    921 		/* Set the matching rule and check for "final". */
    922 		if (attr & NPF_RULE_FINAL) {
    923 			break;
    924 		}
    925 		n++;
    926 	}
    927 
    928 	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
    929 	return final_rl;
    930 }
    931 
    932 /*
    933  * npf_rule_conclude: return decision and the flags for conclusion.
    934  *
    935  * => Returns ENETUNREACH if "block" and 0 if "pass".
    936  */
    937 int
    938 npf_rule_conclude(const npf_rule_t *rl, npf_match_info_t *mi)
    939 {
    940 	/* If not passing - drop the packet. */
    941 	mi->mi_retfl = rl->r_attr;
    942 	mi->mi_rid = rl->r_id;
    943 	return (rl->r_attr & NPF_RULE_PASS) ? 0 : ENETUNREACH;
    944 }
    945 
    946 
    947 #if defined(DDB) || defined(_NPF_TESTING)
    948 
    949 void
    950 npf_ruleset_dump(npf_t *npf, const char *name)
    951 {
    952 	npf_ruleset_t *rlset = npf_config_ruleset(npf);
    953 	npf_rule_t *rg, *rl;
    954 
    955 	LIST_FOREACH(rg, &rlset->rs_dynamic, r_dentry) {
    956 		printf("ruleset '%s':\n", rg->r_name);
    957 		for (rl = rg->r_subset; rl; rl = rl->r_next) {
    958 			printf("\tid %"PRIu64", key: ", rl->r_id);
    959 			for (unsigned i = 0; i < NPF_RULE_MAXKEYLEN; i++)
    960 				printf("%x", rl->r_key[i]);
    961 			printf("\n");
    962 		}
    963 	}
    964 }
    965 
    966 #endif
    967