Home | History | Annotate | Line # | Download | only in npf
      1 /*-
      2  * Copyright (c) 2020 Mindaugas Rasiukevicius <rmind at noxt eu>
      3  * Copyright (c) 2009-2025 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This material is based upon work partially supported by The
      7  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * NPF ruleset module.
     33  */
     34 
     35 #ifdef _KERNEL
     36 #include <sys/cdefs.h>
     37 __KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.56 2025/07/01 18:42:37 joe Exp $");
     38 
     39 #include <sys/param.h>
     40 #include <sys/types.h>
     41 
     42 #include <sys/atomic.h>
     43 #include <sys/kmem.h>
     44 #include <sys/queue.h>
     45 #include <sys/mbuf.h>
     46 #include <sys/types.h>
     47 #include <sys/kauth.h>
     48 
     49 #include <net/bpf.h>
     50 #include <net/bpfjit.h>
     51 #include <net/pfil.h>
     52 #include <net/if.h>
     53 #endif
     54 
     55 #include "npf_impl.h"
     56 
     57 struct npf_ruleset {
     58 	/*
     59 	 * - List of all rules.
     60 	 * - Dynamic (i.e. named) rules.
     61 	 * - G/C list for convenience.
     62 	 */
     63 	LIST_HEAD(, npf_rule)	rs_all;
     64 	LIST_HEAD(, npf_rule)	rs_dynamic;
     65 	LIST_HEAD(, npf_rule)	rs_gc;
     66 
     67 	/* Unique ID counter. */
     68 	uint64_t		rs_idcnt;
     69 
     70 	/* Number of array slots and active rules. */
     71 	unsigned		rs_slots;
     72 	unsigned		rs_nitems;
     73 
     74 	/* Array of ordered rules. */
     75 	npf_rule_t *		rs_rules[];
     76 };
     77 
     78 struct npf_rule {
     79 	/* Attributes, interface and skip slot. */
     80 	uint32_t		r_attr;
     81 	unsigned		r_ifid;
     82 	unsigned		r_skip_to;
     83 
     84 	/* Code to process, if any. */
     85 	int			r_type;
     86 	bpfjit_func_t		r_jcode;
     87 	void *			r_code;
     88 	unsigned		r_clen;
     89 
     90 	/* NAT policy (optional), rule procedure and subset. */
     91 	npf_natpolicy_t *	r_natp;
     92 	npf_rproc_t *		r_rproc;
     93 
     94 	union {
     95 		/*
     96 		 * Dynamic group: rule subset and a group list entry.
     97 		 */
     98 		struct {
     99 			npf_rule_t *		r_subset;
    100 			LIST_ENTRY(npf_rule)	r_dentry;
    101 		};
    102 
    103 		/*
    104 		 * Dynamic rule: priority, parent group and next rule.
    105 		 */
    106 		struct {
    107 			int			r_priority;
    108 			npf_rule_t *		r_parent;
    109 			npf_rule_t *		r_next;
    110 		};
    111 	};
    112 
    113 	/* Rule ID, name and the optional key. */
    114 	uint64_t		r_id;
    115 	char			r_name[NPF_RULE_MAXNAMELEN];
    116 	uint8_t			r_key[NPF_RULE_MAXKEYLEN];
    117 
    118 	/* All-list entry and the auxiliary info. */
    119 	LIST_ENTRY(npf_rule)	r_aentry;
    120 	nvlist_t *		r_info;
    121 	size_t			r_info_len;
    122 
    123 	rid_t uid;
    124 	rid_t gid;
    125 };
    126 
    127 #define	SKIPTO_ADJ_FLAG		(1U << 31)
    128 #define	SKIPTO_MASK		(SKIPTO_ADJ_FLAG - 1)
    129 
    130 static nvlist_t *	npf_rule_export(npf_t *, const npf_rule_t *);
    131 
    132 /*
    133  * Private attributes - must be in the NPF_RULE_PRIVMASK range.
    134  */
    135 #define	NPF_RULE_KEEPNAT	(0x01000000 & NPF_RULE_PRIVMASK)
    136 
    137 #define	NPF_DYNAMIC_GROUP_P(attr) \
    138     (((attr) & NPF_DYNAMIC_GROUP) == NPF_DYNAMIC_GROUP)
    139 
    140 #define	NPF_DYNAMIC_RULE_P(attr) \
    141     (((attr) & NPF_DYNAMIC_GROUP) == NPF_RULE_DYNAMIC)
    142 
    143 npf_ruleset_t *
    144 npf_ruleset_create(size_t slots)
    145 {
    146 	size_t len = offsetof(npf_ruleset_t, rs_rules[slots]);
    147 	npf_ruleset_t *rlset;
    148 
    149 	rlset = kmem_zalloc(len, KM_SLEEP);
    150 	LIST_INIT(&rlset->rs_dynamic);
    151 	LIST_INIT(&rlset->rs_all);
    152 	LIST_INIT(&rlset->rs_gc);
    153 	rlset->rs_slots = slots;
    154 
    155 	return rlset;
    156 }
    157 
    158 void
    159 npf_ruleset_destroy(npf_ruleset_t *rlset)
    160 {
    161 	size_t len = offsetof(npf_ruleset_t, rs_rules[rlset->rs_slots]);
    162 	npf_rule_t *rl;
    163 
    164 	while ((rl = LIST_FIRST(&rlset->rs_all)) != NULL) {
    165 		if (NPF_DYNAMIC_GROUP_P(rl->r_attr)) {
    166 			/*
    167 			 * Note: r_subset may point to the rules which
    168 			 * were inherited by a new ruleset.
    169 			 */
    170 			rl->r_subset = NULL;
    171 			LIST_REMOVE(rl, r_dentry);
    172 		}
    173 		if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
    174 			/* Not removing from r_subset, see above. */
    175 			KASSERT(rl->r_parent != NULL);
    176 		}
    177 		LIST_REMOVE(rl, r_aentry);
    178 		npf_rule_free(rl);
    179 	}
    180 	KASSERT(LIST_EMPTY(&rlset->rs_dynamic));
    181 
    182 	npf_ruleset_gc(rlset);
    183 	KASSERT(LIST_EMPTY(&rlset->rs_gc));
    184 	kmem_free(rlset, len);
    185 }
    186 
    187 /*
    188  * npf_ruleset_insert: insert the rule into the specified ruleset.
    189  */
    190 void
    191 npf_ruleset_insert(npf_ruleset_t *rlset, npf_rule_t *rl)
    192 {
    193 	unsigned n = rlset->rs_nitems;
    194 
    195 	KASSERT(n < rlset->rs_slots);
    196 
    197 	LIST_INSERT_HEAD(&rlset->rs_all, rl, r_aentry);
    198 	if (NPF_DYNAMIC_GROUP_P(rl->r_attr)) {
    199 		LIST_INSERT_HEAD(&rlset->rs_dynamic, rl, r_dentry);
    200 	} else {
    201 		KASSERTMSG(rl->r_parent == NULL, "cannot be dynamic rule");
    202 		rl->r_attr &= ~NPF_RULE_DYNAMIC;
    203 	}
    204 
    205 	rlset->rs_rules[n] = rl;
    206 	rlset->rs_nitems++;
    207 	rl->r_id = ++rlset->rs_idcnt;
    208 
    209 	if (rl->r_skip_to < ++n) {
    210 		rl->r_skip_to = SKIPTO_ADJ_FLAG | n;
    211 	}
    212 }
    213 
    214 npf_rule_t *
    215 npf_ruleset_lookup(npf_ruleset_t *rlset, const char *name)
    216 {
    217 	npf_rule_t *rl;
    218 
    219 	LIST_FOREACH(rl, &rlset->rs_dynamic, r_dentry) {
    220 		KASSERT(NPF_DYNAMIC_GROUP_P(rl->r_attr));
    221 		if (strncmp(rl->r_name, name, NPF_RULE_MAXNAMELEN) == 0)
    222 			break;
    223 	}
    224 	return rl;
    225 }
    226 
    227 /*
    228  * npf_ruleset_add: insert dynamic rule into the (active) ruleset.
    229  */
    230 int
    231 npf_ruleset_add(npf_ruleset_t *rlset, const char *rname, npf_rule_t *rl)
    232 {
    233 	npf_rule_t *rg, *it, *target;
    234 	int priocmd;
    235 
    236 	if (!NPF_DYNAMIC_RULE_P(rl->r_attr)) {
    237 		return EINVAL;
    238 	}
    239 	rg = npf_ruleset_lookup(rlset, rname);
    240 	if (rg == NULL) {
    241 		return ESRCH;
    242 	}
    243 
    244 	/* Dynamic rule - assign a unique ID and save the parent. */
    245 	rl->r_id = ++rlset->rs_idcnt;
    246 	rl->r_parent = rg;
    247 
    248 	/*
    249 	 * Rule priority: (highest) 1, 2 ... n (lowest).
    250 	 * Negative priority indicates an operation and is reset to zero.
    251 	 */
    252 	if ((priocmd = rl->r_priority) < 0) {
    253 		rl->r_priority = 0;
    254 	}
    255 
    256 	/*
    257 	 * WARNING: once rg->subset or target->r_next of an *active*
    258 	 * rule is set, then our rule becomes globally visible and active.
    259 	 * Must issue a load fence to ensure rl->r_next visibility first.
    260 	 */
    261 	switch (priocmd) {
    262 	case NPF_PRI_LAST:
    263 	default:
    264 		target = NULL;
    265 		it = rg->r_subset;
    266 		while (it && it->r_priority <= rl->r_priority) {
    267 			target = it;
    268 			it = it->r_next;
    269 		}
    270 		if (target) {
    271 			atomic_store_relaxed(&rl->r_next, target->r_next);
    272 			membar_producer();
    273 			atomic_store_relaxed(&target->r_next, rl);
    274 			break;
    275 		}
    276 		/* FALLTHROUGH */
    277 
    278 	case NPF_PRI_FIRST:
    279 		atomic_store_relaxed(&rl->r_next, rg->r_subset);
    280 		membar_producer();
    281 		atomic_store_relaxed(&rg->r_subset, rl);
    282 		break;
    283 	}
    284 
    285 	/* Finally, add into the all-list. */
    286 	LIST_INSERT_HEAD(&rlset->rs_all, rl, r_aentry);
    287 	return 0;
    288 }
    289 
    290 static void
    291 npf_ruleset_unlink(npf_rule_t *rl, npf_rule_t *prev)
    292 {
    293 	KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    294 	if (prev) {
    295 		prev->r_next = rl->r_next;
    296 	} else {
    297 		npf_rule_t *rg = rl->r_parent;
    298 		rg->r_subset = rl->r_next;
    299 	}
    300 	LIST_REMOVE(rl, r_aentry);
    301 }
    302 
    303 /*
    304  * npf_ruleset_remove: remove the dynamic rule given the rule ID.
    305  */
    306 int
    307 npf_ruleset_remove(npf_ruleset_t *rlset, const char *rname, uint64_t id)
    308 {
    309 	npf_rule_t *rg, *prev = NULL;
    310 
    311 	if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
    312 		return ESRCH;
    313 	}
    314 	for (npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
    315 		KASSERT(rl->r_parent == rg);
    316 		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    317 
    318 		/* Compare ID.  On match, remove and return. */
    319 		if (rl->r_id == id) {
    320 			npf_ruleset_unlink(rl, prev);
    321 			LIST_INSERT_HEAD(&rlset->rs_gc, rl, r_aentry);
    322 			return 0;
    323 		}
    324 		prev = rl;
    325 	}
    326 	return ENOENT;
    327 }
    328 
    329 /*
    330  * npf_ruleset_remkey: remove the dynamic rule given the rule key.
    331  */
    332 int
    333 npf_ruleset_remkey(npf_ruleset_t *rlset, const char *rname,
    334     const void *key, size_t len)
    335 {
    336 	npf_rule_t *rg, *rlast = NULL, *prev = NULL, *lastprev = NULL;
    337 
    338 	KASSERT(len && len <= NPF_RULE_MAXKEYLEN);
    339 
    340 	if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
    341 		return ESRCH;
    342 	}
    343 
    344 	/* Compare the key and find the last in the list. */
    345 	for (npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
    346 		KASSERT(rl->r_parent == rg);
    347 		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    348 		if (memcmp(rl->r_key, key, len) == 0) {
    349 			lastprev = prev;
    350 			rlast = rl;
    351 		}
    352 		prev = rl;
    353 	}
    354 	if (!rlast) {
    355 		return ENOENT;
    356 	}
    357 	npf_ruleset_unlink(rlast, lastprev);
    358 	LIST_INSERT_HEAD(&rlset->rs_gc, rlast, r_aentry);
    359 	return 0;
    360 }
    361 
    362 /*
    363  * npf_ruleset_list: serialise and return the dynamic rules.
    364  */
    365 int
    366 npf_ruleset_list(npf_t *npf, npf_ruleset_t *rlset, const char *rname,
    367     nvlist_t *rlset_nvl)
    368 {
    369 	const npf_rule_t *rg;
    370 
    371 	KASSERT(npf_config_locked_p(npf));
    372 
    373 	if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
    374 		return ESRCH;
    375 	}
    376 	for (const npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
    377 		nvlist_t *rule;
    378 
    379 		KASSERT(rl->r_parent == rg);
    380 		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    381 
    382 		if ((rule = npf_rule_export(npf, rl)) == NULL) {
    383 			return ENOMEM;
    384 		}
    385 		nvlist_append_nvlist_array(rlset_nvl, "rules", rule);
    386 		nvlist_destroy(rule);
    387 	}
    388 	return 0;
    389 }
    390 
    391 /*
    392  * npf_ruleset_flush: flush the dynamic rules in the ruleset by inserting
    393  * them into the G/C list.
    394  */
    395 int
    396 npf_ruleset_flush(npf_ruleset_t *rlset, const char *rname)
    397 {
    398 	npf_rule_t *rg, *rl;
    399 
    400 	if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
    401 		return ESRCH;
    402 	}
    403 
    404 	rl = atomic_swap_ptr(&rg->r_subset, NULL);
    405 	membar_producer();
    406 
    407 	while (rl) {
    408 		KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    409 		KASSERT(rl->r_parent == rg);
    410 
    411 		LIST_REMOVE(rl, r_aentry);
    412 		LIST_INSERT_HEAD(&rlset->rs_gc, rl, r_aentry);
    413 		rl = rl->r_next;
    414 	}
    415 	rlset->rs_idcnt = 0;
    416 	return 0;
    417 }
    418 
    419 /*
    420  * npf_ruleset_gc: destroy the rules in G/C list.
    421  */
    422 void
    423 npf_ruleset_gc(npf_ruleset_t *rlset)
    424 {
    425 	npf_rule_t *rl;
    426 
    427 	while ((rl = LIST_FIRST(&rlset->rs_gc)) != NULL) {
    428 		LIST_REMOVE(rl, r_aentry);
    429 		npf_rule_free(rl);
    430 	}
    431 }
    432 
    433 /*
    434  * npf_ruleset_export: serialise and return the static rules.
    435  */
    436 int
    437 npf_ruleset_export(npf_t *npf, const npf_ruleset_t *rlset,
    438     const char *key, nvlist_t *npf_nv)
    439 {
    440 	const unsigned nitems = rlset->rs_nitems;
    441 	unsigned n = 0;
    442 	int error = 0;
    443 
    444 	KASSERT(npf_config_locked_p(npf));
    445 
    446 	while (n < nitems) {
    447 		const npf_rule_t *rl = rlset->rs_rules[n];
    448 		const npf_natpolicy_t *natp = rl->r_natp;
    449 		nvlist_t *rule;
    450 
    451 		rule = npf_rule_export(npf, rl);
    452 		if (!rule) {
    453 			error = ENOMEM;
    454 			break;
    455 		}
    456 		if (natp && (error = npf_natpolicy_export(natp, rule)) != 0) {
    457 			nvlist_destroy(rule);
    458 			break;
    459 		}
    460 		nvlist_append_nvlist_array(npf_nv, key, rule);
    461 		nvlist_destroy(rule);
    462 		n++;
    463 	}
    464 	return error;
    465 }
    466 
    467 /*
    468  * npf_ruleset_reload: prepare the new ruleset by scanning the active
    469  * ruleset and: 1) sharing the dynamic rules 2) sharing NAT policies.
    470  *
    471  * => The active (old) ruleset should be exclusively locked.
    472  */
    473 void
    474 npf_ruleset_reload(npf_t *npf, npf_ruleset_t *newset,
    475     npf_ruleset_t *oldset, bool load)
    476 {
    477 	npf_rule_t *rg, *rl;
    478 	uint64_t nid = 0;
    479 
    480 	KASSERT(npf_config_locked_p(npf));
    481 
    482 	/*
    483 	 * Scan the dynamic rules and share (migrate) if needed.
    484 	 */
    485 	LIST_FOREACH(rg, &newset->rs_dynamic, r_dentry) {
    486 		npf_rule_t *active_rgroup;
    487 
    488 		/* Look for a dynamic ruleset group with such name. */
    489 		active_rgroup = npf_ruleset_lookup(oldset, rg->r_name);
    490 		if (active_rgroup == NULL) {
    491 			continue;
    492 		}
    493 
    494 		/*
    495 		 * ATOMICITY: Copy the head pointer of the linked-list,
    496 		 * but do not remove the rules from the active r_subset.
    497 		 * This is necessary because the rules are still active
    498 		 * and therefore are accessible for inspection via the
    499 		 * old ruleset.
    500 		 */
    501 		rg->r_subset = active_rgroup->r_subset;
    502 
    503 		/*
    504 		 * We can safely migrate to the new all-rule list and
    505 		 * reset the parent rule, though.
    506 		 */
    507 		for (rl = rg->r_subset; rl; rl = rl->r_next) {
    508 			KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    509 			LIST_REMOVE(rl, r_aentry);
    510 			LIST_INSERT_HEAD(&newset->rs_all, rl, r_aentry);
    511 
    512 			KASSERT(rl->r_parent == active_rgroup);
    513 			rl->r_parent = rg;
    514 		}
    515 	}
    516 
    517 	/*
    518 	 * If performing the load of connections then NAT policies might
    519 	 * already have translated connections associated with them and
    520 	 * we should not share or inherit anything.
    521 	 */
    522 	if (load)
    523 		return;
    524 
    525 	/*
    526 	 * Scan all rules in the new ruleset and inherit the active NAT
    527 	 * policies if they are the same.  Also, assign a unique ID for
    528 	 * each policy here.
    529 	 */
    530 	LIST_FOREACH(rl, &newset->rs_all, r_aentry) {
    531 		npf_natpolicy_t *np;
    532 		npf_rule_t *actrl;
    533 
    534 		/* Does the rule have a NAT policy associated? */
    535 		if ((np = rl->r_natp) == NULL) {
    536 			continue;
    537 		}
    538 
    539 		/* Does it match with any policy in the active ruleset? */
    540 		LIST_FOREACH(actrl, &oldset->rs_all, r_aentry) {
    541 			if (!actrl->r_natp)
    542 				continue;
    543 			if ((actrl->r_attr & NPF_RULE_KEEPNAT) != 0)
    544 				continue;
    545 			if (npf_natpolicy_cmp(actrl->r_natp, np))
    546 				break;
    547 		}
    548 		if (!actrl) {
    549 			/* No: just set the ID and continue. */
    550 			npf_nat_setid(np, ++nid);
    551 			continue;
    552 		}
    553 
    554 		/* Yes: inherit the matching NAT policy. */
    555 		rl->r_natp = actrl->r_natp;
    556 		npf_nat_setid(rl->r_natp, ++nid);
    557 
    558 		/*
    559 		 * Finally, mark the active rule to not destroy its NAT
    560 		 * policy later as we inherited it (but the rule must be
    561 		 * kept active for now).  Destroy the new/unused policy.
    562 		 */
    563 		actrl->r_attr |= NPF_RULE_KEEPNAT;
    564 		npf_natpolicy_destroy(np);
    565 	}
    566 
    567 	/* Inherit the ID counter. */
    568 	newset->rs_idcnt = oldset->rs_idcnt;
    569 }
    570 
    571 /*
    572  * npf_ruleset_findnat: find a NAT policy in the ruleset by a given ID.
    573  */
    574 npf_natpolicy_t *
    575 npf_ruleset_findnat(npf_ruleset_t *rlset, uint64_t id)
    576 {
    577 	npf_rule_t *rl;
    578 
    579 	LIST_FOREACH(rl, &rlset->rs_all, r_aentry) {
    580 		npf_natpolicy_t *np = rl->r_natp;
    581 		if (np && npf_nat_getid(np) == id) {
    582 			return np;
    583 		}
    584 	}
    585 	return NULL;
    586 }
    587 
    588 /*
    589  * npf_ruleset_freealg: inspect the ruleset and disassociate specified
    590  * ALG from all NAT entries using it.
    591  */
    592 void
    593 npf_ruleset_freealg(npf_ruleset_t *rlset, npf_alg_t *alg)
    594 {
    595 	npf_rule_t *rl;
    596 	npf_natpolicy_t *np;
    597 
    598 	LIST_FOREACH(rl, &rlset->rs_all, r_aentry) {
    599 		if ((np = rl->r_natp) != NULL) {
    600 			npf_nat_freealg(np, alg);
    601 		}
    602 	}
    603 }
    604 
    605 /*
    606  * npf_rule_alloc: allocate a rule and initialise it.
    607  */
    608 npf_rule_t *
    609 npf_rule_alloc(npf_t *npf, const nvlist_t *rule)
    610 {
    611 	npf_rule_t *rl;
    612 	const char *rname;
    613 	const void *key, *info;
    614 	size_t len;
    615 
    616 	/* Allocate a rule structure and keep the information. */
    617 	rl = kmem_zalloc(sizeof(npf_rule_t), KM_SLEEP);
    618 	info = dnvlist_get_binary(rule, "info", &rl->r_info_len, NULL, 0);
    619 	if (info) {
    620 		rl->r_info = kmem_alloc(rl->r_info_len, KM_SLEEP);
    621 		memcpy(rl->r_info, info, rl->r_info_len);
    622 	}
    623 	rl->r_natp = NULL;
    624 
    625 	/* Name (optional) */
    626 	if ((rname = dnvlist_get_string(rule, "name", NULL)) != NULL) {
    627 		strlcpy(rl->r_name, rname, NPF_RULE_MAXNAMELEN);
    628 	} else {
    629 		rl->r_name[0] = '\0';
    630 	}
    631 
    632 	/* Attributes, priority and interface ID (optional). */
    633 	rl->r_attr = dnvlist_get_number(rule, "attr", 0);
    634 	rl->r_attr &= ~NPF_RULE_PRIVMASK;
    635 
    636 	if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
    637 		/* Priority of the dynamic rule. */
    638 		rl->r_priority = (int)dnvlist_get_number(rule, "prio", 0);
    639 	} else {
    640 		/* The skip-to index.  No need to validate it. */
    641 		rl->r_skip_to = dnvlist_get_number(rule, "skip-to", 0);
    642 	}
    643 
    644 	/* Interface name; register and get the npf-if-id. */
    645 	if ((rname = dnvlist_get_string(rule, "ifname", NULL)) != NULL) {
    646 		if ((rl->r_ifid = npf_ifmap_register(npf, rname)) == 0) {
    647 			kmem_free(rl, sizeof(npf_rule_t));
    648 			return NULL;
    649 		}
    650 	} else {
    651 		rl->r_ifid = 0;
    652 	}
    653 
    654 	/* Key (optional). */
    655 	if ((key = dnvlist_get_binary(rule, "key", &len, NULL, 0)) != NULL) {
    656 		if (len > NPF_RULE_MAXKEYLEN) {
    657 			kmem_free(rl, sizeof(npf_rule_t));
    658 			return NULL;
    659 		}
    660 		memcpy(rl->r_key, key, len);
    661 	}
    662 
    663 	/* no gid/uid set yet */
    664 	rl->gid.op = rl->uid.op = NPF_OP_NONE;
    665 	return rl;
    666 }
    667 
    668 static void
    669 npf_rid_export(nvlist_t *rl, struct r_id rid, const char *name)
    670 {
    671 	uint64_t uid_element[3] = { rid.id[0], rid.id[1], rid.op };
    672 	nvlist_add_number_array(rl, name, uid_element, 3);
    673 }
    674 
    675 static nvlist_t *
    676 npf_rule_export(npf_t *npf, const npf_rule_t *rl)
    677 {
    678 	nvlist_t *rule = nvlist_create(0);
    679 	unsigned skip_to = 0;
    680 	npf_rproc_t *rp;
    681 
    682 	nvlist_add_number(rule, "attr", rl->r_attr);
    683 	nvlist_add_number(rule, "prio", rl->r_priority);
    684 	if ((rl->r_skip_to & SKIPTO_ADJ_FLAG) == 0) {
    685 		skip_to = rl->r_skip_to & SKIPTO_MASK;
    686 	}
    687 	nvlist_add_number(rule, "skip-to", skip_to);
    688 	nvlist_add_number(rule, "code-type", rl->r_type);
    689 	if (rl->r_code) {
    690 		nvlist_add_binary(rule, "code", rl->r_code, rl->r_clen);
    691 	}
    692 	if (rl->r_ifid) {
    693 		char ifname[IFNAMSIZ];
    694 		npf_ifmap_copyname(npf, rl->r_ifid, ifname, sizeof(ifname));
    695 		nvlist_add_string(rule, "ifname", ifname);
    696 	}
    697 	nvlist_add_number(rule, "id", rl->r_id);
    698 
    699 	if (rl->r_name[0]) {
    700 		nvlist_add_string(rule, "name", rl->r_name);
    701 	}
    702 	if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
    703 		nvlist_add_binary(rule, "key", rl->r_key, NPF_RULE_MAXKEYLEN);
    704 	}
    705 	if (rl->r_info) {
    706 		nvlist_add_binary(rule, "info", rl->r_info, rl->r_info_len);
    707 	}
    708 	if (rl->uid.op != NPF_OP_NONE) {
    709 		npf_rid_export(rule, rl->uid, "r_user");
    710 	}
    711 	if (rl->gid.op != NPF_OP_NONE) {
    712 		npf_rid_export(rule, rl->gid, "r_group");
    713 	}
    714 	if ((rp = npf_rule_getrproc(rl)) != NULL) {
    715 		const char *rname = npf_rproc_getname(rp);
    716 		nvlist_add_string(rule, "rproc", rname);
    717 		npf_rproc_release(rp);
    718 	}
    719 	return rule;
    720 }
    721 
    722 /*
    723  * npf_rule_setcode: assign filter code to the rule.
    724  *
    725  * => The code must be validated by the caller.
    726  * => JIT compilation may be performed here.
    727  */
    728 void
    729 npf_rule_setcode(npf_rule_t *rl, const int type, void *code, size_t size)
    730 {
    731 	KASSERT(type == NPF_CODE_BPF);
    732 
    733 	rl->r_type = type;
    734 	rl->r_code = code;
    735 	rl->r_clen = size;
    736 	rl->r_jcode = npf_bpf_compile(code, size);
    737 }
    738 
    739 void
    740 npf_rule_setrid(const nvlist_t *req, npf_rule_t *rl, const char *name)
    741 {
    742 	size_t nitems;
    743 	rid_t id;
    744 	const uint64_t *rid = nvlist_get_number_array(req, name, &nitems);
    745 	KASSERT(nitems == 3);
    746 
    747 	id.id[0] = (uint32_t)rid[0];
    748 	id.id[1] = (uint32_t)rid[1];
    749 	id.op = (uint8_t)rid[2];
    750 
    751 	if (!strcmp(name, "r_user"))
    752 		rl->uid = id;
    753 	else if (!strcmp(name, "r_group"))
    754 		rl->gid = id;
    755 }
    756 
    757 /*
    758  * npf_rule_setrproc: assign a rule procedure and hold a reference on it.
    759  */
    760 void
    761 npf_rule_setrproc(npf_rule_t *rl, npf_rproc_t *rp)
    762 {
    763 	npf_rproc_acquire(rp);
    764 	rl->r_rproc = rp;
    765 }
    766 
    767 /*
    768  * npf_rule_free: free the specified rule.
    769  */
    770 void
    771 npf_rule_free(npf_rule_t *rl)
    772 {
    773 	npf_natpolicy_t *np = rl->r_natp;
    774 	npf_rproc_t *rp = rl->r_rproc;
    775 
    776 	if (np && (rl->r_attr & NPF_RULE_KEEPNAT) == 0) {
    777 		/* Destroy the NAT policy. */
    778 		npf_natpolicy_destroy(np);
    779 	}
    780 	if (rp) {
    781 		/* Release rule procedure. */
    782 		npf_rproc_release(rp);
    783 	}
    784 	if (rl->r_code) {
    785 		/* Free byte-code. */
    786 		kmem_free(rl->r_code, rl->r_clen);
    787 	}
    788 	if (rl->r_jcode) {
    789 		/* Free JIT code. */
    790 		bpf_jit_freecode(rl->r_jcode);
    791 	}
    792 	if (rl->r_info) {
    793 		kmem_free(rl->r_info, rl->r_info_len);
    794 	}
    795 	kmem_free(rl, sizeof(npf_rule_t));
    796 }
    797 
    798 /*
    799  * npf_rule_getid: return the unique ID of a rule.
    800  * npf_rule_getrproc: acquire a reference and return rule procedure, if any.
    801  * npf_rule_getnat: get NAT policy assigned to the rule.
    802  */
    803 
    804 uint64_t
    805 npf_rule_getid(const npf_rule_t *rl)
    806 {
    807 	KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
    808 	return rl->r_id;
    809 }
    810 
    811 npf_rproc_t *
    812 npf_rule_getrproc(const npf_rule_t *rl)
    813 {
    814 	npf_rproc_t *rp = rl->r_rproc;
    815 
    816 	if (rp) {
    817 		npf_rproc_acquire(rp);
    818 	}
    819 	return rp;
    820 }
    821 
    822 npf_natpolicy_t *
    823 npf_rule_getnat(const npf_rule_t *rl)
    824 {
    825 	return rl->r_natp;
    826 }
    827 
    828 /*
    829  * npf_rule_setnat: assign NAT policy to the rule and insert into the
    830  * NAT policy list in the ruleset.
    831  */
    832 void
    833 npf_rule_setnat(npf_rule_t *rl, npf_natpolicy_t *np)
    834 {
    835 	KASSERT(rl->r_natp == NULL);
    836 	rl->r_natp = np;
    837 }
    838 
    839 /*
    840  * npf_rule_inspect: match the interface, direction and run the filter code.
    841  * Returns true if rule matches and false otherwise.
    842  */
    843 static inline bool
    844 npf_rule_inspect(const npf_rule_t *rl, bpf_args_t *bc_args,
    845     const int di_mask, const unsigned ifid)
    846 {
    847 	/* Match the interface. */
    848 	if (rl->r_ifid && rl->r_ifid != ifid) {
    849 		return false;
    850 	}
    851 
    852 	/* Match the direction. */
    853 	if ((rl->r_attr & NPF_RULE_DIMASK) != NPF_RULE_DIMASK) {
    854 		if ((rl->r_attr & di_mask) == 0)
    855 			return false;
    856 	}
    857 
    858 	/* Any code? */
    859 	if (!rl->r_code) {
    860 		KASSERT(rl->r_jcode == NULL);
    861 		return true;
    862 	}
    863 	KASSERT(rl->r_type == NPF_CODE_BPF);
    864 	return npf_bpf_filter(bc_args, rl->r_code, rl->r_jcode) != 0;
    865 }
    866 
    867 /*
    868  * npf_rule_reinspect: re-inspect the dynamic rule by iterating its list.
    869  * This is only for the dynamic rules.  Subrules cannot have nested rules.
    870  */
    871 static inline npf_rule_t *
    872 npf_rule_reinspect(const npf_rule_t *rg, bpf_args_t *bc_args,
    873     const int di_mask, const unsigned ifid)
    874 {
    875 	npf_rule_t *final_rl = NULL, *rl;
    876 
    877 	KASSERT(NPF_DYNAMIC_GROUP_P(rg->r_attr));
    878 
    879 	rl = atomic_load_relaxed(&rg->r_subset);
    880 	for (; rl; rl = atomic_load_relaxed(&rl->r_next)) {
    881 		KASSERT(!final_rl || rl->r_priority >= final_rl->r_priority);
    882 		if (!npf_rule_inspect(rl, bc_args, di_mask, ifid)) {
    883 			continue;
    884 		}
    885 		if (rl->r_attr & NPF_RULE_FINAL) {
    886 			return rl;
    887 		}
    888 		final_rl = rl;
    889 	}
    890 	return final_rl;
    891 }
    892 
    893 /*
    894  * npf_ruleset_inspect: inspect the packet against the given ruleset.
    895  *
    896  * Loop through the rules in the set and run the byte-code of each rule
    897  * against the packet (nbuf chain).  If sub-ruleset is found, inspect it.
    898  */
    899 npf_rule_t *
    900 npf_ruleset_inspect(npf_cache_t *npc, const npf_ruleset_t *rlset,
    901     const int di, const int layer)
    902 {
    903 	nbuf_t *nbuf = npc->npc_nbuf;
    904 	const int di_mask = (di & PFIL_IN) ? NPF_RULE_IN : NPF_RULE_OUT;
    905 	const unsigned nitems = rlset->rs_nitems;
    906 	const unsigned ifid = nbuf->nb_ifid;
    907 	npf_rule_t *final_rl = NULL;
    908 	bpf_args_t bc_args;
    909 	unsigned n = 0;
    910 
    911 	KASSERT(((di & PFIL_IN) != 0) ^ ((di & PFIL_OUT) != 0));
    912 
    913 	/*
    914 	 * Prepare the external memory store and the arguments for
    915 	 * the BPF programs to be executed.  Reset mbuf before taking
    916 	 * any pointers for the BPF.
    917 	 */
    918 	uint32_t bc_words[NPF_BPF_NWORDS];
    919 
    920 	nbuf_reset(nbuf);
    921 	npf_bpf_prepare(npc, &bc_args, bc_words);
    922 
    923 	while (n < nitems) {
    924 		npf_rule_t *rl = rlset->rs_rules[n];
    925 		const unsigned skip_to = rl->r_skip_to & SKIPTO_MASK;
    926 		const uint32_t attr = rl->r_attr;
    927 
    928 		if ((attr & layer) == 0) {
    929 			n = skip_to;
    930 			continue;
    931 		}
    932 
    933 		KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
    934 		KASSERT(n < skip_to);
    935 
    936 		/* Group is a barrier: return a matching if found any. */
    937 		if ((attr & NPF_DYNAMIC_GROUP) == NPF_RULE_GROUP && final_rl) {
    938 			break;
    939 		}
    940 
    941 		/* Main inspection of the rule. */
    942 		if (!npf_rule_inspect(rl, &bc_args, di_mask, ifid)) {
    943 			n = skip_to;
    944 			continue;
    945 		}
    946 
    947 		if (NPF_DYNAMIC_GROUP_P(attr)) {
    948 			/*
    949 			 * If this is a dynamic rule, re-inspect the subrules.
    950 			 * If it has any matching rule, then it is final.
    951 			 */
    952 			rl = npf_rule_reinspect(rl, &bc_args, di_mask, ifid);
    953 			if (rl != NULL) {
    954 				final_rl = rl;
    955 				break;
    956 			}
    957 		} else if ((attr & NPF_RULE_GROUP) == 0) {
    958 			/*
    959 			 * Groups themselves are not matching.
    960 			 */
    961 			final_rl = rl;
    962 		}
    963 
    964 		/* Set the matching rule and check for "final". */
    965 		if (attr & NPF_RULE_FINAL) {
    966 			break;
    967 		}
    968 		n++;
    969 	}
    970 
    971 	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
    972 	return final_rl;
    973 }
    974 
    975 /*
    976  * just exchange the flag attributes for pass/block for the diff protocols.
    977  * for passing, we set the STATEFULNESS for TCP connection establishment
    978  * if ret == 0, it is for a pass to be changed to block
    979  * non-zero ret indicates a block to pass
    980  * when we change to block, we assume the default RST rerturn for TCP
    981  * when we change to pass, we ensure no bit field for RST for tcp and ICMP for udp
    982  * finally change the ret condition too
    983  */
    984 int
    985 npf_rule_reverse(npf_cache_t *npc, npf_match_info_t *mi, int ret)
    986 {
    987 	KASSERT(npf_iscached(npc, NPC_LAYER4));
    988 	switch(npc->npc_proto) {
    989 		case IPPROTO_TCP:
    990 			if (ret == 0) /* switch pass to block */ {
    991 				mi->mi_retfl &= !(NPF_RULE_PASS | NPF_RULE_STATEFUL |
    992 					NPF_RULE_GSTATEFUL);
    993 				mi->mi_retfl |= NPF_RULE_RETRST;
    994 			}
    995 			else /* block to pass */ {
    996 				mi->mi_retfl &= !(NPF_RULE_RETRST);
    997 				mi->mi_retfl |= (NPF_RULE_PASS | NPF_RULE_STATEFUL |
    998 					NPF_RULE_GSTATEFUL);
    999 			}
   1000 			break;
   1001 		case IPPROTO_UDP:
   1002 			if (ret == 0) /* pass to block */ {
   1003 				mi->mi_retfl &= !(NPF_RULE_PASS);
   1004 				mi->mi_retfl |= NPF_RULE_RETICMP;
   1005 			}
   1006 			else /* block to pass */ {
   1007 				mi->mi_retfl &= !(NPF_RULE_RETICMP);
   1008 				mi->mi_retfl |= NPF_RULE_PASS;
   1009 			}
   1010 			break;
   1011 	}
   1012 
   1013 	return (ret == 0) ? ENETUNREACH : 0;
   1014 }
   1015 
   1016 /* only perform uid/gid checks when set */
   1017 int
   1018 npf_rule_match_rid(npf_rule_t *rl, npf_cache_t *npc, int dir)
   1019 {
   1020 	uint32_t sock_gid, sock_uid;
   1021 	bool uid_matched = false, gid_matched = false;
   1022 
   1023 	if (rl->gid.op == NPF_OP_NONE && rl->uid.op == NPF_OP_NONE)
   1024 		return -1; /* quickly return if packet has nothing to do with rids */
   1025 
   1026 	KASSERT(npf_iscached(npc, NPC_IP46));
   1027 	KASSERT(npf_iscached(npc, NPC_LAYER4));
   1028 
   1029 	if (rl->gid.op != NPF_OP_NONE) {
   1030 		if (npf_socket_lookup_rid(npc, kauth_cred_getegid, &sock_gid, dir) == -1)
   1031 			return ENOTCONN;
   1032 
   1033 		gid_matched |= npf_match_rid(&rl->gid, sock_gid);
   1034 	}
   1035 	if (rl->uid.op != NPF_OP_NONE) {
   1036 		if (npf_socket_lookup_rid(npc, kauth_cred_geteuid, &sock_uid, dir) == -1)
   1037 			return ENOTCONN;
   1038 
   1039 		uid_matched |= npf_match_rid(&rl->uid, sock_uid);
   1040 	}
   1041 
   1042 	/* if both uid and gid are set on rule, both must be matching to agree */
   1043 	if (rl->gid.op && rl->uid.op)
   1044 		return gid_matched && uid_matched;
   1045 	else
   1046 		return gid_matched || uid_matched;
   1047 }
   1048 
   1049 /*
   1050  * npf_rule_conclude: return decision and the flags for conclusion.
   1051  *
   1052  * => Returns ENETUNREACH if "block" and 0 if "pass".
   1053  */
   1054 int
   1055 npf_rule_conclude(const npf_rule_t *rl, npf_match_info_t *mi)
   1056 {
   1057 	/* If not passing - drop the packet. */
   1058 	mi->mi_retfl = rl->r_attr;
   1059 	mi->mi_rid = rl->r_id;
   1060 	return (rl->r_attr & NPF_RULE_PASS) ? 0 : ENETUNREACH;
   1061 }
   1062 
   1063 
   1064 #if defined(DDB) || defined(_NPF_TESTING)
   1065 
   1066 void
   1067 npf_ruleset_dump(npf_t *npf, const char *name)
   1068 {
   1069 	npf_ruleset_t *rlset = npf_config_ruleset(npf);
   1070 	npf_rule_t *rg, *rl;
   1071 
   1072 	LIST_FOREACH(rg, &rlset->rs_dynamic, r_dentry) {
   1073 		printf("ruleset '%s':\n", rg->r_name);
   1074 		for (rl = rg->r_subset; rl; rl = rl->r_next) {
   1075 			printf("\tid %"PRIu64", key: ", rl->r_id);
   1076 			for (unsigned i = 0; i < NPF_RULE_MAXKEYLEN; i++)
   1077 				printf("%x", rl->r_key[i]);
   1078 			printf("\n");
   1079 		}
   1080 	}
   1081 }
   1082 
   1083 #endif
   1084