Home | History | Annotate | Line # | Download | only in npf
      1 /*-
      2  * Copyright (c) 2009-2019 The NetBSD Foundation, Inc.
      3  * All rights reserved.
      4  *
      5  * This material is based upon work partially supported by The
      6  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27  * POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 /*
     31  * NPF tableset module.
     32  *
     33  * Notes
     34  *
     35  *	The tableset is an array of tables.  After the creation, the array
     36  *	is immutable.  The caller is responsible to synchronise the access
     37  *	to the tableset.
     38  *
     39  * Warning (not applicable for the userspace npfkern):
     40  *
     41  *	The thmap_put()/thmap_del() are not called from the interrupt
     42  *	context and are protected by an IPL_NET mutex(9), therefore they
     43  *	do not need SPL wrappers -- see the comment at the top of the
     44  *	npf_conndb.c source file.
     45  */
     46 
     47 #ifdef _KERNEL
     48 #include <sys/cdefs.h>
     49 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.43 2025/02/07 23:37:46 joe Exp $");
     50 
     51 #include <sys/param.h>
     52 #include <sys/types.h>
     53 
     54 #include <sys/atomic.h>
     55 #include <sys/cdbr.h>
     56 #include <sys/kmem.h>
     57 #include <sys/pool.h>
     58 #include <sys/queue.h>
     59 #include <sys/mutex.h>
     60 #include <sys/thmap.h>
     61 
     62 #include "lpm.h"
     63 #endif
     64 
     65 #include "npf_impl.h"
     66 
     67 typedef struct npf_tblent {
     68 	LIST_ENTRY(npf_tblent)	te_listent;
     69 	uint16_t		te_preflen;
     70 	uint16_t		te_alen;
     71 	npf_addr_t		te_addr;
     72 } npf_tblent_t;
     73 
     74 #define	NPF_ADDRLEN2IDX(alen)	((alen) >> 4)
     75 #define	NPF_ADDR_SLOTS		(2)
     76 
     77 struct npf_table {
     78 	/*
     79 	 * The storage type can be: a) hashmap b) LPM c) cdb.
     80 	 * There are separate trees for IPv4 and IPv6.
     81 	 */
     82 	union {
     83 		struct {
     84 			thmap_t *	t_map;
     85 			LIST_HEAD(, npf_tblent) t_gc;
     86 		};
     87 		lpm_t *			t_lpm;
     88 		struct {
     89 			void *		t_blob;
     90 			size_t		t_bsize;
     91 			struct cdbr *	t_cdb;
     92 		};
     93 		struct {
     94 			npf_tblent_t **	t_elements[NPF_ADDR_SLOTS];
     95 			unsigned	t_allocated[NPF_ADDR_SLOTS];
     96 			unsigned	t_used[NPF_ADDR_SLOTS];
     97 		};
     98 	} /* C11 */;
     99 	LIST_HEAD(, npf_tblent)		t_list;
    100 	unsigned			t_nitems;
    101 
    102 	/*
    103 	 * Table ID, type and lock.  The ID may change during the
    104 	 * config reload, it is protected by the npf_t::config_lock.
    105 	 */
    106 	int			t_type;
    107 	unsigned		t_id;
    108 	kmutex_t		t_lock;
    109 
    110 	/* Reference count and table name. */
    111 	unsigned		t_refcnt;
    112 	char			t_name[NPF_TABLE_MAXNAMELEN];
    113 };
    114 
    115 struct npf_tableset {
    116 	unsigned		ts_nitems;
    117 	npf_table_t *		ts_map[];
    118 };
    119 
    120 #define	NPF_TABLESET_SIZE(n)	\
    121     (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *))
    122 
    123 #define	NPF_IFADDR_STEP		4
    124 
    125 static pool_cache_t		tblent_cache	__read_mostly;
    126 
    127 /*
    128  * npf_table_sysinit: initialise tableset structures.
    129  */
    130 void
    131 npf_tableset_sysinit(void)
    132 {
    133 	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), 0,
    134 	    0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
    135 }
    136 
    137 void
    138 npf_tableset_sysfini(void)
    139 {
    140 	pool_cache_destroy(tblent_cache);
    141 }
    142 
    143 npf_tableset_t *
    144 npf_tableset_create(u_int nitems)
    145 {
    146 	npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP);
    147 	ts->ts_nitems = nitems;
    148 	return ts;
    149 }
    150 
    151 void
    152 npf_tableset_destroy(npf_tableset_t *ts)
    153 {
    154 	/*
    155 	 * Destroy all tables (no references should be held, since the
    156 	 * ruleset should be destroyed before).
    157 	 */
    158 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    159 		npf_table_t *t = ts->ts_map[tid];
    160 
    161 		if (t == NULL)
    162 			continue;
    163 		membar_release();
    164 		if (atomic_dec_uint_nv(&t->t_refcnt) > 0)
    165 			continue;
    166 		membar_acquire();
    167 		npf_table_destroy(t);
    168 	}
    169 	kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems));
    170 }
    171 
    172 /*
    173  * npf_tableset_insert: insert the table into the specified tableset.
    174  *
    175  * => Returns 0 on success.  Fails and returns error if ID is already used.
    176  */
    177 int
    178 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t)
    179 {
    180 	const u_int tid = t->t_id;
    181 	int error;
    182 
    183 	KASSERT((u_int)tid < ts->ts_nitems);
    184 
    185 	if (ts->ts_map[tid] == NULL) {
    186 		atomic_inc_uint(&t->t_refcnt);
    187 		ts->ts_map[tid] = t;
    188 		error = 0;
    189 	} else {
    190 		error = EEXIST;
    191 	}
    192 	return error;
    193 }
    194 
    195 npf_table_t *
    196 npf_tableset_swap(npf_tableset_t *ts, npf_table_t *newt)
    197 {
    198 	const u_int tid = newt->t_id;
    199 	npf_table_t *oldt = ts->ts_map[tid];
    200 
    201 	KASSERT(tid < ts->ts_nitems);
    202 	KASSERT(oldt->t_id == newt->t_id);
    203 
    204 	newt->t_refcnt = oldt->t_refcnt;
    205 	oldt->t_refcnt = 0;
    206 	membar_producer();
    207 
    208 	return atomic_swap_ptr(&ts->ts_map[tid], newt);
    209 }
    210 
    211 /*
    212  * npf_tableset_getbyname: look for a table in the set given the name.
    213  */
    214 npf_table_t *
    215 npf_tableset_getbyname(npf_tableset_t *ts, const char *name)
    216 {
    217 	npf_table_t *t;
    218 
    219 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    220 		if ((t = ts->ts_map[tid]) == NULL)
    221 			continue;
    222 		if (strcmp(name, t->t_name) == 0)
    223 			return t;
    224 	}
    225 	return NULL;
    226 }
    227 
    228 npf_table_t *
    229 npf_tableset_getbyid(npf_tableset_t *ts, unsigned tid)
    230 {
    231 	if (__predict_true(tid < ts->ts_nitems)) {
    232 		return atomic_load_relaxed(&ts->ts_map[tid]);
    233 	}
    234 	return NULL;
    235 }
    236 
    237 /*
    238  * npf_tableset_reload: iterate all tables and if the new table is of the
    239  * same type and has no items, then we preserve the old one and its entries.
    240  *
    241  * => The caller is responsible for providing synchronisation.
    242  */
    243 void
    244 npf_tableset_reload(npf_t *npf, npf_tableset_t *nts, npf_tableset_t *ots)
    245 {
    246 	for (u_int tid = 0; tid < nts->ts_nitems; tid++) {
    247 		npf_table_t *t, *ot;
    248 
    249 		if ((t = nts->ts_map[tid]) == NULL) {
    250 			continue;
    251 		}
    252 
    253 		/* If our table has entries, just load it. */
    254 		if (t->t_nitems) {
    255 			continue;
    256 		}
    257 
    258 		/* Look for a currently existing table with such name. */
    259 		ot = npf_tableset_getbyname(ots, t->t_name);
    260 		if (ot == NULL) {
    261 			/* Not found: we have a new table. */
    262 			continue;
    263 		}
    264 
    265 		/* Found.  Did the type change? */
    266 		if (t->t_type != ot->t_type) {
    267 			/* Yes, load the new. */
    268 			continue;
    269 		}
    270 
    271 		/*
    272 		 * Preserve the current table.  Acquire a reference since
    273 		 * we are keeping it in the old table set.  Update its ID.
    274 		 */
    275 		atomic_inc_uint(&ot->t_refcnt);
    276 		nts->ts_map[tid] = ot;
    277 
    278 		KASSERT(npf_config_locked_p(npf));
    279 		ot->t_id = tid;
    280 
    281 		/* Destroy the new table (we hold the only reference). */
    282 		t->t_refcnt--;
    283 		npf_table_destroy(t);
    284 	}
    285 }
    286 
    287 int
    288 npf_tableset_export(npf_t *npf, const npf_tableset_t *ts, nvlist_t *nvl)
    289 {
    290 	const npf_table_t *t;
    291 
    292 	KASSERT(npf_config_locked_p(npf));
    293 
    294 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    295 		nvlist_t *table;
    296 
    297 		if ((t = ts->ts_map[tid]) == NULL) {
    298 			continue;
    299 		}
    300 		table = nvlist_create(0);
    301 		nvlist_add_string(table, "name", t->t_name);
    302 		nvlist_add_number(table, "type", t->t_type);
    303 		nvlist_add_number(table, "id", tid);
    304 
    305 		nvlist_append_nvlist_array(nvl, "tables", table);
    306 		nvlist_destroy(table);
    307 	}
    308 	return 0;
    309 }
    310 
    311 /*
    312  * Few helper routines.
    313  */
    314 
    315 static void
    316 table_ipset_flush(npf_table_t *t)
    317 {
    318 	npf_tblent_t *ent;
    319 
    320 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    321 		thmap_del(t->t_map, &ent->te_addr, ent->te_alen);
    322 		LIST_REMOVE(ent, te_listent);
    323 		pool_cache_put(tblent_cache, ent);
    324 	}
    325 	t->t_nitems = 0;
    326 }
    327 
    328 static void
    329 table_tree_flush(npf_table_t *t)
    330 {
    331 	npf_tblent_t *ent;
    332 
    333 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    334 		LIST_REMOVE(ent, te_listent);
    335 		pool_cache_put(tblent_cache, ent);
    336 	}
    337 	lpm_clear(t->t_lpm, NULL, NULL);
    338 	t->t_nitems = 0;
    339 }
    340 
    341 static void
    342 table_ifaddr_flush(npf_table_t *t)
    343 {
    344 	npf_tblent_t *ent;
    345 
    346 	for (unsigned i = 0; i < NPF_ADDR_SLOTS; i++) {
    347 		size_t len;
    348 
    349 		if (!t->t_allocated[i]) {
    350 			KASSERT(t->t_elements[i] == NULL);
    351 			continue;
    352 		}
    353 		len = t->t_allocated[i] * sizeof(npf_tblent_t *);
    354 		kmem_free(t->t_elements[i], len);
    355 		t->t_elements[i] = NULL;
    356 		t->t_allocated[i] = 0;
    357 		t->t_used[i] = 0;
    358 	}
    359 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    360 		LIST_REMOVE(ent, te_listent);
    361 		pool_cache_put(tblent_cache, ent);
    362 	}
    363 	t->t_nitems = 0;
    364 }
    365 
    366 /*
    367  * npf_table_create: create table with a specified ID.
    368  */
    369 npf_table_t *
    370 npf_table_create(const char *name, u_int tid, int type,
    371     const void *blob, size_t size)
    372 {
    373 	npf_table_t *t;
    374 
    375 	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
    376 	strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
    377 
    378 	switch (type) {
    379 	case NPF_TABLE_LPM:
    380 		t->t_lpm = lpm_create(KM_NOSLEEP);
    381 		if (t->t_lpm == NULL) {
    382 			goto out;
    383 		}
    384 		LIST_INIT(&t->t_list);
    385 		break;
    386 	case NPF_TABLE_IPSET:
    387 		t->t_map = thmap_create(0, NULL, THMAP_NOCOPY);
    388 		if (t->t_map == NULL) {
    389 			goto out;
    390 		}
    391 		break;
    392 	case NPF_TABLE_CONST:
    393 		t->t_blob = kmem_alloc(size, KM_SLEEP);
    394 		if (t->t_blob == NULL) {
    395 			goto out;
    396 		}
    397 		memcpy(t->t_blob, blob, size);
    398 		t->t_bsize = size;
    399 
    400 		t->t_cdb = cdbr_open_mem(t->t_blob, size,
    401 		    CDBR_DEFAULT, NULL, NULL);
    402 		if (t->t_cdb == NULL) {
    403 			kmem_free(t->t_blob, t->t_bsize);
    404 			goto out;
    405 		}
    406 		t->t_nitems = cdbr_entries(t->t_cdb);
    407 		break;
    408 	case NPF_TABLE_IFADDR:
    409 		break;
    410 	default:
    411 		KASSERT(false);
    412 	}
    413 	mutex_init(&t->t_lock, MUTEX_DEFAULT, IPL_NET);
    414 	t->t_type = type;
    415 	t->t_id = tid;
    416 	return t;
    417 out:
    418 	kmem_free(t, sizeof(npf_table_t));
    419 	return NULL;
    420 }
    421 
    422 /*
    423  * npf_table_destroy: free all table entries and table itself.
    424  */
    425 void
    426 npf_table_destroy(npf_table_t *t)
    427 {
    428 	KASSERT(t->t_refcnt == 0);
    429 
    430 	switch (t->t_type) {
    431 	case NPF_TABLE_IPSET:
    432 		table_ipset_flush(t);
    433 		npf_table_gc(NULL, t);
    434 		thmap_destroy(t->t_map);
    435 		break;
    436 	case NPF_TABLE_LPM:
    437 		table_tree_flush(t);
    438 		lpm_destroy(t->t_lpm);
    439 		break;
    440 	case NPF_TABLE_CONST:
    441 		cdbr_close(t->t_cdb);
    442 		kmem_free(t->t_blob, t->t_bsize);
    443 		break;
    444 	case NPF_TABLE_IFADDR:
    445 		table_ifaddr_flush(t);
    446 		break;
    447 	default:
    448 		KASSERT(false);
    449 	}
    450 	mutex_destroy(&t->t_lock);
    451 	kmem_free(t, sizeof(npf_table_t));
    452 }
    453 
    454 u_int
    455 npf_table_getid(npf_table_t *t)
    456 {
    457 	return t->t_id;
    458 }
    459 
    460 /*
    461  * npf_table_check: validate the name, ID and type.
    462  */
    463 int
    464 npf_table_check(npf_tableset_t *ts, const char *name, uint64_t tid,
    465     uint64_t type, bool replacing)
    466 {
    467 	const npf_table_t *t;
    468 
    469 	if (tid >= ts->ts_nitems) {
    470 		return EINVAL;
    471 	}
    472 	if (!replacing && ts->ts_map[tid] != NULL) {
    473 		return EEXIST;
    474 	}
    475 	switch (type) {
    476 	case NPF_TABLE_LPM:
    477 	case NPF_TABLE_IPSET:
    478 	case NPF_TABLE_CONST:
    479 	case NPF_TABLE_IFADDR:
    480 		break;
    481 	default:
    482 		return EINVAL;
    483 	}
    484 	if (strlen(name) >= NPF_TABLE_MAXNAMELEN) {
    485 		return ENAMETOOLONG;
    486 	}
    487 	if ((t = npf_tableset_getbyname(ts, name)) != NULL) {
    488 		if (!replacing || t->t_id != tid) {
    489 			return EEXIST;
    490 		}
    491 	}
    492 	return 0;
    493 }
    494 
    495 static int
    496 table_ifaddr_insert(npf_table_t *t, const int alen, npf_tblent_t *ent)
    497 {
    498 	const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    499 	const unsigned allocated = t->t_allocated[aidx];
    500 	const unsigned used = t->t_used[aidx];
    501 
    502 	/*
    503 	 * No need to check for duplicates.
    504 	 */
    505 	if (allocated <= used) {
    506 		npf_tblent_t **old_elements = t->t_elements[aidx];
    507 		npf_tblent_t **elements;
    508 		size_t toalloc, newsize;
    509 
    510 		toalloc = roundup2(allocated + 1, NPF_IFADDR_STEP);
    511 		newsize = toalloc * sizeof(npf_tblent_t *);
    512 
    513 		elements = kmem_zalloc(newsize, KM_NOSLEEP);
    514 		if (elements == NULL) {
    515 			return ENOMEM;
    516 		}
    517 		for (unsigned i = 0; i < used; i++) {
    518 			elements[i] = old_elements[i];
    519 		}
    520 		if (allocated) {
    521 			const size_t len = allocated * sizeof(npf_tblent_t *);
    522 			KASSERT(old_elements != NULL);
    523 			kmem_free(old_elements, len);
    524 		}
    525 		t->t_elements[aidx] = elements;
    526 		t->t_allocated[aidx] = toalloc;
    527 	}
    528 	t->t_elements[aidx][used] = ent;
    529 	t->t_used[aidx]++;
    530 	return 0;
    531 }
    532 
    533 /*
    534  * npf_table_insert: add an IP CIDR entry into the table.
    535  */
    536 int
    537 npf_table_insert(npf_table_t *t, const int alen,
    538     const npf_addr_t *addr, const npf_netmask_t mask)
    539 {
    540 	npf_tblent_t *ent;
    541 	int error;
    542 
    543 	error = npf_netmask_check(alen, mask);
    544 	if (error) {
    545 		return error;
    546 	}
    547 	ent = pool_cache_get(tblent_cache, PR_WAITOK);
    548 	memcpy(&ent->te_addr, addr, alen);
    549 	ent->te_alen = alen;
    550 	ent->te_preflen = 0;
    551 
    552 	/*
    553 	 * Insert the entry.  Return an error on duplicate.
    554 	 */
    555 	mutex_enter(&t->t_lock);
    556 	switch (t->t_type) {
    557 	case NPF_TABLE_IPSET:
    558 		/*
    559 		 * Hashmap supports only IPs.
    560 		 *
    561 		 * Note: the key must be already persistent, since we
    562 		 * use THMAP_NOCOPY.
    563 		 */
    564 		if (mask != NPF_NO_NETMASK) {
    565 			error = EINVAL;
    566 			break;
    567 		}
    568 		if (thmap_put(t->t_map, &ent->te_addr, alen, ent) == ent) {
    569 			LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    570 			t->t_nitems++;
    571 		} else {
    572 			error = EEXIST;
    573 		}
    574 		break;
    575 	case NPF_TABLE_LPM: {
    576 		const unsigned preflen =
    577 		    (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
    578 		ent->te_preflen = preflen;
    579 
    580 		if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
    581 		    lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
    582 			LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    583 			t->t_nitems++;
    584 			error = 0;
    585 		} else {
    586 			error = EEXIST;
    587 		}
    588 		break;
    589 	}
    590 	case NPF_TABLE_CONST:
    591 		error = EINVAL;
    592 		break;
    593 	case NPF_TABLE_IFADDR:
    594 		if ((error = table_ifaddr_insert(t, alen, ent)) != 0) {
    595 			break;
    596 		}
    597 		LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    598 		t->t_nitems++;
    599 		break;
    600 	default:
    601 		KASSERT(false);
    602 	}
    603 	mutex_exit(&t->t_lock);
    604 
    605 	if (error) {
    606 		pool_cache_put(tblent_cache, ent);
    607 	}
    608 	return error;
    609 }
    610 
    611 /*
    612  * npf_table_remove: remove the IP CIDR entry from the table.
    613  */
    614 int
    615 npf_table_remove(npf_table_t *t, const int alen,
    616     const npf_addr_t *addr, const npf_netmask_t mask)
    617 {
    618 	npf_tblent_t *ent = NULL;
    619 	int error;
    620 
    621 	error = npf_netmask_check(alen, mask);
    622 	if (error) {
    623 		return error;
    624 	}
    625 
    626 	mutex_enter(&t->t_lock);
    627 	switch (t->t_type) {
    628 	case NPF_TABLE_IPSET:
    629 		ent = thmap_del(t->t_map, addr, alen);
    630 		if (__predict_true(ent != NULL)) {
    631 			LIST_REMOVE(ent, te_listent);
    632 			LIST_INSERT_HEAD(&t->t_gc, ent, te_listent);
    633 			ent = NULL; // to be G/C'ed
    634 			t->t_nitems--;
    635 		} else {
    636 			error = ENOENT;
    637 		}
    638 		break;
    639 	case NPF_TABLE_LPM:
    640 		ent = lpm_lookup(t->t_lpm, addr, alen);
    641 		if (__predict_true(ent != NULL)) {
    642 			LIST_REMOVE(ent, te_listent);
    643 			lpm_remove(t->t_lpm, &ent->te_addr,
    644 			    ent->te_alen, ent->te_preflen);
    645 			t->t_nitems--;
    646 		} else {
    647 			error = ENOENT;
    648 		}
    649 		break;
    650 	case NPF_TABLE_CONST:
    651 	case NPF_TABLE_IFADDR:
    652 		error = EINVAL;
    653 		break;
    654 	default:
    655 		KASSERT(false);
    656 		ent = NULL;
    657 	}
    658 	mutex_exit(&t->t_lock);
    659 
    660 	if (ent) {
    661 		pool_cache_put(tblent_cache, ent);
    662 	}
    663 	return error;
    664 }
    665 
    666 /*
    667  * npf_table_lookup: find the table according to ID, lookup and match
    668  * the contents with the specified IP address.
    669  */
    670 int
    671 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr)
    672 {
    673 	const void *data;
    674 	size_t dlen;
    675 	bool found;
    676 	int error;
    677 
    678 	error = npf_netmask_check(alen, NPF_NO_NETMASK);
    679 	if (error) {
    680 		return error;
    681 	}
    682 
    683 	switch (t->t_type) {
    684 	case NPF_TABLE_IPSET:
    685 		/* Note: the caller is in the npf_config_read_enter(). */
    686 		found = thmap_get(t->t_map, addr, alen) != NULL;
    687 		break;
    688 	case NPF_TABLE_LPM:
    689 		mutex_enter(&t->t_lock);
    690 		found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
    691 		mutex_exit(&t->t_lock);
    692 		break;
    693 	case NPF_TABLE_CONST:
    694 		if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) {
    695 			found = dlen == (unsigned)alen &&
    696 			    memcmp(addr, data, dlen) == 0;
    697 		} else {
    698 			found = false;
    699 		}
    700 		break;
    701 	case NPF_TABLE_IFADDR: {
    702 		const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    703 
    704 		found = false;
    705 		for (unsigned i = 0; i < t->t_used[aidx]; i++) {
    706 			const npf_tblent_t *elm = t->t_elements[aidx][i];
    707 
    708 			KASSERT(elm->te_alen == alen);
    709 
    710 			if (memcmp(&elm->te_addr, addr, alen) == 0) {
    711 				found = true;
    712 				break;
    713 			}
    714 		}
    715 		break;
    716 	}
    717 	default:
    718 		KASSERT(false);
    719 		found = false;
    720 	}
    721 
    722 	return found ? 0 : ENOENT;
    723 }
    724 
    725 npf_addr_t *
    726 npf_table_getsome(npf_table_t *t, const int alen, unsigned idx)
    727 {
    728 	const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    729 	npf_tblent_t *elm;
    730 	unsigned nitems;
    731 
    732 	KASSERT(t->t_type == NPF_TABLE_IFADDR);
    733 	KASSERT(aidx < NPF_ADDR_SLOTS);
    734 
    735 	nitems = t->t_used[aidx];
    736 	if (nitems == 0) {
    737 		return NULL;
    738 	}
    739 
    740 	/*
    741 	 * No need to acquire the lock, since the table is immutable.
    742 	 */
    743 	elm = t->t_elements[aidx][idx % nitems];
    744 	return &elm->te_addr;
    745 }
    746 
    747 static int
    748 table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
    749     void *ubuf, size_t len, size_t *off)
    750 {
    751 	void *ubufp = (uint8_t *)ubuf + *off;
    752 	npf_ioctl_ent_t uent;
    753 
    754 	if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
    755 		return ENOMEM;
    756 	}
    757 	uent.alen = alen;
    758 	memcpy(&uent.addr, addr, sizeof(npf_addr_t));
    759 	uent.mask = mask;
    760 
    761 	return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
    762 }
    763 
    764 static int
    765 table_generic_list(npf_table_t *t, void *ubuf, size_t len)
    766 {
    767 	npf_tblent_t *ent;
    768 	size_t off = 0;
    769 	int error = 0;
    770 
    771 	LIST_FOREACH(ent, &t->t_list, te_listent) {
    772 		mutex_exit(&t->t_lock);
    773 		error = table_ent_copyout(&ent->te_addr,
    774 		    ent->te_alen, ent->te_preflen, ubuf, len, &off);
    775 		mutex_enter(&t->t_lock);
    776 		if (error)
    777 			break;
    778 	}
    779 	return error;
    780 }
    781 
    782 static int
    783 table_cdb_list(npf_table_t *t, void *ubuf, size_t len)
    784 {
    785 	size_t off = 0, dlen;
    786 	const void *data;
    787 	int error = 0;
    788 
    789 	for (size_t i = 0; i < t->t_nitems; i++) {
    790 		if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) {
    791 			return EINVAL;
    792 		}
    793 		error = table_ent_copyout(data, dlen, 0, ubuf, len, &off);
    794 		if (error)
    795 			break;
    796 	}
    797 	return error;
    798 }
    799 
    800 /*
    801  * npf_table_list: copy a list of all table entries into a userspace buffer.
    802  */
    803 int
    804 npf_table_list(npf_table_t *t, void *ubuf, size_t len)
    805 {
    806 	int error = 0;
    807 
    808 	mutex_enter(&t->t_lock);
    809 	switch (t->t_type) {
    810 	case NPF_TABLE_IPSET:
    811 		error = table_generic_list(t, ubuf, len);
    812 		break;
    813 	case NPF_TABLE_LPM:
    814 		error = table_generic_list(t, ubuf, len);
    815 		break;
    816 	case NPF_TABLE_CONST:
    817 		error = table_cdb_list(t, ubuf, len);
    818 		break;
    819 	case NPF_TABLE_IFADDR:
    820 		error = table_generic_list(t, ubuf, len);
    821 		break;
    822 	default:
    823 		KASSERT(false);
    824 	}
    825 	mutex_exit(&t->t_lock);
    826 
    827 	return error;
    828 }
    829 
    830 /*
    831  * npf_table_flush: remove all table entries.
    832  */
    833 int
    834 npf_table_flush(npf_table_t *t)
    835 {
    836 	int error = 0;
    837 
    838 	mutex_enter(&t->t_lock);
    839 	switch (t->t_type) {
    840 	case NPF_TABLE_IPSET:
    841 		table_ipset_flush(t);
    842 		break;
    843 	case NPF_TABLE_LPM:
    844 		table_tree_flush(t);
    845 		break;
    846 	case NPF_TABLE_CONST:
    847 	case NPF_TABLE_IFADDR:
    848 		error = EINVAL;
    849 		break;
    850 	default:
    851 		KASSERT(false);
    852 	}
    853 	mutex_exit(&t->t_lock);
    854 	return error;
    855 }
    856 
    857 void
    858 npf_table_gc(npf_t *npf, npf_table_t *t)
    859 {
    860 	npf_tblent_t *ent;
    861 	void *ref;
    862 
    863 	if (t->t_type != NPF_TABLE_IPSET || LIST_EMPTY(&t->t_gc)) {
    864 		return;
    865 	}
    866 
    867 	ref = thmap_stage_gc(t->t_map);
    868 	if (npf) {
    869 		KASSERT(npf_config_locked_p(npf));
    870 		npf_config_sync(npf);
    871 	}
    872 	thmap_gc(t->t_map, ref);
    873 
    874 	while ((ent = LIST_FIRST(&t->t_gc)) != NULL) {
    875 		LIST_REMOVE(ent, te_listent);
    876 		pool_cache_put(tblent_cache, ent);
    877 	}
    878 }
    879