Home | History | Annotate | Line # | Download | only in npf
npf_tableset.c revision 1.39
      1 /*-
      2  * Copyright (c) 2009-2019 The NetBSD Foundation, Inc.
      3  * All rights reserved.
      4  *
      5  * This material is based upon work partially supported by The
      6  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27  * POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 /*
     31  * NPF tableset module.
     32  *
     33  * Notes
     34  *
     35  *	The tableset is an array of tables.  After the creation, the array
     36  *	is immutable.  The caller is responsible to synchronise the access
     37  *	to the tableset.
     38  *
     39  * Warning (not applicable for the userspace npfkern):
     40  *
     41  *	The thmap_put()/thmap_del() are not called from the interrupt
     42  *	context and are protected by an IPL_NET mutex(9), therefore they
     43  *	do not need SPL wrappers -- see the comment at the top of the
     44  *	npf_conndb.c source file.
     45  */
     46 
     47 #ifdef _KERNEL
     48 #include <sys/cdefs.h>
     49 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.39 2023/01/22 18:39:22 riastradh Exp $");
     50 
     51 #include <sys/param.h>
     52 #include <sys/types.h>
     53 
     54 #include <sys/atomic.h>
     55 #include <sys/cdbr.h>
     56 #include <sys/kmem.h>
     57 #include <sys/pool.h>
     58 #include <sys/queue.h>
     59 #include <sys/mutex.h>
     60 #include <sys/thmap.h>
     61 
     62 #include "lpm.h"
     63 #endif
     64 
     65 #include "npf_impl.h"
     66 
     67 typedef struct npf_tblent {
     68 	LIST_ENTRY(npf_tblent)	te_listent;
     69 	uint16_t		te_preflen;
     70 	uint16_t		te_alen;
     71 	npf_addr_t		te_addr;
     72 } npf_tblent_t;
     73 
     74 #define	NPF_ADDRLEN2IDX(alen)	((alen) >> 4)
     75 #define	NPF_ADDR_SLOTS		(2)
     76 
     77 struct npf_table {
     78 	/*
     79 	 * The storage type can be: a) hashmap b) LPM c) cdb.
     80 	 * There are separate trees for IPv4 and IPv6.
     81 	 */
     82 	union {
     83 		struct {
     84 			thmap_t *	t_map;
     85 			LIST_HEAD(, npf_tblent) t_gc;
     86 		};
     87 		lpm_t *			t_lpm;
     88 		struct {
     89 			void *		t_blob;
     90 			size_t		t_bsize;
     91 			struct cdbr *	t_cdb;
     92 		};
     93 		struct {
     94 			npf_tblent_t **	t_elements[NPF_ADDR_SLOTS];
     95 			unsigned	t_allocated[NPF_ADDR_SLOTS];
     96 			unsigned	t_used[NPF_ADDR_SLOTS];
     97 		};
     98 	} /* C11 */;
     99 	LIST_HEAD(, npf_tblent)		t_list;
    100 	unsigned			t_nitems;
    101 
    102 	/*
    103 	 * Table ID, type and lock.  The ID may change during the
    104 	 * config reload, it is protected by the npf->config_lock.
    105 	 */
    106 	int			t_type;
    107 	unsigned		t_id;
    108 	kmutex_t		t_lock;
    109 
    110 	/* Reference count and table name. */
    111 	unsigned		t_refcnt;
    112 	char			t_name[NPF_TABLE_MAXNAMELEN];
    113 };
    114 
    115 struct npf_tableset {
    116 	unsigned		ts_nitems;
    117 	npf_table_t *		ts_map[];
    118 };
    119 
    120 #define	NPF_TABLESET_SIZE(n)	\
    121     (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *))
    122 
    123 #define	NPF_IFADDR_STEP		4
    124 
    125 static pool_cache_t		tblent_cache	__read_mostly;
    126 
    127 /*
    128  * npf_table_sysinit: initialise tableset structures.
    129  */
    130 void
    131 npf_tableset_sysinit(void)
    132 {
    133 	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), 0,
    134 	    0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
    135 }
    136 
    137 void
    138 npf_tableset_sysfini(void)
    139 {
    140 	pool_cache_destroy(tblent_cache);
    141 }
    142 
    143 npf_tableset_t *
    144 npf_tableset_create(u_int nitems)
    145 {
    146 	npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP);
    147 	ts->ts_nitems = nitems;
    148 	return ts;
    149 }
    150 
    151 void
    152 npf_tableset_destroy(npf_tableset_t *ts)
    153 {
    154 	/*
    155 	 * Destroy all tables (no references should be held, since the
    156 	 * ruleset should be destroyed before).
    157 	 */
    158 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    159 		npf_table_t *t = ts->ts_map[tid];
    160 
    161 		if (t == NULL)
    162 			continue;
    163 #ifndef __HAVE_ATOMIC_AS_MEMBAR
    164 		membar_release();
    165 #endif
    166 		if (atomic_dec_uint_nv(&t->t_refcnt) > 0)
    167 			continue;
    168 #ifndef __HAVE_ATOMIC_AS_MEMBAR
    169 		membar_acquire();
    170 #endif
    171 		npf_table_destroy(t);
    172 	}
    173 	kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems));
    174 }
    175 
    176 /*
    177  * npf_tableset_insert: insert the table into the specified tableset.
    178  *
    179  * => Returns 0 on success.  Fails and returns error if ID is already used.
    180  */
    181 int
    182 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t)
    183 {
    184 	const u_int tid = t->t_id;
    185 	int error;
    186 
    187 	KASSERT((u_int)tid < ts->ts_nitems);
    188 
    189 	if (ts->ts_map[tid] == NULL) {
    190 		atomic_inc_uint(&t->t_refcnt);
    191 		ts->ts_map[tid] = t;
    192 		error = 0;
    193 	} else {
    194 		error = EEXIST;
    195 	}
    196 	return error;
    197 }
    198 
    199 npf_table_t *
    200 npf_tableset_swap(npf_tableset_t *ts, npf_table_t *newt)
    201 {
    202 	const u_int tid = newt->t_id;
    203 	npf_table_t *oldt = ts->ts_map[tid];
    204 
    205 	KASSERT(tid < ts->ts_nitems);
    206 	KASSERT(oldt->t_id == newt->t_id);
    207 
    208 	newt->t_refcnt = oldt->t_refcnt;
    209 	oldt->t_refcnt = 0;
    210 	membar_producer();
    211 
    212 	return atomic_swap_ptr(&ts->ts_map[tid], newt);
    213 }
    214 
    215 /*
    216  * npf_tableset_getbyname: look for a table in the set given the name.
    217  */
    218 npf_table_t *
    219 npf_tableset_getbyname(npf_tableset_t *ts, const char *name)
    220 {
    221 	npf_table_t *t;
    222 
    223 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    224 		if ((t = ts->ts_map[tid]) == NULL)
    225 			continue;
    226 		if (strcmp(name, t->t_name) == 0)
    227 			return t;
    228 	}
    229 	return NULL;
    230 }
    231 
    232 npf_table_t *
    233 npf_tableset_getbyid(npf_tableset_t *ts, unsigned tid)
    234 {
    235 	if (__predict_true(tid < ts->ts_nitems)) {
    236 		return atomic_load_relaxed(&ts->ts_map[tid]);
    237 	}
    238 	return NULL;
    239 }
    240 
    241 /*
    242  * npf_tableset_reload: iterate all tables and if the new table is of the
    243  * same type and has no items, then we preserve the old one and its entries.
    244  *
    245  * => The caller is responsible for providing synchronisation.
    246  */
    247 void
    248 npf_tableset_reload(npf_t *npf, npf_tableset_t *nts, npf_tableset_t *ots)
    249 {
    250 	for (u_int tid = 0; tid < nts->ts_nitems; tid++) {
    251 		npf_table_t *t, *ot;
    252 
    253 		if ((t = nts->ts_map[tid]) == NULL) {
    254 			continue;
    255 		}
    256 
    257 		/* If our table has entries, just load it. */
    258 		if (t->t_nitems) {
    259 			continue;
    260 		}
    261 
    262 		/* Look for a currently existing table with such name. */
    263 		ot = npf_tableset_getbyname(ots, t->t_name);
    264 		if (ot == NULL) {
    265 			/* Not found: we have a new table. */
    266 			continue;
    267 		}
    268 
    269 		/* Found.  Did the type change? */
    270 		if (t->t_type != ot->t_type) {
    271 			/* Yes, load the new. */
    272 			continue;
    273 		}
    274 
    275 		/*
    276 		 * Preserve the current table.  Acquire a reference since
    277 		 * we are keeping it in the old table set.  Update its ID.
    278 		 */
    279 		atomic_inc_uint(&ot->t_refcnt);
    280 		nts->ts_map[tid] = ot;
    281 
    282 		KASSERT(npf_config_locked_p(npf));
    283 		ot->t_id = tid;
    284 
    285 		/* Destroy the new table (we hold the only reference). */
    286 		t->t_refcnt--;
    287 		npf_table_destroy(t);
    288 	}
    289 }
    290 
    291 int
    292 npf_tableset_export(npf_t *npf, const npf_tableset_t *ts, nvlist_t *nvl)
    293 {
    294 	const npf_table_t *t;
    295 
    296 	KASSERT(npf_config_locked_p(npf));
    297 
    298 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    299 		nvlist_t *table;
    300 
    301 		if ((t = ts->ts_map[tid]) == NULL) {
    302 			continue;
    303 		}
    304 		table = nvlist_create(0);
    305 		nvlist_add_string(table, "name", t->t_name);
    306 		nvlist_add_number(table, "type", t->t_type);
    307 		nvlist_add_number(table, "id", tid);
    308 
    309 		nvlist_append_nvlist_array(nvl, "tables", table);
    310 		nvlist_destroy(table);
    311 	}
    312 	return 0;
    313 }
    314 
    315 /*
    316  * Few helper routines.
    317  */
    318 
    319 static void
    320 table_ipset_flush(npf_table_t *t)
    321 {
    322 	npf_tblent_t *ent;
    323 
    324 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    325 		thmap_del(t->t_map, &ent->te_addr, ent->te_alen);
    326 		LIST_REMOVE(ent, te_listent);
    327 		pool_cache_put(tblent_cache, ent);
    328 	}
    329 	t->t_nitems = 0;
    330 }
    331 
    332 static void
    333 table_tree_flush(npf_table_t *t)
    334 {
    335 	npf_tblent_t *ent;
    336 
    337 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    338 		LIST_REMOVE(ent, te_listent);
    339 		pool_cache_put(tblent_cache, ent);
    340 	}
    341 	lpm_clear(t->t_lpm, NULL, NULL);
    342 	t->t_nitems = 0;
    343 }
    344 
    345 static void
    346 table_ifaddr_flush(npf_table_t *t)
    347 {
    348 	npf_tblent_t *ent;
    349 
    350 	for (unsigned i = 0; i < NPF_ADDR_SLOTS; i++) {
    351 		size_t len;
    352 
    353 		if (!t->t_allocated[i]) {
    354 			KASSERT(t->t_elements[i] == NULL);
    355 			continue;
    356 		}
    357 		len = t->t_allocated[i] * sizeof(npf_tblent_t *);
    358 		kmem_free(t->t_elements[i], len);
    359 		t->t_elements[i] = NULL;
    360 		t->t_allocated[i] = 0;
    361 		t->t_used[i] = 0;
    362 	}
    363 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    364 		LIST_REMOVE(ent, te_listent);
    365 		pool_cache_put(tblent_cache, ent);
    366 	}
    367 	t->t_nitems = 0;
    368 }
    369 
    370 /*
    371  * npf_table_create: create table with a specified ID.
    372  */
    373 npf_table_t *
    374 npf_table_create(const char *name, u_int tid, int type,
    375     const void *blob, size_t size)
    376 {
    377 	npf_table_t *t;
    378 
    379 	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
    380 	strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
    381 
    382 	switch (type) {
    383 	case NPF_TABLE_LPM:
    384 		t->t_lpm = lpm_create(KM_NOSLEEP);
    385 		if (t->t_lpm == NULL) {
    386 			goto out;
    387 		}
    388 		LIST_INIT(&t->t_list);
    389 		break;
    390 	case NPF_TABLE_IPSET:
    391 		t->t_map = thmap_create(0, NULL, THMAP_NOCOPY);
    392 		if (t->t_map == NULL) {
    393 			goto out;
    394 		}
    395 		break;
    396 	case NPF_TABLE_CONST:
    397 		t->t_blob = kmem_alloc(size, KM_SLEEP);
    398 		if (t->t_blob == NULL) {
    399 			goto out;
    400 		}
    401 		memcpy(t->t_blob, blob, size);
    402 		t->t_bsize = size;
    403 
    404 		t->t_cdb = cdbr_open_mem(t->t_blob, size,
    405 		    CDBR_DEFAULT, NULL, NULL);
    406 		if (t->t_cdb == NULL) {
    407 			kmem_free(t->t_blob, t->t_bsize);
    408 			goto out;
    409 		}
    410 		t->t_nitems = cdbr_entries(t->t_cdb);
    411 		break;
    412 	case NPF_TABLE_IFADDR:
    413 		break;
    414 	default:
    415 		KASSERT(false);
    416 	}
    417 	mutex_init(&t->t_lock, MUTEX_DEFAULT, IPL_NET);
    418 	t->t_type = type;
    419 	t->t_id = tid;
    420 	return t;
    421 out:
    422 	kmem_free(t, sizeof(npf_table_t));
    423 	return NULL;
    424 }
    425 
    426 /*
    427  * npf_table_destroy: free all table entries and table itself.
    428  */
    429 void
    430 npf_table_destroy(npf_table_t *t)
    431 {
    432 	KASSERT(t->t_refcnt == 0);
    433 
    434 	switch (t->t_type) {
    435 	case NPF_TABLE_IPSET:
    436 		table_ipset_flush(t);
    437 		npf_table_gc(NULL, t);
    438 		thmap_destroy(t->t_map);
    439 		break;
    440 	case NPF_TABLE_LPM:
    441 		table_tree_flush(t);
    442 		lpm_destroy(t->t_lpm);
    443 		break;
    444 	case NPF_TABLE_CONST:
    445 		cdbr_close(t->t_cdb);
    446 		kmem_free(t->t_blob, t->t_bsize);
    447 		break;
    448 	case NPF_TABLE_IFADDR:
    449 		table_ifaddr_flush(t);
    450 		break;
    451 	default:
    452 		KASSERT(false);
    453 	}
    454 	mutex_destroy(&t->t_lock);
    455 	kmem_free(t, sizeof(npf_table_t));
    456 }
    457 
    458 u_int
    459 npf_table_getid(npf_table_t *t)
    460 {
    461 	return t->t_id;
    462 }
    463 
    464 /*
    465  * npf_table_check: validate the name, ID and type.
    466  */
    467 int
    468 npf_table_check(npf_tableset_t *ts, const char *name, uint64_t tid,
    469     uint64_t type, bool replacing)
    470 {
    471 	const npf_table_t *t;
    472 
    473 	if (tid >= ts->ts_nitems) {
    474 		return EINVAL;
    475 	}
    476 	if (!replacing && ts->ts_map[tid] != NULL) {
    477 		return EEXIST;
    478 	}
    479 	switch (type) {
    480 	case NPF_TABLE_LPM:
    481 	case NPF_TABLE_IPSET:
    482 	case NPF_TABLE_CONST:
    483 	case NPF_TABLE_IFADDR:
    484 		break;
    485 	default:
    486 		return EINVAL;
    487 	}
    488 	if (strlen(name) >= NPF_TABLE_MAXNAMELEN) {
    489 		return ENAMETOOLONG;
    490 	}
    491 	if ((t = npf_tableset_getbyname(ts, name)) != NULL) {
    492 		if (!replacing || t->t_id != tid) {
    493 			return EEXIST;
    494 		}
    495 	}
    496 	return 0;
    497 }
    498 
    499 static int
    500 table_ifaddr_insert(npf_table_t *t, const int alen, npf_tblent_t *ent)
    501 {
    502 	const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    503 	const unsigned allocated = t->t_allocated[aidx];
    504 	const unsigned used = t->t_used[aidx];
    505 
    506 	/*
    507 	 * No need to check for duplicates.
    508 	 */
    509 	if (allocated <= used) {
    510 		npf_tblent_t **old_elements = t->t_elements[aidx];
    511 		npf_tblent_t **elements;
    512 		size_t toalloc, newsize;
    513 
    514 		toalloc = roundup2(allocated + 1, NPF_IFADDR_STEP);
    515 		newsize = toalloc * sizeof(npf_tblent_t *);
    516 
    517 		elements = kmem_zalloc(newsize, KM_NOSLEEP);
    518 		if (elements == NULL) {
    519 			return ENOMEM;
    520 		}
    521 		for (unsigned i = 0; i < used; i++) {
    522 			elements[i] = old_elements[i];
    523 		}
    524 		if (allocated) {
    525 			const size_t len = allocated * sizeof(npf_tblent_t *);
    526 			KASSERT(old_elements != NULL);
    527 			kmem_free(old_elements, len);
    528 		}
    529 		t->t_elements[aidx] = elements;
    530 		t->t_allocated[aidx] = toalloc;
    531 	}
    532 	t->t_elements[aidx][used] = ent;
    533 	t->t_used[aidx]++;
    534 	return 0;
    535 }
    536 
    537 /*
    538  * npf_table_insert: add an IP CIDR entry into the table.
    539  */
    540 int
    541 npf_table_insert(npf_table_t *t, const int alen,
    542     const npf_addr_t *addr, const npf_netmask_t mask)
    543 {
    544 	npf_tblent_t *ent;
    545 	int error;
    546 
    547 	error = npf_netmask_check(alen, mask);
    548 	if (error) {
    549 		return error;
    550 	}
    551 	ent = pool_cache_get(tblent_cache, PR_WAITOK);
    552 	memcpy(&ent->te_addr, addr, alen);
    553 	ent->te_alen = alen;
    554 	ent->te_preflen = 0;
    555 
    556 	/*
    557 	 * Insert the entry.  Return an error on duplicate.
    558 	 */
    559 	mutex_enter(&t->t_lock);
    560 	switch (t->t_type) {
    561 	case NPF_TABLE_IPSET:
    562 		/*
    563 		 * Hashmap supports only IPs.
    564 		 *
    565 		 * Note: the key must be already persistent, since we
    566 		 * use THMAP_NOCOPY.
    567 		 */
    568 		if (mask != NPF_NO_NETMASK) {
    569 			error = EINVAL;
    570 			break;
    571 		}
    572 		if (thmap_put(t->t_map, &ent->te_addr, alen, ent) == ent) {
    573 			LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    574 			t->t_nitems++;
    575 		} else {
    576 			error = EEXIST;
    577 		}
    578 		break;
    579 	case NPF_TABLE_LPM: {
    580 		const unsigned preflen =
    581 		    (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
    582 		ent->te_preflen = preflen;
    583 
    584 		if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
    585 		    lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
    586 			LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    587 			t->t_nitems++;
    588 			error = 0;
    589 		} else {
    590 			error = EEXIST;
    591 		}
    592 		break;
    593 	}
    594 	case NPF_TABLE_CONST:
    595 		error = EINVAL;
    596 		break;
    597 	case NPF_TABLE_IFADDR:
    598 		if ((error = table_ifaddr_insert(t, alen, ent)) != 0) {
    599 			break;
    600 		}
    601 		LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    602 		t->t_nitems++;
    603 		break;
    604 	default:
    605 		KASSERT(false);
    606 	}
    607 	mutex_exit(&t->t_lock);
    608 
    609 	if (error) {
    610 		pool_cache_put(tblent_cache, ent);
    611 	}
    612 	return error;
    613 }
    614 
    615 /*
    616  * npf_table_remove: remove the IP CIDR entry from the table.
    617  */
    618 int
    619 npf_table_remove(npf_table_t *t, const int alen,
    620     const npf_addr_t *addr, const npf_netmask_t mask)
    621 {
    622 	npf_tblent_t *ent = NULL;
    623 	int error;
    624 
    625 	error = npf_netmask_check(alen, mask);
    626 	if (error) {
    627 		return error;
    628 	}
    629 
    630 	mutex_enter(&t->t_lock);
    631 	switch (t->t_type) {
    632 	case NPF_TABLE_IPSET:
    633 		ent = thmap_del(t->t_map, addr, alen);
    634 		if (__predict_true(ent != NULL)) {
    635 			LIST_REMOVE(ent, te_listent);
    636 			LIST_INSERT_HEAD(&t->t_gc, ent, te_listent);
    637 			ent = NULL; // to be G/C'ed
    638 			t->t_nitems--;
    639 		} else {
    640 			error = ENOENT;
    641 		}
    642 		break;
    643 	case NPF_TABLE_LPM:
    644 		ent = lpm_lookup(t->t_lpm, addr, alen);
    645 		if (__predict_true(ent != NULL)) {
    646 			LIST_REMOVE(ent, te_listent);
    647 			lpm_remove(t->t_lpm, &ent->te_addr,
    648 			    ent->te_alen, ent->te_preflen);
    649 			t->t_nitems--;
    650 		} else {
    651 			error = ENOENT;
    652 		}
    653 		break;
    654 	case NPF_TABLE_CONST:
    655 	case NPF_TABLE_IFADDR:
    656 		error = EINVAL;
    657 		break;
    658 	default:
    659 		KASSERT(false);
    660 		ent = NULL;
    661 	}
    662 	mutex_exit(&t->t_lock);
    663 
    664 	if (ent) {
    665 		pool_cache_put(tblent_cache, ent);
    666 	}
    667 	return error;
    668 }
    669 
    670 /*
    671  * npf_table_lookup: find the table according to ID, lookup and match
    672  * the contents with the specified IP address.
    673  */
    674 int
    675 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr)
    676 {
    677 	const void *data;
    678 	size_t dlen;
    679 	bool found;
    680 	int error;
    681 
    682 	error = npf_netmask_check(alen, NPF_NO_NETMASK);
    683 	if (error) {
    684 		return error;
    685 	}
    686 
    687 	switch (t->t_type) {
    688 	case NPF_TABLE_IPSET:
    689 		/* Note: the caller is in the npf_config_read_enter(). */
    690 		found = thmap_get(t->t_map, addr, alen) != NULL;
    691 		break;
    692 	case NPF_TABLE_LPM:
    693 		mutex_enter(&t->t_lock);
    694 		found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
    695 		mutex_exit(&t->t_lock);
    696 		break;
    697 	case NPF_TABLE_CONST:
    698 		if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) {
    699 			found = dlen == (unsigned)alen &&
    700 			    memcmp(addr, data, dlen) == 0;
    701 		} else {
    702 			found = false;
    703 		}
    704 		break;
    705 	case NPF_TABLE_IFADDR: {
    706 		const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    707 
    708 		found = false;
    709 		for (unsigned i = 0; i < t->t_used[aidx]; i++) {
    710 			const npf_tblent_t *elm = t->t_elements[aidx][i];
    711 
    712 			KASSERT(elm->te_alen == alen);
    713 
    714 			if (memcmp(&elm->te_addr, addr, alen) == 0) {
    715 				found = true;
    716 				break;
    717 			}
    718 		}
    719 		break;
    720 	}
    721 	default:
    722 		KASSERT(false);
    723 		found = false;
    724 	}
    725 
    726 	return found ? 0 : ENOENT;
    727 }
    728 
    729 npf_addr_t *
    730 npf_table_getsome(npf_table_t *t, const int alen, unsigned idx)
    731 {
    732 	const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    733 	npf_tblent_t *elm;
    734 	unsigned nitems;
    735 
    736 	KASSERT(t->t_type == NPF_TABLE_IFADDR);
    737 	KASSERT(aidx < NPF_ADDR_SLOTS);
    738 
    739 	nitems = t->t_used[aidx];
    740 	if (nitems == 0) {
    741 		return NULL;
    742 	}
    743 
    744 	/*
    745 	 * No need to acquire the lock, since the table is immutable.
    746 	 */
    747 	elm = t->t_elements[aidx][idx % nitems];
    748 	return &elm->te_addr;
    749 }
    750 
    751 static int
    752 table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
    753     void *ubuf, size_t len, size_t *off)
    754 {
    755 	void *ubufp = (uint8_t *)ubuf + *off;
    756 	npf_ioctl_ent_t uent;
    757 
    758 	if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
    759 		return ENOMEM;
    760 	}
    761 	uent.alen = alen;
    762 	memcpy(&uent.addr, addr, sizeof(npf_addr_t));
    763 	uent.mask = mask;
    764 
    765 	return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
    766 }
    767 
    768 static int
    769 table_generic_list(const npf_table_t *t, void *ubuf, size_t len)
    770 {
    771 	npf_tblent_t *ent;
    772 	size_t off = 0;
    773 	int error = 0;
    774 
    775 	LIST_FOREACH(ent, &t->t_list, te_listent) {
    776 		error = table_ent_copyout(&ent->te_addr,
    777 		    ent->te_alen, ent->te_preflen, ubuf, len, &off);
    778 		if (error)
    779 			break;
    780 	}
    781 	return error;
    782 }
    783 
    784 static int
    785 table_cdb_list(npf_table_t *t, void *ubuf, size_t len)
    786 {
    787 	size_t off = 0, dlen;
    788 	const void *data;
    789 	int error = 0;
    790 
    791 	for (size_t i = 0; i < t->t_nitems; i++) {
    792 		if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) {
    793 			return EINVAL;
    794 		}
    795 		error = table_ent_copyout(data, dlen, 0, ubuf, len, &off);
    796 		if (error)
    797 			break;
    798 	}
    799 	return error;
    800 }
    801 
    802 /*
    803  * npf_table_list: copy a list of all table entries into a userspace buffer.
    804  */
    805 int
    806 npf_table_list(npf_table_t *t, void *ubuf, size_t len)
    807 {
    808 	int error = 0;
    809 
    810 	mutex_enter(&t->t_lock);
    811 	switch (t->t_type) {
    812 	case NPF_TABLE_IPSET:
    813 		error = table_generic_list(t, ubuf, len);
    814 		break;
    815 	case NPF_TABLE_LPM:
    816 		error = table_generic_list(t, ubuf, len);
    817 		break;
    818 	case NPF_TABLE_CONST:
    819 		error = table_cdb_list(t, ubuf, len);
    820 		break;
    821 	case NPF_TABLE_IFADDR:
    822 		error = table_generic_list(t, ubuf, len);
    823 		break;
    824 	default:
    825 		KASSERT(false);
    826 	}
    827 	mutex_exit(&t->t_lock);
    828 
    829 	return error;
    830 }
    831 
    832 /*
    833  * npf_table_flush: remove all table entries.
    834  */
    835 int
    836 npf_table_flush(npf_table_t *t)
    837 {
    838 	int error = 0;
    839 
    840 	mutex_enter(&t->t_lock);
    841 	switch (t->t_type) {
    842 	case NPF_TABLE_IPSET:
    843 		table_ipset_flush(t);
    844 		break;
    845 	case NPF_TABLE_LPM:
    846 		table_tree_flush(t);
    847 		break;
    848 	case NPF_TABLE_CONST:
    849 	case NPF_TABLE_IFADDR:
    850 		error = EINVAL;
    851 		break;
    852 	default:
    853 		KASSERT(false);
    854 	}
    855 	mutex_exit(&t->t_lock);
    856 	return error;
    857 }
    858 
    859 void
    860 npf_table_gc(npf_t *npf, npf_table_t *t)
    861 {
    862 	npf_tblent_t *ent;
    863 	void *ref;
    864 
    865 	if (t->t_type != NPF_TABLE_IPSET || LIST_EMPTY(&t->t_gc)) {
    866 		return;
    867 	}
    868 
    869 	ref = thmap_stage_gc(t->t_map);
    870 	if (npf) {
    871 		npf_config_locked_p(npf);
    872 		npf_config_sync(npf);
    873 	}
    874 	thmap_gc(t->t_map, ref);
    875 
    876 	while ((ent = LIST_FIRST(&t->t_gc)) != NULL) {
    877 		LIST_REMOVE(ent, te_listent);
    878 		pool_cache_put(tblent_cache, ent);
    879 	}
    880 }
    881