Home | History | Annotate | Line # | Download | only in npf
npf_tableset.c revision 1.35
      1 /*-
      2  * Copyright (c) 2009-2019 The NetBSD Foundation, Inc.
      3  * All rights reserved.
      4  *
      5  * This material is based upon work partially supported by The
      6  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27  * POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 /*
     31  * NPF tableset module.
     32  *
     33  * Notes
     34  *
     35  *	The tableset is an array of tables.  After the creation, the array
     36  *	is immutable.  The caller is responsible to synchronise the access
     37  *	to the tableset.
     38  *
     39  * Warning (not applicable for the userspace npfkern):
     40  *
     41  *	The thmap_put()/thmap_del() are not called from the interrupt
     42  *	context and are protected by an IPL_NET mutex(9), therefore they
     43  *	do not need SPL wrappers -- see the comment at the top of the
     44  *	npf_conndb.c source file.
     45  */
     46 
     47 #ifdef _KERNEL
     48 #include <sys/cdefs.h>
     49 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.35 2020/05/30 14:16:56 rmind Exp $");
     50 
     51 #include <sys/param.h>
     52 #include <sys/types.h>
     53 
     54 #include <sys/atomic.h>
     55 #include <sys/cdbr.h>
     56 #include <sys/kmem.h>
     57 #include <sys/pool.h>
     58 #include <sys/queue.h>
     59 #include <sys/mutex.h>
     60 #include <sys/thmap.h>
     61 
     62 #include "lpm.h"
     63 #endif
     64 
     65 #include "npf_impl.h"
     66 
     67 typedef struct npf_tblent {
     68 	LIST_ENTRY(npf_tblent)	te_listent;
     69 	uint16_t		te_preflen;
     70 	uint16_t		te_alen;
     71 	npf_addr_t		te_addr;
     72 } npf_tblent_t;
     73 
     74 #define	NPF_ADDRLEN2IDX(alen)	((alen) >> 4)
     75 #define	NPF_ADDR_SLOTS		(2)
     76 
     77 struct npf_table {
     78 	/*
     79 	 * The storage type can be: a) hashmap b) LPM c) cdb.
     80 	 * There are separate trees for IPv4 and IPv6.
     81 	 */
     82 	union {
     83 		struct {
     84 			thmap_t *	t_map;
     85 			LIST_HEAD(, npf_tblent) t_gc;
     86 		};
     87 		lpm_t *			t_lpm;
     88 		struct {
     89 			void *		t_blob;
     90 			size_t		t_bsize;
     91 			struct cdbr *	t_cdb;
     92 		};
     93 		struct {
     94 			npf_tblent_t **	t_elements[NPF_ADDR_SLOTS];
     95 			unsigned	t_allocated[NPF_ADDR_SLOTS];
     96 			unsigned	t_used[NPF_ADDR_SLOTS];
     97 		};
     98 	} /* C11 */;
     99 	LIST_HEAD(, npf_tblent)		t_list;
    100 	unsigned			t_nitems;
    101 
    102 	/*
    103 	 * Table ID, type and lock.  The ID may change during the
    104 	 * config reload, it is protected by the npf_config_lock.
    105 	 */
    106 	int			t_type;
    107 	unsigned		t_id;
    108 	kmutex_t		t_lock;
    109 
    110 	/* Reference count and table name. */
    111 	unsigned		t_refcnt;
    112 	char			t_name[NPF_TABLE_MAXNAMELEN];
    113 };
    114 
    115 struct npf_tableset {
    116 	unsigned		ts_nitems;
    117 	npf_table_t *		ts_map[];
    118 };
    119 
    120 #define	NPF_TABLESET_SIZE(n)	\
    121     (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *))
    122 
    123 #define	NPF_IFADDR_STEP		4
    124 
    125 static pool_cache_t		tblent_cache	__read_mostly;
    126 
    127 /*
    128  * npf_table_sysinit: initialise tableset structures.
    129  */
    130 void
    131 npf_tableset_sysinit(void)
    132 {
    133 	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), 0,
    134 	    0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
    135 }
    136 
    137 void
    138 npf_tableset_sysfini(void)
    139 {
    140 	pool_cache_destroy(tblent_cache);
    141 }
    142 
    143 npf_tableset_t *
    144 npf_tableset_create(u_int nitems)
    145 {
    146 	npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP);
    147 	ts->ts_nitems = nitems;
    148 	return ts;
    149 }
    150 
    151 void
    152 npf_tableset_destroy(npf_tableset_t *ts)
    153 {
    154 	/*
    155 	 * Destroy all tables (no references should be held, since the
    156 	 * ruleset should be destroyed before).
    157 	 */
    158 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    159 		npf_table_t *t = ts->ts_map[tid];
    160 
    161 		if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) {
    162 			npf_table_destroy(t);
    163 		}
    164 	}
    165 	kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems));
    166 }
    167 
    168 /*
    169  * npf_tableset_insert: insert the table into the specified tableset.
    170  *
    171  * => Returns 0 on success.  Fails and returns error if ID is already used.
    172  */
    173 int
    174 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t)
    175 {
    176 	const u_int tid = t->t_id;
    177 	int error;
    178 
    179 	KASSERT((u_int)tid < ts->ts_nitems);
    180 
    181 	if (ts->ts_map[tid] == NULL) {
    182 		atomic_inc_uint(&t->t_refcnt);
    183 		ts->ts_map[tid] = t;
    184 		error = 0;
    185 	} else {
    186 		error = EEXIST;
    187 	}
    188 	return error;
    189 }
    190 
    191 npf_table_t *
    192 npf_tableset_swap(npf_tableset_t *ts, npf_table_t *newt)
    193 {
    194 	const u_int tid = newt->t_id;
    195 	npf_table_t *oldt = ts->ts_map[tid];
    196 
    197 	KASSERT(tid < ts->ts_nitems);
    198 	KASSERT(oldt->t_id == newt->t_id);
    199 
    200 	newt->t_refcnt = oldt->t_refcnt;
    201 	oldt->t_refcnt = 0;
    202 	membar_producer();
    203 
    204 	return atomic_swap_ptr(&ts->ts_map[tid], newt);
    205 }
    206 
    207 /*
    208  * npf_tableset_getbyname: look for a table in the set given the name.
    209  */
    210 npf_table_t *
    211 npf_tableset_getbyname(npf_tableset_t *ts, const char *name)
    212 {
    213 	npf_table_t *t;
    214 
    215 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    216 		if ((t = ts->ts_map[tid]) == NULL)
    217 			continue;
    218 		if (strcmp(name, t->t_name) == 0)
    219 			return t;
    220 	}
    221 	return NULL;
    222 }
    223 
    224 npf_table_t *
    225 npf_tableset_getbyid(npf_tableset_t *ts, unsigned tid)
    226 {
    227 	if (__predict_true(tid < ts->ts_nitems)) {
    228 		return atomic_load_relaxed(&ts->ts_map[tid]);
    229 	}
    230 	return NULL;
    231 }
    232 
    233 /*
    234  * npf_tableset_reload: iterate all tables and if the new table is of the
    235  * same type and has no items, then we preserve the old one and its entries.
    236  *
    237  * => The caller is responsible for providing synchronisation.
    238  */
    239 void
    240 npf_tableset_reload(npf_t *npf, npf_tableset_t *nts, npf_tableset_t *ots)
    241 {
    242 	for (u_int tid = 0; tid < nts->ts_nitems; tid++) {
    243 		npf_table_t *t, *ot;
    244 
    245 		if ((t = nts->ts_map[tid]) == NULL) {
    246 			continue;
    247 		}
    248 
    249 		/* If our table has entries, just load it. */
    250 		if (t->t_nitems) {
    251 			continue;
    252 		}
    253 
    254 		/* Look for a currently existing table with such name. */
    255 		ot = npf_tableset_getbyname(ots, t->t_name);
    256 		if (ot == NULL) {
    257 			/* Not found: we have a new table. */
    258 			continue;
    259 		}
    260 
    261 		/* Found.  Did the type change? */
    262 		if (t->t_type != ot->t_type) {
    263 			/* Yes, load the new. */
    264 			continue;
    265 		}
    266 
    267 		/*
    268 		 * Preserve the current table.  Acquire a reference since
    269 		 * we are keeping it in the old table set.  Update its ID.
    270 		 */
    271 		atomic_inc_uint(&ot->t_refcnt);
    272 		nts->ts_map[tid] = ot;
    273 
    274 		KASSERT(npf_config_locked_p(npf));
    275 		ot->t_id = tid;
    276 
    277 		/* Destroy the new table (we hold the only reference). */
    278 		t->t_refcnt--;
    279 		npf_table_destroy(t);
    280 	}
    281 }
    282 
    283 int
    284 npf_tableset_export(npf_t *npf, const npf_tableset_t *ts, nvlist_t *nvl)
    285 {
    286 	const npf_table_t *t;
    287 
    288 	KASSERT(npf_config_locked_p(npf));
    289 
    290 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    291 		nvlist_t *table;
    292 
    293 		if ((t = ts->ts_map[tid]) == NULL) {
    294 			continue;
    295 		}
    296 		table = nvlist_create(0);
    297 		nvlist_add_string(table, "name", t->t_name);
    298 		nvlist_add_number(table, "type", t->t_type);
    299 		nvlist_add_number(table, "id", tid);
    300 
    301 		nvlist_append_nvlist_array(nvl, "tables", table);
    302 		nvlist_destroy(table);
    303 	}
    304 	return 0;
    305 }
    306 
    307 /*
    308  * Few helper routines.
    309  */
    310 
    311 static void
    312 table_ipset_flush(npf_table_t *t)
    313 {
    314 	npf_tblent_t *ent;
    315 
    316 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    317 		thmap_del(t->t_map, &ent->te_addr, ent->te_alen);
    318 		LIST_REMOVE(ent, te_listent);
    319 		pool_cache_put(tblent_cache, ent);
    320 	}
    321 	t->t_nitems = 0;
    322 }
    323 
    324 static void
    325 table_tree_flush(npf_table_t *t)
    326 {
    327 	npf_tblent_t *ent;
    328 
    329 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    330 		LIST_REMOVE(ent, te_listent);
    331 		pool_cache_put(tblent_cache, ent);
    332 	}
    333 	lpm_clear(t->t_lpm, NULL, NULL);
    334 	t->t_nitems = 0;
    335 }
    336 
    337 static void
    338 table_ifaddr_flush(npf_table_t *t)
    339 {
    340 	npf_tblent_t *ent;
    341 
    342 	for (unsigned i = 0; i < NPF_ADDR_SLOTS; i++) {
    343 		size_t len;
    344 
    345 		if (!t->t_allocated[i]) {
    346 			KASSERT(t->t_elements[i] == NULL);
    347 			continue;
    348 		}
    349 		len = t->t_allocated[i] * sizeof(npf_tblent_t *);
    350 		kmem_free(t->t_elements[i], len);
    351 		t->t_elements[i] = NULL;
    352 		t->t_allocated[i] = 0;
    353 		t->t_used[i] = 0;
    354 	}
    355 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    356 		LIST_REMOVE(ent, te_listent);
    357 		pool_cache_put(tblent_cache, ent);
    358 	}
    359 	t->t_nitems = 0;
    360 }
    361 
    362 /*
    363  * npf_table_create: create table with a specified ID.
    364  */
    365 npf_table_t *
    366 npf_table_create(const char *name, u_int tid, int type,
    367     const void *blob, size_t size)
    368 {
    369 	npf_table_t *t;
    370 
    371 	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
    372 	strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
    373 
    374 	switch (type) {
    375 	case NPF_TABLE_LPM:
    376 		t->t_lpm = lpm_create(KM_NOSLEEP);
    377 		if (t->t_lpm == NULL) {
    378 			goto out;
    379 		}
    380 		LIST_INIT(&t->t_list);
    381 		break;
    382 	case NPF_TABLE_IPSET:
    383 		t->t_map = thmap_create(0, NULL, THMAP_NOCOPY);
    384 		if (t->t_map == NULL) {
    385 			goto out;
    386 		}
    387 		break;
    388 	case NPF_TABLE_CONST:
    389 		t->t_blob = kmem_alloc(size, KM_SLEEP);
    390 		if (t->t_blob == NULL) {
    391 			goto out;
    392 		}
    393 		memcpy(t->t_blob, blob, size);
    394 		t->t_bsize = size;
    395 
    396 		t->t_cdb = cdbr_open_mem(t->t_blob, size,
    397 		    CDBR_DEFAULT, NULL, NULL);
    398 		if (t->t_cdb == NULL) {
    399 			kmem_free(t->t_blob, t->t_bsize);
    400 			goto out;
    401 		}
    402 		t->t_nitems = cdbr_entries(t->t_cdb);
    403 		break;
    404 	case NPF_TABLE_IFADDR:
    405 		break;
    406 	default:
    407 		KASSERT(false);
    408 	}
    409 	mutex_init(&t->t_lock, MUTEX_DEFAULT, IPL_NET);
    410 	t->t_type = type;
    411 	t->t_id = tid;
    412 	return t;
    413 out:
    414 	kmem_free(t, sizeof(npf_table_t));
    415 	return NULL;
    416 }
    417 
    418 /*
    419  * npf_table_destroy: free all table entries and table itself.
    420  */
    421 void
    422 npf_table_destroy(npf_table_t *t)
    423 {
    424 	KASSERT(t->t_refcnt == 0);
    425 
    426 	switch (t->t_type) {
    427 	case NPF_TABLE_IPSET:
    428 		table_ipset_flush(t);
    429 		npf_table_gc(NULL, t);
    430 		thmap_destroy(t->t_map);
    431 		break;
    432 	case NPF_TABLE_LPM:
    433 		table_tree_flush(t);
    434 		lpm_destroy(t->t_lpm);
    435 		break;
    436 	case NPF_TABLE_CONST:
    437 		cdbr_close(t->t_cdb);
    438 		kmem_free(t->t_blob, t->t_bsize);
    439 		break;
    440 	case NPF_TABLE_IFADDR:
    441 		table_ifaddr_flush(t);
    442 		break;
    443 	default:
    444 		KASSERT(false);
    445 	}
    446 	mutex_destroy(&t->t_lock);
    447 	kmem_free(t, sizeof(npf_table_t));
    448 }
    449 
    450 u_int
    451 npf_table_getid(npf_table_t *t)
    452 {
    453 	return t->t_id;
    454 }
    455 
    456 /*
    457  * npf_table_check: validate the name, ID and type.
    458  */
    459 int
    460 npf_table_check(npf_tableset_t *ts, const char *name, uint64_t tid,
    461     uint64_t type, bool replacing)
    462 {
    463 	const npf_table_t *t;
    464 
    465 	if (tid >= ts->ts_nitems) {
    466 		return EINVAL;
    467 	}
    468 	if (!replacing && ts->ts_map[tid] != NULL) {
    469 		return EEXIST;
    470 	}
    471 	switch (type) {
    472 	case NPF_TABLE_LPM:
    473 	case NPF_TABLE_IPSET:
    474 	case NPF_TABLE_CONST:
    475 	case NPF_TABLE_IFADDR:
    476 		break;
    477 	default:
    478 		return EINVAL;
    479 	}
    480 	if (strlen(name) >= NPF_TABLE_MAXNAMELEN) {
    481 		return ENAMETOOLONG;
    482 	}
    483 	if ((t = npf_tableset_getbyname(ts, name)) != NULL) {
    484 		if (!replacing || t->t_id != tid) {
    485 			return EEXIST;
    486 		}
    487 	}
    488 	return 0;
    489 }
    490 
    491 static int
    492 table_ifaddr_insert(npf_table_t *t, const int alen, npf_tblent_t *ent)
    493 {
    494 	const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    495 	const unsigned allocated = t->t_allocated[aidx];
    496 	const unsigned used = t->t_used[aidx];
    497 
    498 	/*
    499 	 * No need to check for duplicates.
    500 	 */
    501 	if (allocated <= used) {
    502 		npf_tblent_t **old_elements = t->t_elements[aidx];
    503 		npf_tblent_t **elements;
    504 		size_t toalloc, newsize;
    505 
    506 		toalloc = roundup2(allocated + 1, NPF_IFADDR_STEP);
    507 		newsize = toalloc * sizeof(npf_tblent_t *);
    508 
    509 		elements = kmem_zalloc(newsize, KM_NOSLEEP);
    510 		if (elements == NULL) {
    511 			return ENOMEM;
    512 		}
    513 		for (unsigned i = 0; i < used; i++) {
    514 			elements[i] = old_elements[i];
    515 		}
    516 		if (allocated) {
    517 			const size_t len = allocated * sizeof(npf_tblent_t *);
    518 			KASSERT(old_elements != NULL);
    519 			kmem_free(old_elements, len);
    520 		}
    521 		t->t_elements[aidx] = elements;
    522 		t->t_allocated[aidx] = toalloc;
    523 	}
    524 	t->t_elements[aidx][used] = ent;
    525 	t->t_used[aidx]++;
    526 	return 0;
    527 }
    528 
    529 /*
    530  * npf_table_insert: add an IP CIDR entry into the table.
    531  */
    532 int
    533 npf_table_insert(npf_table_t *t, const int alen,
    534     const npf_addr_t *addr, const npf_netmask_t mask)
    535 {
    536 	npf_tblent_t *ent;
    537 	int error;
    538 
    539 	error = npf_netmask_check(alen, mask);
    540 	if (error) {
    541 		return error;
    542 	}
    543 	ent = pool_cache_get(tblent_cache, PR_WAITOK);
    544 	memcpy(&ent->te_addr, addr, alen);
    545 	ent->te_alen = alen;
    546 	ent->te_preflen = 0;
    547 
    548 	/*
    549 	 * Insert the entry.  Return an error on duplicate.
    550 	 */
    551 	mutex_enter(&t->t_lock);
    552 	switch (t->t_type) {
    553 	case NPF_TABLE_IPSET:
    554 		/*
    555 		 * Hashmap supports only IPs.
    556 		 *
    557 		 * Note: the key must be already persistent, since we
    558 		 * use THMAP_NOCOPY.
    559 		 */
    560 		if (mask != NPF_NO_NETMASK) {
    561 			error = EINVAL;
    562 			break;
    563 		}
    564 		if (thmap_put(t->t_map, &ent->te_addr, alen, ent) == ent) {
    565 			LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    566 			t->t_nitems++;
    567 		} else {
    568 			error = EEXIST;
    569 		}
    570 		break;
    571 	case NPF_TABLE_LPM: {
    572 		const unsigned preflen =
    573 		    (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
    574 		ent->te_preflen = preflen;
    575 
    576 		if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
    577 		    lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
    578 			LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    579 			t->t_nitems++;
    580 			error = 0;
    581 		} else {
    582 			error = EEXIST;
    583 		}
    584 		break;
    585 	}
    586 	case NPF_TABLE_CONST:
    587 		error = EINVAL;
    588 		break;
    589 	case NPF_TABLE_IFADDR:
    590 		if ((error = table_ifaddr_insert(t, alen, ent)) != 0) {
    591 			break;
    592 		}
    593 		LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    594 		t->t_nitems++;
    595 		break;
    596 	default:
    597 		KASSERT(false);
    598 	}
    599 	mutex_exit(&t->t_lock);
    600 
    601 	if (error) {
    602 		pool_cache_put(tblent_cache, ent);
    603 	}
    604 	return error;
    605 }
    606 
    607 /*
    608  * npf_table_remove: remove the IP CIDR entry from the table.
    609  */
    610 int
    611 npf_table_remove(npf_table_t *t, const int alen,
    612     const npf_addr_t *addr, const npf_netmask_t mask)
    613 {
    614 	npf_tblent_t *ent = NULL;
    615 	int error;
    616 
    617 	error = npf_netmask_check(alen, mask);
    618 	if (error) {
    619 		return error;
    620 	}
    621 
    622 	mutex_enter(&t->t_lock);
    623 	switch (t->t_type) {
    624 	case NPF_TABLE_IPSET:
    625 		ent = thmap_del(t->t_map, addr, alen);
    626 		if (__predict_true(ent != NULL)) {
    627 			LIST_REMOVE(ent, te_listent);
    628 			LIST_INSERT_HEAD(&t->t_gc, ent, te_listent);
    629 			ent = NULL; // to be G/C'ed
    630 			t->t_nitems--;
    631 		} else {
    632 			error = ENOENT;
    633 		}
    634 		break;
    635 	case NPF_TABLE_LPM:
    636 		ent = lpm_lookup(t->t_lpm, addr, alen);
    637 		if (__predict_true(ent != NULL)) {
    638 			LIST_REMOVE(ent, te_listent);
    639 			lpm_remove(t->t_lpm, &ent->te_addr,
    640 			    ent->te_alen, ent->te_preflen);
    641 			t->t_nitems--;
    642 		} else {
    643 			error = ENOENT;
    644 		}
    645 		break;
    646 	case NPF_TABLE_CONST:
    647 	case NPF_TABLE_IFADDR:
    648 		error = EINVAL;
    649 		break;
    650 	default:
    651 		KASSERT(false);
    652 		ent = NULL;
    653 	}
    654 	mutex_exit(&t->t_lock);
    655 
    656 	if (ent) {
    657 		pool_cache_put(tblent_cache, ent);
    658 	}
    659 	return error;
    660 }
    661 
    662 /*
    663  * npf_table_lookup: find the table according to ID, lookup and match
    664  * the contents with the specified IP address.
    665  */
    666 int
    667 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr)
    668 {
    669 	const void *data;
    670 	size_t dlen;
    671 	bool found;
    672 	int error;
    673 
    674 	error = npf_netmask_check(alen, NPF_NO_NETMASK);
    675 	if (error) {
    676 		return error;
    677 	}
    678 
    679 	switch (t->t_type) {
    680 	case NPF_TABLE_IPSET:
    681 		/* Note: the caller is in the npf_config_read_enter(). */
    682 		found = thmap_get(t->t_map, addr, alen) != NULL;
    683 		break;
    684 	case NPF_TABLE_LPM:
    685 		mutex_enter(&t->t_lock);
    686 		found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
    687 		mutex_exit(&t->t_lock);
    688 		break;
    689 	case NPF_TABLE_CONST:
    690 		if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) {
    691 			found = dlen == (unsigned)alen &&
    692 			    memcmp(addr, data, dlen) == 0;
    693 		} else {
    694 			found = false;
    695 		}
    696 		break;
    697 	case NPF_TABLE_IFADDR: {
    698 		const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    699 
    700 		found = false;
    701 		for (unsigned i = 0; i < t->t_used[aidx]; i++) {
    702 			const npf_tblent_t *elm = t->t_elements[aidx][i];
    703 
    704 			KASSERT(elm->te_alen == alen);
    705 
    706 			if (memcmp(&elm->te_addr, addr, alen) == 0) {
    707 				found = true;
    708 				break;
    709 			}
    710 		}
    711 		break;
    712 	}
    713 	default:
    714 		KASSERT(false);
    715 		found = false;
    716 	}
    717 
    718 	return found ? 0 : ENOENT;
    719 }
    720 
    721 npf_addr_t *
    722 npf_table_getsome(npf_table_t *t, const int alen, unsigned idx)
    723 {
    724 	const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    725 	npf_tblent_t *elm;
    726 	unsigned nitems;
    727 
    728 	KASSERT(t->t_type == NPF_TABLE_IFADDR);
    729 	KASSERT(aidx < NPF_ADDR_SLOTS);
    730 
    731 	nitems = t->t_used[aidx];
    732 	if (nitems == 0) {
    733 		return NULL;
    734 	}
    735 
    736 	/*
    737 	 * No need to acquire the lock, since the table is immutable.
    738 	 */
    739 	elm = t->t_elements[aidx][idx % nitems];
    740 	return &elm->te_addr;
    741 }
    742 
    743 static int
    744 table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
    745     void *ubuf, size_t len, size_t *off)
    746 {
    747 	void *ubufp = (uint8_t *)ubuf + *off;
    748 	npf_ioctl_ent_t uent;
    749 
    750 	if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
    751 		return ENOMEM;
    752 	}
    753 	uent.alen = alen;
    754 	memcpy(&uent.addr, addr, sizeof(npf_addr_t));
    755 	uent.mask = mask;
    756 
    757 	return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
    758 }
    759 
    760 static int
    761 table_generic_list(const npf_table_t *t, void *ubuf, size_t len)
    762 {
    763 	npf_tblent_t *ent;
    764 	size_t off = 0;
    765 	int error = 0;
    766 
    767 	LIST_FOREACH(ent, &t->t_list, te_listent) {
    768 		error = table_ent_copyout(&ent->te_addr,
    769 		    ent->te_alen, ent->te_preflen, ubuf, len, &off);
    770 		if (error)
    771 			break;
    772 	}
    773 	return error;
    774 }
    775 
    776 static int
    777 table_cdb_list(npf_table_t *t, void *ubuf, size_t len)
    778 {
    779 	size_t off = 0, dlen;
    780 	const void *data;
    781 	int error = 0;
    782 
    783 	for (size_t i = 0; i < t->t_nitems; i++) {
    784 		if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) {
    785 			return EINVAL;
    786 		}
    787 		error = table_ent_copyout(data, dlen, 0, ubuf, len, &off);
    788 		if (error)
    789 			break;
    790 	}
    791 	return error;
    792 }
    793 
    794 /*
    795  * npf_table_list: copy a list of all table entries into a userspace buffer.
    796  */
    797 int
    798 npf_table_list(npf_table_t *t, void *ubuf, size_t len)
    799 {
    800 	int error = 0;
    801 
    802 	mutex_enter(&t->t_lock);
    803 	switch (t->t_type) {
    804 	case NPF_TABLE_IPSET:
    805 		error = table_generic_list(t, ubuf, len);
    806 		break;
    807 	case NPF_TABLE_LPM:
    808 		error = table_generic_list(t, ubuf, len);
    809 		break;
    810 	case NPF_TABLE_CONST:
    811 		error = table_cdb_list(t, ubuf, len);
    812 		break;
    813 	case NPF_TABLE_IFADDR:
    814 		error = table_generic_list(t, ubuf, len);
    815 		break;
    816 	default:
    817 		KASSERT(false);
    818 	}
    819 	mutex_exit(&t->t_lock);
    820 
    821 	return error;
    822 }
    823 
    824 /*
    825  * npf_table_flush: remove all table entries.
    826  */
    827 int
    828 npf_table_flush(npf_table_t *t)
    829 {
    830 	int error = 0;
    831 
    832 	mutex_enter(&t->t_lock);
    833 	switch (t->t_type) {
    834 	case NPF_TABLE_IPSET:
    835 		table_ipset_flush(t);
    836 		break;
    837 	case NPF_TABLE_LPM:
    838 		table_tree_flush(t);
    839 		break;
    840 	case NPF_TABLE_CONST:
    841 	case NPF_TABLE_IFADDR:
    842 		error = EINVAL;
    843 		break;
    844 	default:
    845 		KASSERT(false);
    846 	}
    847 	mutex_exit(&t->t_lock);
    848 	return error;
    849 }
    850 
    851 void
    852 npf_table_gc(npf_t *npf, npf_table_t *t)
    853 {
    854 	npf_tblent_t *ent;
    855 	void *ref;
    856 
    857 	if (t->t_type != NPF_TABLE_IPSET || LIST_EMPTY(&t->t_gc)) {
    858 		return;
    859 	}
    860 
    861 	ref = thmap_stage_gc(t->t_map);
    862 	if (npf) {
    863 		npf_config_locked_p(npf);
    864 		npf_config_sync(npf);
    865 	}
    866 	thmap_gc(t->t_map, ref);
    867 
    868 	while ((ent = LIST_FIRST(&t->t_gc)) != NULL) {
    869 		LIST_REMOVE(ent, te_listent);
    870 		pool_cache_put(tblent_cache, ent);
    871 	}
    872 }
    873