Home | History | Annotate | Line # | Download | only in npf
npf_tableset.c revision 1.43
      1   1.1     rmind /*-
      2  1.33     rmind  * Copyright (c) 2009-2019 The NetBSD Foundation, Inc.
      3   1.1     rmind  * All rights reserved.
      4   1.1     rmind  *
      5   1.1     rmind  * This material is based upon work partially supported by The
      6   1.1     rmind  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      7   1.1     rmind  *
      8   1.1     rmind  * Redistribution and use in source and binary forms, with or without
      9   1.1     rmind  * modification, are permitted provided that the following conditions
     10   1.1     rmind  * are met:
     11   1.1     rmind  * 1. Redistributions of source code must retain the above copyright
     12   1.1     rmind  *    notice, this list of conditions and the following disclaimer.
     13   1.1     rmind  * 2. Redistributions in binary form must reproduce the above copyright
     14   1.1     rmind  *    notice, this list of conditions and the following disclaimer in the
     15   1.1     rmind  *    documentation and/or other materials provided with the distribution.
     16   1.1     rmind  *
     17   1.1     rmind  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     18   1.1     rmind  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     19   1.1     rmind  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     20   1.1     rmind  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     21   1.1     rmind  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22   1.1     rmind  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23   1.1     rmind  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24   1.1     rmind  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25   1.1     rmind  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26   1.1     rmind  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27   1.1     rmind  * POSSIBILITY OF SUCH DAMAGE.
     28   1.1     rmind  */
     29   1.1     rmind 
     30   1.1     rmind /*
     31   1.4     rmind  * NPF tableset module.
     32   1.1     rmind  *
     33  1.15     rmind  * Notes
     34  1.15     rmind  *
     35  1.15     rmind  *	The tableset is an array of tables.  After the creation, the array
     36  1.15     rmind  *	is immutable.  The caller is responsible to synchronise the access
     37  1.29     rmind  *	to the tableset.
     38  1.33     rmind  *
     39  1.33     rmind  * Warning (not applicable for the userspace npfkern):
     40  1.33     rmind  *
     41  1.33     rmind  *	The thmap_put()/thmap_del() are not called from the interrupt
     42  1.35     rmind  *	context and are protected by an IPL_NET mutex(9), therefore they
     43  1.35     rmind  *	do not need SPL wrappers -- see the comment at the top of the
     44  1.35     rmind  *	npf_conndb.c source file.
     45   1.1     rmind  */
     46   1.1     rmind 
     47  1.25  christos #ifdef _KERNEL
     48   1.1     rmind #include <sys/cdefs.h>
     49  1.43       joe __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.43 2025/02/07 23:37:46 joe Exp $");
     50   1.1     rmind 
     51   1.1     rmind #include <sys/param.h>
     52  1.10     rmind #include <sys/types.h>
     53   1.1     rmind 
     54   1.1     rmind #include <sys/atomic.h>
     55  1.21     rmind #include <sys/cdbr.h>
     56   1.1     rmind #include <sys/kmem.h>
     57   1.1     rmind #include <sys/pool.h>
     58   1.1     rmind #include <sys/queue.h>
     59  1.28     rmind #include <sys/mutex.h>
     60  1.29     rmind #include <sys/thmap.h>
     61   1.1     rmind 
     62  1.25  christos #include "lpm.h"
     63  1.25  christos #endif
     64  1.25  christos 
     65   1.1     rmind #include "npf_impl.h"
     66   1.1     rmind 
     67  1.15     rmind typedef struct npf_tblent {
     68  1.24  christos 	LIST_ENTRY(npf_tblent)	te_listent;
     69  1.24  christos 	uint16_t		te_preflen;
     70  1.24  christos 	uint16_t		te_alen;
     71  1.13     rmind 	npf_addr_t		te_addr;
     72  1.15     rmind } npf_tblent_t;
     73   1.1     rmind 
     74  1.29     rmind #define	NPF_ADDRLEN2IDX(alen)	((alen) >> 4)
     75  1.29     rmind #define	NPF_ADDR_SLOTS		(2)
     76   1.1     rmind 
     77   1.1     rmind struct npf_table {
     78  1.19     rmind 	/*
     79  1.29     rmind 	 * The storage type can be: a) hashmap b) LPM c) cdb.
     80  1.19     rmind 	 * There are separate trees for IPv4 and IPv6.
     81  1.19     rmind 	 */
     82  1.21     rmind 	union {
     83  1.21     rmind 		struct {
     84  1.29     rmind 			thmap_t *	t_map;
     85  1.29     rmind 			LIST_HEAD(, npf_tblent) t_gc;
     86  1.21     rmind 		};
     87  1.29     rmind 		lpm_t *			t_lpm;
     88  1.21     rmind 		struct {
     89  1.21     rmind 			void *		t_blob;
     90  1.21     rmind 			size_t		t_bsize;
     91  1.21     rmind 			struct cdbr *	t_cdb;
     92  1.21     rmind 		};
     93  1.29     rmind 		struct {
     94  1.29     rmind 			npf_tblent_t **	t_elements[NPF_ADDR_SLOTS];
     95  1.29     rmind 			unsigned	t_allocated[NPF_ADDR_SLOTS];
     96  1.29     rmind 			unsigned	t_used[NPF_ADDR_SLOTS];
     97  1.29     rmind 		};
     98  1.21     rmind 	} /* C11 */;
     99  1.29     rmind 	LIST_HEAD(, npf_tblent)		t_list;
    100  1.29     rmind 	unsigned			t_nitems;
    101  1.19     rmind 
    102  1.19     rmind 	/*
    103  1.19     rmind 	 * Table ID, type and lock.  The ID may change during the
    104  1.40  riastrad 	 * config reload, it is protected by the npf_t::config_lock.
    105  1.19     rmind 	 */
    106  1.19     rmind 	int			t_type;
    107  1.29     rmind 	unsigned		t_id;
    108  1.28     rmind 	kmutex_t		t_lock;
    109  1.19     rmind 
    110  1.29     rmind 	/* Reference count and table name. */
    111  1.29     rmind 	unsigned		t_refcnt;
    112  1.19     rmind 	char			t_name[NPF_TABLE_MAXNAMELEN];
    113  1.19     rmind };
    114  1.19     rmind 
    115  1.19     rmind struct npf_tableset {
    116  1.29     rmind 	unsigned		ts_nitems;
    117  1.19     rmind 	npf_table_t *		ts_map[];
    118   1.1     rmind };
    119   1.1     rmind 
    120  1.19     rmind #define	NPF_TABLESET_SIZE(n)	\
    121  1.19     rmind     (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *))
    122  1.19     rmind 
    123  1.29     rmind #define	NPF_IFADDR_STEP		4
    124  1.13     rmind 
    125  1.13     rmind static pool_cache_t		tblent_cache	__read_mostly;
    126   1.1     rmind 
    127   1.1     rmind /*
    128   1.1     rmind  * npf_table_sysinit: initialise tableset structures.
    129   1.1     rmind  */
    130   1.4     rmind void
    131   1.1     rmind npf_tableset_sysinit(void)
    132   1.1     rmind {
    133  1.29     rmind 	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), 0,
    134  1.14     rmind 	    0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
    135   1.1     rmind }
    136   1.1     rmind 
    137   1.1     rmind void
    138   1.1     rmind npf_tableset_sysfini(void)
    139   1.1     rmind {
    140   1.1     rmind 	pool_cache_destroy(tblent_cache);
    141   1.1     rmind }
    142   1.1     rmind 
    143   1.1     rmind npf_tableset_t *
    144  1.19     rmind npf_tableset_create(u_int nitems)
    145   1.1     rmind {
    146  1.19     rmind 	npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP);
    147  1.19     rmind 	ts->ts_nitems = nitems;
    148  1.19     rmind 	return ts;
    149   1.1     rmind }
    150   1.1     rmind 
    151   1.1     rmind void
    152  1.19     rmind npf_tableset_destroy(npf_tableset_t *ts)
    153   1.1     rmind {
    154   1.1     rmind 	/*
    155  1.19     rmind 	 * Destroy all tables (no references should be held, since the
    156  1.19     rmind 	 * ruleset should be destroyed before).
    157   1.1     rmind 	 */
    158  1.19     rmind 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    159  1.19     rmind 		npf_table_t *t = ts->ts_map[tid];
    160  1.19     rmind 
    161  1.37  riastrad 		if (t == NULL)
    162  1.37  riastrad 			continue;
    163  1.38  riastrad 		membar_release();
    164  1.37  riastrad 		if (atomic_dec_uint_nv(&t->t_refcnt) > 0)
    165  1.37  riastrad 			continue;
    166  1.38  riastrad 		membar_acquire();
    167  1.37  riastrad 		npf_table_destroy(t);
    168   1.1     rmind 	}
    169  1.19     rmind 	kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems));
    170   1.1     rmind }
    171   1.1     rmind 
    172   1.1     rmind /*
    173   1.1     rmind  * npf_tableset_insert: insert the table into the specified tableset.
    174   1.1     rmind  *
    175  1.13     rmind  * => Returns 0 on success.  Fails and returns error if ID is already used.
    176   1.1     rmind  */
    177   1.1     rmind int
    178  1.19     rmind npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t)
    179   1.1     rmind {
    180   1.1     rmind 	const u_int tid = t->t_id;
    181   1.1     rmind 	int error;
    182   1.1     rmind 
    183  1.19     rmind 	KASSERT((u_int)tid < ts->ts_nitems);
    184   1.1     rmind 
    185  1.19     rmind 	if (ts->ts_map[tid] == NULL) {
    186  1.17     rmind 		atomic_inc_uint(&t->t_refcnt);
    187  1.19     rmind 		ts->ts_map[tid] = t;
    188   1.1     rmind 		error = 0;
    189   1.1     rmind 	} else {
    190   1.1     rmind 		error = EEXIST;
    191   1.1     rmind 	}
    192   1.1     rmind 	return error;
    193   1.1     rmind }
    194   1.1     rmind 
    195  1.26     rmind npf_table_t *
    196  1.26     rmind npf_tableset_swap(npf_tableset_t *ts, npf_table_t *newt)
    197  1.26     rmind {
    198  1.26     rmind 	const u_int tid = newt->t_id;
    199  1.26     rmind 	npf_table_t *oldt = ts->ts_map[tid];
    200  1.26     rmind 
    201  1.26     rmind 	KASSERT(tid < ts->ts_nitems);
    202  1.26     rmind 	KASSERT(oldt->t_id == newt->t_id);
    203  1.26     rmind 
    204  1.26     rmind 	newt->t_refcnt = oldt->t_refcnt;
    205  1.26     rmind 	oldt->t_refcnt = 0;
    206  1.35     rmind 	membar_producer();
    207  1.26     rmind 
    208  1.26     rmind 	return atomic_swap_ptr(&ts->ts_map[tid], newt);
    209  1.26     rmind }
    210  1.26     rmind 
    211   1.1     rmind /*
    212  1.19     rmind  * npf_tableset_getbyname: look for a table in the set given the name.
    213  1.19     rmind  */
    214  1.19     rmind npf_table_t *
    215  1.19     rmind npf_tableset_getbyname(npf_tableset_t *ts, const char *name)
    216  1.19     rmind {
    217  1.19     rmind 	npf_table_t *t;
    218  1.19     rmind 
    219  1.19     rmind 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    220  1.19     rmind 		if ((t = ts->ts_map[tid]) == NULL)
    221  1.19     rmind 			continue;
    222  1.19     rmind 		if (strcmp(name, t->t_name) == 0)
    223  1.19     rmind 			return t;
    224  1.19     rmind 	}
    225  1.19     rmind 	return NULL;
    226  1.19     rmind }
    227  1.19     rmind 
    228  1.19     rmind npf_table_t *
    229  1.35     rmind npf_tableset_getbyid(npf_tableset_t *ts, unsigned tid)
    230  1.19     rmind {
    231  1.19     rmind 	if (__predict_true(tid < ts->ts_nitems)) {
    232  1.35     rmind 		return atomic_load_relaxed(&ts->ts_map[tid]);
    233  1.19     rmind 	}
    234  1.19     rmind 	return NULL;
    235  1.19     rmind }
    236  1.19     rmind 
    237  1.19     rmind /*
    238  1.15     rmind  * npf_tableset_reload: iterate all tables and if the new table is of the
    239  1.15     rmind  * same type and has no items, then we preserve the old one and its entries.
    240  1.15     rmind  *
    241  1.15     rmind  * => The caller is responsible for providing synchronisation.
    242  1.15     rmind  */
    243  1.15     rmind void
    244  1.25  christos npf_tableset_reload(npf_t *npf, npf_tableset_t *nts, npf_tableset_t *ots)
    245  1.15     rmind {
    246  1.19     rmind 	for (u_int tid = 0; tid < nts->ts_nitems; tid++) {
    247  1.19     rmind 		npf_table_t *t, *ot;
    248  1.19     rmind 
    249  1.19     rmind 		if ((t = nts->ts_map[tid]) == NULL) {
    250  1.19     rmind 			continue;
    251  1.19     rmind 		}
    252  1.15     rmind 
    253  1.19     rmind 		/* If our table has entries, just load it. */
    254  1.19     rmind 		if (t->t_nitems) {
    255  1.15     rmind 			continue;
    256  1.15     rmind 		}
    257  1.19     rmind 
    258  1.19     rmind 		/* Look for a currently existing table with such name. */
    259  1.19     rmind 		ot = npf_tableset_getbyname(ots, t->t_name);
    260  1.19     rmind 		if (ot == NULL) {
    261  1.19     rmind 			/* Not found: we have a new table. */
    262  1.19     rmind 			continue;
    263  1.19     rmind 		}
    264  1.19     rmind 
    265  1.19     rmind 		/* Found.  Did the type change? */
    266  1.19     rmind 		if (t->t_type != ot->t_type) {
    267  1.19     rmind 			/* Yes, load the new. */
    268  1.15     rmind 			continue;
    269  1.15     rmind 		}
    270  1.17     rmind 
    271  1.17     rmind 		/*
    272  1.19     rmind 		 * Preserve the current table.  Acquire a reference since
    273  1.19     rmind 		 * we are keeping it in the old table set.  Update its ID.
    274  1.17     rmind 		 */
    275  1.17     rmind 		atomic_inc_uint(&ot->t_refcnt);
    276  1.19     rmind 		nts->ts_map[tid] = ot;
    277  1.19     rmind 
    278  1.25  christos 		KASSERT(npf_config_locked_p(npf));
    279  1.19     rmind 		ot->t_id = tid;
    280  1.17     rmind 
    281  1.21     rmind 		/* Destroy the new table (we hold the only reference). */
    282  1.17     rmind 		t->t_refcnt--;
    283  1.15     rmind 		npf_table_destroy(t);
    284  1.15     rmind 	}
    285  1.15     rmind }
    286  1.15     rmind 
    287  1.22     rmind int
    288  1.35     rmind npf_tableset_export(npf_t *npf, const npf_tableset_t *ts, nvlist_t *nvl)
    289  1.20     rmind {
    290  1.20     rmind 	const npf_table_t *t;
    291  1.20     rmind 
    292  1.25  christos 	KASSERT(npf_config_locked_p(npf));
    293  1.20     rmind 
    294  1.20     rmind 	for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
    295  1.28     rmind 		nvlist_t *table;
    296  1.28     rmind 
    297  1.20     rmind 		if ((t = ts->ts_map[tid]) == NULL) {
    298  1.20     rmind 			continue;
    299  1.20     rmind 		}
    300  1.28     rmind 		table = nvlist_create(0);
    301  1.28     rmind 		nvlist_add_string(table, "name", t->t_name);
    302  1.28     rmind 		nvlist_add_number(table, "type", t->t_type);
    303  1.28     rmind 		nvlist_add_number(table, "id", tid);
    304  1.20     rmind 
    305  1.35     rmind 		nvlist_append_nvlist_array(nvl, "tables", table);
    306  1.28     rmind 		nvlist_destroy(table);
    307  1.20     rmind 	}
    308  1.22     rmind 	return 0;
    309  1.20     rmind }
    310  1.20     rmind 
    311  1.15     rmind /*
    312  1.13     rmind  * Few helper routines.
    313   1.1     rmind  */
    314   1.1     rmind 
    315  1.29     rmind static void
    316  1.29     rmind table_ipset_flush(npf_table_t *t)
    317   1.1     rmind {
    318  1.13     rmind 	npf_tblent_t *ent;
    319   1.1     rmind 
    320  1.29     rmind 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    321  1.29     rmind 		thmap_del(t->t_map, &ent->te_addr, ent->te_alen);
    322  1.29     rmind 		LIST_REMOVE(ent, te_listent);
    323  1.29     rmind 		pool_cache_put(tblent_cache, ent);
    324  1.13     rmind 	}
    325  1.29     rmind 	t->t_nitems = 0;
    326   1.1     rmind }
    327   1.1     rmind 
    328  1.13     rmind static void
    329  1.29     rmind table_tree_flush(npf_table_t *t)
    330  1.18     rmind {
    331  1.29     rmind 	npf_tblent_t *ent;
    332  1.18     rmind 
    333  1.29     rmind 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    334  1.29     rmind 		LIST_REMOVE(ent, te_listent);
    335  1.29     rmind 		pool_cache_put(tblent_cache, ent);
    336  1.18     rmind 	}
    337  1.29     rmind 	lpm_clear(t->t_lpm, NULL, NULL);
    338  1.29     rmind 	t->t_nitems = 0;
    339  1.18     rmind }
    340  1.18     rmind 
    341  1.18     rmind static void
    342  1.29     rmind table_ifaddr_flush(npf_table_t *t)
    343   1.1     rmind {
    344  1.13     rmind 	npf_tblent_t *ent;
    345   1.1     rmind 
    346  1.29     rmind 	for (unsigned i = 0; i < NPF_ADDR_SLOTS; i++) {
    347  1.29     rmind 		size_t len;
    348  1.29     rmind 
    349  1.29     rmind 		if (!t->t_allocated[i]) {
    350  1.29     rmind 			KASSERT(t->t_elements[i] == NULL);
    351  1.29     rmind 			continue;
    352  1.29     rmind 		}
    353  1.29     rmind 		len = t->t_allocated[i] * sizeof(npf_tblent_t *);
    354  1.29     rmind 		kmem_free(t->t_elements[i], len);
    355  1.29     rmind 		t->t_elements[i] = NULL;
    356  1.29     rmind 		t->t_allocated[i] = 0;
    357  1.29     rmind 		t->t_used[i] = 0;
    358  1.29     rmind 	}
    359  1.24  christos 	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
    360  1.24  christos 		LIST_REMOVE(ent, te_listent);
    361  1.13     rmind 		pool_cache_put(tblent_cache, ent);
    362  1.13     rmind 	}
    363  1.29     rmind 	t->t_nitems = 0;
    364   1.1     rmind }
    365   1.1     rmind 
    366   1.1     rmind /*
    367   1.1     rmind  * npf_table_create: create table with a specified ID.
    368   1.1     rmind  */
    369   1.1     rmind npf_table_t *
    370  1.21     rmind npf_table_create(const char *name, u_int tid, int type,
    371  1.28     rmind     const void *blob, size_t size)
    372   1.1     rmind {
    373   1.1     rmind 	npf_table_t *t;
    374   1.1     rmind 
    375  1.25  christos 	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
    376  1.19     rmind 	strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
    377   1.1     rmind 
    378   1.1     rmind 	switch (type) {
    379  1.29     rmind 	case NPF_TABLE_LPM:
    380  1.30  christos 		t->t_lpm = lpm_create(KM_NOSLEEP);
    381  1.29     rmind 		if (t->t_lpm == NULL) {
    382  1.24  christos 			goto out;
    383  1.25  christos 		}
    384  1.24  christos 		LIST_INIT(&t->t_list);
    385   1.1     rmind 		break;
    386  1.29     rmind 	case NPF_TABLE_IPSET:
    387  1.29     rmind 		t->t_map = thmap_create(0, NULL, THMAP_NOCOPY);
    388  1.29     rmind 		if (t->t_map == NULL) {
    389  1.24  christos 			goto out;
    390  1.25  christos 		}
    391   1.1     rmind 		break;
    392  1.29     rmind 	case NPF_TABLE_CONST:
    393  1.28     rmind 		t->t_blob = kmem_alloc(size, KM_SLEEP);
    394  1.28     rmind 		if (t->t_blob == NULL) {
    395  1.28     rmind 			goto out;
    396  1.28     rmind 		}
    397  1.28     rmind 		memcpy(t->t_blob, blob, size);
    398  1.21     rmind 		t->t_bsize = size;
    399  1.28     rmind 
    400  1.28     rmind 		t->t_cdb = cdbr_open_mem(t->t_blob, size,
    401  1.28     rmind 		    CDBR_DEFAULT, NULL, NULL);
    402  1.21     rmind 		if (t->t_cdb == NULL) {
    403  1.28     rmind 			kmem_free(t->t_blob, t->t_bsize);
    404  1.24  christos 			goto out;
    405  1.21     rmind 		}
    406  1.21     rmind 		t->t_nitems = cdbr_entries(t->t_cdb);
    407  1.21     rmind 		break;
    408  1.29     rmind 	case NPF_TABLE_IFADDR:
    409  1.29     rmind 		break;
    410   1.1     rmind 	default:
    411   1.1     rmind 		KASSERT(false);
    412   1.1     rmind 	}
    413  1.30  christos 	mutex_init(&t->t_lock, MUTEX_DEFAULT, IPL_NET);
    414   1.1     rmind 	t->t_type = type;
    415   1.1     rmind 	t->t_id = tid;
    416   1.1     rmind 	return t;
    417  1.24  christos out:
    418  1.25  christos 	kmem_free(t, sizeof(npf_table_t));
    419  1.24  christos 	return NULL;
    420   1.1     rmind }
    421   1.1     rmind 
    422   1.1     rmind /*
    423   1.1     rmind  * npf_table_destroy: free all table entries and table itself.
    424   1.1     rmind  */
    425   1.1     rmind void
    426   1.1     rmind npf_table_destroy(npf_table_t *t)
    427   1.1     rmind {
    428  1.17     rmind 	KASSERT(t->t_refcnt == 0);
    429   1.1     rmind 
    430   1.1     rmind 	switch (t->t_type) {
    431  1.29     rmind 	case NPF_TABLE_IPSET:
    432  1.29     rmind 		table_ipset_flush(t);
    433  1.29     rmind 		npf_table_gc(NULL, t);
    434  1.29     rmind 		thmap_destroy(t->t_map);
    435   1.1     rmind 		break;
    436  1.29     rmind 	case NPF_TABLE_LPM:
    437  1.24  christos 		table_tree_flush(t);
    438  1.24  christos 		lpm_destroy(t->t_lpm);
    439   1.1     rmind 		break;
    440  1.29     rmind 	case NPF_TABLE_CONST:
    441  1.21     rmind 		cdbr_close(t->t_cdb);
    442  1.28     rmind 		kmem_free(t->t_blob, t->t_bsize);
    443  1.21     rmind 		break;
    444  1.29     rmind 	case NPF_TABLE_IFADDR:
    445  1.29     rmind 		table_ifaddr_flush(t);
    446  1.29     rmind 		break;
    447   1.1     rmind 	default:
    448   1.1     rmind 		KASSERT(false);
    449   1.1     rmind 	}
    450  1.28     rmind 	mutex_destroy(&t->t_lock);
    451  1.25  christos 	kmem_free(t, sizeof(npf_table_t));
    452   1.1     rmind }
    453   1.1     rmind 
    454  1.26     rmind u_int
    455  1.26     rmind npf_table_getid(npf_table_t *t)
    456  1.26     rmind {
    457  1.26     rmind 	return t->t_id;
    458  1.26     rmind }
    459  1.26     rmind 
    460   1.1     rmind /*
    461  1.19     rmind  * npf_table_check: validate the name, ID and type.
    462  1.13     rmind  */
    463   1.1     rmind int
    464  1.34     rmind npf_table_check(npf_tableset_t *ts, const char *name, uint64_t tid,
    465  1.34     rmind     uint64_t type, bool replacing)
    466   1.1     rmind {
    467  1.34     rmind 	const npf_table_t *t;
    468  1.34     rmind 
    469  1.28     rmind 	if (tid >= ts->ts_nitems) {
    470   1.1     rmind 		return EINVAL;
    471   1.1     rmind 	}
    472  1.34     rmind 	if (!replacing && ts->ts_map[tid] != NULL) {
    473   1.1     rmind 		return EEXIST;
    474   1.1     rmind 	}
    475  1.21     rmind 	switch (type) {
    476  1.29     rmind 	case NPF_TABLE_LPM:
    477  1.29     rmind 	case NPF_TABLE_IPSET:
    478  1.29     rmind 	case NPF_TABLE_CONST:
    479  1.29     rmind 	case NPF_TABLE_IFADDR:
    480  1.21     rmind 		break;
    481  1.21     rmind 	default:
    482   1.1     rmind 		return EINVAL;
    483   1.1     rmind 	}
    484  1.19     rmind 	if (strlen(name) >= NPF_TABLE_MAXNAMELEN) {
    485  1.19     rmind 		return ENAMETOOLONG;
    486  1.19     rmind 	}
    487  1.34     rmind 	if ((t = npf_tableset_getbyname(ts, name)) != NULL) {
    488  1.34     rmind 		if (!replacing || t->t_id != tid) {
    489  1.34     rmind 			return EEXIST;
    490  1.34     rmind 		}
    491  1.19     rmind 	}
    492   1.1     rmind 	return 0;
    493   1.1     rmind }
    494   1.1     rmind 
    495  1.13     rmind static int
    496  1.29     rmind table_ifaddr_insert(npf_table_t *t, const int alen, npf_tblent_t *ent)
    497  1.29     rmind {
    498  1.29     rmind 	const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    499  1.29     rmind 	const unsigned allocated = t->t_allocated[aidx];
    500  1.29     rmind 	const unsigned used = t->t_used[aidx];
    501  1.13     rmind 
    502  1.13     rmind 	/*
    503  1.29     rmind 	 * No need to check for duplicates.
    504  1.13     rmind 	 */
    505  1.29     rmind 	if (allocated <= used) {
    506  1.29     rmind 		npf_tblent_t **old_elements = t->t_elements[aidx];
    507  1.29     rmind 		npf_tblent_t **elements;
    508  1.29     rmind 		size_t toalloc, newsize;
    509  1.29     rmind 
    510  1.29     rmind 		toalloc = roundup2(allocated + 1, NPF_IFADDR_STEP);
    511  1.29     rmind 		newsize = toalloc * sizeof(npf_tblent_t *);
    512  1.29     rmind 
    513  1.31  christos 		elements = kmem_zalloc(newsize, KM_NOSLEEP);
    514  1.32  christos 		if (elements == NULL) {
    515  1.32  christos 			return ENOMEM;
    516  1.32  christos 		}
    517  1.29     rmind 		for (unsigned i = 0; i < used; i++) {
    518  1.29     rmind 			elements[i] = old_elements[i];
    519  1.29     rmind 		}
    520  1.29     rmind 		if (allocated) {
    521  1.29     rmind 			const size_t len = allocated * sizeof(npf_tblent_t *);
    522  1.29     rmind 			KASSERT(old_elements != NULL);
    523  1.29     rmind 			kmem_free(old_elements, len);
    524  1.29     rmind 		}
    525  1.29     rmind 		t->t_elements[aidx] = elements;
    526  1.29     rmind 		t->t_allocated[aidx] = toalloc;
    527  1.13     rmind 	}
    528  1.29     rmind 	t->t_elements[aidx][used] = ent;
    529  1.29     rmind 	t->t_used[aidx]++;
    530  1.32  christos 	return 0;
    531  1.13     rmind }
    532  1.13     rmind 
    533   1.1     rmind /*
    534  1.13     rmind  * npf_table_insert: add an IP CIDR entry into the table.
    535   1.1     rmind  */
    536   1.1     rmind int
    537  1.19     rmind npf_table_insert(npf_table_t *t, const int alen,
    538   1.6    zoltan     const npf_addr_t *addr, const npf_netmask_t mask)
    539   1.1     rmind {
    540  1.13     rmind 	npf_tblent_t *ent;
    541  1.13     rmind 	int error;
    542   1.1     rmind 
    543  1.33     rmind 	error = npf_netmask_check(alen, mask);
    544  1.13     rmind 	if (error) {
    545  1.13     rmind 		return error;
    546   1.8     rmind 	}
    547  1.12     rmind 	ent = pool_cache_get(tblent_cache, PR_WAITOK);
    548  1.13     rmind 	memcpy(&ent->te_addr, addr, alen);
    549  1.13     rmind 	ent->te_alen = alen;
    550  1.29     rmind 	ent->te_preflen = 0;
    551   1.1     rmind 
    552  1.13     rmind 	/*
    553  1.13     rmind 	 * Insert the entry.  Return an error on duplicate.
    554  1.13     rmind 	 */
    555  1.28     rmind 	mutex_enter(&t->t_lock);
    556   1.1     rmind 	switch (t->t_type) {
    557  1.29     rmind 	case NPF_TABLE_IPSET:
    558  1.13     rmind 		/*
    559  1.29     rmind 		 * Hashmap supports only IPs.
    560  1.29     rmind 		 *
    561  1.29     rmind 		 * Note: the key must be already persistent, since we
    562  1.29     rmind 		 * use THMAP_NOCOPY.
    563  1.13     rmind 		 */
    564  1.13     rmind 		if (mask != NPF_NO_NETMASK) {
    565  1.13     rmind 			error = EINVAL;
    566  1.13     rmind 			break;
    567   1.1     rmind 		}
    568  1.29     rmind 		if (thmap_put(t->t_map, &ent->te_addr, alen, ent) == ent) {
    569  1.29     rmind 			LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    570  1.15     rmind 			t->t_nitems++;
    571   1.1     rmind 		} else {
    572   1.1     rmind 			error = EEXIST;
    573   1.1     rmind 		}
    574   1.1     rmind 		break;
    575  1.29     rmind 	case NPF_TABLE_LPM: {
    576  1.24  christos 		const unsigned preflen =
    577  1.24  christos 		    (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
    578  1.29     rmind 		ent->te_preflen = preflen;
    579  1.29     rmind 
    580  1.24  christos 		if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
    581  1.24  christos 		    lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
    582  1.24  christos 			LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    583  1.15     rmind 			t->t_nitems++;
    584  1.15     rmind 			error = 0;
    585  1.13     rmind 		} else {
    586  1.15     rmind 			error = EEXIST;
    587   1.1     rmind 		}
    588   1.1     rmind 		break;
    589  1.13     rmind 	}
    590  1.29     rmind 	case NPF_TABLE_CONST:
    591  1.21     rmind 		error = EINVAL;
    592  1.21     rmind 		break;
    593  1.29     rmind 	case NPF_TABLE_IFADDR:
    594  1.32  christos 		if ((error = table_ifaddr_insert(t, alen, ent)) != 0) {
    595  1.32  christos 			break;
    596  1.32  christos 		}
    597  1.29     rmind 		LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
    598  1.29     rmind 		t->t_nitems++;
    599  1.29     rmind 		break;
    600   1.1     rmind 	default:
    601   1.1     rmind 		KASSERT(false);
    602   1.1     rmind 	}
    603  1.28     rmind 	mutex_exit(&t->t_lock);
    604   1.1     rmind 
    605   1.8     rmind 	if (error) {
    606  1.12     rmind 		pool_cache_put(tblent_cache, ent);
    607   1.1     rmind 	}
    608   1.1     rmind 	return error;
    609   1.1     rmind }
    610   1.1     rmind 
    611   1.1     rmind /*
    612  1.13     rmind  * npf_table_remove: remove the IP CIDR entry from the table.
    613   1.1     rmind  */
    614   1.1     rmind int
    615  1.19     rmind npf_table_remove(npf_table_t *t, const int alen,
    616   1.6    zoltan     const npf_addr_t *addr, const npf_netmask_t mask)
    617   1.1     rmind {
    618  1.21     rmind 	npf_tblent_t *ent = NULL;
    619  1.29     rmind 	int error;
    620   1.1     rmind 
    621  1.33     rmind 	error = npf_netmask_check(alen, mask);
    622  1.13     rmind 	if (error) {
    623  1.13     rmind 		return error;
    624   1.8     rmind 	}
    625  1.15     rmind 
    626  1.28     rmind 	mutex_enter(&t->t_lock);
    627  1.13     rmind 	switch (t->t_type) {
    628  1.29     rmind 	case NPF_TABLE_IPSET:
    629  1.29     rmind 		ent = thmap_del(t->t_map, addr, alen);
    630  1.12     rmind 		if (__predict_true(ent != NULL)) {
    631  1.24  christos 			LIST_REMOVE(ent, te_listent);
    632  1.29     rmind 			LIST_INSERT_HEAD(&t->t_gc, ent, te_listent);
    633  1.29     rmind 			ent = NULL; // to be G/C'ed
    634  1.15     rmind 			t->t_nitems--;
    635  1.29     rmind 		} else {
    636  1.29     rmind 			error = ENOENT;
    637   1.1     rmind 		}
    638   1.1     rmind 		break;
    639  1.29     rmind 	case NPF_TABLE_LPM:
    640  1.24  christos 		ent = lpm_lookup(t->t_lpm, addr, alen);
    641  1.12     rmind 		if (__predict_true(ent != NULL)) {
    642  1.24  christos 			LIST_REMOVE(ent, te_listent);
    643  1.24  christos 			lpm_remove(t->t_lpm, &ent->te_addr,
    644  1.24  christos 			    ent->te_alen, ent->te_preflen);
    645  1.15     rmind 			t->t_nitems--;
    646  1.29     rmind 		} else {
    647  1.29     rmind 			error = ENOENT;
    648   1.1     rmind 		}
    649   1.1     rmind 		break;
    650  1.29     rmind 	case NPF_TABLE_CONST:
    651  1.29     rmind 	case NPF_TABLE_IFADDR:
    652  1.21     rmind 		error = EINVAL;
    653  1.21     rmind 		break;
    654   1.1     rmind 	default:
    655   1.1     rmind 		KASSERT(false);
    656  1.13     rmind 		ent = NULL;
    657   1.1     rmind 	}
    658  1.28     rmind 	mutex_exit(&t->t_lock);
    659   1.1     rmind 
    660  1.21     rmind 	if (ent) {
    661  1.21     rmind 		pool_cache_put(tblent_cache, ent);
    662   1.1     rmind 	}
    663  1.21     rmind 	return error;
    664   1.1     rmind }
    665   1.1     rmind 
    666   1.1     rmind /*
    667  1.13     rmind  * npf_table_lookup: find the table according to ID, lookup and match
    668  1.13     rmind  * the contents with the specified IP address.
    669   1.1     rmind  */
    670   1.1     rmind int
    671  1.19     rmind npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr)
    672   1.1     rmind {
    673  1.21     rmind 	const void *data;
    674  1.21     rmind 	size_t dlen;
    675  1.21     rmind 	bool found;
    676  1.29     rmind 	int error;
    677   1.1     rmind 
    678  1.33     rmind 	error = npf_netmask_check(alen, NPF_NO_NETMASK);
    679  1.29     rmind 	if (error) {
    680  1.29     rmind 		return error;
    681  1.13     rmind 	}
    682  1.13     rmind 
    683   1.1     rmind 	switch (t->t_type) {
    684  1.29     rmind 	case NPF_TABLE_IPSET:
    685  1.35     rmind 		/* Note: the caller is in the npf_config_read_enter(). */
    686  1.29     rmind 		found = thmap_get(t->t_map, addr, alen) != NULL;
    687   1.1     rmind 		break;
    688  1.29     rmind 	case NPF_TABLE_LPM:
    689  1.28     rmind 		mutex_enter(&t->t_lock);
    690  1.24  christos 		found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
    691  1.28     rmind 		mutex_exit(&t->t_lock);
    692  1.21     rmind 		break;
    693  1.29     rmind 	case NPF_TABLE_CONST:
    694  1.21     rmind 		if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) {
    695  1.29     rmind 			found = dlen == (unsigned)alen &&
    696  1.25  christos 			    memcmp(addr, data, dlen) == 0;
    697  1.21     rmind 		} else {
    698  1.21     rmind 			found = false;
    699  1.21     rmind 		}
    700   1.1     rmind 		break;
    701  1.29     rmind 	case NPF_TABLE_IFADDR: {
    702  1.29     rmind 		const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    703  1.29     rmind 
    704  1.29     rmind 		found = false;
    705  1.29     rmind 		for (unsigned i = 0; i < t->t_used[aidx]; i++) {
    706  1.29     rmind 			const npf_tblent_t *elm = t->t_elements[aidx][i];
    707  1.29     rmind 
    708  1.29     rmind 			KASSERT(elm->te_alen == alen);
    709  1.29     rmind 
    710  1.29     rmind 			if (memcmp(&elm->te_addr, addr, alen) == 0) {
    711  1.29     rmind 				found = true;
    712  1.29     rmind 				break;
    713  1.29     rmind 			}
    714  1.29     rmind 		}
    715  1.29     rmind 		break;
    716  1.29     rmind 	}
    717   1.1     rmind 	default:
    718   1.1     rmind 		KASSERT(false);
    719  1.21     rmind 		found = false;
    720   1.1     rmind 	}
    721   1.1     rmind 
    722  1.21     rmind 	return found ? 0 : ENOENT;
    723   1.1     rmind }
    724  1.15     rmind 
    725  1.29     rmind npf_addr_t *
    726  1.29     rmind npf_table_getsome(npf_table_t *t, const int alen, unsigned idx)
    727  1.29     rmind {
    728  1.29     rmind 	const unsigned aidx = NPF_ADDRLEN2IDX(alen);
    729  1.29     rmind 	npf_tblent_t *elm;
    730  1.29     rmind 	unsigned nitems;
    731  1.29     rmind 
    732  1.29     rmind 	KASSERT(t->t_type == NPF_TABLE_IFADDR);
    733  1.29     rmind 	KASSERT(aidx < NPF_ADDR_SLOTS);
    734  1.29     rmind 
    735  1.29     rmind 	nitems = t->t_used[aidx];
    736  1.29     rmind 	if (nitems == 0) {
    737  1.29     rmind 		return NULL;
    738  1.29     rmind 	}
    739  1.29     rmind 
    740  1.29     rmind 	/*
    741  1.29     rmind 	 * No need to acquire the lock, since the table is immutable.
    742  1.29     rmind 	 */
    743  1.29     rmind 	elm = t->t_elements[aidx][idx % nitems];
    744  1.29     rmind 	return &elm->te_addr;
    745  1.29     rmind }
    746  1.29     rmind 
    747  1.15     rmind static int
    748  1.21     rmind table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
    749  1.15     rmind     void *ubuf, size_t len, size_t *off)
    750  1.15     rmind {
    751  1.15     rmind 	void *ubufp = (uint8_t *)ubuf + *off;
    752  1.15     rmind 	npf_ioctl_ent_t uent;
    753  1.15     rmind 
    754  1.15     rmind 	if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
    755  1.15     rmind 		return ENOMEM;
    756  1.15     rmind 	}
    757  1.21     rmind 	uent.alen = alen;
    758  1.21     rmind 	memcpy(&uent.addr, addr, sizeof(npf_addr_t));
    759  1.15     rmind 	uent.mask = mask;
    760  1.15     rmind 
    761  1.15     rmind 	return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
    762  1.15     rmind }
    763  1.15     rmind 
    764  1.15     rmind static int
    765  1.41  riastrad table_generic_list(npf_table_t *t, void *ubuf, size_t len)
    766  1.15     rmind {
    767  1.24  christos 	npf_tblent_t *ent;
    768  1.24  christos 	size_t off = 0;
    769  1.15     rmind 	int error = 0;
    770  1.15     rmind 
    771  1.24  christos 	LIST_FOREACH(ent, &t->t_list, te_listent) {
    772  1.41  riastrad 		mutex_exit(&t->t_lock);
    773  1.24  christos 		error = table_ent_copyout(&ent->te_addr,
    774  1.29     rmind 		    ent->te_alen, ent->te_preflen, ubuf, len, &off);
    775  1.41  riastrad 		mutex_enter(&t->t_lock);
    776  1.21     rmind 		if (error)
    777  1.21     rmind 			break;
    778  1.21     rmind 	}
    779  1.21     rmind 	return error;
    780  1.21     rmind }
    781  1.21     rmind 
    782  1.21     rmind static int
    783  1.21     rmind table_cdb_list(npf_table_t *t, void *ubuf, size_t len)
    784  1.21     rmind {
    785  1.21     rmind 	size_t off = 0, dlen;
    786  1.21     rmind 	const void *data;
    787  1.21     rmind 	int error = 0;
    788  1.21     rmind 
    789  1.21     rmind 	for (size_t i = 0; i < t->t_nitems; i++) {
    790  1.21     rmind 		if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) {
    791  1.21     rmind 			return EINVAL;
    792  1.21     rmind 		}
    793  1.21     rmind 		error = table_ent_copyout(data, dlen, 0, ubuf, len, &off);
    794  1.15     rmind 		if (error)
    795  1.15     rmind 			break;
    796  1.15     rmind 	}
    797  1.15     rmind 	return error;
    798  1.15     rmind }
    799  1.15     rmind 
    800  1.15     rmind /*
    801  1.15     rmind  * npf_table_list: copy a list of all table entries into a userspace buffer.
    802  1.15     rmind  */
    803  1.15     rmind int
    804  1.19     rmind npf_table_list(npf_table_t *t, void *ubuf, size_t len)
    805  1.15     rmind {
    806  1.15     rmind 	int error = 0;
    807  1.15     rmind 
    808  1.28     rmind 	mutex_enter(&t->t_lock);
    809  1.15     rmind 	switch (t->t_type) {
    810  1.29     rmind 	case NPF_TABLE_IPSET:
    811  1.29     rmind 		error = table_generic_list(t, ubuf, len);
    812  1.15     rmind 		break;
    813  1.29     rmind 	case NPF_TABLE_LPM:
    814  1.29     rmind 		error = table_generic_list(t, ubuf, len);
    815  1.16     rmind 		break;
    816  1.29     rmind 	case NPF_TABLE_CONST:
    817  1.21     rmind 		error = table_cdb_list(t, ubuf, len);
    818  1.21     rmind 		break;
    819  1.29     rmind 	case NPF_TABLE_IFADDR:
    820  1.29     rmind 		error = table_generic_list(t, ubuf, len);
    821  1.29     rmind 		break;
    822  1.15     rmind 	default:
    823  1.15     rmind 		KASSERT(false);
    824  1.15     rmind 	}
    825  1.28     rmind 	mutex_exit(&t->t_lock);
    826  1.15     rmind 
    827  1.15     rmind 	return error;
    828  1.15     rmind }
    829  1.18     rmind 
    830  1.18     rmind /*
    831  1.18     rmind  * npf_table_flush: remove all table entries.
    832  1.18     rmind  */
    833  1.18     rmind int
    834  1.19     rmind npf_table_flush(npf_table_t *t)
    835  1.18     rmind {
    836  1.21     rmind 	int error = 0;
    837  1.21     rmind 
    838  1.28     rmind 	mutex_enter(&t->t_lock);
    839  1.18     rmind 	switch (t->t_type) {
    840  1.29     rmind 	case NPF_TABLE_IPSET:
    841  1.29     rmind 		table_ipset_flush(t);
    842  1.18     rmind 		break;
    843  1.29     rmind 	case NPF_TABLE_LPM:
    844  1.24  christos 		table_tree_flush(t);
    845  1.18     rmind 		break;
    846  1.29     rmind 	case NPF_TABLE_CONST:
    847  1.29     rmind 	case NPF_TABLE_IFADDR:
    848  1.21     rmind 		error = EINVAL;
    849  1.21     rmind 		break;
    850  1.18     rmind 	default:
    851  1.18     rmind 		KASSERT(false);
    852  1.18     rmind 	}
    853  1.28     rmind 	mutex_exit(&t->t_lock);
    854  1.21     rmind 	return error;
    855  1.18     rmind }
    856  1.29     rmind 
    857  1.29     rmind void
    858  1.29     rmind npf_table_gc(npf_t *npf, npf_table_t *t)
    859  1.29     rmind {
    860  1.29     rmind 	npf_tblent_t *ent;
    861  1.29     rmind 	void *ref;
    862  1.29     rmind 
    863  1.29     rmind 	if (t->t_type != NPF_TABLE_IPSET || LIST_EMPTY(&t->t_gc)) {
    864  1.29     rmind 		return;
    865  1.29     rmind 	}
    866  1.29     rmind 
    867  1.29     rmind 	ref = thmap_stage_gc(t->t_map);
    868  1.29     rmind 	if (npf) {
    869  1.43       joe 		KASSERT(npf_config_locked_p(npf));
    870  1.29     rmind 		npf_config_sync(npf);
    871  1.29     rmind 	}
    872  1.29     rmind 	thmap_gc(t->t_map, ref);
    873  1.29     rmind 
    874  1.29     rmind 	while ((ent = LIST_FIRST(&t->t_gc)) != NULL) {
    875  1.29     rmind 		LIST_REMOVE(ent, te_listent);
    876  1.29     rmind 		pool_cache_put(tblent_cache, ent);
    877  1.29     rmind 	}
    878  1.29     rmind }
    879