Home | History | Annotate | Line # | Download | only in npf
npf_tableset.c revision 1.17
      1 /*	$NetBSD: npf_tableset.c,v 1.17 2013/02/09 03:35:32 rmind Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This material is based upon work partially supported by The
      8  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * NPF tableset module.
     34  *
     35  * Notes
     36  *
     37  *	The tableset is an array of tables.  After the creation, the array
     38  *	is immutable.  The caller is responsible to synchronise the access
     39  *	to the tableset.  The table can either be a hash or a tree.  Its
     40  *	entries are protected by a read-write lock.
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.17 2013/02/09 03:35:32 rmind Exp $");
     45 
     46 #include <sys/param.h>
     47 #include <sys/types.h>
     48 
     49 #include <sys/atomic.h>
     50 #include <sys/hash.h>
     51 #include <sys/kmem.h>
     52 #include <sys/pool.h>
     53 #include <sys/queue.h>
     54 #include <sys/rwlock.h>
     55 #include <sys/systm.h>
     56 #include <sys/types.h>
     57 
     58 #include "npf_impl.h"
     59 
     60 /*
     61  * Table structures.
     62  */
     63 
     64 typedef struct npf_tblent {
     65 	union {
     66 		LIST_ENTRY(npf_tblent) hashq;
     67 		pt_node_t	node;
     68 	} te_entry;
     69 	int			te_alen;
     70 	npf_addr_t		te_addr;
     71 } npf_tblent_t;
     72 
     73 LIST_HEAD(npf_hashl, npf_tblent);
     74 
     75 struct npf_table {
     76 	char			t_name[16];
     77 	/* Lock and reference count. */
     78 	krwlock_t		t_lock;
     79 	u_int			t_refcnt;
     80 	/* Total number of items. */
     81 	u_int			t_nitems;
     82 	/* Table ID. */
     83 	u_int			t_id;
     84 	/* The storage type can be: a) hash b) tree. */
     85 	int			t_type;
     86 	struct npf_hashl *	t_hashl;
     87 	u_long			t_hashmask;
     88 	/* Separate trees for IPv4 and IPv6. */
     89 	pt_tree_t		t_tree[2];
     90 };
     91 
     92 #define	NPF_ADDRLEN2TREE(alen)	((alen) >> 4)
     93 
     94 static pool_cache_t		tblent_cache	__read_mostly;
     95 
     96 /*
     97  * npf_table_sysinit: initialise tableset structures.
     98  */
     99 void
    100 npf_tableset_sysinit(void)
    101 {
    102 
    103 	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
    104 	    0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
    105 }
    106 
    107 void
    108 npf_tableset_sysfini(void)
    109 {
    110 
    111 	pool_cache_destroy(tblent_cache);
    112 }
    113 
    114 npf_tableset_t *
    115 npf_tableset_create(void)
    116 {
    117 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
    118 
    119 	return kmem_zalloc(sz, KM_SLEEP);
    120 }
    121 
    122 void
    123 npf_tableset_destroy(npf_tableset_t *tblset)
    124 {
    125 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
    126 	npf_table_t *t;
    127 	u_int tid;
    128 
    129 	/*
    130 	 * Destroy all tables (no references should be held, as ruleset
    131 	 * should be destroyed before).
    132 	 */
    133 	for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
    134 		t = tblset[tid];
    135 		if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) {
    136 			npf_table_destroy(t);
    137 		}
    138 	}
    139 	kmem_free(tblset, sz);
    140 }
    141 
    142 /*
    143  * npf_tableset_insert: insert the table into the specified tableset.
    144  *
    145  * => Returns 0 on success.  Fails and returns error if ID is already used.
    146  */
    147 int
    148 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
    149 {
    150 	const u_int tid = t->t_id;
    151 	int error;
    152 
    153 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
    154 
    155 	if (tblset[tid] == NULL) {
    156 		atomic_inc_uint(&t->t_refcnt);
    157 		tblset[tid] = t;
    158 		error = 0;
    159 	} else {
    160 		error = EEXIST;
    161 	}
    162 	return error;
    163 }
    164 
    165 /*
    166  * npf_tableset_reload: iterate all tables and if the new table is of the
    167  * same type and has no items, then we preserve the old one and its entries.
    168  *
    169  * => The caller is responsible for providing synchronisation.
    170  */
    171 void
    172 npf_tableset_reload(npf_tableset_t *ntset, npf_tableset_t *otset)
    173 {
    174 	for (int i = 0; i < NPF_TABLE_SLOTS; i++) {
    175 		npf_table_t *t = ntset[i], *ot = otset[i];
    176 
    177 		if (t == NULL || ot == NULL) {
    178 			continue;
    179 		}
    180 		if (t->t_nitems || t->t_type != ot->t_type) {
    181 			continue;
    182 		}
    183 
    184 		/*
    185 		 * Acquire a reference since the table has to be kept
    186 		 * in the old tableset.
    187 		 */
    188 		atomic_inc_uint(&ot->t_refcnt);
    189 		ntset[i] = ot;
    190 
    191 		/* Only reference, never been visible. */
    192 		t->t_refcnt--;
    193 		npf_table_destroy(t);
    194 	}
    195 }
    196 
    197 /*
    198  * Few helper routines.
    199  */
    200 
    201 static npf_tblent_t *
    202 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr,
    203     const int alen, struct npf_hashl **rhtbl)
    204 {
    205 	const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT);
    206 	struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask];
    207 	npf_tblent_t *ent;
    208 
    209 	/*
    210 	 * Lookup the hash table and check for duplicates.
    211 	 * Note: mask is ignored for the hash storage.
    212 	 */
    213 	LIST_FOREACH(ent, htbl, te_entry.hashq) {
    214 		if (ent->te_alen != alen) {
    215 			continue;
    216 		}
    217 		if (memcmp(&ent->te_addr, addr, alen) == 0) {
    218 			break;
    219 		}
    220 	}
    221 	*rhtbl = htbl;
    222 	return ent;
    223 }
    224 
    225 static void
    226 table_tree_destroy(pt_tree_t *tree)
    227 {
    228 	npf_tblent_t *ent;
    229 
    230 	while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) {
    231 		ptree_remove_node(tree, ent);
    232 		pool_cache_put(tblent_cache, ent);
    233 	}
    234 }
    235 
    236 /*
    237  * npf_table_create: create table with a specified ID.
    238  */
    239 npf_table_t *
    240 npf_table_create(u_int tid, int type, size_t hsize)
    241 {
    242 	npf_table_t *t;
    243 
    244 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
    245 
    246 	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
    247 	switch (type) {
    248 	case NPF_TABLE_TREE:
    249 		ptree_init(&t->t_tree[0], &npf_table_ptree_ops,
    250 		    (void *)(sizeof(struct in_addr) / sizeof(uint32_t)),
    251 		    offsetof(npf_tblent_t, te_entry.node),
    252 		    offsetof(npf_tblent_t, te_addr));
    253 		ptree_init(&t->t_tree[1], &npf_table_ptree_ops,
    254 		    (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)),
    255 		    offsetof(npf_tblent_t, te_entry.node),
    256 		    offsetof(npf_tblent_t, te_addr));
    257 		break;
    258 	case NPF_TABLE_HASH:
    259 		t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
    260 		if (t->t_hashl == NULL) {
    261 			kmem_free(t, sizeof(npf_table_t));
    262 			return NULL;
    263 		}
    264 		break;
    265 	default:
    266 		KASSERT(false);
    267 	}
    268 	rw_init(&t->t_lock);
    269 	t->t_type = type;
    270 	t->t_id = tid;
    271 
    272 	return t;
    273 }
    274 
    275 /*
    276  * npf_table_destroy: free all table entries and table itself.
    277  */
    278 void
    279 npf_table_destroy(npf_table_t *t)
    280 {
    281 	KASSERT(t->t_refcnt == 0);
    282 
    283 	switch (t->t_type) {
    284 	case NPF_TABLE_HASH:
    285 		for (unsigned n = 0; n <= t->t_hashmask; n++) {
    286 			npf_tblent_t *ent;
    287 
    288 			while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
    289 				LIST_REMOVE(ent, te_entry.hashq);
    290 				pool_cache_put(tblent_cache, ent);
    291 			}
    292 		}
    293 		hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
    294 		break;
    295 	case NPF_TABLE_TREE:
    296 		table_tree_destroy(&t->t_tree[0]);
    297 		table_tree_destroy(&t->t_tree[1]);
    298 		break;
    299 	default:
    300 		KASSERT(false);
    301 	}
    302 	rw_destroy(&t->t_lock);
    303 	kmem_free(t, sizeof(npf_table_t));
    304 }
    305 
    306 /*
    307  * npf_table_check: validate ID and type.
    308  */
    309 int
    310 npf_table_check(const npf_tableset_t *tset, u_int tid, int type)
    311 {
    312 
    313 	if ((u_int)tid >= NPF_TABLE_SLOTS) {
    314 		return EINVAL;
    315 	}
    316 	if (tset[tid] != NULL) {
    317 		return EEXIST;
    318 	}
    319 	if (type != NPF_TABLE_TREE && type != NPF_TABLE_HASH) {
    320 		return EINVAL;
    321 	}
    322 	return 0;
    323 }
    324 
    325 static int
    326 table_cidr_check(const u_int aidx, const npf_addr_t *addr,
    327     const npf_netmask_t mask)
    328 {
    329 
    330 	if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) {
    331 		return EINVAL;
    332 	}
    333 	if (aidx > 1) {
    334 		return EINVAL;
    335 	}
    336 
    337 	/*
    338 	 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128.
    339 	 * If it is a host - shall use NPF_NO_NETMASK.
    340 	 */
    341 	if (mask >= (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) {
    342 		return EINVAL;
    343 	}
    344 	return 0;
    345 }
    346 
    347 /*
    348  * npf_table_insert: add an IP CIDR entry into the table.
    349  */
    350 int
    351 npf_table_insert(npf_tableset_t *tset, u_int tid, const int alen,
    352     const npf_addr_t *addr, const npf_netmask_t mask)
    353 {
    354 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
    355 	npf_tblent_t *ent;
    356 	npf_table_t *t;
    357 	int error;
    358 
    359 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
    360 		return EINVAL;
    361 	}
    362 
    363 	error = table_cidr_check(aidx, addr, mask);
    364 	if (error) {
    365 		return error;
    366 	}
    367 	ent = pool_cache_get(tblent_cache, PR_WAITOK);
    368 	memcpy(&ent->te_addr, addr, alen);
    369 	ent->te_alen = alen;
    370 
    371 	/*
    372 	 * Insert the entry.  Return an error on duplicate.
    373 	 */
    374 	rw_enter(&t->t_lock, RW_WRITER);
    375 	switch (t->t_type) {
    376 	case NPF_TABLE_HASH: {
    377 		struct npf_hashl *htbl;
    378 
    379 		/*
    380 		 * Hash tables by the concept support only IPs.
    381 		 */
    382 		if (mask != NPF_NO_NETMASK) {
    383 			error = EINVAL;
    384 			break;
    385 		}
    386 		if (!table_hash_lookup(t, addr, alen, &htbl)) {
    387 			LIST_INSERT_HEAD(htbl, ent, te_entry.hashq);
    388 			t->t_nitems++;
    389 		} else {
    390 			error = EEXIST;
    391 		}
    392 		break;
    393 	}
    394 	case NPF_TABLE_TREE: {
    395 		pt_tree_t *tree = &t->t_tree[aidx];
    396 		bool ok;
    397 
    398 		/*
    399 		 * If no mask specified, use maximum mask.
    400 		 */
    401 		ok = (mask != NPF_NO_NETMASK) ?
    402 		    ptree_insert_mask_node(tree, ent, mask) :
    403 		    ptree_insert_node(tree, ent);
    404 		if (ok) {
    405 			t->t_nitems++;
    406 			error = 0;
    407 		} else {
    408 			error = EEXIST;
    409 		}
    410 		break;
    411 	}
    412 	default:
    413 		KASSERT(false);
    414 	}
    415 	rw_exit(&t->t_lock);
    416 
    417 	if (error) {
    418 		pool_cache_put(tblent_cache, ent);
    419 	}
    420 	return error;
    421 }
    422 
    423 /*
    424  * npf_table_remove: remove the IP CIDR entry from the table.
    425  */
    426 int
    427 npf_table_remove(npf_tableset_t *tset, u_int tid, const int alen,
    428     const npf_addr_t *addr, const npf_netmask_t mask)
    429 {
    430 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
    431 	npf_tblent_t *ent;
    432 	npf_table_t *t;
    433 	int error;
    434 
    435 	error = table_cidr_check(aidx, addr, mask);
    436 	if (error) {
    437 		return error;
    438 	}
    439 
    440 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
    441 		return EINVAL;
    442 	}
    443 
    444 	rw_enter(&t->t_lock, RW_WRITER);
    445 	switch (t->t_type) {
    446 	case NPF_TABLE_HASH: {
    447 		struct npf_hashl *htbl;
    448 
    449 		ent = table_hash_lookup(t, addr, alen, &htbl);
    450 		if (__predict_true(ent != NULL)) {
    451 			LIST_REMOVE(ent, te_entry.hashq);
    452 			t->t_nitems--;
    453 		}
    454 		break;
    455 	}
    456 	case NPF_TABLE_TREE: {
    457 		pt_tree_t *tree = &t->t_tree[aidx];
    458 
    459 		ent = ptree_find_node(tree, addr);
    460 		if (__predict_true(ent != NULL)) {
    461 			ptree_remove_node(tree, ent);
    462 			t->t_nitems--;
    463 		}
    464 		break;
    465 	}
    466 	default:
    467 		KASSERT(false);
    468 		ent = NULL;
    469 	}
    470 	rw_exit(&t->t_lock);
    471 
    472 	if (ent == NULL) {
    473 		return ENOENT;
    474 	}
    475 	pool_cache_put(tblent_cache, ent);
    476 	return 0;
    477 }
    478 
    479 /*
    480  * npf_table_lookup: find the table according to ID, lookup and match
    481  * the contents with the specified IP address.
    482  */
    483 int
    484 npf_table_lookup(npf_tableset_t *tset, u_int tid,
    485     const int alen, const npf_addr_t *addr)
    486 {
    487 	const u_int aidx = NPF_ADDRLEN2TREE(alen);
    488 	npf_tblent_t *ent;
    489 	npf_table_t *t;
    490 
    491 	if (__predict_false(aidx > 1)) {
    492 		return EINVAL;
    493 	}
    494 
    495 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
    496 		return EINVAL;
    497 	}
    498 
    499 	rw_enter(&t->t_lock, RW_READER);
    500 	switch (t->t_type) {
    501 	case NPF_TABLE_HASH: {
    502 		struct npf_hashl *htbl;
    503 		ent = table_hash_lookup(t, addr, alen, &htbl);
    504 		break;
    505 	}
    506 	case NPF_TABLE_TREE: {
    507 		ent = ptree_find_node(&t->t_tree[aidx], addr);
    508 		break;
    509 	}
    510 	default:
    511 		KASSERT(false);
    512 		ent = NULL;
    513 	}
    514 	rw_exit(&t->t_lock);
    515 
    516 	return ent ? 0 : ENOENT;
    517 }
    518 
    519 static int
    520 table_ent_copyout(npf_tblent_t *ent, npf_netmask_t mask,
    521     void *ubuf, size_t len, size_t *off)
    522 {
    523 	void *ubufp = (uint8_t *)ubuf + *off;
    524 	npf_ioctl_ent_t uent;
    525 
    526 	if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
    527 		return ENOMEM;
    528 	}
    529 	uent.alen = ent->te_alen;
    530 	memcpy(&uent.addr, &ent->te_addr, sizeof(npf_addr_t));
    531 	uent.mask = mask;
    532 
    533 	return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
    534 }
    535 
    536 static int
    537 table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf,
    538     size_t len, size_t *off)
    539 {
    540 	npf_tblent_t *ent = NULL;
    541 	int error = 0;
    542 
    543 	while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) {
    544 		pt_bitlen_t blen;
    545 
    546 		if (!ptree_mask_node_p(tree, ent, &blen)) {
    547 			blen = maxmask;
    548 		}
    549 		error = table_ent_copyout(ent, blen, ubuf, len, off);
    550 		if (error)
    551 			break;
    552 	}
    553 	return error;
    554 }
    555 
    556 /*
    557  * npf_table_list: copy a list of all table entries into a userspace buffer.
    558  */
    559 int
    560 npf_table_list(npf_tableset_t *tset, u_int tid, void *ubuf, size_t len)
    561 {
    562 	npf_table_t *t;
    563 	size_t off = 0;
    564 	int error = 0;
    565 
    566 	if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
    567 		return EINVAL;
    568 	}
    569 
    570 	rw_enter(&t->t_lock, RW_READER);
    571 	switch (t->t_type) {
    572 	case NPF_TABLE_HASH:
    573 		for (unsigned n = 0; n <= t->t_hashmask; n++) {
    574 			npf_tblent_t *ent;
    575 
    576 			LIST_FOREACH(ent, &t->t_hashl[n], te_entry.hashq)
    577 				if ((error = table_ent_copyout(ent, 0, ubuf,
    578 				    len, &off)) != 0)
    579 					break;
    580 		}
    581 		break;
    582 	case NPF_TABLE_TREE:
    583 		error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off);
    584 		if (error)
    585 			break;
    586 		error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off);
    587 		break;
    588 	default:
    589 		KASSERT(false);
    590 	}
    591 	rw_exit(&t->t_lock);
    592 
    593 	return error;
    594 }
    595