Home | History | Annotate | Line # | Download | only in npf
npf_tableset.c revision 1.1
      1 /*	$NetBSD: npf_tableset.c,v 1.1 2010/08/22 18:56:23 rmind Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This material is based upon work partially supported by The
      8  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * NPF table module.
     34  *
     35  *	table_lock ->
     36  *		npf_table_t::t_lock
     37  *
     38  * TODO:
     39  * - Currently, code is modeled to handle IPv4 CIDR blocks.
     40  * - Dynamic hash growing/shrinking (i.e. re-hash functionality), maybe?
     41  * - Dynamic array resize.
     42  */
     43 
     44 #ifdef _KERNEL
     45 #include <sys/cdefs.h>
     46 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.1 2010/08/22 18:56:23 rmind Exp $");
     47 #endif
     48 
     49 #include <sys/param.h>
     50 #include <sys/kernel.h>
     51 
     52 #include <sys/atomic.h>
     53 #include <sys/hash.h>
     54 #include <sys/kmem.h>
     55 #include <sys/pool.h>
     56 #include <sys/queue.h>
     57 #include <sys/rwlock.h>
     58 #include <sys/systm.h>
     59 #include <sys/types.h>
     60 
     61 #include "npf_impl.h"
     62 
     63 /* Table entry structure. */
     64 struct npf_tblent {
     65 	/* IPv4 CIDR block. */
     66 	in_addr_t			te_addr;
     67 	in_addr_t			te_mask;
     68 	union {
     69 		LIST_ENTRY(npf_tblent)	hashq;
     70 		struct rb_node		rbnode;
     71 	} te_entry;
     72 };
     73 
     74 /* Return pointer to npf_tblent_t from RB-tree node. (XXX fix rb-tree) */
     75 #define	NPF_RBN2TBLENT(n)		\
     76     (npf_tblent_t *)((uintptr_t)n - offsetof(npf_tblent_t, te_entry.rbnode))
     77 
     78 LIST_HEAD(npf_hashl, npf_tblent);
     79 
     80 /* Table structure. */
     81 struct npf_table {
     82 	char				t_name[16];
     83 	/* Lock and reference count. */
     84 	krwlock_t			t_lock;
     85 	u_int				t_refcnt;
     86 	/* Table ID. */
     87 	u_int				t_id;
     88 	/* The storage type can be: 1. Hash 2. RB-tree. */
     89 	u_int				t_type;
     90 	struct npf_hashl *		t_hashl;
     91 	u_long				t_hashmask;
     92 	struct rb_tree			t_rbtree;
     93 };
     94 
     95 /* Global table array and its lock. */
     96 static npf_tableset_t *		table_array;
     97 static krwlock_t		table_lock;
     98 static pool_cache_t		tblent_cache;
     99 
    100 /*
    101  * npf_table_sysinit: initialise tableset structures.
    102  */
    103 int
    104 npf_tableset_sysinit(void)
    105 {
    106 
    107 	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
    108 	    0, 0, "npftenpl", NULL, IPL_NONE, NULL, NULL, NULL);
    109 	if (tblent_cache == NULL) {
    110 		return ENOMEM;
    111 	}
    112 	table_array = npf_tableset_create();
    113 	if (table_array == NULL) {
    114 		pool_cache_destroy(tblent_cache);
    115 		return ENOMEM;
    116 	}
    117 	rw_init(&table_lock);
    118 	return 0;
    119 }
    120 
    121 void
    122 npf_tableset_sysfini(void)
    123 {
    124 
    125 	npf_tableset_destroy(table_array);
    126 	pool_cache_destroy(tblent_cache);
    127 	rw_destroy(&table_lock);
    128 }
    129 
    130 npf_tableset_t *
    131 npf_tableset_create(void)
    132 {
    133 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
    134 
    135 	return kmem_zalloc(sz, KM_SLEEP);
    136 }
    137 
    138 void
    139 npf_tableset_destroy(npf_tableset_t *tblset)
    140 {
    141 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
    142 	npf_table_t *t;
    143 	u_int tid;
    144 
    145 	/*
    146 	 * Destroy all tables (no references should be held, as ruleset
    147 	 * should be destroyed before).
    148 	 */
    149 	for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
    150 		t = tblset[tid];
    151 		if (t != NULL) {
    152 			npf_table_destroy(t);
    153 		}
    154 	}
    155 	kmem_free(tblset, sz);
    156 }
    157 
    158 /*
    159  * npf_tableset_insert: insert the table into the specified tableset.
    160  *
    161  * => Returns 0 on success, fails and returns errno if ID is already used.
    162  */
    163 int
    164 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
    165 {
    166 	const u_int tid = t->t_id;
    167 	int error;
    168 
    169 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
    170 
    171 	if (tblset[tid] == NULL) {
    172 		tblset[tid] = t;
    173 		error = 0;
    174 	} else {
    175 		error = EEXIST;
    176 	}
    177 	return error;
    178 }
    179 
    180 /*
    181  * npf_tableset_reload: replace old tableset array with a new one.
    182  *
    183  * => Called from npf_ruleset_reload() with a global ruleset lock held.
    184  * => Returns pointer to the old tableset, caller will destroy it.
    185  */
    186 npf_tableset_t *
    187 npf_tableset_reload(npf_tableset_t *tblset)
    188 {
    189 	npf_tableset_t *oldtblset;
    190 
    191 	rw_enter(&table_lock, RW_WRITER);
    192 	oldtblset = table_array;
    193 	table_array = tblset;
    194 	rw_exit(&table_lock);
    195 
    196 	return oldtblset;
    197 }
    198 
    199 /*
    200  * Red-black tree storage.
    201  */
    202 
    203 static signed int
    204 table_rbtree_cmp_nodes(const struct rb_node *n1, const struct rb_node *n2)
    205 {
    206 	const npf_tblent_t *te1 = NPF_RBN2TBLENT(n1);
    207 	const npf_tblent_t *te2 = NPF_RBN2TBLENT(n2);
    208 	const in_addr_t x = te1->te_addr & te1->te_mask;
    209 	const in_addr_t y = te2->te_addr & te2->te_mask;
    210 
    211 	if (x < y)
    212 		return 1;
    213 	if (x > y)
    214 		return -1;
    215 	return 0;
    216 }
    217 
    218 static signed int
    219 table_rbtree_cmp_key(const struct rb_node *n1, const void *key)
    220 {
    221 	const npf_tblent_t *te = NPF_RBN2TBLENT(n1);
    222 	const in_addr_t x = te->te_addr & te->te_mask;
    223 	const in_addr_t y = *(const in_addr_t *)key;
    224 
    225 	if (x < y)
    226 		return 1;
    227 	if (x > y)
    228 		return -1;
    229 	return 0;
    230 }
    231 
    232 static const struct rb_tree_ops table_rbtree_ops = {
    233 	.rbto_compare_nodes = table_rbtree_cmp_nodes,
    234 	.rbto_compare_key = table_rbtree_cmp_key
    235 };
    236 
    237 /*
    238  * Hash helper routine.
    239  */
    240 
    241 static inline struct npf_hashl *
    242 table_hash_bucket(npf_table_t *t, void *buf, size_t sz)
    243 {
    244 	const uint32_t hidx = hash32_buf(buf, sz, HASH32_BUF_INIT);
    245 
    246 	return &t->t_hashl[hidx & t->t_hashmask];
    247 }
    248 
    249 /*
    250  * npf_table_create: create table with a specified ID.
    251  */
    252 npf_table_t *
    253 npf_table_create(u_int tid, int type, size_t hsize)
    254 {
    255 	npf_table_t *t;
    256 
    257 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
    258 
    259 	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
    260 	switch (type) {
    261 	case NPF_TABLE_RBTREE:
    262 		rb_tree_init(&t->t_rbtree, &table_rbtree_ops);
    263 		break;
    264 	case NPF_TABLE_HASH:
    265 		t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
    266 		if (t->t_hashl == NULL) {
    267 			kmem_free(t, sizeof(npf_table_t));
    268 			return NULL;
    269 		}
    270 		break;
    271 	default:
    272 		KASSERT(false);
    273 	}
    274 	rw_init(&t->t_lock);
    275 	t->t_type = type;
    276 	t->t_refcnt = 1;
    277 	t->t_id = tid;
    278 	return t;
    279 }
    280 
    281 /*
    282  * npf_table_destroy: free all table entries and table itself.
    283  */
    284 void
    285 npf_table_destroy(npf_table_t *t)
    286 {
    287 	npf_tblent_t *e;
    288 	struct rb_node *nd;
    289 	u_int n;
    290 
    291 	switch (t->t_type) {
    292 	case NPF_TABLE_HASH:
    293 		for (n = 0; n <= t->t_hashmask; n++) {
    294 			while ((e = LIST_FIRST(&t->t_hashl[n])) != NULL) {
    295 				LIST_REMOVE(e, te_entry.hashq);
    296 				pool_cache_put(tblent_cache, e);
    297 			}
    298 		}
    299 		hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
    300 		break;
    301 	case NPF_TABLE_RBTREE:
    302 		while ((nd = rb_tree_iterate(&t->t_rbtree, NULL,
    303 		    RB_DIR_RIGHT)) != NULL) {
    304 			e = NPF_RBN2TBLENT(nd);
    305 			rb_tree_remove_node(&t->t_rbtree, &e->te_entry.rbnode);
    306 			pool_cache_put(tblent_cache, e);
    307 		}
    308 		break;
    309 	default:
    310 		KASSERT(false);
    311 	}
    312 	rw_destroy(&t->t_lock);
    313 	kmem_free(t, sizeof(npf_table_t));
    314 }
    315 
    316 /*
    317  * npf_table_ref: holds the reference on table.
    318  *
    319  * => Table must be locked.
    320  */
    321 void
    322 npf_table_ref(npf_table_t *t)
    323 {
    324 
    325 	KASSERT(rw_lock_held(&t->t_lock));
    326 	atomic_inc_uint(&t->t_refcnt);
    327 }
    328 
    329 /*
    330  * npf_table_unref: drop reference from the table and destroy the table if
    331  * it is the last reference.
    332  */
    333 void
    334 npf_table_unref(npf_table_t *t)
    335 {
    336 
    337 	if (atomic_dec_uint_nv(&t->t_refcnt) != 0) {
    338 		return;
    339 	}
    340 	npf_table_destroy(t);
    341 }
    342 
    343 /*
    344  * npf_table_get: find the table according to ID and "get it" by locking it.
    345  */
    346 npf_table_t *
    347 npf_table_get(npf_tableset_t *tset, u_int tid)
    348 {
    349 	npf_table_t *t;
    350 
    351 	if ((u_int)tid >= NPF_TABLE_SLOTS) {
    352 		return NULL;
    353 	}
    354 	if (tset) {
    355 		t = tset[tid];
    356 		if (t != NULL) {
    357 			rw_enter(&t->t_lock, RW_READER);
    358 		}
    359 		return t;
    360 	}
    361 	rw_enter(&table_lock, RW_READER);
    362 	t = table_array[tid];
    363 	if (t != NULL) {
    364 		rw_enter(&t->t_lock, RW_READER);
    365 	}
    366 	rw_exit(&table_lock);
    367 	return t;
    368 }
    369 
    370 /*
    371  * npf_table_put: "put table back" by unlocking it.
    372  */
    373 void
    374 npf_table_put(npf_table_t *t)
    375 {
    376 
    377 	rw_exit(&t->t_lock);
    378 }
    379 
    380 /*
    381  * npf_table_check: validate ID and type.
    382  * */
    383 int
    384 npf_table_check(npf_tableset_t *tset, u_int tid, int type)
    385 {
    386 
    387 	if ((u_int)tid >= NPF_TABLE_SLOTS) {
    388 		return EINVAL;
    389 	}
    390 	if (tset[tid] != NULL) {
    391 		return EEXIST;
    392 	}
    393 	if (type != NPF_TABLE_RBTREE && type != NPF_TABLE_HASH) {
    394 		return EINVAL;
    395 	}
    396 	return 0;
    397 }
    398 
    399 /*
    400  * npf_table_add_v4cidr: add an IPv4 CIDR into the table.
    401  */
    402 int
    403 npf_table_add_v4cidr(npf_tableset_t *tset, u_int tid,
    404     in_addr_t addr, in_addr_t mask)
    405 {
    406 	struct npf_hashl *htbl;
    407 	npf_tblent_t *e, *it;
    408 	npf_table_t *t;
    409 	in_addr_t val;
    410 	int error = 0;
    411 
    412 	/* Allocate and setup entry. */
    413 	e = pool_cache_get(tblent_cache, PR_WAITOK);
    414 	if (e == NULL) {
    415 		return ENOMEM;
    416 	}
    417 	e->te_addr = addr;
    418 	e->te_mask = mask;
    419 
    420 	/* Locks the table. */
    421 	t = npf_table_get(tset, tid);
    422 	if (__predict_false(t == NULL)) {
    423 		pool_cache_put(tblent_cache, e);
    424 		return EINVAL;
    425 	}
    426 	switch (t->t_type) {
    427 	case NPF_TABLE_HASH:
    428 		/* Generate hash value from: address & mask. */
    429 		val = addr & mask;
    430 		htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
    431 		/* Lookup to check for duplicates. */
    432 		LIST_FOREACH(it, htbl, te_entry.hashq) {
    433 			if (it->te_addr == addr && it->te_mask == mask)
    434 				break;
    435 		}
    436 		/* If no duplicate - insert entry. */
    437 		if (__predict_true(it == NULL)) {
    438 			LIST_INSERT_HEAD(htbl, e, te_entry.hashq);
    439 		} else {
    440 			error = EEXIST;
    441 		}
    442 		break;
    443 	case NPF_TABLE_RBTREE:
    444 		/* Insert entry.  Returns false, if duplicate. */
    445 		if (!rb_tree_insert_node(&t->t_rbtree, &e->te_entry.rbnode)) {
    446 			error = EEXIST;
    447 		}
    448 		break;
    449 	default:
    450 		KASSERT(false);
    451 	}
    452 	npf_table_put(t);
    453 
    454 	if (__predict_false(error)) {
    455 		pool_cache_put(tblent_cache, e);
    456 	}
    457 	return error;
    458 }
    459 
    460 /*
    461  * npf_table_rem_v4cidr: remove an IPv4 CIDR from the table.
    462  */
    463 int
    464 npf_table_rem_v4cidr(npf_tableset_t *tset, u_int tid,
    465     in_addr_t addr, in_addr_t mask)
    466 {
    467 	struct npf_hashl *htbl;
    468 	struct rb_node *nd;
    469 	npf_tblent_t *e;
    470 	npf_table_t *t;
    471 	in_addr_t val;
    472 	int error;
    473 
    474 	e = NULL;
    475 
    476 	/* Locks the table. */
    477 	t = npf_table_get(tset, tid);
    478 	if (__predict_false(t == NULL)) {
    479 		return EINVAL;
    480 	}
    481 	/* Lookup & remove. */
    482 	switch (t->t_type) {
    483 	case NPF_TABLE_HASH:
    484 		/* Generate hash value from: (address & mask). */
    485 		val = addr & mask;
    486 		htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
    487 		LIST_FOREACH(e, htbl, te_entry.hashq) {
    488 			if (e->te_addr == addr && e->te_mask == mask)
    489 				break;
    490 		}
    491 		if (__predict_true(e != NULL)) {
    492 			LIST_REMOVE(e, te_entry.hashq);
    493 		} else {
    494 			error = ESRCH;
    495 		}
    496 		break;
    497 	case NPF_TABLE_RBTREE:
    498 		/* Key: (address & mask). */
    499 		val = addr & mask;
    500 		nd = rb_tree_find_node(&t->t_rbtree, &val);
    501 		if (__predict_true(nd != NULL)) {
    502 			e = NPF_RBN2TBLENT(nd);
    503 			rb_tree_remove_node(&t->t_rbtree, &e->te_entry.rbnode);
    504 		} else {
    505 			error = ESRCH;
    506 		}
    507 		break;
    508 	default:
    509 		KASSERT(false);
    510 	}
    511 	npf_table_put(t);
    512 
    513 	/* Free table the entry. */
    514 	if (__predict_true(e != NULL)) {
    515 		pool_cache_put(tblent_cache, e);
    516 	}
    517 	return e ? 0 : -1;
    518 }
    519 
    520 /*
    521  * npf_table_match_v4addr: find the table according to ID, lookup and
    522  * match the contents with specified IPv4 address.
    523  */
    524 int
    525 npf_table_match_v4addr(u_int tid, in_addr_t ip4addr)
    526 {
    527 	struct npf_hashl *htbl;
    528 	struct rb_node *nd;
    529 	npf_tblent_t *e;
    530 	npf_table_t *t;
    531 
    532 	e = NULL;
    533 
    534 	/* Locks the table. */
    535 	t = npf_table_get(NULL, tid);
    536 	if (__predict_false(t == NULL)) {
    537 		return EINVAL;
    538 	}
    539 	switch (t->t_type) {
    540 	case NPF_TABLE_HASH:
    541 		htbl = table_hash_bucket(t, &ip4addr, sizeof(in_addr_t));
    542 		LIST_FOREACH(e, htbl, te_entry.hashq) {
    543 			if ((ip4addr & e->te_mask) == e->te_addr) {
    544 				break;
    545 			}
    546 		}
    547 		break;
    548 	case NPF_TABLE_RBTREE:
    549 		nd = rb_tree_find_node(&t->t_rbtree, &ip4addr);
    550 		e = NPF_RBN2TBLENT(nd);
    551 		KASSERT((ip4addr & e->te_mask) == e->te_addr);
    552 		break;
    553 	default:
    554 		KASSERT(false);
    555 	}
    556 	npf_table_put(t);
    557 
    558 	return e ? 0 : -1;
    559 }
    560