Home | History | Annotate | Line # | Download | only in npf
npf_tableset.c revision 1.2
      1 /*	$NetBSD: npf_tableset.c,v 1.2 2010/09/24 22:51:50 rmind Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This material is based upon work partially supported by The
      8  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * NPF table module.
     34  *
     35  *	table_lock ->
     36  *		npf_table_t::t_lock
     37  *
     38  * TODO:
     39  * - Currently, code is modeled to handle IPv4 CIDR blocks.
     40  * - Dynamic hash growing/shrinking (i.e. re-hash functionality), maybe?
     41  * - Dynamic array resize.
     42  */
     43 
     44 #ifdef _KERNEL
     45 #include <sys/cdefs.h>
     46 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.2 2010/09/24 22:51:50 rmind Exp $");
     47 #endif
     48 
     49 #include <sys/param.h>
     50 #include <sys/kernel.h>
     51 
     52 #include <sys/atomic.h>
     53 #include <sys/hash.h>
     54 #include <sys/kmem.h>
     55 #include <sys/pool.h>
     56 #include <sys/queue.h>
     57 #include <sys/rwlock.h>
     58 #include <sys/systm.h>
     59 #include <sys/types.h>
     60 
     61 #include "npf_impl.h"
     62 
     63 /* Table entry structure. */
     64 struct npf_tblent {
     65 	/* Hash/tree entry. */
     66 	union {
     67 		LIST_ENTRY(npf_tblent)	hashq;
     68 		struct rb_node		rbnode;
     69 	} te_entry;
     70 	/* IPv4 CIDR block. */
     71 	in_addr_t			te_addr;
     72 	in_addr_t			te_mask;
     73 };
     74 
     75 LIST_HEAD(npf_hashl, npf_tblent);
     76 
     77 /* Table structure. */
     78 struct npf_table {
     79 	char				t_name[16];
     80 	/* Lock and reference count. */
     81 	krwlock_t			t_lock;
     82 	u_int				t_refcnt;
     83 	/* Table ID. */
     84 	u_int				t_id;
     85 	/* The storage type can be: 1. Hash 2. RB-tree. */
     86 	u_int				t_type;
     87 	struct npf_hashl *		t_hashl;
     88 	u_long				t_hashmask;
     89 	rb_tree_t			t_rbtree;
     90 };
     91 
     92 /* Global table array and its lock. */
     93 static npf_tableset_t *		table_array;
     94 static krwlock_t		table_lock;
     95 static pool_cache_t		tblent_cache;
     96 
     97 /*
     98  * npf_table_sysinit: initialise tableset structures.
     99  */
    100 int
    101 npf_tableset_sysinit(void)
    102 {
    103 
    104 	tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
    105 	    0, 0, "npftenpl", NULL, IPL_NONE, NULL, NULL, NULL);
    106 	if (tblent_cache == NULL) {
    107 		return ENOMEM;
    108 	}
    109 	table_array = npf_tableset_create();
    110 	if (table_array == NULL) {
    111 		pool_cache_destroy(tblent_cache);
    112 		return ENOMEM;
    113 	}
    114 	rw_init(&table_lock);
    115 	return 0;
    116 }
    117 
    118 void
    119 npf_tableset_sysfini(void)
    120 {
    121 
    122 	npf_tableset_destroy(table_array);
    123 	pool_cache_destroy(tblent_cache);
    124 	rw_destroy(&table_lock);
    125 }
    126 
    127 npf_tableset_t *
    128 npf_tableset_create(void)
    129 {
    130 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
    131 
    132 	return kmem_zalloc(sz, KM_SLEEP);
    133 }
    134 
    135 void
    136 npf_tableset_destroy(npf_tableset_t *tblset)
    137 {
    138 	const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
    139 	npf_table_t *t;
    140 	u_int tid;
    141 
    142 	/*
    143 	 * Destroy all tables (no references should be held, as ruleset
    144 	 * should be destroyed before).
    145 	 */
    146 	for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
    147 		t = tblset[tid];
    148 		if (t != NULL) {
    149 			npf_table_destroy(t);
    150 		}
    151 	}
    152 	kmem_free(tblset, sz);
    153 }
    154 
    155 /*
    156  * npf_tableset_insert: insert the table into the specified tableset.
    157  *
    158  * => Returns 0 on success, fails and returns errno if ID is already used.
    159  */
    160 int
    161 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
    162 {
    163 	const u_int tid = t->t_id;
    164 	int error;
    165 
    166 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
    167 
    168 	if (tblset[tid] == NULL) {
    169 		tblset[tid] = t;
    170 		error = 0;
    171 	} else {
    172 		error = EEXIST;
    173 	}
    174 	return error;
    175 }
    176 
    177 /*
    178  * npf_tableset_reload: replace old tableset array with a new one.
    179  *
    180  * => Called from npf_ruleset_reload() with a global ruleset lock held.
    181  * => Returns pointer to the old tableset, caller will destroy it.
    182  */
    183 npf_tableset_t *
    184 npf_tableset_reload(npf_tableset_t *tblset)
    185 {
    186 	npf_tableset_t *oldtblset;
    187 
    188 	rw_enter(&table_lock, RW_WRITER);
    189 	oldtblset = table_array;
    190 	table_array = tblset;
    191 	rw_exit(&table_lock);
    192 
    193 	return oldtblset;
    194 }
    195 
    196 /*
    197  * Red-black tree storage.
    198  */
    199 
    200 static signed int
    201 table_rbtree_cmp_nodes(void *ctx, const void *n1, const void *n2)
    202 {
    203 	const npf_tblent_t * const te1 = n1;
    204 	const npf_tblent_t * const te2 = n2;
    205 	const in_addr_t x = te1->te_addr & te1->te_mask;
    206 	const in_addr_t y = te2->te_addr & te2->te_mask;
    207 
    208 	if (x < y)
    209 		return -1;
    210 	if (x > y)
    211 		return 1;
    212 	return 0;
    213 }
    214 
    215 static signed int
    216 table_rbtree_cmp_key(void *ctx, const void *n1, const void *key)
    217 {
    218 	const npf_tblent_t * const te = n1;
    219 	const in_addr_t x = te->te_addr & te->te_mask;
    220 	const in_addr_t y = *(const in_addr_t *)key;
    221 
    222 	if (x < y)
    223 		return -1;
    224 	if (x > y)
    225 		return 1;
    226 	return 0;
    227 }
    228 
    229 static const rb_tree_ops_t table_rbtree_ops = {
    230 	.rbto_compare_nodes = table_rbtree_cmp_nodes,
    231 	.rbto_compare_key = table_rbtree_cmp_key,
    232 	.rbto_node_offset = offsetof(npf_tblent_t, te_entry.rbnode),
    233 	.rbto_context = NULL
    234 };
    235 
    236 /*
    237  * Hash helper routine.
    238  */
    239 
    240 static inline struct npf_hashl *
    241 table_hash_bucket(npf_table_t *t, void *buf, size_t sz)
    242 {
    243 	const uint32_t hidx = hash32_buf(buf, sz, HASH32_BUF_INIT);
    244 
    245 	return &t->t_hashl[hidx & t->t_hashmask];
    246 }
    247 
    248 /*
    249  * npf_table_create: create table with a specified ID.
    250  */
    251 npf_table_t *
    252 npf_table_create(u_int tid, int type, size_t hsize)
    253 {
    254 	npf_table_t *t;
    255 
    256 	KASSERT((u_int)tid < NPF_TABLE_SLOTS);
    257 
    258 	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
    259 	switch (type) {
    260 	case NPF_TABLE_RBTREE:
    261 		rb_tree_init(&t->t_rbtree, &table_rbtree_ops);
    262 		break;
    263 	case NPF_TABLE_HASH:
    264 		t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
    265 		if (t->t_hashl == NULL) {
    266 			kmem_free(t, sizeof(npf_table_t));
    267 			return NULL;
    268 		}
    269 		break;
    270 	default:
    271 		KASSERT(false);
    272 	}
    273 	rw_init(&t->t_lock);
    274 	t->t_type = type;
    275 	t->t_refcnt = 1;
    276 	t->t_id = tid;
    277 	return t;
    278 }
    279 
    280 /*
    281  * npf_table_destroy: free all table entries and table itself.
    282  */
    283 void
    284 npf_table_destroy(npf_table_t *t)
    285 {
    286 	npf_tblent_t *e;
    287 	u_int n;
    288 
    289 	switch (t->t_type) {
    290 	case NPF_TABLE_HASH:
    291 		for (n = 0; n <= t->t_hashmask; n++) {
    292 			while ((e = LIST_FIRST(&t->t_hashl[n])) != NULL) {
    293 				LIST_REMOVE(e, te_entry.hashq);
    294 				pool_cache_put(tblent_cache, e);
    295 			}
    296 		}
    297 		hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
    298 		break;
    299 	case NPF_TABLE_RBTREE:
    300 		while ((e = rb_tree_iterate(&t->t_rbtree, NULL,
    301 		    RB_DIR_LEFT)) != NULL) {
    302 			rb_tree_remove_node(&t->t_rbtree, e);
    303 			pool_cache_put(tblent_cache, e);
    304 		}
    305 		break;
    306 	default:
    307 		KASSERT(false);
    308 	}
    309 	rw_destroy(&t->t_lock);
    310 	kmem_free(t, sizeof(npf_table_t));
    311 }
    312 
    313 /*
    314  * npf_table_ref: holds the reference on table.
    315  *
    316  * => Table must be locked.
    317  */
    318 void
    319 npf_table_ref(npf_table_t *t)
    320 {
    321 
    322 	KASSERT(rw_lock_held(&t->t_lock));
    323 	atomic_inc_uint(&t->t_refcnt);
    324 }
    325 
    326 /*
    327  * npf_table_unref: drop reference from the table and destroy the table if
    328  * it is the last reference.
    329  */
    330 void
    331 npf_table_unref(npf_table_t *t)
    332 {
    333 
    334 	if (atomic_dec_uint_nv(&t->t_refcnt) != 0) {
    335 		return;
    336 	}
    337 	npf_table_destroy(t);
    338 }
    339 
    340 /*
    341  * npf_table_get: find the table according to ID and "get it" by locking it.
    342  */
    343 npf_table_t *
    344 npf_table_get(npf_tableset_t *tset, u_int tid)
    345 {
    346 	npf_table_t *t;
    347 
    348 	if ((u_int)tid >= NPF_TABLE_SLOTS) {
    349 		return NULL;
    350 	}
    351 	if (tset) {
    352 		t = tset[tid];
    353 		if (t != NULL) {
    354 			rw_enter(&t->t_lock, RW_READER);
    355 		}
    356 		return t;
    357 	}
    358 	rw_enter(&table_lock, RW_READER);
    359 	t = table_array[tid];
    360 	if (t != NULL) {
    361 		rw_enter(&t->t_lock, RW_READER);
    362 	}
    363 	rw_exit(&table_lock);
    364 	return t;
    365 }
    366 
    367 /*
    368  * npf_table_put: "put table back" by unlocking it.
    369  */
    370 void
    371 npf_table_put(npf_table_t *t)
    372 {
    373 
    374 	rw_exit(&t->t_lock);
    375 }
    376 
    377 /*
    378  * npf_table_check: validate ID and type.
    379  * */
    380 int
    381 npf_table_check(npf_tableset_t *tset, u_int tid, int type)
    382 {
    383 
    384 	if ((u_int)tid >= NPF_TABLE_SLOTS) {
    385 		return EINVAL;
    386 	}
    387 	if (tset[tid] != NULL) {
    388 		return EEXIST;
    389 	}
    390 	if (type != NPF_TABLE_RBTREE && type != NPF_TABLE_HASH) {
    391 		return EINVAL;
    392 	}
    393 	return 0;
    394 }
    395 
    396 /*
    397  * npf_table_add_v4cidr: add an IPv4 CIDR into the table.
    398  */
    399 int
    400 npf_table_add_v4cidr(npf_tableset_t *tset, u_int tid,
    401     in_addr_t addr, in_addr_t mask)
    402 {
    403 	struct npf_hashl *htbl;
    404 	npf_tblent_t *e, *it;
    405 	npf_table_t *t;
    406 	in_addr_t val;
    407 	int error = 0;
    408 
    409 	/* Allocate and setup entry. */
    410 	e = pool_cache_get(tblent_cache, PR_WAITOK);
    411 	if (e == NULL) {
    412 		return ENOMEM;
    413 	}
    414 	e->te_addr = addr;
    415 	e->te_mask = mask;
    416 
    417 	/* Locks the table. */
    418 	t = npf_table_get(tset, tid);
    419 	if (__predict_false(t == NULL)) {
    420 		pool_cache_put(tblent_cache, e);
    421 		return EINVAL;
    422 	}
    423 	switch (t->t_type) {
    424 	case NPF_TABLE_HASH:
    425 		/* Generate hash value from: address & mask. */
    426 		val = addr & mask;
    427 		htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
    428 		/* Lookup to check for duplicates. */
    429 		LIST_FOREACH(it, htbl, te_entry.hashq) {
    430 			if (it->te_addr == addr && it->te_mask == mask)
    431 				break;
    432 		}
    433 		/* If no duplicate - insert entry. */
    434 		if (__predict_true(it == NULL)) {
    435 			LIST_INSERT_HEAD(htbl, e, te_entry.hashq);
    436 		} else {
    437 			error = EEXIST;
    438 		}
    439 		break;
    440 	case NPF_TABLE_RBTREE:
    441 		/* Insert entry.  Returns false, if duplicate. */
    442 		if (rb_tree_insert_node(&t->t_rbtree, e) != e) {
    443 			error = EEXIST;
    444 		}
    445 		break;
    446 	default:
    447 		KASSERT(false);
    448 	}
    449 	npf_table_put(t);
    450 
    451 	if (__predict_false(error)) {
    452 		pool_cache_put(tblent_cache, e);
    453 	}
    454 	return error;
    455 }
    456 
    457 /*
    458  * npf_table_rem_v4cidr: remove an IPv4 CIDR from the table.
    459  */
    460 int
    461 npf_table_rem_v4cidr(npf_tableset_t *tset, u_int tid,
    462     in_addr_t addr, in_addr_t mask)
    463 {
    464 	struct npf_hashl *htbl;
    465 	npf_tblent_t *e;
    466 	npf_table_t *t;
    467 	in_addr_t val;
    468 	int error;
    469 
    470 	e = NULL;
    471 
    472 	/* Locks the table. */
    473 	t = npf_table_get(tset, tid);
    474 	if (__predict_false(t == NULL)) {
    475 		return EINVAL;
    476 	}
    477 	/* Lookup & remove. */
    478 	switch (t->t_type) {
    479 	case NPF_TABLE_HASH:
    480 		/* Generate hash value from: (address & mask). */
    481 		val = addr & mask;
    482 		htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
    483 		LIST_FOREACH(e, htbl, te_entry.hashq) {
    484 			if (e->te_addr == addr && e->te_mask == mask)
    485 				break;
    486 		}
    487 		if (__predict_true(e != NULL)) {
    488 			LIST_REMOVE(e, te_entry.hashq);
    489 		} else {
    490 			error = ESRCH;
    491 		}
    492 		break;
    493 	case NPF_TABLE_RBTREE:
    494 		/* Key: (address & mask). */
    495 		val = addr & mask;
    496 		e = rb_tree_find_node(&t->t_rbtree, &val);
    497 		if (__predict_true(e != NULL)) {
    498 			rb_tree_remove_node(&t->t_rbtree, e);
    499 		} else {
    500 			error = ESRCH;
    501 		}
    502 		break;
    503 	default:
    504 		KASSERT(false);
    505 	}
    506 	npf_table_put(t);
    507 
    508 	/* Free table the entry. */
    509 	if (__predict_true(e != NULL)) {
    510 		pool_cache_put(tblent_cache, e);
    511 	}
    512 	return e ? 0 : -1;
    513 }
    514 
    515 /*
    516  * npf_table_match_v4addr: find the table according to ID, lookup and
    517  * match the contents with specified IPv4 address.
    518  */
    519 int
    520 npf_table_match_v4addr(u_int tid, in_addr_t ip4addr)
    521 {
    522 	struct npf_hashl *htbl;
    523 	npf_tblent_t *e;
    524 	npf_table_t *t;
    525 
    526 	e = NULL;
    527 
    528 	/* Locks the table. */
    529 	t = npf_table_get(NULL, tid);
    530 	if (__predict_false(t == NULL)) {
    531 		return EINVAL;
    532 	}
    533 	switch (t->t_type) {
    534 	case NPF_TABLE_HASH:
    535 		htbl = table_hash_bucket(t, &ip4addr, sizeof(in_addr_t));
    536 		LIST_FOREACH(e, htbl, te_entry.hashq) {
    537 			if ((ip4addr & e->te_mask) == e->te_addr) {
    538 				break;
    539 			}
    540 		}
    541 		break;
    542 	case NPF_TABLE_RBTREE:
    543 		e = rb_tree_find_node(&t->t_rbtree, &ip4addr);
    544 		KASSERT((ip4addr & e->te_mask) == e->te_addr);
    545 		break;
    546 	default:
    547 		KASSERT(false);
    548 	}
    549 	npf_table_put(t);
    550 
    551 	return e ? 0 : -1;
    552 }
    553