npf_tableset.c revision 1.2 1 /* $NetBSD: npf_tableset.c,v 1.2 2010/09/24 22:51:50 rmind Exp $ */
2
3 /*-
4 * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * NPF table module.
34 *
35 * table_lock ->
36 * npf_table_t::t_lock
37 *
38 * TODO:
39 * - Currently, code is modeled to handle IPv4 CIDR blocks.
40 * - Dynamic hash growing/shrinking (i.e. re-hash functionality), maybe?
41 * - Dynamic array resize.
42 */
43
44 #ifdef _KERNEL
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.2 2010/09/24 22:51:50 rmind Exp $");
47 #endif
48
49 #include <sys/param.h>
50 #include <sys/kernel.h>
51
52 #include <sys/atomic.h>
53 #include <sys/hash.h>
54 #include <sys/kmem.h>
55 #include <sys/pool.h>
56 #include <sys/queue.h>
57 #include <sys/rwlock.h>
58 #include <sys/systm.h>
59 #include <sys/types.h>
60
61 #include "npf_impl.h"
62
63 /* Table entry structure. */
64 struct npf_tblent {
65 /* Hash/tree entry. */
66 union {
67 LIST_ENTRY(npf_tblent) hashq;
68 struct rb_node rbnode;
69 } te_entry;
70 /* IPv4 CIDR block. */
71 in_addr_t te_addr;
72 in_addr_t te_mask;
73 };
74
75 LIST_HEAD(npf_hashl, npf_tblent);
76
77 /* Table structure. */
78 struct npf_table {
79 char t_name[16];
80 /* Lock and reference count. */
81 krwlock_t t_lock;
82 u_int t_refcnt;
83 /* Table ID. */
84 u_int t_id;
85 /* The storage type can be: 1. Hash 2. RB-tree. */
86 u_int t_type;
87 struct npf_hashl * t_hashl;
88 u_long t_hashmask;
89 rb_tree_t t_rbtree;
90 };
91
92 /* Global table array and its lock. */
93 static npf_tableset_t * table_array;
94 static krwlock_t table_lock;
95 static pool_cache_t tblent_cache;
96
97 /*
98 * npf_table_sysinit: initialise tableset structures.
99 */
100 int
101 npf_tableset_sysinit(void)
102 {
103
104 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
105 0, 0, "npftenpl", NULL, IPL_NONE, NULL, NULL, NULL);
106 if (tblent_cache == NULL) {
107 return ENOMEM;
108 }
109 table_array = npf_tableset_create();
110 if (table_array == NULL) {
111 pool_cache_destroy(tblent_cache);
112 return ENOMEM;
113 }
114 rw_init(&table_lock);
115 return 0;
116 }
117
118 void
119 npf_tableset_sysfini(void)
120 {
121
122 npf_tableset_destroy(table_array);
123 pool_cache_destroy(tblent_cache);
124 rw_destroy(&table_lock);
125 }
126
127 npf_tableset_t *
128 npf_tableset_create(void)
129 {
130 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
131
132 return kmem_zalloc(sz, KM_SLEEP);
133 }
134
135 void
136 npf_tableset_destroy(npf_tableset_t *tblset)
137 {
138 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
139 npf_table_t *t;
140 u_int tid;
141
142 /*
143 * Destroy all tables (no references should be held, as ruleset
144 * should be destroyed before).
145 */
146 for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
147 t = tblset[tid];
148 if (t != NULL) {
149 npf_table_destroy(t);
150 }
151 }
152 kmem_free(tblset, sz);
153 }
154
155 /*
156 * npf_tableset_insert: insert the table into the specified tableset.
157 *
158 * => Returns 0 on success, fails and returns errno if ID is already used.
159 */
160 int
161 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
162 {
163 const u_int tid = t->t_id;
164 int error;
165
166 KASSERT((u_int)tid < NPF_TABLE_SLOTS);
167
168 if (tblset[tid] == NULL) {
169 tblset[tid] = t;
170 error = 0;
171 } else {
172 error = EEXIST;
173 }
174 return error;
175 }
176
177 /*
178 * npf_tableset_reload: replace old tableset array with a new one.
179 *
180 * => Called from npf_ruleset_reload() with a global ruleset lock held.
181 * => Returns pointer to the old tableset, caller will destroy it.
182 */
183 npf_tableset_t *
184 npf_tableset_reload(npf_tableset_t *tblset)
185 {
186 npf_tableset_t *oldtblset;
187
188 rw_enter(&table_lock, RW_WRITER);
189 oldtblset = table_array;
190 table_array = tblset;
191 rw_exit(&table_lock);
192
193 return oldtblset;
194 }
195
196 /*
197 * Red-black tree storage.
198 */
199
200 static signed int
201 table_rbtree_cmp_nodes(void *ctx, const void *n1, const void *n2)
202 {
203 const npf_tblent_t * const te1 = n1;
204 const npf_tblent_t * const te2 = n2;
205 const in_addr_t x = te1->te_addr & te1->te_mask;
206 const in_addr_t y = te2->te_addr & te2->te_mask;
207
208 if (x < y)
209 return -1;
210 if (x > y)
211 return 1;
212 return 0;
213 }
214
215 static signed int
216 table_rbtree_cmp_key(void *ctx, const void *n1, const void *key)
217 {
218 const npf_tblent_t * const te = n1;
219 const in_addr_t x = te->te_addr & te->te_mask;
220 const in_addr_t y = *(const in_addr_t *)key;
221
222 if (x < y)
223 return -1;
224 if (x > y)
225 return 1;
226 return 0;
227 }
228
229 static const rb_tree_ops_t table_rbtree_ops = {
230 .rbto_compare_nodes = table_rbtree_cmp_nodes,
231 .rbto_compare_key = table_rbtree_cmp_key,
232 .rbto_node_offset = offsetof(npf_tblent_t, te_entry.rbnode),
233 .rbto_context = NULL
234 };
235
236 /*
237 * Hash helper routine.
238 */
239
240 static inline struct npf_hashl *
241 table_hash_bucket(npf_table_t *t, void *buf, size_t sz)
242 {
243 const uint32_t hidx = hash32_buf(buf, sz, HASH32_BUF_INIT);
244
245 return &t->t_hashl[hidx & t->t_hashmask];
246 }
247
248 /*
249 * npf_table_create: create table with a specified ID.
250 */
251 npf_table_t *
252 npf_table_create(u_int tid, int type, size_t hsize)
253 {
254 npf_table_t *t;
255
256 KASSERT((u_int)tid < NPF_TABLE_SLOTS);
257
258 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
259 switch (type) {
260 case NPF_TABLE_RBTREE:
261 rb_tree_init(&t->t_rbtree, &table_rbtree_ops);
262 break;
263 case NPF_TABLE_HASH:
264 t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
265 if (t->t_hashl == NULL) {
266 kmem_free(t, sizeof(npf_table_t));
267 return NULL;
268 }
269 break;
270 default:
271 KASSERT(false);
272 }
273 rw_init(&t->t_lock);
274 t->t_type = type;
275 t->t_refcnt = 1;
276 t->t_id = tid;
277 return t;
278 }
279
280 /*
281 * npf_table_destroy: free all table entries and table itself.
282 */
283 void
284 npf_table_destroy(npf_table_t *t)
285 {
286 npf_tblent_t *e;
287 u_int n;
288
289 switch (t->t_type) {
290 case NPF_TABLE_HASH:
291 for (n = 0; n <= t->t_hashmask; n++) {
292 while ((e = LIST_FIRST(&t->t_hashl[n])) != NULL) {
293 LIST_REMOVE(e, te_entry.hashq);
294 pool_cache_put(tblent_cache, e);
295 }
296 }
297 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
298 break;
299 case NPF_TABLE_RBTREE:
300 while ((e = rb_tree_iterate(&t->t_rbtree, NULL,
301 RB_DIR_LEFT)) != NULL) {
302 rb_tree_remove_node(&t->t_rbtree, e);
303 pool_cache_put(tblent_cache, e);
304 }
305 break;
306 default:
307 KASSERT(false);
308 }
309 rw_destroy(&t->t_lock);
310 kmem_free(t, sizeof(npf_table_t));
311 }
312
313 /*
314 * npf_table_ref: holds the reference on table.
315 *
316 * => Table must be locked.
317 */
318 void
319 npf_table_ref(npf_table_t *t)
320 {
321
322 KASSERT(rw_lock_held(&t->t_lock));
323 atomic_inc_uint(&t->t_refcnt);
324 }
325
326 /*
327 * npf_table_unref: drop reference from the table and destroy the table if
328 * it is the last reference.
329 */
330 void
331 npf_table_unref(npf_table_t *t)
332 {
333
334 if (atomic_dec_uint_nv(&t->t_refcnt) != 0) {
335 return;
336 }
337 npf_table_destroy(t);
338 }
339
340 /*
341 * npf_table_get: find the table according to ID and "get it" by locking it.
342 */
343 npf_table_t *
344 npf_table_get(npf_tableset_t *tset, u_int tid)
345 {
346 npf_table_t *t;
347
348 if ((u_int)tid >= NPF_TABLE_SLOTS) {
349 return NULL;
350 }
351 if (tset) {
352 t = tset[tid];
353 if (t != NULL) {
354 rw_enter(&t->t_lock, RW_READER);
355 }
356 return t;
357 }
358 rw_enter(&table_lock, RW_READER);
359 t = table_array[tid];
360 if (t != NULL) {
361 rw_enter(&t->t_lock, RW_READER);
362 }
363 rw_exit(&table_lock);
364 return t;
365 }
366
367 /*
368 * npf_table_put: "put table back" by unlocking it.
369 */
370 void
371 npf_table_put(npf_table_t *t)
372 {
373
374 rw_exit(&t->t_lock);
375 }
376
377 /*
378 * npf_table_check: validate ID and type.
379 * */
380 int
381 npf_table_check(npf_tableset_t *tset, u_int tid, int type)
382 {
383
384 if ((u_int)tid >= NPF_TABLE_SLOTS) {
385 return EINVAL;
386 }
387 if (tset[tid] != NULL) {
388 return EEXIST;
389 }
390 if (type != NPF_TABLE_RBTREE && type != NPF_TABLE_HASH) {
391 return EINVAL;
392 }
393 return 0;
394 }
395
396 /*
397 * npf_table_add_v4cidr: add an IPv4 CIDR into the table.
398 */
399 int
400 npf_table_add_v4cidr(npf_tableset_t *tset, u_int tid,
401 in_addr_t addr, in_addr_t mask)
402 {
403 struct npf_hashl *htbl;
404 npf_tblent_t *e, *it;
405 npf_table_t *t;
406 in_addr_t val;
407 int error = 0;
408
409 /* Allocate and setup entry. */
410 e = pool_cache_get(tblent_cache, PR_WAITOK);
411 if (e == NULL) {
412 return ENOMEM;
413 }
414 e->te_addr = addr;
415 e->te_mask = mask;
416
417 /* Locks the table. */
418 t = npf_table_get(tset, tid);
419 if (__predict_false(t == NULL)) {
420 pool_cache_put(tblent_cache, e);
421 return EINVAL;
422 }
423 switch (t->t_type) {
424 case NPF_TABLE_HASH:
425 /* Generate hash value from: address & mask. */
426 val = addr & mask;
427 htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
428 /* Lookup to check for duplicates. */
429 LIST_FOREACH(it, htbl, te_entry.hashq) {
430 if (it->te_addr == addr && it->te_mask == mask)
431 break;
432 }
433 /* If no duplicate - insert entry. */
434 if (__predict_true(it == NULL)) {
435 LIST_INSERT_HEAD(htbl, e, te_entry.hashq);
436 } else {
437 error = EEXIST;
438 }
439 break;
440 case NPF_TABLE_RBTREE:
441 /* Insert entry. Returns false, if duplicate. */
442 if (rb_tree_insert_node(&t->t_rbtree, e) != e) {
443 error = EEXIST;
444 }
445 break;
446 default:
447 KASSERT(false);
448 }
449 npf_table_put(t);
450
451 if (__predict_false(error)) {
452 pool_cache_put(tblent_cache, e);
453 }
454 return error;
455 }
456
457 /*
458 * npf_table_rem_v4cidr: remove an IPv4 CIDR from the table.
459 */
460 int
461 npf_table_rem_v4cidr(npf_tableset_t *tset, u_int tid,
462 in_addr_t addr, in_addr_t mask)
463 {
464 struct npf_hashl *htbl;
465 npf_tblent_t *e;
466 npf_table_t *t;
467 in_addr_t val;
468 int error;
469
470 e = NULL;
471
472 /* Locks the table. */
473 t = npf_table_get(tset, tid);
474 if (__predict_false(t == NULL)) {
475 return EINVAL;
476 }
477 /* Lookup & remove. */
478 switch (t->t_type) {
479 case NPF_TABLE_HASH:
480 /* Generate hash value from: (address & mask). */
481 val = addr & mask;
482 htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
483 LIST_FOREACH(e, htbl, te_entry.hashq) {
484 if (e->te_addr == addr && e->te_mask == mask)
485 break;
486 }
487 if (__predict_true(e != NULL)) {
488 LIST_REMOVE(e, te_entry.hashq);
489 } else {
490 error = ESRCH;
491 }
492 break;
493 case NPF_TABLE_RBTREE:
494 /* Key: (address & mask). */
495 val = addr & mask;
496 e = rb_tree_find_node(&t->t_rbtree, &val);
497 if (__predict_true(e != NULL)) {
498 rb_tree_remove_node(&t->t_rbtree, e);
499 } else {
500 error = ESRCH;
501 }
502 break;
503 default:
504 KASSERT(false);
505 }
506 npf_table_put(t);
507
508 /* Free table the entry. */
509 if (__predict_true(e != NULL)) {
510 pool_cache_put(tblent_cache, e);
511 }
512 return e ? 0 : -1;
513 }
514
515 /*
516 * npf_table_match_v4addr: find the table according to ID, lookup and
517 * match the contents with specified IPv4 address.
518 */
519 int
520 npf_table_match_v4addr(u_int tid, in_addr_t ip4addr)
521 {
522 struct npf_hashl *htbl;
523 npf_tblent_t *e;
524 npf_table_t *t;
525
526 e = NULL;
527
528 /* Locks the table. */
529 t = npf_table_get(NULL, tid);
530 if (__predict_false(t == NULL)) {
531 return EINVAL;
532 }
533 switch (t->t_type) {
534 case NPF_TABLE_HASH:
535 htbl = table_hash_bucket(t, &ip4addr, sizeof(in_addr_t));
536 LIST_FOREACH(e, htbl, te_entry.hashq) {
537 if ((ip4addr & e->te_mask) == e->te_addr) {
538 break;
539 }
540 }
541 break;
542 case NPF_TABLE_RBTREE:
543 e = rb_tree_find_node(&t->t_rbtree, &ip4addr);
544 KASSERT((ip4addr & e->te_mask) == e->te_addr);
545 break;
546 default:
547 KASSERT(false);
548 }
549 npf_table_put(t);
550
551 return e ? 0 : -1;
552 }
553