npf_tableset.c revision 1.5 1 /* $NetBSD: npf_tableset.c,v 1.5 2011/02/02 02:20:25 rmind Exp $ */
2
3 /*-
4 * Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * NPF tableset module.
34 *
35 * TODO:
36 * - Currently, code is modeled to handle IPv4 CIDR blocks.
37 * - Dynamic hash growing/shrinking (i.e. re-hash functionality), maybe?
38 * - Dynamic array resize.
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.5 2011/02/02 02:20:25 rmind Exp $");
43
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46
47 #include <sys/atomic.h>
48 #include <sys/hash.h>
49 #include <sys/kmem.h>
50 #include <sys/pool.h>
51 #include <sys/queue.h>
52 #include <sys/rwlock.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
55
56 #include "npf_impl.h"
57
58 /* Table entry structure. */
59 struct npf_tblent {
60 /* Hash/tree entry. */
61 union {
62 LIST_ENTRY(npf_tblent) hashq;
63 rb_node_t rbnode;
64 } te_entry;
65 /* IPv4 CIDR block. */
66 in_addr_t te_addr;
67 in_addr_t te_mask;
68 };
69
70 LIST_HEAD(npf_hashl, npf_tblent);
71
72 /* Table structure. */
73 struct npf_table {
74 char t_name[16];
75 /* Lock and reference count. */
76 krwlock_t t_lock;
77 u_int t_refcnt;
78 /* Table ID. */
79 u_int t_id;
80 /* The storage type can be: 1. Hash 2. RB-tree. */
81 int t_type;
82 struct npf_hashl * t_hashl;
83 u_long t_hashmask;
84 rb_tree_t t_rbtree;
85 };
86
87 static pool_cache_t tblent_cache __read_mostly;
88
89 /*
90 * npf_table_sysinit: initialise tableset structures.
91 */
92 void
93 npf_tableset_sysinit(void)
94 {
95
96 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
97 0, 0, "npftenpl", NULL, IPL_NONE, NULL, NULL, NULL);
98 }
99
100 void
101 npf_tableset_sysfini(void)
102 {
103
104 pool_cache_destroy(tblent_cache);
105 }
106
107 npf_tableset_t *
108 npf_tableset_create(void)
109 {
110 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
111
112 return kmem_zalloc(sz, KM_SLEEP);
113 }
114
115 void
116 npf_tableset_destroy(npf_tableset_t *tblset)
117 {
118 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
119 npf_table_t *t;
120 u_int tid;
121
122 /*
123 * Destroy all tables (no references should be held, as ruleset
124 * should be destroyed before).
125 */
126 for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
127 t = tblset[tid];
128 if (t != NULL) {
129 npf_table_destroy(t);
130 }
131 }
132 kmem_free(tblset, sz);
133 }
134
135 /*
136 * npf_tableset_insert: insert the table into the specified tableset.
137 *
138 * => Returns 0 on success, fails and returns errno if ID is already used.
139 */
140 int
141 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
142 {
143 const u_int tid = t->t_id;
144 int error;
145
146 KASSERT((u_int)tid < NPF_TABLE_SLOTS);
147
148 if (tblset[tid] == NULL) {
149 tblset[tid] = t;
150 error = 0;
151 } else {
152 error = EEXIST;
153 }
154 return error;
155 }
156
157 /*
158 * Red-black tree storage.
159 */
160
161 static signed int
162 table_rbtree_cmp_nodes(void *ctx, const void *n1, const void *n2)
163 {
164 const npf_tblent_t * const te1 = n1;
165 const npf_tblent_t * const te2 = n2;
166 const in_addr_t x = te1->te_addr & te1->te_mask;
167 const in_addr_t y = te2->te_addr & te2->te_mask;
168
169 if (x < y)
170 return -1;
171 if (x > y)
172 return 1;
173 return 0;
174 }
175
176 static signed int
177 table_rbtree_cmp_key(void *ctx, const void *n1, const void *key)
178 {
179 const npf_tblent_t * const te = n1;
180 const in_addr_t x = te->te_addr & te->te_mask;
181 const in_addr_t y = *(const in_addr_t *)key;
182
183 if (x < y)
184 return -1;
185 if (x > y)
186 return 1;
187 return 0;
188 }
189
190 static const rb_tree_ops_t table_rbtree_ops = {
191 .rbto_compare_nodes = table_rbtree_cmp_nodes,
192 .rbto_compare_key = table_rbtree_cmp_key,
193 .rbto_node_offset = offsetof(npf_tblent_t, te_entry.rbnode),
194 .rbto_context = NULL
195 };
196
197 /*
198 * Hash helper routine.
199 */
200
201 static inline struct npf_hashl *
202 table_hash_bucket(npf_table_t *t, void *buf, size_t sz)
203 {
204 const uint32_t hidx = hash32_buf(buf, sz, HASH32_BUF_INIT);
205
206 return &t->t_hashl[hidx & t->t_hashmask];
207 }
208
209 /*
210 * npf_table_create: create table with a specified ID.
211 */
212 npf_table_t *
213 npf_table_create(u_int tid, int type, size_t hsize)
214 {
215 npf_table_t *t;
216
217 KASSERT((u_int)tid < NPF_TABLE_SLOTS);
218
219 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
220 switch (type) {
221 case NPF_TABLE_RBTREE:
222 rb_tree_init(&t->t_rbtree, &table_rbtree_ops);
223 break;
224 case NPF_TABLE_HASH:
225 t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
226 if (t->t_hashl == NULL) {
227 kmem_free(t, sizeof(npf_table_t));
228 return NULL;
229 }
230 break;
231 default:
232 KASSERT(false);
233 }
234 rw_init(&t->t_lock);
235 t->t_type = type;
236 t->t_refcnt = 1;
237 t->t_id = tid;
238 return t;
239 }
240
241 /*
242 * npf_table_destroy: free all table entries and table itself.
243 */
244 void
245 npf_table_destroy(npf_table_t *t)
246 {
247 npf_tblent_t *e;
248 u_int n;
249
250 switch (t->t_type) {
251 case NPF_TABLE_HASH:
252 for (n = 0; n <= t->t_hashmask; n++) {
253 while ((e = LIST_FIRST(&t->t_hashl[n])) != NULL) {
254 LIST_REMOVE(e, te_entry.hashq);
255 pool_cache_put(tblent_cache, e);
256 }
257 }
258 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
259 break;
260 case NPF_TABLE_RBTREE:
261 while ((e = rb_tree_iterate(&t->t_rbtree, NULL,
262 RB_DIR_LEFT)) != NULL) {
263 rb_tree_remove_node(&t->t_rbtree, e);
264 pool_cache_put(tblent_cache, e);
265 }
266 break;
267 default:
268 KASSERT(false);
269 }
270 rw_destroy(&t->t_lock);
271 kmem_free(t, sizeof(npf_table_t));
272 }
273
274 /*
275 * npf_table_ref: holds the reference on table.
276 *
277 * => Table must be locked.
278 */
279 void
280 npf_table_ref(npf_table_t *t)
281 {
282
283 KASSERT(rw_lock_held(&t->t_lock));
284 atomic_inc_uint(&t->t_refcnt);
285 }
286
287 /*
288 * npf_table_unref: drop reference from the table and destroy the table if
289 * it is the last reference.
290 */
291 void
292 npf_table_unref(npf_table_t *t)
293 {
294
295 if (atomic_dec_uint_nv(&t->t_refcnt) != 0) {
296 return;
297 }
298 npf_table_destroy(t);
299 }
300
301 /*
302 * npf_table_get: find the table according to ID and "get it" by locking it.
303 */
304 npf_table_t *
305 npf_table_get(npf_tableset_t *tset, u_int tid)
306 {
307 npf_tableset_t *rtset;
308 npf_table_t *t;
309
310 if ((u_int)tid >= NPF_TABLE_SLOTS) {
311 return NULL;
312 }
313 rtset = tset ? tset : npf_core_tableset();
314 t = rtset[tid];
315 if (t != NULL) {
316 rw_enter(&t->t_lock, RW_READER);
317 }
318 return t;
319 }
320
321 /*
322 * npf_table_put: "put table back" by unlocking it.
323 */
324 void
325 npf_table_put(npf_table_t *t)
326 {
327
328 rw_exit(&t->t_lock);
329 }
330
331 /*
332 * npf_table_check: validate ID and type.
333 * */
334 int
335 npf_table_check(npf_tableset_t *tset, u_int tid, int type)
336 {
337
338 if ((u_int)tid >= NPF_TABLE_SLOTS) {
339 return EINVAL;
340 }
341 if (tset[tid] != NULL) {
342 return EEXIST;
343 }
344 if (type != NPF_TABLE_RBTREE && type != NPF_TABLE_HASH) {
345 return EINVAL;
346 }
347 return 0;
348 }
349
350 /*
351 * npf_table_add_v4cidr: add an IPv4 CIDR into the table.
352 */
353 int
354 npf_table_add_v4cidr(npf_tableset_t *tset, u_int tid,
355 in_addr_t addr, in_addr_t mask)
356 {
357 struct npf_hashl *htbl;
358 npf_tblent_t *e, *it;
359 npf_table_t *t;
360 in_addr_t val;
361 int error = 0;
362
363 /* Allocate and setup entry. */
364 e = pool_cache_get(tblent_cache, PR_WAITOK);
365 e->te_addr = addr;
366 e->te_mask = mask;
367
368 /* Locks the table. */
369 t = npf_table_get(tset, tid);
370 if (__predict_false(t == NULL)) {
371 pool_cache_put(tblent_cache, e);
372 return EINVAL;
373 }
374 switch (t->t_type) {
375 case NPF_TABLE_HASH:
376 /* Generate hash value from: address & mask. */
377 val = addr & mask;
378 htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
379 /* Lookup to check for duplicates. */
380 LIST_FOREACH(it, htbl, te_entry.hashq) {
381 if (it->te_addr == addr && it->te_mask == mask)
382 break;
383 }
384 /* If no duplicate - insert entry. */
385 if (__predict_true(it == NULL)) {
386 LIST_INSERT_HEAD(htbl, e, te_entry.hashq);
387 } else {
388 error = EEXIST;
389 }
390 break;
391 case NPF_TABLE_RBTREE:
392 /* Insert entry. Returns false, if duplicate. */
393 if (rb_tree_insert_node(&t->t_rbtree, e) != e) {
394 error = EEXIST;
395 }
396 break;
397 default:
398 KASSERT(false);
399 }
400 npf_table_put(t);
401
402 if (__predict_false(error)) {
403 pool_cache_put(tblent_cache, e);
404 }
405 return error;
406 }
407
408 /*
409 * npf_table_rem_v4cidr: remove an IPv4 CIDR from the table.
410 */
411 int
412 npf_table_rem_v4cidr(npf_tableset_t *tset, u_int tid,
413 in_addr_t addr, in_addr_t mask)
414 {
415 struct npf_hashl *htbl;
416 npf_tblent_t *e;
417 npf_table_t *t;
418 in_addr_t val;
419 int error;
420
421 e = NULL;
422
423 /* Locks the table. */
424 t = npf_table_get(tset, tid);
425 if (__predict_false(t == NULL)) {
426 return EINVAL;
427 }
428 /* Lookup & remove. */
429 switch (t->t_type) {
430 case NPF_TABLE_HASH:
431 /* Generate hash value from: (address & mask). */
432 val = addr & mask;
433 htbl = table_hash_bucket(t, &val, sizeof(in_addr_t));
434 LIST_FOREACH(e, htbl, te_entry.hashq) {
435 if (e->te_addr == addr && e->te_mask == mask)
436 break;
437 }
438 if (__predict_true(e != NULL)) {
439 LIST_REMOVE(e, te_entry.hashq);
440 } else {
441 error = ESRCH;
442 }
443 break;
444 case NPF_TABLE_RBTREE:
445 /* Key: (address & mask). */
446 val = addr & mask;
447 e = rb_tree_find_node(&t->t_rbtree, &val);
448 if (__predict_true(e != NULL)) {
449 rb_tree_remove_node(&t->t_rbtree, e);
450 } else {
451 error = ESRCH;
452 }
453 break;
454 default:
455 KASSERT(false);
456 }
457 npf_table_put(t);
458
459 /* Free table the entry. */
460 if (__predict_true(e != NULL)) {
461 pool_cache_put(tblent_cache, e);
462 }
463 return e ? 0 : -1;
464 }
465
466 /*
467 * npf_table_match_v4addr: find the table according to ID, lookup and
468 * match the contents with specified IPv4 address.
469 */
470 int
471 npf_table_match_v4addr(u_int tid, in_addr_t ip4addr)
472 {
473 struct npf_hashl *htbl;
474 npf_tblent_t *e = NULL;
475 npf_table_t *t;
476
477 /* Locks the table. */
478 t = npf_table_get(NULL, tid);
479 if (__predict_false(t == NULL)) {
480 return EINVAL;
481 }
482 switch (t->t_type) {
483 case NPF_TABLE_HASH:
484 htbl = table_hash_bucket(t, &ip4addr, sizeof(in_addr_t));
485 LIST_FOREACH(e, htbl, te_entry.hashq) {
486 if ((ip4addr & e->te_mask) == e->te_addr) {
487 break;
488 }
489 }
490 break;
491 case NPF_TABLE_RBTREE:
492 e = rb_tree_find_node(&t->t_rbtree, &ip4addr);
493 KASSERT((ip4addr & e->te_mask) == e->te_addr);
494 break;
495 default:
496 KASSERT(false);
497 }
498 npf_table_put(t);
499
500 return e ? 0 : -1;
501 }
502