npf_tableset.c revision 1.11 1 /* $NetBSD: npf_tableset.c,v 1.11 2012/06/22 13:43:17 rmind Exp $ */
2
3 /*-
4 * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * NPF tableset module.
34 *
35 * TODO:
36 * - Currently, code is modeled to handle IPv4 CIDR blocks.
37 * - Dynamic hash growing/shrinking (i.e. re-hash functionality), maybe?
38 * - Dynamic array resize.
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.11 2012/06/22 13:43:17 rmind Exp $");
43
44 #include <sys/param.h>
45 #include <sys/types.h>
46
47 #include <sys/atomic.h>
48 #include <sys/hash.h>
49 #include <sys/kmem.h>
50 #include <sys/pool.h>
51 #include <sys/queue.h>
52 #include <sys/rwlock.h>
53 #include <sys/systm.h>
54 #include <sys/types.h>
55
56 #include "npf_impl.h"
57
58 /* Table entry structure. */
59 struct npf_tblent {
60 /* Hash/tree entry. */
61 union {
62 LIST_ENTRY(npf_tblent) hashq;
63 rb_node_t rbnode;
64 } te_entry;
65 /* IPv4 CIDR block. */
66 npf_addr_t te_addr;
67 npf_netmask_t te_mask;
68 };
69
70 LIST_HEAD(npf_hashl, npf_tblent);
71
72 /* Table structure. */
73 struct npf_table {
74 char t_name[16];
75 /* Lock and reference count. */
76 krwlock_t t_lock;
77 u_int t_refcnt;
78 /* Table ID. */
79 u_int t_id;
80 /* The storage type can be: 1. Hash 2. RB-tree. */
81 int t_type;
82 struct npf_hashl * t_hashl;
83 u_long t_hashmask;
84 rb_tree_t t_rbtree;
85 };
86
87 static pool_cache_t tblent_cache __read_mostly;
88
89 /*
90 * npf_table_sysinit: initialise tableset structures.
91 */
92 void
93 npf_tableset_sysinit(void)
94 {
95
96 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
97 0, 0, "npftenpl", NULL, IPL_NONE, NULL, NULL, NULL);
98 }
99
100 void
101 npf_tableset_sysfini(void)
102 {
103
104 pool_cache_destroy(tblent_cache);
105 }
106
107 npf_tableset_t *
108 npf_tableset_create(void)
109 {
110 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
111
112 return kmem_zalloc(sz, KM_SLEEP);
113 }
114
115 void
116 npf_tableset_destroy(npf_tableset_t *tblset)
117 {
118 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
119 npf_table_t *t;
120 u_int tid;
121
122 /*
123 * Destroy all tables (no references should be held, as ruleset
124 * should be destroyed before).
125 */
126 for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
127 t = tblset[tid];
128 if (t != NULL) {
129 npf_table_destroy(t);
130 }
131 }
132 kmem_free(tblset, sz);
133 }
134
135 /*
136 * npf_tableset_insert: insert the table into the specified tableset.
137 *
138 * => Returns 0 on success, fails and returns errno if ID is already used.
139 */
140 int
141 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
142 {
143 const u_int tid = t->t_id;
144 int error;
145
146 KASSERT((u_int)tid < NPF_TABLE_SLOTS);
147
148 if (tblset[tid] == NULL) {
149 tblset[tid] = t;
150 error = 0;
151 } else {
152 error = EEXIST;
153 }
154 return error;
155 }
156
157 /*
158 * Red-black tree storage.
159 */
160
161 static signed int
162 table_rbtree_cmp_nodes(void *ctx, const void *n1, const void *n2)
163 {
164 const npf_tblent_t * const te1 = n1;
165 const npf_tblent_t * const te2 = n2;
166
167 return npf_addr_cmp(&te1->te_addr, te1->te_mask,
168 &te2->te_addr, te2->te_mask);
169 }
170
171 static signed int
172 table_rbtree_cmp_key(void *ctx, const void *n1, const void *key)
173 {
174 const npf_tblent_t * const te = n1;
175 const npf_addr_t *t2 = key;
176
177 return npf_addr_cmp(&te->te_addr, te->te_mask, t2, NPF_NO_NETMASK);
178 }
179
180 static const rb_tree_ops_t table_rbtree_ops = {
181 .rbto_compare_nodes = table_rbtree_cmp_nodes,
182 .rbto_compare_key = table_rbtree_cmp_key,
183 .rbto_node_offset = offsetof(npf_tblent_t, te_entry.rbnode),
184 .rbto_context = NULL
185 };
186
187 /*
188 * Hash helper routine.
189 */
190
191 static inline struct npf_hashl *
192 table_hash_bucket(npf_table_t *t, const void *buf, size_t sz)
193 {
194 const uint32_t hidx = hash32_buf(buf, sz, HASH32_BUF_INIT);
195
196 return &t->t_hashl[hidx & t->t_hashmask];
197 }
198
199 /*
200 * npf_table_create: create table with a specified ID.
201 */
202 npf_table_t *
203 npf_table_create(u_int tid, int type, size_t hsize)
204 {
205 npf_table_t *t;
206
207 KASSERT((u_int)tid < NPF_TABLE_SLOTS);
208
209 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
210 switch (type) {
211 case NPF_TABLE_TREE:
212 rb_tree_init(&t->t_rbtree, &table_rbtree_ops);
213 break;
214 case NPF_TABLE_HASH:
215 t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
216 if (t->t_hashl == NULL) {
217 kmem_free(t, sizeof(npf_table_t));
218 return NULL;
219 }
220 break;
221 default:
222 KASSERT(false);
223 }
224 rw_init(&t->t_lock);
225 t->t_type = type;
226 t->t_refcnt = 1;
227 t->t_id = tid;
228 return t;
229 }
230
231 /*
232 * npf_table_destroy: free all table entries and table itself.
233 */
234 void
235 npf_table_destroy(npf_table_t *t)
236 {
237 npf_tblent_t *e;
238 u_int n;
239
240 switch (t->t_type) {
241 case NPF_TABLE_HASH:
242 for (n = 0; n <= t->t_hashmask; n++) {
243 while ((e = LIST_FIRST(&t->t_hashl[n])) != NULL) {
244 LIST_REMOVE(e, te_entry.hashq);
245 pool_cache_put(tblent_cache, e);
246 }
247 }
248 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
249 break;
250 case NPF_TABLE_TREE:
251 while ((e = rb_tree_iterate(&t->t_rbtree, NULL,
252 RB_DIR_LEFT)) != NULL) {
253 rb_tree_remove_node(&t->t_rbtree, e);
254 pool_cache_put(tblent_cache, e);
255 }
256 break;
257 default:
258 KASSERT(false);
259 }
260 rw_destroy(&t->t_lock);
261 kmem_free(t, sizeof(npf_table_t));
262 }
263
264 /*
265 * npf_table_ref: holds the reference on table.
266 *
267 * => Table must be locked.
268 */
269 void
270 npf_table_ref(npf_table_t *t)
271 {
272
273 KASSERT(rw_lock_held(&t->t_lock));
274 atomic_inc_uint(&t->t_refcnt);
275 }
276
277 /*
278 * npf_table_unref: drop reference from the table and destroy the table if
279 * it is the last reference.
280 */
281 void
282 npf_table_unref(npf_table_t *t)
283 {
284
285 if (atomic_dec_uint_nv(&t->t_refcnt) != 0) {
286 return;
287 }
288 npf_table_destroy(t);
289 }
290
291 /*
292 * npf_table_get: find the table according to ID and "get it" by locking it.
293 */
294 npf_table_t *
295 npf_table_get(npf_tableset_t *tset, u_int tid)
296 {
297 npf_table_t *t;
298
299 KASSERT(tset != NULL);
300
301 if ((u_int)tid >= NPF_TABLE_SLOTS) {
302 return NULL;
303 }
304 t = tset[tid];
305 if (t != NULL) {
306 rw_enter(&t->t_lock, RW_READER);
307 }
308 return t;
309 }
310
311 /*
312 * npf_table_put: "put table back" by unlocking it.
313 */
314 void
315 npf_table_put(npf_table_t *t)
316 {
317
318 rw_exit(&t->t_lock);
319 }
320
321 /*
322 * npf_table_check: validate ID and type.
323 * */
324 int
325 npf_table_check(npf_tableset_t *tset, u_int tid, int type)
326 {
327
328 if ((u_int)tid >= NPF_TABLE_SLOTS) {
329 return EINVAL;
330 }
331 if (tset[tid] != NULL) {
332 return EEXIST;
333 }
334 if (type != NPF_TABLE_TREE && type != NPF_TABLE_HASH) {
335 return EINVAL;
336 }
337 return 0;
338 }
339
340 /*
341 * npf_table_add_cidr: add an IPv4 or IPv6 CIDR into the table.
342 */
343 int
344 npf_table_add_cidr(npf_tableset_t *tset, u_int tid,
345 const npf_addr_t *addr, const npf_netmask_t mask)
346 {
347 struct npf_hashl *htbl;
348 npf_tblent_t *e, *it;
349 npf_table_t *t;
350 npf_addr_t val;
351 int error = 0;
352
353 if (mask > NPF_MAX_NETMASK) {
354 return EINVAL;
355 }
356 e = pool_cache_get(tblent_cache, PR_WAITOK);
357 memcpy(&e->te_addr, addr, sizeof(npf_addr_t));
358 e->te_mask = mask;
359
360 /* Get the table (acquire the lock). */
361 t = npf_table_get(tset, tid);
362 if (t == NULL) {
363 pool_cache_put(tblent_cache, e);
364 return EINVAL;
365 }
366 switch (t->t_type) {
367 case NPF_TABLE_HASH:
368 /* Generate hash value from: address & mask. */
369 npf_addr_mask(addr, mask, &val);
370 htbl = table_hash_bucket(t, &val, sizeof(npf_addr_t));
371 /* Lookup to check for duplicates. */
372 LIST_FOREACH(it, htbl, te_entry.hashq) {
373 if (it->te_mask != mask) {
374 continue;
375 }
376 if (!memcmp(&it->te_addr, addr, sizeof(npf_addr_t))) {
377 break;
378 }
379 }
380 /* If no duplicate - insert entry. */
381 if (__predict_true(it == NULL)) {
382 LIST_INSERT_HEAD(htbl, e, te_entry.hashq);
383 } else {
384 error = EEXIST;
385 }
386 break;
387 case NPF_TABLE_TREE:
388 /* Insert entry. Returns false, if duplicate. */
389 if (rb_tree_insert_node(&t->t_rbtree, e) != e) {
390 error = EEXIST;
391 }
392 break;
393 default:
394 KASSERT(false);
395 }
396 npf_table_put(t);
397
398 if (error) {
399 pool_cache_put(tblent_cache, e);
400 }
401 return error;
402 }
403
404 /*
405 * npf_table_rem_v4cidr: remove an IPv4 CIDR from the table.
406 */
407 int
408 npf_table_rem_cidr(npf_tableset_t *tset, u_int tid,
409 const npf_addr_t *addr, const npf_netmask_t mask)
410 {
411 struct npf_hashl *htbl;
412 npf_tblent_t *e;
413 npf_table_t *t;
414 npf_addr_t val;
415 int error;
416
417 if (mask > NPF_MAX_NETMASK) {
418 return EINVAL;
419 }
420
421 /* Get the table (acquire the lock). */
422 t = npf_table_get(tset, tid);
423 if (__predict_false(t == NULL)) {
424 return EINVAL;
425 }
426 e = NULL;
427
428 switch (t->t_type) {
429 case NPF_TABLE_HASH:
430 /* Generate hash value from: (address & mask). */
431 npf_addr_mask(addr, mask, &val);
432 htbl = table_hash_bucket(t, &val, sizeof(npf_addr_t));
433 LIST_FOREACH(e, htbl, te_entry.hashq) {
434 if (e->te_mask != mask) {
435 continue;
436 }
437 if (!memcmp(&e->te_addr, addr, sizeof(npf_addr_t))) {
438 break;
439 }
440 }
441 if (__predict_true(e != NULL)) {
442 LIST_REMOVE(e, te_entry.hashq);
443 } else {
444 error = ESRCH;
445 }
446 break;
447 case NPF_TABLE_TREE:
448 /* Key: (address & mask). */
449 npf_addr_mask(addr, mask, &val);
450 e = rb_tree_find_node(&t->t_rbtree, &val);
451 if (__predict_true(e != NULL)) {
452 rb_tree_remove_node(&t->t_rbtree, e);
453 } else {
454 error = ESRCH;
455 }
456 break;
457 default:
458 KASSERT(false);
459 }
460 npf_table_put(t);
461
462 if (e == NULL) {
463 return ENOENT;
464 }
465 pool_cache_put(tblent_cache, e);
466 return 0;
467 }
468
469 /*
470 * npf_table_match_addr: find the table according to ID, lookup and
471 * match the contents with specified IPv4 address.
472 */
473 int
474 npf_table_match_addr(npf_tableset_t *tset, u_int tid, const npf_addr_t *addr)
475 {
476 struct npf_hashl *htbl;
477 npf_tblent_t *e = NULL;
478 npf_table_t *t;
479
480 /* Get the table (acquire the lock). */
481 t = npf_table_get(tset, tid);
482 if (__predict_false(t == NULL)) {
483 return EINVAL;
484 }
485 switch (t->t_type) {
486 case NPF_TABLE_HASH:
487 htbl = table_hash_bucket(t, addr, sizeof(npf_addr_t));
488 LIST_FOREACH(e, htbl, te_entry.hashq) {
489 if (npf_addr_cmp(addr, e->te_mask, &e->te_addr,
490 NPF_NO_NETMASK) == 0)
491 break;
492 }
493 break;
494 case NPF_TABLE_TREE:
495 e = rb_tree_find_node(&t->t_rbtree, addr);
496 break;
497 default:
498 KASSERT(false);
499 }
500 npf_table_put(t);
501
502 if (e == NULL) {
503 return ENOENT;
504 }
505 KASSERT(npf_addr_cmp(addr, e->te_mask, &e->te_addr,
506 NPF_NO_NETMASK) == 0);
507 return 0;
508 }
509