npf_tableset.c revision 1.17 1 /* $NetBSD: npf_tableset.c,v 1.17 2013/02/09 03:35:32 rmind Exp $ */
2
3 /*-
4 * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * NPF tableset module.
34 *
35 * Notes
36 *
37 * The tableset is an array of tables. After the creation, the array
38 * is immutable. The caller is responsible to synchronise the access
39 * to the tableset. The table can either be a hash or a tree. Its
40 * entries are protected by a read-write lock.
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.17 2013/02/09 03:35:32 rmind Exp $");
45
46 #include <sys/param.h>
47 #include <sys/types.h>
48
49 #include <sys/atomic.h>
50 #include <sys/hash.h>
51 #include <sys/kmem.h>
52 #include <sys/pool.h>
53 #include <sys/queue.h>
54 #include <sys/rwlock.h>
55 #include <sys/systm.h>
56 #include <sys/types.h>
57
58 #include "npf_impl.h"
59
60 /*
61 * Table structures.
62 */
63
64 typedef struct npf_tblent {
65 union {
66 LIST_ENTRY(npf_tblent) hashq;
67 pt_node_t node;
68 } te_entry;
69 int te_alen;
70 npf_addr_t te_addr;
71 } npf_tblent_t;
72
73 LIST_HEAD(npf_hashl, npf_tblent);
74
75 struct npf_table {
76 char t_name[16];
77 /* Lock and reference count. */
78 krwlock_t t_lock;
79 u_int t_refcnt;
80 /* Total number of items. */
81 u_int t_nitems;
82 /* Table ID. */
83 u_int t_id;
84 /* The storage type can be: a) hash b) tree. */
85 int t_type;
86 struct npf_hashl * t_hashl;
87 u_long t_hashmask;
88 /* Separate trees for IPv4 and IPv6. */
89 pt_tree_t t_tree[2];
90 };
91
92 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4)
93
94 static pool_cache_t tblent_cache __read_mostly;
95
96 /*
97 * npf_table_sysinit: initialise tableset structures.
98 */
99 void
100 npf_tableset_sysinit(void)
101 {
102
103 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
104 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
105 }
106
107 void
108 npf_tableset_sysfini(void)
109 {
110
111 pool_cache_destroy(tblent_cache);
112 }
113
114 npf_tableset_t *
115 npf_tableset_create(void)
116 {
117 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
118
119 return kmem_zalloc(sz, KM_SLEEP);
120 }
121
122 void
123 npf_tableset_destroy(npf_tableset_t *tblset)
124 {
125 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
126 npf_table_t *t;
127 u_int tid;
128
129 /*
130 * Destroy all tables (no references should be held, as ruleset
131 * should be destroyed before).
132 */
133 for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
134 t = tblset[tid];
135 if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) {
136 npf_table_destroy(t);
137 }
138 }
139 kmem_free(tblset, sz);
140 }
141
142 /*
143 * npf_tableset_insert: insert the table into the specified tableset.
144 *
145 * => Returns 0 on success. Fails and returns error if ID is already used.
146 */
147 int
148 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
149 {
150 const u_int tid = t->t_id;
151 int error;
152
153 KASSERT((u_int)tid < NPF_TABLE_SLOTS);
154
155 if (tblset[tid] == NULL) {
156 atomic_inc_uint(&t->t_refcnt);
157 tblset[tid] = t;
158 error = 0;
159 } else {
160 error = EEXIST;
161 }
162 return error;
163 }
164
165 /*
166 * npf_tableset_reload: iterate all tables and if the new table is of the
167 * same type and has no items, then we preserve the old one and its entries.
168 *
169 * => The caller is responsible for providing synchronisation.
170 */
171 void
172 npf_tableset_reload(npf_tableset_t *ntset, npf_tableset_t *otset)
173 {
174 for (int i = 0; i < NPF_TABLE_SLOTS; i++) {
175 npf_table_t *t = ntset[i], *ot = otset[i];
176
177 if (t == NULL || ot == NULL) {
178 continue;
179 }
180 if (t->t_nitems || t->t_type != ot->t_type) {
181 continue;
182 }
183
184 /*
185 * Acquire a reference since the table has to be kept
186 * in the old tableset.
187 */
188 atomic_inc_uint(&ot->t_refcnt);
189 ntset[i] = ot;
190
191 /* Only reference, never been visible. */
192 t->t_refcnt--;
193 npf_table_destroy(t);
194 }
195 }
196
197 /*
198 * Few helper routines.
199 */
200
201 static npf_tblent_t *
202 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr,
203 const int alen, struct npf_hashl **rhtbl)
204 {
205 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT);
206 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask];
207 npf_tblent_t *ent;
208
209 /*
210 * Lookup the hash table and check for duplicates.
211 * Note: mask is ignored for the hash storage.
212 */
213 LIST_FOREACH(ent, htbl, te_entry.hashq) {
214 if (ent->te_alen != alen) {
215 continue;
216 }
217 if (memcmp(&ent->te_addr, addr, alen) == 0) {
218 break;
219 }
220 }
221 *rhtbl = htbl;
222 return ent;
223 }
224
225 static void
226 table_tree_destroy(pt_tree_t *tree)
227 {
228 npf_tblent_t *ent;
229
230 while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) {
231 ptree_remove_node(tree, ent);
232 pool_cache_put(tblent_cache, ent);
233 }
234 }
235
236 /*
237 * npf_table_create: create table with a specified ID.
238 */
239 npf_table_t *
240 npf_table_create(u_int tid, int type, size_t hsize)
241 {
242 npf_table_t *t;
243
244 KASSERT((u_int)tid < NPF_TABLE_SLOTS);
245
246 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
247 switch (type) {
248 case NPF_TABLE_TREE:
249 ptree_init(&t->t_tree[0], &npf_table_ptree_ops,
250 (void *)(sizeof(struct in_addr) / sizeof(uint32_t)),
251 offsetof(npf_tblent_t, te_entry.node),
252 offsetof(npf_tblent_t, te_addr));
253 ptree_init(&t->t_tree[1], &npf_table_ptree_ops,
254 (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)),
255 offsetof(npf_tblent_t, te_entry.node),
256 offsetof(npf_tblent_t, te_addr));
257 break;
258 case NPF_TABLE_HASH:
259 t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
260 if (t->t_hashl == NULL) {
261 kmem_free(t, sizeof(npf_table_t));
262 return NULL;
263 }
264 break;
265 default:
266 KASSERT(false);
267 }
268 rw_init(&t->t_lock);
269 t->t_type = type;
270 t->t_id = tid;
271
272 return t;
273 }
274
275 /*
276 * npf_table_destroy: free all table entries and table itself.
277 */
278 void
279 npf_table_destroy(npf_table_t *t)
280 {
281 KASSERT(t->t_refcnt == 0);
282
283 switch (t->t_type) {
284 case NPF_TABLE_HASH:
285 for (unsigned n = 0; n <= t->t_hashmask; n++) {
286 npf_tblent_t *ent;
287
288 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
289 LIST_REMOVE(ent, te_entry.hashq);
290 pool_cache_put(tblent_cache, ent);
291 }
292 }
293 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
294 break;
295 case NPF_TABLE_TREE:
296 table_tree_destroy(&t->t_tree[0]);
297 table_tree_destroy(&t->t_tree[1]);
298 break;
299 default:
300 KASSERT(false);
301 }
302 rw_destroy(&t->t_lock);
303 kmem_free(t, sizeof(npf_table_t));
304 }
305
306 /*
307 * npf_table_check: validate ID and type.
308 */
309 int
310 npf_table_check(const npf_tableset_t *tset, u_int tid, int type)
311 {
312
313 if ((u_int)tid >= NPF_TABLE_SLOTS) {
314 return EINVAL;
315 }
316 if (tset[tid] != NULL) {
317 return EEXIST;
318 }
319 if (type != NPF_TABLE_TREE && type != NPF_TABLE_HASH) {
320 return EINVAL;
321 }
322 return 0;
323 }
324
325 static int
326 table_cidr_check(const u_int aidx, const npf_addr_t *addr,
327 const npf_netmask_t mask)
328 {
329
330 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) {
331 return EINVAL;
332 }
333 if (aidx > 1) {
334 return EINVAL;
335 }
336
337 /*
338 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128.
339 * If it is a host - shall use NPF_NO_NETMASK.
340 */
341 if (mask >= (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) {
342 return EINVAL;
343 }
344 return 0;
345 }
346
347 /*
348 * npf_table_insert: add an IP CIDR entry into the table.
349 */
350 int
351 npf_table_insert(npf_tableset_t *tset, u_int tid, const int alen,
352 const npf_addr_t *addr, const npf_netmask_t mask)
353 {
354 const u_int aidx = NPF_ADDRLEN2TREE(alen);
355 npf_tblent_t *ent;
356 npf_table_t *t;
357 int error;
358
359 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
360 return EINVAL;
361 }
362
363 error = table_cidr_check(aidx, addr, mask);
364 if (error) {
365 return error;
366 }
367 ent = pool_cache_get(tblent_cache, PR_WAITOK);
368 memcpy(&ent->te_addr, addr, alen);
369 ent->te_alen = alen;
370
371 /*
372 * Insert the entry. Return an error on duplicate.
373 */
374 rw_enter(&t->t_lock, RW_WRITER);
375 switch (t->t_type) {
376 case NPF_TABLE_HASH: {
377 struct npf_hashl *htbl;
378
379 /*
380 * Hash tables by the concept support only IPs.
381 */
382 if (mask != NPF_NO_NETMASK) {
383 error = EINVAL;
384 break;
385 }
386 if (!table_hash_lookup(t, addr, alen, &htbl)) {
387 LIST_INSERT_HEAD(htbl, ent, te_entry.hashq);
388 t->t_nitems++;
389 } else {
390 error = EEXIST;
391 }
392 break;
393 }
394 case NPF_TABLE_TREE: {
395 pt_tree_t *tree = &t->t_tree[aidx];
396 bool ok;
397
398 /*
399 * If no mask specified, use maximum mask.
400 */
401 ok = (mask != NPF_NO_NETMASK) ?
402 ptree_insert_mask_node(tree, ent, mask) :
403 ptree_insert_node(tree, ent);
404 if (ok) {
405 t->t_nitems++;
406 error = 0;
407 } else {
408 error = EEXIST;
409 }
410 break;
411 }
412 default:
413 KASSERT(false);
414 }
415 rw_exit(&t->t_lock);
416
417 if (error) {
418 pool_cache_put(tblent_cache, ent);
419 }
420 return error;
421 }
422
423 /*
424 * npf_table_remove: remove the IP CIDR entry from the table.
425 */
426 int
427 npf_table_remove(npf_tableset_t *tset, u_int tid, const int alen,
428 const npf_addr_t *addr, const npf_netmask_t mask)
429 {
430 const u_int aidx = NPF_ADDRLEN2TREE(alen);
431 npf_tblent_t *ent;
432 npf_table_t *t;
433 int error;
434
435 error = table_cidr_check(aidx, addr, mask);
436 if (error) {
437 return error;
438 }
439
440 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
441 return EINVAL;
442 }
443
444 rw_enter(&t->t_lock, RW_WRITER);
445 switch (t->t_type) {
446 case NPF_TABLE_HASH: {
447 struct npf_hashl *htbl;
448
449 ent = table_hash_lookup(t, addr, alen, &htbl);
450 if (__predict_true(ent != NULL)) {
451 LIST_REMOVE(ent, te_entry.hashq);
452 t->t_nitems--;
453 }
454 break;
455 }
456 case NPF_TABLE_TREE: {
457 pt_tree_t *tree = &t->t_tree[aidx];
458
459 ent = ptree_find_node(tree, addr);
460 if (__predict_true(ent != NULL)) {
461 ptree_remove_node(tree, ent);
462 t->t_nitems--;
463 }
464 break;
465 }
466 default:
467 KASSERT(false);
468 ent = NULL;
469 }
470 rw_exit(&t->t_lock);
471
472 if (ent == NULL) {
473 return ENOENT;
474 }
475 pool_cache_put(tblent_cache, ent);
476 return 0;
477 }
478
479 /*
480 * npf_table_lookup: find the table according to ID, lookup and match
481 * the contents with the specified IP address.
482 */
483 int
484 npf_table_lookup(npf_tableset_t *tset, u_int tid,
485 const int alen, const npf_addr_t *addr)
486 {
487 const u_int aidx = NPF_ADDRLEN2TREE(alen);
488 npf_tblent_t *ent;
489 npf_table_t *t;
490
491 if (__predict_false(aidx > 1)) {
492 return EINVAL;
493 }
494
495 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
496 return EINVAL;
497 }
498
499 rw_enter(&t->t_lock, RW_READER);
500 switch (t->t_type) {
501 case NPF_TABLE_HASH: {
502 struct npf_hashl *htbl;
503 ent = table_hash_lookup(t, addr, alen, &htbl);
504 break;
505 }
506 case NPF_TABLE_TREE: {
507 ent = ptree_find_node(&t->t_tree[aidx], addr);
508 break;
509 }
510 default:
511 KASSERT(false);
512 ent = NULL;
513 }
514 rw_exit(&t->t_lock);
515
516 return ent ? 0 : ENOENT;
517 }
518
519 static int
520 table_ent_copyout(npf_tblent_t *ent, npf_netmask_t mask,
521 void *ubuf, size_t len, size_t *off)
522 {
523 void *ubufp = (uint8_t *)ubuf + *off;
524 npf_ioctl_ent_t uent;
525
526 if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
527 return ENOMEM;
528 }
529 uent.alen = ent->te_alen;
530 memcpy(&uent.addr, &ent->te_addr, sizeof(npf_addr_t));
531 uent.mask = mask;
532
533 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
534 }
535
536 static int
537 table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf,
538 size_t len, size_t *off)
539 {
540 npf_tblent_t *ent = NULL;
541 int error = 0;
542
543 while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) {
544 pt_bitlen_t blen;
545
546 if (!ptree_mask_node_p(tree, ent, &blen)) {
547 blen = maxmask;
548 }
549 error = table_ent_copyout(ent, blen, ubuf, len, off);
550 if (error)
551 break;
552 }
553 return error;
554 }
555
556 /*
557 * npf_table_list: copy a list of all table entries into a userspace buffer.
558 */
559 int
560 npf_table_list(npf_tableset_t *tset, u_int tid, void *ubuf, size_t len)
561 {
562 npf_table_t *t;
563 size_t off = 0;
564 int error = 0;
565
566 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
567 return EINVAL;
568 }
569
570 rw_enter(&t->t_lock, RW_READER);
571 switch (t->t_type) {
572 case NPF_TABLE_HASH:
573 for (unsigned n = 0; n <= t->t_hashmask; n++) {
574 npf_tblent_t *ent;
575
576 LIST_FOREACH(ent, &t->t_hashl[n], te_entry.hashq)
577 if ((error = table_ent_copyout(ent, 0, ubuf,
578 len, &off)) != 0)
579 break;
580 }
581 break;
582 case NPF_TABLE_TREE:
583 error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off);
584 if (error)
585 break;
586 error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off);
587 break;
588 default:
589 KASSERT(false);
590 }
591 rw_exit(&t->t_lock);
592
593 return error;
594 }
595