npf_tableset.c revision 1.16 1 /* $NetBSD: npf_tableset.c,v 1.16 2012/12/04 19:28:16 rmind Exp $ */
2
3 /*-
4 * Copyright (c) 2009-2012 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * NPF tableset module.
34 *
35 * Notes
36 *
37 * The tableset is an array of tables. After the creation, the array
38 * is immutable. The caller is responsible to synchronise the access
39 * to the tableset. The table can either be a hash or a tree. Its
40 * entries are protected by a read-write lock.
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.16 2012/12/04 19:28:16 rmind Exp $");
45
46 #include <sys/param.h>
47 #include <sys/types.h>
48
49 #include <sys/atomic.h>
50 #include <sys/hash.h>
51 #include <sys/kmem.h>
52 #include <sys/pool.h>
53 #include <sys/queue.h>
54 #include <sys/rwlock.h>
55 #include <sys/systm.h>
56 #include <sys/types.h>
57
58 #include "npf_impl.h"
59
60 /*
61 * Table structures.
62 */
63
64 typedef struct npf_tblent {
65 union {
66 LIST_ENTRY(npf_tblent) hashq;
67 pt_node_t node;
68 } te_entry;
69 int te_alen;
70 npf_addr_t te_addr;
71 } npf_tblent_t;
72
73 LIST_HEAD(npf_hashl, npf_tblent);
74
75 struct npf_table {
76 char t_name[16];
77 /* Lock and reference count. */
78 krwlock_t t_lock;
79 u_int t_refcnt;
80 /* Total number of items. */
81 u_int t_nitems;
82 /* Table ID. */
83 u_int t_id;
84 /* The storage type can be: a) hash b) tree. */
85 int t_type;
86 struct npf_hashl * t_hashl;
87 u_long t_hashmask;
88 /* Separate trees for IPv4 and IPv6. */
89 pt_tree_t t_tree[2];
90 };
91
92 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4)
93
94 static pool_cache_t tblent_cache __read_mostly;
95
96 /*
97 * npf_table_sysinit: initialise tableset structures.
98 */
99 void
100 npf_tableset_sysinit(void)
101 {
102
103 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
104 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
105 }
106
107 void
108 npf_tableset_sysfini(void)
109 {
110
111 pool_cache_destroy(tblent_cache);
112 }
113
114 npf_tableset_t *
115 npf_tableset_create(void)
116 {
117 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
118
119 return kmem_zalloc(sz, KM_SLEEP);
120 }
121
122 void
123 npf_tableset_destroy(npf_tableset_t *tblset)
124 {
125 const size_t sz = NPF_TABLE_SLOTS * sizeof(npf_table_t *);
126 npf_table_t *t;
127 u_int tid;
128
129 /*
130 * Destroy all tables (no references should be held, as ruleset
131 * should be destroyed before).
132 */
133 for (tid = 0; tid < NPF_TABLE_SLOTS; tid++) {
134 t = tblset[tid];
135 if (t && --t->t_refcnt == 0) {
136 npf_table_destroy(t);
137 }
138 }
139 kmem_free(tblset, sz);
140 }
141
142 /*
143 * npf_tableset_insert: insert the table into the specified tableset.
144 *
145 * => Returns 0 on success. Fails and returns error if ID is already used.
146 */
147 int
148 npf_tableset_insert(npf_tableset_t *tblset, npf_table_t *t)
149 {
150 const u_int tid = t->t_id;
151 int error;
152
153 KASSERT((u_int)tid < NPF_TABLE_SLOTS);
154
155 if (tblset[tid] == NULL) {
156 tblset[tid] = t;
157 t->t_refcnt++;
158 error = 0;
159 } else {
160 error = EEXIST;
161 }
162 return error;
163 }
164
165 /*
166 * npf_tableset_reload: iterate all tables and if the new table is of the
167 * same type and has no items, then we preserve the old one and its entries.
168 *
169 * => The caller is responsible for providing synchronisation.
170 */
171 void
172 npf_tableset_reload(npf_tableset_t *ntset, npf_tableset_t *otset)
173 {
174 for (int i = 0; i < NPF_TABLE_SLOTS; i++) {
175 npf_table_t *t = ntset[i], *ot = otset[i];
176
177 if (t == NULL || ot == NULL) {
178 continue;
179 }
180 if (t->t_nitems || t->t_type != ot->t_type) {
181 continue;
182 }
183 ntset[i] = ot;
184 ot->t_refcnt++;
185 npf_table_destroy(t);
186 }
187 }
188
189 /*
190 * Few helper routines.
191 */
192
193 static npf_tblent_t *
194 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr,
195 const int alen, struct npf_hashl **rhtbl)
196 {
197 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT);
198 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask];
199 npf_tblent_t *ent;
200
201 /*
202 * Lookup the hash table and check for duplicates.
203 * Note: mask is ignored for the hash storage.
204 */
205 LIST_FOREACH(ent, htbl, te_entry.hashq) {
206 if (ent->te_alen != alen) {
207 continue;
208 }
209 if (memcmp(&ent->te_addr, addr, alen) == 0) {
210 break;
211 }
212 }
213 *rhtbl = htbl;
214 return ent;
215 }
216
217 static void
218 table_tree_destroy(pt_tree_t *tree)
219 {
220 npf_tblent_t *ent;
221
222 while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) {
223 ptree_remove_node(tree, ent);
224 pool_cache_put(tblent_cache, ent);
225 }
226 }
227
228 /*
229 * npf_table_create: create table with a specified ID.
230 */
231 npf_table_t *
232 npf_table_create(u_int tid, int type, size_t hsize)
233 {
234 npf_table_t *t;
235
236 KASSERT((u_int)tid < NPF_TABLE_SLOTS);
237
238 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
239 switch (type) {
240 case NPF_TABLE_TREE:
241 ptree_init(&t->t_tree[0], &npf_table_ptree_ops,
242 (void *)(sizeof(struct in_addr) / sizeof(uint32_t)),
243 offsetof(npf_tblent_t, te_entry.node),
244 offsetof(npf_tblent_t, te_addr));
245 ptree_init(&t->t_tree[1], &npf_table_ptree_ops,
246 (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)),
247 offsetof(npf_tblent_t, te_entry.node),
248 offsetof(npf_tblent_t, te_addr));
249 break;
250 case NPF_TABLE_HASH:
251 t->t_hashl = hashinit(hsize, HASH_LIST, true, &t->t_hashmask);
252 if (t->t_hashl == NULL) {
253 kmem_free(t, sizeof(npf_table_t));
254 return NULL;
255 }
256 break;
257 default:
258 KASSERT(false);
259 }
260 rw_init(&t->t_lock);
261 t->t_type = type;
262 t->t_id = tid;
263
264 return t;
265 }
266
267 /*
268 * npf_table_destroy: free all table entries and table itself.
269 */
270 void
271 npf_table_destroy(npf_table_t *t)
272 {
273
274 switch (t->t_type) {
275 case NPF_TABLE_HASH:
276 for (unsigned n = 0; n <= t->t_hashmask; n++) {
277 npf_tblent_t *ent;
278
279 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
280 LIST_REMOVE(ent, te_entry.hashq);
281 pool_cache_put(tblent_cache, ent);
282 }
283 }
284 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
285 break;
286 case NPF_TABLE_TREE:
287 table_tree_destroy(&t->t_tree[0]);
288 table_tree_destroy(&t->t_tree[1]);
289 break;
290 default:
291 KASSERT(false);
292 }
293 rw_destroy(&t->t_lock);
294 kmem_free(t, sizeof(npf_table_t));
295 }
296
297 /*
298 * npf_table_check: validate ID and type.
299 */
300 int
301 npf_table_check(const npf_tableset_t *tset, u_int tid, int type)
302 {
303
304 if ((u_int)tid >= NPF_TABLE_SLOTS) {
305 return EINVAL;
306 }
307 if (tset[tid] != NULL) {
308 return EEXIST;
309 }
310 if (type != NPF_TABLE_TREE && type != NPF_TABLE_HASH) {
311 return EINVAL;
312 }
313 return 0;
314 }
315
316 static int
317 table_cidr_check(const u_int aidx, const npf_addr_t *addr,
318 const npf_netmask_t mask)
319 {
320
321 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) {
322 return EINVAL;
323 }
324 if (aidx > 1) {
325 return EINVAL;
326 }
327
328 /*
329 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128.
330 * If it is a host - shall use NPF_NO_NETMASK.
331 */
332 if (mask >= (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) {
333 return EINVAL;
334 }
335 return 0;
336 }
337
338 /*
339 * npf_table_insert: add an IP CIDR entry into the table.
340 */
341 int
342 npf_table_insert(npf_tableset_t *tset, u_int tid, const int alen,
343 const npf_addr_t *addr, const npf_netmask_t mask)
344 {
345 const u_int aidx = NPF_ADDRLEN2TREE(alen);
346 npf_tblent_t *ent;
347 npf_table_t *t;
348 int error;
349
350 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
351 return EINVAL;
352 }
353
354 error = table_cidr_check(aidx, addr, mask);
355 if (error) {
356 return error;
357 }
358 ent = pool_cache_get(tblent_cache, PR_WAITOK);
359 memcpy(&ent->te_addr, addr, alen);
360 ent->te_alen = alen;
361
362 /*
363 * Insert the entry. Return an error on duplicate.
364 */
365 rw_enter(&t->t_lock, RW_WRITER);
366 switch (t->t_type) {
367 case NPF_TABLE_HASH: {
368 struct npf_hashl *htbl;
369
370 /*
371 * Hash tables by the concept support only IPs.
372 */
373 if (mask != NPF_NO_NETMASK) {
374 error = EINVAL;
375 break;
376 }
377 if (!table_hash_lookup(t, addr, alen, &htbl)) {
378 LIST_INSERT_HEAD(htbl, ent, te_entry.hashq);
379 t->t_nitems++;
380 } else {
381 error = EEXIST;
382 }
383 break;
384 }
385 case NPF_TABLE_TREE: {
386 pt_tree_t *tree = &t->t_tree[aidx];
387 bool ok;
388
389 /*
390 * If no mask specified, use maximum mask.
391 */
392 ok = (mask != NPF_NO_NETMASK) ?
393 ptree_insert_mask_node(tree, ent, mask) :
394 ptree_insert_node(tree, ent);
395 if (ok) {
396 t->t_nitems++;
397 error = 0;
398 } else {
399 error = EEXIST;
400 }
401 break;
402 }
403 default:
404 KASSERT(false);
405 }
406 rw_exit(&t->t_lock);
407
408 if (error) {
409 pool_cache_put(tblent_cache, ent);
410 }
411 return error;
412 }
413
414 /*
415 * npf_table_remove: remove the IP CIDR entry from the table.
416 */
417 int
418 npf_table_remove(npf_tableset_t *tset, u_int tid, const int alen,
419 const npf_addr_t *addr, const npf_netmask_t mask)
420 {
421 const u_int aidx = NPF_ADDRLEN2TREE(alen);
422 npf_tblent_t *ent;
423 npf_table_t *t;
424 int error;
425
426 error = table_cidr_check(aidx, addr, mask);
427 if (error) {
428 return error;
429 }
430
431 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
432 return EINVAL;
433 }
434
435 rw_enter(&t->t_lock, RW_WRITER);
436 switch (t->t_type) {
437 case NPF_TABLE_HASH: {
438 struct npf_hashl *htbl;
439
440 ent = table_hash_lookup(t, addr, alen, &htbl);
441 if (__predict_true(ent != NULL)) {
442 LIST_REMOVE(ent, te_entry.hashq);
443 t->t_nitems--;
444 }
445 break;
446 }
447 case NPF_TABLE_TREE: {
448 pt_tree_t *tree = &t->t_tree[aidx];
449
450 ent = ptree_find_node(tree, addr);
451 if (__predict_true(ent != NULL)) {
452 ptree_remove_node(tree, ent);
453 t->t_nitems--;
454 }
455 break;
456 }
457 default:
458 KASSERT(false);
459 ent = NULL;
460 }
461 rw_exit(&t->t_lock);
462
463 if (ent == NULL) {
464 return ENOENT;
465 }
466 pool_cache_put(tblent_cache, ent);
467 return 0;
468 }
469
470 /*
471 * npf_table_lookup: find the table according to ID, lookup and match
472 * the contents with the specified IP address.
473 */
474 int
475 npf_table_lookup(npf_tableset_t *tset, u_int tid,
476 const int alen, const npf_addr_t *addr)
477 {
478 const u_int aidx = NPF_ADDRLEN2TREE(alen);
479 npf_tblent_t *ent;
480 npf_table_t *t;
481
482 if (__predict_false(aidx > 1)) {
483 return EINVAL;
484 }
485
486 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
487 return EINVAL;
488 }
489
490 rw_enter(&t->t_lock, RW_READER);
491 switch (t->t_type) {
492 case NPF_TABLE_HASH: {
493 struct npf_hashl *htbl;
494 ent = table_hash_lookup(t, addr, alen, &htbl);
495 break;
496 }
497 case NPF_TABLE_TREE: {
498 ent = ptree_find_node(&t->t_tree[aidx], addr);
499 break;
500 }
501 default:
502 KASSERT(false);
503 ent = NULL;
504 }
505 rw_exit(&t->t_lock);
506
507 return ent ? 0 : ENOENT;
508 }
509
510 static int
511 table_ent_copyout(npf_tblent_t *ent, npf_netmask_t mask,
512 void *ubuf, size_t len, size_t *off)
513 {
514 void *ubufp = (uint8_t *)ubuf + *off;
515 npf_ioctl_ent_t uent;
516
517 if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
518 return ENOMEM;
519 }
520 uent.alen = ent->te_alen;
521 memcpy(&uent.addr, &ent->te_addr, sizeof(npf_addr_t));
522 uent.mask = mask;
523
524 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
525 }
526
527 static int
528 table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf,
529 size_t len, size_t *off)
530 {
531 npf_tblent_t *ent = NULL;
532 int error = 0;
533
534 while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) {
535 pt_bitlen_t blen;
536
537 if (!ptree_mask_node_p(tree, ent, &blen)) {
538 blen = maxmask;
539 }
540 error = table_ent_copyout(ent, blen, ubuf, len, off);
541 if (error)
542 break;
543 }
544 return error;
545 }
546
547 /*
548 * npf_table_list: copy a list of all table entries into a userspace buffer.
549 */
550 int
551 npf_table_list(npf_tableset_t *tset, u_int tid, void *ubuf, size_t len)
552 {
553 npf_table_t *t;
554 size_t off = 0;
555 int error = 0;
556
557 if ((u_int)tid >= NPF_TABLE_SLOTS || (t = tset[tid]) == NULL) {
558 return EINVAL;
559 }
560
561 rw_enter(&t->t_lock, RW_READER);
562 switch (t->t_type) {
563 case NPF_TABLE_HASH:
564 for (unsigned n = 0; n <= t->t_hashmask; n++) {
565 npf_tblent_t *ent;
566
567 LIST_FOREACH(ent, &t->t_hashl[n], te_entry.hashq)
568 if ((error = table_ent_copyout(ent, 0, ubuf,
569 len, &off)) != 0)
570 break;
571 }
572 break;
573 case NPF_TABLE_TREE:
574 error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off);
575 if (error)
576 break;
577 error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off);
578 break;
579 default:
580 KASSERT(false);
581 }
582 rw_exit(&t->t_lock);
583
584 return error;
585 }
586