npf_tableset.c revision 1.24 1 /* $NetBSD: npf_tableset.c,v 1.24 2016/12/09 02:40:38 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2009-2016 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * NPF tableset module.
34 *
35 * Notes
36 *
37 * The tableset is an array of tables. After the creation, the array
38 * is immutable. The caller is responsible to synchronise the access
39 * to the tableset. The table can either be a hash or a tree. Its
40 * entries are protected by a read-write lock.
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.24 2016/12/09 02:40:38 christos Exp $");
45
46 #include <sys/param.h>
47 #include <sys/types.h>
48
49 #include <sys/atomic.h>
50 #include <sys/hash.h>
51 #include <sys/cdbr.h>
52 #include <sys/kmem.h>
53 #include <sys/malloc.h>
54 #include <sys/pool.h>
55 #include <sys/queue.h>
56 #include <sys/rwlock.h>
57 #include <sys/systm.h>
58 #include <sys/types.h>
59
60 #include "npf_impl.h"
61 #include "lpm.h"
62
63 typedef struct npf_tblent {
64 LIST_ENTRY(npf_tblent) te_listent;
65 uint16_t te_preflen;
66 uint16_t te_alen;
67 npf_addr_t te_addr;
68 } npf_tblent_t;
69
70 LIST_HEAD(npf_hashl, npf_tblent);
71
72 struct npf_table {
73 /*
74 * The storage type can be: a) hash b) tree c) cdb.
75 * There are separate trees for IPv4 and IPv6.
76 */
77 union {
78 struct {
79 struct npf_hashl *t_hashl;
80 u_long t_hashmask;
81 };
82 struct {
83 lpm_t * t_lpm;
84 LIST_HEAD(, npf_tblent) t_list;
85 };
86 struct {
87 void * t_blob;
88 size_t t_bsize;
89 struct cdbr * t_cdb;
90 };
91 } /* C11 */;
92
93 /*
94 * Table ID, type and lock. The ID may change during the
95 * config reload, it is protected by the npf_config_lock.
96 */
97 int t_type;
98 u_int t_id;
99 krwlock_t t_lock;
100
101 /* The number of items, reference count and table name. */
102 u_int t_nitems;
103 u_int t_refcnt;
104 char t_name[NPF_TABLE_MAXNAMELEN];
105 };
106
107 struct npf_tableset {
108 u_int ts_nitems;
109 npf_table_t * ts_map[];
110 };
111
112 #define NPF_TABLESET_SIZE(n) \
113 (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *))
114
115 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4)
116
117 static pool_cache_t tblent_cache __read_mostly;
118
119 /*
120 * npf_table_sysinit: initialise tableset structures.
121 */
122 void
123 npf_tableset_sysinit(void)
124 {
125 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
126 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
127 }
128
129 void
130 npf_tableset_sysfini(void)
131 {
132 pool_cache_destroy(tblent_cache);
133 }
134
135 npf_tableset_t *
136 npf_tableset_create(u_int nitems)
137 {
138 npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP);
139 ts->ts_nitems = nitems;
140 return ts;
141 }
142
143 void
144 npf_tableset_destroy(npf_tableset_t *ts)
145 {
146 /*
147 * Destroy all tables (no references should be held, since the
148 * ruleset should be destroyed before).
149 */
150 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
151 npf_table_t *t = ts->ts_map[tid];
152
153 if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) {
154 npf_table_destroy(t);
155 }
156 }
157 kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems));
158 }
159
160 /*
161 * npf_tableset_insert: insert the table into the specified tableset.
162 *
163 * => Returns 0 on success. Fails and returns error if ID is already used.
164 */
165 int
166 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t)
167 {
168 const u_int tid = t->t_id;
169 int error;
170
171 KASSERT((u_int)tid < ts->ts_nitems);
172
173 if (ts->ts_map[tid] == NULL) {
174 atomic_inc_uint(&t->t_refcnt);
175 ts->ts_map[tid] = t;
176 error = 0;
177 } else {
178 error = EEXIST;
179 }
180 return error;
181 }
182
183 /*
184 * npf_tableset_getbyname: look for a table in the set given the name.
185 */
186 npf_table_t *
187 npf_tableset_getbyname(npf_tableset_t *ts, const char *name)
188 {
189 npf_table_t *t;
190
191 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
192 if ((t = ts->ts_map[tid]) == NULL)
193 continue;
194 if (strcmp(name, t->t_name) == 0)
195 return t;
196 }
197 return NULL;
198 }
199
200 npf_table_t *
201 npf_tableset_getbyid(npf_tableset_t *ts, u_int tid)
202 {
203 if (__predict_true(tid < ts->ts_nitems)) {
204 return ts->ts_map[tid];
205 }
206 return NULL;
207 }
208
209 /*
210 * npf_tableset_reload: iterate all tables and if the new table is of the
211 * same type and has no items, then we preserve the old one and its entries.
212 *
213 * => The caller is responsible for providing synchronisation.
214 */
215 void
216 npf_tableset_reload(npf_tableset_t *nts, npf_tableset_t *ots)
217 {
218 for (u_int tid = 0; tid < nts->ts_nitems; tid++) {
219 npf_table_t *t, *ot;
220
221 if ((t = nts->ts_map[tid]) == NULL) {
222 continue;
223 }
224
225 /* If our table has entries, just load it. */
226 if (t->t_nitems) {
227 continue;
228 }
229
230 /* Look for a currently existing table with such name. */
231 ot = npf_tableset_getbyname(ots, t->t_name);
232 if (ot == NULL) {
233 /* Not found: we have a new table. */
234 continue;
235 }
236
237 /* Found. Did the type change? */
238 if (t->t_type != ot->t_type) {
239 /* Yes, load the new. */
240 continue;
241 }
242
243 /*
244 * Preserve the current table. Acquire a reference since
245 * we are keeping it in the old table set. Update its ID.
246 */
247 atomic_inc_uint(&ot->t_refcnt);
248 nts->ts_map[tid] = ot;
249
250 KASSERT(npf_config_locked_p());
251 ot->t_id = tid;
252
253 /* Destroy the new table (we hold the only reference). */
254 t->t_refcnt--;
255 npf_table_destroy(t);
256 }
257 }
258
259 int
260 npf_tableset_export(const npf_tableset_t *ts, prop_array_t tables)
261 {
262 const npf_table_t *t;
263
264 KASSERT(npf_config_locked_p());
265
266 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
267 if ((t = ts->ts_map[tid]) == NULL) {
268 continue;
269 }
270 prop_dictionary_t tdict = prop_dictionary_create();
271 prop_dictionary_set_cstring(tdict, "name", t->t_name);
272 prop_dictionary_set_uint32(tdict, "type", t->t_type);
273 prop_dictionary_set_uint32(tdict, "id", tid);
274
275 prop_array_add(tables, tdict);
276 prop_object_release(tdict);
277 }
278 return 0;
279 }
280
281 /*
282 * Few helper routines.
283 */
284
285 static npf_tblent_t *
286 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr,
287 const int alen, struct npf_hashl **rhtbl)
288 {
289 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT);
290 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask];
291 npf_tblent_t *ent;
292
293 /*
294 * Lookup the hash table and check for duplicates.
295 * Note: mask is ignored for the hash storage.
296 */
297 LIST_FOREACH(ent, htbl, te_listent) {
298 if (ent->te_alen != alen) {
299 continue;
300 }
301 if (memcmp(&ent->te_addr, addr, alen) == 0) {
302 break;
303 }
304 }
305 *rhtbl = htbl;
306 return ent;
307 }
308
309 static void
310 table_hash_flush(npf_table_t *t)
311 {
312 for (unsigned n = 0; n <= t->t_hashmask; n++) {
313 npf_tblent_t *ent;
314
315 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
316 LIST_REMOVE(ent, te_listent);
317 pool_cache_put(tblent_cache, ent);
318 }
319 }
320 }
321
322 static void
323 table_tree_flush(npf_table_t *t)
324 {
325 npf_tblent_t *ent;
326
327 while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
328 LIST_REMOVE(ent, te_listent);
329 pool_cache_put(tblent_cache, ent);
330 }
331 lpm_clear(t->t_lpm, NULL, NULL);
332 }
333
334 /*
335 * npf_table_create: create table with a specified ID.
336 */
337 npf_table_t *
338 npf_table_create(const char *name, u_int tid, int type,
339 void *blob, size_t size)
340 {
341 npf_table_t *t;
342
343 t = kmem_zalloc(sizeof(*t), KM_SLEEP);
344 strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
345
346 switch (type) {
347 case NPF_TABLE_TREE:
348 if ((t->t_lpm = lpm_create()) == NULL)
349 goto out;
350 LIST_INIT(&t->t_list);
351 break;
352 case NPF_TABLE_HASH:
353 t->t_hashl = hashinit(1024, HASH_LIST, true, &t->t_hashmask);
354 if (t->t_hashl == NULL)
355 goto out;
356 break;
357 case NPF_TABLE_CDB:
358 t->t_blob = blob;
359 t->t_bsize = size;
360 t->t_cdb = cdbr_open_mem(blob, size, CDBR_DEFAULT, NULL, NULL);
361 if (t->t_cdb == NULL) {
362 free(blob, M_TEMP);
363 goto out;
364 }
365 t->t_nitems = cdbr_entries(t->t_cdb);
366 break;
367 default:
368 KASSERT(false);
369 }
370 rw_init(&t->t_lock);
371 t->t_type = type;
372 t->t_id = tid;
373
374 return t;
375 out:
376 kmem_free(t, sizeof(*t));
377 return NULL;
378
379 }
380
381 /*
382 * npf_table_destroy: free all table entries and table itself.
383 */
384 void
385 npf_table_destroy(npf_table_t *t)
386 {
387 KASSERT(t->t_refcnt == 0);
388
389 switch (t->t_type) {
390 case NPF_TABLE_HASH:
391 table_hash_flush(t);
392 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
393 break;
394 case NPF_TABLE_TREE:
395 table_tree_flush(t);
396 lpm_destroy(t->t_lpm);
397 break;
398 case NPF_TABLE_CDB:
399 cdbr_close(t->t_cdb);
400 free(t->t_blob, M_TEMP);
401 break;
402 default:
403 KASSERT(false);
404 }
405 rw_destroy(&t->t_lock);
406 kmem_free(t, sizeof(*t));
407 }
408
409 /*
410 * npf_table_check: validate the name, ID and type.
411 */
412 int
413 npf_table_check(npf_tableset_t *ts, const char *name, u_int tid, int type)
414 {
415 if ((u_int)tid >= ts->ts_nitems) {
416 return EINVAL;
417 }
418 if (ts->ts_map[tid] != NULL) {
419 return EEXIST;
420 }
421 switch (type) {
422 case NPF_TABLE_TREE:
423 case NPF_TABLE_HASH:
424 case NPF_TABLE_CDB:
425 break;
426 default:
427 return EINVAL;
428 }
429 if (strlen(name) >= NPF_TABLE_MAXNAMELEN) {
430 return ENAMETOOLONG;
431 }
432 if (npf_tableset_getbyname(ts, name)) {
433 return EEXIST;
434 }
435 return 0;
436 }
437
438 static int
439 table_cidr_check(const u_int aidx, const npf_addr_t *addr,
440 const npf_netmask_t mask)
441 {
442 if (aidx > 1) {
443 return EINVAL;
444 }
445 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) {
446 return EINVAL;
447 }
448
449 /*
450 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128.
451 * If it is a host - shall use NPF_NO_NETMASK.
452 */
453 if (mask > (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) {
454 return EINVAL;
455 }
456 return 0;
457 }
458
459 /*
460 * npf_table_insert: add an IP CIDR entry into the table.
461 */
462 int
463 npf_table_insert(npf_table_t *t, const int alen,
464 const npf_addr_t *addr, const npf_netmask_t mask)
465 {
466 const u_int aidx = NPF_ADDRLEN2TREE(alen);
467 npf_tblent_t *ent;
468 int error;
469
470 error = table_cidr_check(aidx, addr, mask);
471 if (error) {
472 return error;
473 }
474 ent = pool_cache_get(tblent_cache, PR_WAITOK);
475 memcpy(&ent->te_addr, addr, alen);
476 ent->te_alen = alen;
477
478 /*
479 * Insert the entry. Return an error on duplicate.
480 */
481 rw_enter(&t->t_lock, RW_WRITER);
482 switch (t->t_type) {
483 case NPF_TABLE_HASH: {
484 struct npf_hashl *htbl;
485
486 /*
487 * Hash tables by the concept support only IPs.
488 */
489 if (mask != NPF_NO_NETMASK) {
490 error = EINVAL;
491 break;
492 }
493 if (!table_hash_lookup(t, addr, alen, &htbl)) {
494 LIST_INSERT_HEAD(htbl, ent, te_listent);
495 t->t_nitems++;
496 } else {
497 error = EEXIST;
498 }
499 break;
500 }
501 case NPF_TABLE_TREE: {
502 const unsigned preflen =
503 (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
504 if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
505 lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
506 LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
507 ent->te_preflen = preflen;
508 t->t_nitems++;
509 error = 0;
510 } else {
511 error = EEXIST;
512 }
513 break;
514 }
515 case NPF_TABLE_CDB:
516 error = EINVAL;
517 break;
518 default:
519 KASSERT(false);
520 }
521 rw_exit(&t->t_lock);
522
523 if (error) {
524 pool_cache_put(tblent_cache, ent);
525 }
526 return error;
527 }
528
529 /*
530 * npf_table_remove: remove the IP CIDR entry from the table.
531 */
532 int
533 npf_table_remove(npf_table_t *t, const int alen,
534 const npf_addr_t *addr, const npf_netmask_t mask)
535 {
536 const u_int aidx = NPF_ADDRLEN2TREE(alen);
537 npf_tblent_t *ent = NULL;
538 int error = ENOENT;
539
540 error = table_cidr_check(aidx, addr, mask);
541 if (error) {
542 return error;
543 }
544
545 rw_enter(&t->t_lock, RW_WRITER);
546 switch (t->t_type) {
547 case NPF_TABLE_HASH: {
548 struct npf_hashl *htbl;
549
550 ent = table_hash_lookup(t, addr, alen, &htbl);
551 if (__predict_true(ent != NULL)) {
552 LIST_REMOVE(ent, te_listent);
553 t->t_nitems--;
554 }
555 break;
556 }
557 case NPF_TABLE_TREE: {
558 ent = lpm_lookup(t->t_lpm, addr, alen);
559 if (__predict_true(ent != NULL)) {
560 LIST_REMOVE(ent, te_listent);
561 lpm_remove(t->t_lpm, &ent->te_addr,
562 ent->te_alen, ent->te_preflen);
563 t->t_nitems--;
564 }
565 break;
566 }
567 case NPF_TABLE_CDB:
568 error = EINVAL;
569 break;
570 default:
571 KASSERT(false);
572 ent = NULL;
573 }
574 rw_exit(&t->t_lock);
575
576 if (ent) {
577 pool_cache_put(tblent_cache, ent);
578 }
579 return error;
580 }
581
582 /*
583 * npf_table_lookup: find the table according to ID, lookup and match
584 * the contents with the specified IP address.
585 */
586 int
587 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr)
588 {
589 const u_int aidx = NPF_ADDRLEN2TREE(alen);
590 struct npf_hashl *htbl;
591 const void *data;
592 size_t dlen;
593 bool found;
594
595 if (__predict_false(aidx > 1)) {
596 return EINVAL;
597 }
598
599 switch (t->t_type) {
600 case NPF_TABLE_HASH:
601 rw_enter(&t->t_lock, RW_READER);
602 found = table_hash_lookup(t, addr, alen, &htbl) != NULL;
603 rw_exit(&t->t_lock);
604 break;
605 case NPF_TABLE_TREE:
606 rw_enter(&t->t_lock, RW_READER);
607 found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
608 rw_exit(&t->t_lock);
609 break;
610 case NPF_TABLE_CDB:
611 if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) {
612 found = dlen == alen && memcmp(addr, data, dlen) == 0;
613 } else {
614 found = false;
615 }
616 break;
617 default:
618 KASSERT(false);
619 found = false;
620 }
621
622 return found ? 0 : ENOENT;
623 }
624
625 static int
626 table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
627 void *ubuf, size_t len, size_t *off)
628 {
629 void *ubufp = (uint8_t *)ubuf + *off;
630 npf_ioctl_ent_t uent;
631
632 if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
633 return ENOMEM;
634 }
635 uent.alen = alen;
636 memcpy(&uent.addr, addr, sizeof(npf_addr_t));
637 uent.mask = mask;
638
639 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
640 }
641
642 static int
643 table_hash_list(const npf_table_t *t, void *ubuf, size_t len)
644 {
645 size_t off = 0;
646 int error = 0;
647
648 for (unsigned n = 0; n <= t->t_hashmask; n++) {
649 npf_tblent_t *ent;
650
651 LIST_FOREACH(ent, &t->t_hashl[n], te_listent) {
652 error = table_ent_copyout(&ent->te_addr,
653 ent->te_alen, 0, ubuf, len, &off);
654 if (error)
655 break;
656 }
657 }
658 return error;
659 }
660
661 static int
662 table_tree_list(const npf_table_t *t, void *ubuf, size_t len)
663 {
664 npf_tblent_t *ent;
665 size_t off = 0;
666 int error = 0;
667
668 LIST_FOREACH(ent, &t->t_list, te_listent) {
669 error = table_ent_copyout(&ent->te_addr,
670 ent->te_alen, 0, ubuf, len, &off);
671 if (error)
672 break;
673 }
674 return error;
675 }
676
677 static int
678 table_cdb_list(npf_table_t *t, void *ubuf, size_t len)
679 {
680 size_t off = 0, dlen;
681 const void *data;
682 int error = 0;
683
684 for (size_t i = 0; i < t->t_nitems; i++) {
685 if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) {
686 return EINVAL;
687 }
688 error = table_ent_copyout(data, dlen, 0, ubuf, len, &off);
689 if (error)
690 break;
691 }
692 return error;
693 }
694
695 /*
696 * npf_table_list: copy a list of all table entries into a userspace buffer.
697 */
698 int
699 npf_table_list(npf_table_t *t, void *ubuf, size_t len)
700 {
701 int error = 0;
702
703 rw_enter(&t->t_lock, RW_READER);
704 switch (t->t_type) {
705 case NPF_TABLE_HASH:
706 error = table_hash_list(t, ubuf, len);
707 break;
708 case NPF_TABLE_TREE:
709 error = table_tree_list(t, ubuf, len);
710 break;
711 case NPF_TABLE_CDB:
712 error = table_cdb_list(t, ubuf, len);
713 break;
714 default:
715 KASSERT(false);
716 }
717 rw_exit(&t->t_lock);
718
719 return error;
720 }
721
722 /*
723 * npf_table_flush: remove all table entries.
724 */
725 int
726 npf_table_flush(npf_table_t *t)
727 {
728 int error = 0;
729
730 rw_enter(&t->t_lock, RW_WRITER);
731 switch (t->t_type) {
732 case NPF_TABLE_HASH:
733 table_hash_flush(t);
734 t->t_nitems = 0;
735 break;
736 case NPF_TABLE_TREE:
737 table_tree_flush(t);
738 t->t_nitems = 0;
739 break;
740 case NPF_TABLE_CDB:
741 error = EINVAL;
742 break;
743 default:
744 KASSERT(false);
745 }
746 rw_exit(&t->t_lock);
747 return error;
748 }
749