npf_tableset.c revision 1.26 1 /* $NetBSD: npf_tableset.c,v 1.26 2017/01/02 21:49:51 rmind Exp $ */
2
3 /*-
4 * Copyright (c) 2009-2016 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * NPF tableset module.
34 *
35 * Notes
36 *
37 * The tableset is an array of tables. After the creation, the array
38 * is immutable. The caller is responsible to synchronise the access
39 * to the tableset. The table can either be a hash or a tree. Its
40 * entries are protected by a read-write lock.
41 */
42
43 #ifdef _KERNEL
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.26 2017/01/02 21:49:51 rmind Exp $");
46
47 #include <sys/param.h>
48 #include <sys/types.h>
49
50 #include <sys/atomic.h>
51 #include <sys/hash.h>
52 #include <sys/cdbr.h>
53 #include <sys/kmem.h>
54 #include <sys/malloc.h>
55 #include <sys/pool.h>
56 #include <sys/queue.h>
57 #include <sys/rwlock.h>
58 #include <sys/systm.h>
59 #include <sys/types.h>
60
61 #include "lpm.h"
62 #endif
63
64 #include "npf_impl.h"
65
66 typedef struct npf_tblent {
67 LIST_ENTRY(npf_tblent) te_listent;
68 uint16_t te_preflen;
69 uint16_t te_alen;
70 npf_addr_t te_addr;
71 } npf_tblent_t;
72
73 LIST_HEAD(npf_hashl, npf_tblent);
74
75 struct npf_table {
76 /*
77 * The storage type can be: a) hash b) tree c) cdb.
78 * There are separate trees for IPv4 and IPv6.
79 */
80 union {
81 struct {
82 struct npf_hashl *t_hashl;
83 u_long t_hashmask;
84 };
85 struct {
86 lpm_t * t_lpm;
87 LIST_HEAD(, npf_tblent) t_list;
88 };
89 struct {
90 void * t_blob;
91 size_t t_bsize;
92 struct cdbr * t_cdb;
93 };
94 } /* C11 */;
95
96 /*
97 * Table ID, type and lock. The ID may change during the
98 * config reload, it is protected by the npf_config_lock.
99 */
100 int t_type;
101 u_int t_id;
102 krwlock_t t_lock;
103
104 /* The number of items, reference count and table name. */
105 u_int t_nitems;
106 u_int t_refcnt;
107 char t_name[NPF_TABLE_MAXNAMELEN];
108 };
109
110 struct npf_tableset {
111 u_int ts_nitems;
112 npf_table_t * ts_map[];
113 };
114
115 #define NPF_TABLESET_SIZE(n) \
116 (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *))
117
118 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4)
119
120 static pool_cache_t tblent_cache __read_mostly;
121
122 /*
123 * npf_table_sysinit: initialise tableset structures.
124 */
125 void
126 npf_tableset_sysinit(void)
127 {
128 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
129 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
130 }
131
132 void
133 npf_tableset_sysfini(void)
134 {
135 pool_cache_destroy(tblent_cache);
136 }
137
138 npf_tableset_t *
139 npf_tableset_create(u_int nitems)
140 {
141 npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP);
142 ts->ts_nitems = nitems;
143 return ts;
144 }
145
146 void
147 npf_tableset_destroy(npf_tableset_t *ts)
148 {
149 /*
150 * Destroy all tables (no references should be held, since the
151 * ruleset should be destroyed before).
152 */
153 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
154 npf_table_t *t = ts->ts_map[tid];
155
156 if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) {
157 npf_table_destroy(t);
158 }
159 }
160 kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems));
161 }
162
163 /*
164 * npf_tableset_insert: insert the table into the specified tableset.
165 *
166 * => Returns 0 on success. Fails and returns error if ID is already used.
167 */
168 int
169 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t)
170 {
171 const u_int tid = t->t_id;
172 int error;
173
174 KASSERT((u_int)tid < ts->ts_nitems);
175
176 if (ts->ts_map[tid] == NULL) {
177 atomic_inc_uint(&t->t_refcnt);
178 ts->ts_map[tid] = t;
179 error = 0;
180 } else {
181 error = EEXIST;
182 }
183 return error;
184 }
185
186 npf_table_t *
187 npf_tableset_swap(npf_tableset_t *ts, npf_table_t *newt)
188 {
189 const u_int tid = newt->t_id;
190 npf_table_t *oldt = ts->ts_map[tid];
191
192 KASSERT(tid < ts->ts_nitems);
193 KASSERT(oldt->t_id == newt->t_id);
194
195 newt->t_refcnt = oldt->t_refcnt;
196 oldt->t_refcnt = 0;
197
198 return atomic_swap_ptr(&ts->ts_map[tid], newt);
199 }
200
201 /*
202 * npf_tableset_getbyname: look for a table in the set given the name.
203 */
204 npf_table_t *
205 npf_tableset_getbyname(npf_tableset_t *ts, const char *name)
206 {
207 npf_table_t *t;
208
209 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
210 if ((t = ts->ts_map[tid]) == NULL)
211 continue;
212 if (strcmp(name, t->t_name) == 0)
213 return t;
214 }
215 return NULL;
216 }
217
218 npf_table_t *
219 npf_tableset_getbyid(npf_tableset_t *ts, u_int tid)
220 {
221 if (__predict_true(tid < ts->ts_nitems)) {
222 return ts->ts_map[tid];
223 }
224 return NULL;
225 }
226
227 /*
228 * npf_tableset_reload: iterate all tables and if the new table is of the
229 * same type and has no items, then we preserve the old one and its entries.
230 *
231 * => The caller is responsible for providing synchronisation.
232 */
233 void
234 npf_tableset_reload(npf_t *npf, npf_tableset_t *nts, npf_tableset_t *ots)
235 {
236 for (u_int tid = 0; tid < nts->ts_nitems; tid++) {
237 npf_table_t *t, *ot;
238
239 if ((t = nts->ts_map[tid]) == NULL) {
240 continue;
241 }
242
243 /* If our table has entries, just load it. */
244 if (t->t_nitems) {
245 continue;
246 }
247
248 /* Look for a currently existing table with such name. */
249 ot = npf_tableset_getbyname(ots, t->t_name);
250 if (ot == NULL) {
251 /* Not found: we have a new table. */
252 continue;
253 }
254
255 /* Found. Did the type change? */
256 if (t->t_type != ot->t_type) {
257 /* Yes, load the new. */
258 continue;
259 }
260
261 /*
262 * Preserve the current table. Acquire a reference since
263 * we are keeping it in the old table set. Update its ID.
264 */
265 atomic_inc_uint(&ot->t_refcnt);
266 nts->ts_map[tid] = ot;
267
268 KASSERT(npf_config_locked_p(npf));
269 ot->t_id = tid;
270
271 /* Destroy the new table (we hold the only reference). */
272 t->t_refcnt--;
273 npf_table_destroy(t);
274 }
275 }
276
277 int
278 npf_tableset_export(npf_t *npf, const npf_tableset_t *ts, prop_array_t tables)
279 {
280 const npf_table_t *t;
281
282 KASSERT(npf_config_locked_p(npf));
283
284 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
285 if ((t = ts->ts_map[tid]) == NULL) {
286 continue;
287 }
288 prop_dictionary_t tdict = prop_dictionary_create();
289 prop_dictionary_set_cstring(tdict, "name", t->t_name);
290 prop_dictionary_set_uint32(tdict, "type", t->t_type);
291 prop_dictionary_set_uint32(tdict, "id", tid);
292
293 prop_array_add(tables, tdict);
294 prop_object_release(tdict);
295 }
296 return 0;
297 }
298
299 /*
300 * Few helper routines.
301 */
302
303 static npf_tblent_t *
304 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr,
305 const int alen, struct npf_hashl **rhtbl)
306 {
307 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT);
308 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask];
309 npf_tblent_t *ent;
310
311 /*
312 * Lookup the hash table and check for duplicates.
313 * Note: mask is ignored for the hash storage.
314 */
315 LIST_FOREACH(ent, htbl, te_listent) {
316 if (ent->te_alen != alen) {
317 continue;
318 }
319 if (memcmp(&ent->te_addr, addr, alen) == 0) {
320 break;
321 }
322 }
323 *rhtbl = htbl;
324 return ent;
325 }
326
327 static void
328 table_hash_flush(npf_table_t *t)
329 {
330 for (unsigned n = 0; n <= t->t_hashmask; n++) {
331 npf_tblent_t *ent;
332
333 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
334 LIST_REMOVE(ent, te_listent);
335 pool_cache_put(tblent_cache, ent);
336 }
337 }
338 }
339
340 static void
341 table_tree_flush(npf_table_t *t)
342 {
343 npf_tblent_t *ent;
344
345 while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
346 LIST_REMOVE(ent, te_listent);
347 pool_cache_put(tblent_cache, ent);
348 }
349 lpm_clear(t->t_lpm, NULL, NULL);
350 }
351
352 /*
353 * npf_table_create: create table with a specified ID.
354 */
355 npf_table_t *
356 npf_table_create(const char *name, u_int tid, int type,
357 void *blob, size_t size)
358 {
359 npf_table_t *t;
360
361 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
362 strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
363
364 switch (type) {
365 case NPF_TABLE_TREE:
366 if ((t->t_lpm = lpm_create()) == NULL) {
367 goto out;
368 }
369 LIST_INIT(&t->t_list);
370 break;
371 case NPF_TABLE_HASH:
372 size = MIN(size, 128);
373 t->t_hashl = hashinit(size, HASH_LIST, true, &t->t_hashmask);
374 if (t->t_hashl == NULL) {
375 goto out;
376 }
377 break;
378 case NPF_TABLE_CDB:
379 t->t_blob = blob;
380 t->t_bsize = size;
381 t->t_cdb = cdbr_open_mem(blob, size, CDBR_DEFAULT, NULL, NULL);
382 if (t->t_cdb == NULL) {
383 free(blob, M_TEMP);
384 goto out;
385 }
386 t->t_nitems = cdbr_entries(t->t_cdb);
387 break;
388 default:
389 KASSERT(false);
390 }
391 rw_init(&t->t_lock);
392 t->t_type = type;
393 t->t_id = tid;
394 return t;
395 out:
396 kmem_free(t, sizeof(npf_table_t));
397 return NULL;
398 }
399
400 /*
401 * npf_table_destroy: free all table entries and table itself.
402 */
403 void
404 npf_table_destroy(npf_table_t *t)
405 {
406 KASSERT(t->t_refcnt == 0);
407
408 switch (t->t_type) {
409 case NPF_TABLE_HASH:
410 table_hash_flush(t);
411 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
412 break;
413 case NPF_TABLE_TREE:
414 table_tree_flush(t);
415 lpm_destroy(t->t_lpm);
416 break;
417 case NPF_TABLE_CDB:
418 cdbr_close(t->t_cdb);
419 free(t->t_blob, M_TEMP);
420 break;
421 default:
422 KASSERT(false);
423 }
424 rw_destroy(&t->t_lock);
425 kmem_free(t, sizeof(npf_table_t));
426 }
427
428 u_int
429 npf_table_getid(npf_table_t *t)
430 {
431 return t->t_id;
432 }
433
434 /*
435 * npf_table_check: validate the name, ID and type.
436 */
437 int
438 npf_table_check(npf_tableset_t *ts, const char *name, u_int tid, int type)
439 {
440 if ((u_int)tid >= ts->ts_nitems) {
441 return EINVAL;
442 }
443 if (ts->ts_map[tid] != NULL) {
444 return EEXIST;
445 }
446 switch (type) {
447 case NPF_TABLE_TREE:
448 case NPF_TABLE_HASH:
449 case NPF_TABLE_CDB:
450 break;
451 default:
452 return EINVAL;
453 }
454 if (strlen(name) >= NPF_TABLE_MAXNAMELEN) {
455 return ENAMETOOLONG;
456 }
457 if (npf_tableset_getbyname(ts, name)) {
458 return EEXIST;
459 }
460 return 0;
461 }
462
463 static int
464 table_cidr_check(const u_int aidx, const npf_addr_t *addr,
465 const npf_netmask_t mask)
466 {
467 if (aidx > 1) {
468 return EINVAL;
469 }
470 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) {
471 return EINVAL;
472 }
473
474 /*
475 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128.
476 * If it is a host - shall use NPF_NO_NETMASK.
477 */
478 if (mask > (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) {
479 return EINVAL;
480 }
481 return 0;
482 }
483
484 /*
485 * npf_table_insert: add an IP CIDR entry into the table.
486 */
487 int
488 npf_table_insert(npf_table_t *t, const int alen,
489 const npf_addr_t *addr, const npf_netmask_t mask)
490 {
491 const u_int aidx = NPF_ADDRLEN2TREE(alen);
492 npf_tblent_t *ent;
493 int error;
494
495 error = table_cidr_check(aidx, addr, mask);
496 if (error) {
497 return error;
498 }
499 ent = pool_cache_get(tblent_cache, PR_WAITOK);
500 memcpy(&ent->te_addr, addr, alen);
501 ent->te_alen = alen;
502
503 /*
504 * Insert the entry. Return an error on duplicate.
505 */
506 rw_enter(&t->t_lock, RW_WRITER);
507 switch (t->t_type) {
508 case NPF_TABLE_HASH: {
509 struct npf_hashl *htbl;
510
511 /*
512 * Hash tables by the concept support only IPs.
513 */
514 if (mask != NPF_NO_NETMASK) {
515 error = EINVAL;
516 break;
517 }
518 if (!table_hash_lookup(t, addr, alen, &htbl)) {
519 LIST_INSERT_HEAD(htbl, ent, te_listent);
520 t->t_nitems++;
521 } else {
522 error = EEXIST;
523 }
524 break;
525 }
526 case NPF_TABLE_TREE: {
527 const unsigned preflen =
528 (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
529 if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
530 lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
531 LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
532 ent->te_preflen = preflen;
533 t->t_nitems++;
534 error = 0;
535 } else {
536 error = EEXIST;
537 }
538 break;
539 }
540 case NPF_TABLE_CDB:
541 error = EINVAL;
542 break;
543 default:
544 KASSERT(false);
545 }
546 rw_exit(&t->t_lock);
547
548 if (error) {
549 pool_cache_put(tblent_cache, ent);
550 }
551 return error;
552 }
553
554 /*
555 * npf_table_remove: remove the IP CIDR entry from the table.
556 */
557 int
558 npf_table_remove(npf_table_t *t, const int alen,
559 const npf_addr_t *addr, const npf_netmask_t mask)
560 {
561 const u_int aidx = NPF_ADDRLEN2TREE(alen);
562 npf_tblent_t *ent = NULL;
563 int error = ENOENT;
564
565 error = table_cidr_check(aidx, addr, mask);
566 if (error) {
567 return error;
568 }
569
570 rw_enter(&t->t_lock, RW_WRITER);
571 switch (t->t_type) {
572 case NPF_TABLE_HASH: {
573 struct npf_hashl *htbl;
574
575 ent = table_hash_lookup(t, addr, alen, &htbl);
576 if (__predict_true(ent != NULL)) {
577 LIST_REMOVE(ent, te_listent);
578 t->t_nitems--;
579 }
580 break;
581 }
582 case NPF_TABLE_TREE: {
583 ent = lpm_lookup(t->t_lpm, addr, alen);
584 if (__predict_true(ent != NULL)) {
585 LIST_REMOVE(ent, te_listent);
586 lpm_remove(t->t_lpm, &ent->te_addr,
587 ent->te_alen, ent->te_preflen);
588 t->t_nitems--;
589 }
590 break;
591 }
592 case NPF_TABLE_CDB:
593 error = EINVAL;
594 break;
595 default:
596 KASSERT(false);
597 ent = NULL;
598 }
599 rw_exit(&t->t_lock);
600
601 if (ent) {
602 pool_cache_put(tblent_cache, ent);
603 }
604 return error;
605 }
606
607 /*
608 * npf_table_lookup: find the table according to ID, lookup and match
609 * the contents with the specified IP address.
610 */
611 int
612 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr)
613 {
614 const u_int aidx = NPF_ADDRLEN2TREE(alen);
615 struct npf_hashl *htbl;
616 const void *data;
617 size_t dlen;
618 bool found;
619
620 if (__predict_false(aidx > 1)) {
621 return EINVAL;
622 }
623
624 switch (t->t_type) {
625 case NPF_TABLE_HASH:
626 rw_enter(&t->t_lock, RW_READER);
627 found = table_hash_lookup(t, addr, alen, &htbl) != NULL;
628 rw_exit(&t->t_lock);
629 break;
630 case NPF_TABLE_TREE:
631 rw_enter(&t->t_lock, RW_READER);
632 found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
633 rw_exit(&t->t_lock);
634 break;
635 case NPF_TABLE_CDB:
636 if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) {
637 found = dlen == (u_int)alen &&
638 memcmp(addr, data, dlen) == 0;
639 } else {
640 found = false;
641 }
642 break;
643 default:
644 KASSERT(false);
645 found = false;
646 }
647
648 return found ? 0 : ENOENT;
649 }
650
651 static int
652 table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
653 void *ubuf, size_t len, size_t *off)
654 {
655 void *ubufp = (uint8_t *)ubuf + *off;
656 npf_ioctl_ent_t uent;
657
658 if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
659 return ENOMEM;
660 }
661 uent.alen = alen;
662 memcpy(&uent.addr, addr, sizeof(npf_addr_t));
663 uent.mask = mask;
664
665 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
666 }
667
668 static int
669 table_hash_list(const npf_table_t *t, void *ubuf, size_t len)
670 {
671 size_t off = 0;
672 int error = 0;
673
674 for (unsigned n = 0; n <= t->t_hashmask; n++) {
675 npf_tblent_t *ent;
676
677 LIST_FOREACH(ent, &t->t_hashl[n], te_listent) {
678 error = table_ent_copyout(&ent->te_addr,
679 ent->te_alen, 0, ubuf, len, &off);
680 if (error)
681 break;
682 }
683 }
684 return error;
685 }
686
687 static int
688 table_tree_list(const npf_table_t *t, void *ubuf, size_t len)
689 {
690 npf_tblent_t *ent;
691 size_t off = 0;
692 int error = 0;
693
694 LIST_FOREACH(ent, &t->t_list, te_listent) {
695 error = table_ent_copyout(&ent->te_addr,
696 ent->te_alen, 0, ubuf, len, &off);
697 if (error)
698 break;
699 }
700 return error;
701 }
702
703 static int
704 table_cdb_list(npf_table_t *t, void *ubuf, size_t len)
705 {
706 size_t off = 0, dlen;
707 const void *data;
708 int error = 0;
709
710 for (size_t i = 0; i < t->t_nitems; i++) {
711 if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) {
712 return EINVAL;
713 }
714 error = table_ent_copyout(data, dlen, 0, ubuf, len, &off);
715 if (error)
716 break;
717 }
718 return error;
719 }
720
721 /*
722 * npf_table_list: copy a list of all table entries into a userspace buffer.
723 */
724 int
725 npf_table_list(npf_table_t *t, void *ubuf, size_t len)
726 {
727 int error = 0;
728
729 rw_enter(&t->t_lock, RW_READER);
730 switch (t->t_type) {
731 case NPF_TABLE_HASH:
732 error = table_hash_list(t, ubuf, len);
733 break;
734 case NPF_TABLE_TREE:
735 error = table_tree_list(t, ubuf, len);
736 break;
737 case NPF_TABLE_CDB:
738 error = table_cdb_list(t, ubuf, len);
739 break;
740 default:
741 KASSERT(false);
742 }
743 rw_exit(&t->t_lock);
744
745 return error;
746 }
747
748 /*
749 * npf_table_flush: remove all table entries.
750 */
751 int
752 npf_table_flush(npf_table_t *t)
753 {
754 int error = 0;
755
756 rw_enter(&t->t_lock, RW_WRITER);
757 switch (t->t_type) {
758 case NPF_TABLE_HASH:
759 table_hash_flush(t);
760 t->t_nitems = 0;
761 break;
762 case NPF_TABLE_TREE:
763 table_tree_flush(t);
764 t->t_nitems = 0;
765 break;
766 case NPF_TABLE_CDB:
767 error = EINVAL;
768 break;
769 default:
770 KASSERT(false);
771 }
772 rw_exit(&t->t_lock);
773 return error;
774 }
775