npf_tableset.c revision 1.28 1 /*-
2 * Copyright (c) 2009-2016 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This material is based upon work partially supported by The
6 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 /*
31 * NPF tableset module.
32 *
33 * Notes
34 *
35 * The tableset is an array of tables. After the creation, the array
36 * is immutable. The caller is responsible to synchronise the access
37 * to the tableset. The table can either be a hash or a tree. Its
38 * entries are protected by a read-write lock.
39 */
40
41 #ifdef _KERNEL
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.28 2018/09/29 14:41:36 rmind Exp $");
44
45 #include <sys/param.h>
46 #include <sys/types.h>
47
48 #include <sys/atomic.h>
49 #include <sys/hash.h>
50 #include <sys/cdbr.h>
51 #include <sys/kmem.h>
52 #include <sys/malloc.h>
53 #include <sys/pool.h>
54 #include <sys/queue.h>
55 #include <sys/mutex.h>
56 #include <sys/systm.h>
57 #include <sys/types.h>
58
59 #include "lpm.h"
60 #endif
61
62 #include "npf_impl.h"
63
64 typedef struct npf_tblent {
65 LIST_ENTRY(npf_tblent) te_listent;
66 uint16_t te_preflen;
67 uint16_t te_alen;
68 npf_addr_t te_addr;
69 } npf_tblent_t;
70
71 LIST_HEAD(npf_hashl, npf_tblent);
72
73 struct npf_table {
74 /*
75 * The storage type can be: a) hash b) tree c) cdb.
76 * There are separate trees for IPv4 and IPv6.
77 */
78 union {
79 struct {
80 struct npf_hashl *t_hashl;
81 u_long t_hashmask;
82 };
83 struct {
84 lpm_t * t_lpm;
85 LIST_HEAD(, npf_tblent) t_list;
86 };
87 struct {
88 void * t_blob;
89 size_t t_bsize;
90 struct cdbr * t_cdb;
91 };
92 } /* C11 */;
93
94 /*
95 * Table ID, type and lock. The ID may change during the
96 * config reload, it is protected by the npf_config_lock.
97 */
98 int t_type;
99 u_int t_id;
100 kmutex_t t_lock;
101
102 /* The number of items, reference count and table name. */
103 u_int t_nitems;
104 u_int t_refcnt;
105 char t_name[NPF_TABLE_MAXNAMELEN];
106 };
107
108 struct npf_tableset {
109 u_int ts_nitems;
110 npf_table_t * ts_map[];
111 };
112
113 #define NPF_TABLESET_SIZE(n) \
114 (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *))
115
116 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4)
117
118 static pool_cache_t tblent_cache __read_mostly;
119
120 /*
121 * npf_table_sysinit: initialise tableset structures.
122 */
123 void
124 npf_tableset_sysinit(void)
125 {
126 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
127 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
128 }
129
130 void
131 npf_tableset_sysfini(void)
132 {
133 pool_cache_destroy(tblent_cache);
134 }
135
136 npf_tableset_t *
137 npf_tableset_create(u_int nitems)
138 {
139 npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP);
140 ts->ts_nitems = nitems;
141 return ts;
142 }
143
144 void
145 npf_tableset_destroy(npf_tableset_t *ts)
146 {
147 /*
148 * Destroy all tables (no references should be held, since the
149 * ruleset should be destroyed before).
150 */
151 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
152 npf_table_t *t = ts->ts_map[tid];
153
154 if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) {
155 npf_table_destroy(t);
156 }
157 }
158 kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems));
159 }
160
161 /*
162 * npf_tableset_insert: insert the table into the specified tableset.
163 *
164 * => Returns 0 on success. Fails and returns error if ID is already used.
165 */
166 int
167 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t)
168 {
169 const u_int tid = t->t_id;
170 int error;
171
172 KASSERT((u_int)tid < ts->ts_nitems);
173
174 if (ts->ts_map[tid] == NULL) {
175 atomic_inc_uint(&t->t_refcnt);
176 ts->ts_map[tid] = t;
177 error = 0;
178 } else {
179 error = EEXIST;
180 }
181 return error;
182 }
183
184 npf_table_t *
185 npf_tableset_swap(npf_tableset_t *ts, npf_table_t *newt)
186 {
187 const u_int tid = newt->t_id;
188 npf_table_t *oldt = ts->ts_map[tid];
189
190 KASSERT(tid < ts->ts_nitems);
191 KASSERT(oldt->t_id == newt->t_id);
192
193 newt->t_refcnt = oldt->t_refcnt;
194 oldt->t_refcnt = 0;
195
196 return atomic_swap_ptr(&ts->ts_map[tid], newt);
197 }
198
199 /*
200 * npf_tableset_getbyname: look for a table in the set given the name.
201 */
202 npf_table_t *
203 npf_tableset_getbyname(npf_tableset_t *ts, const char *name)
204 {
205 npf_table_t *t;
206
207 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
208 if ((t = ts->ts_map[tid]) == NULL)
209 continue;
210 if (strcmp(name, t->t_name) == 0)
211 return t;
212 }
213 return NULL;
214 }
215
216 npf_table_t *
217 npf_tableset_getbyid(npf_tableset_t *ts, u_int tid)
218 {
219 if (__predict_true(tid < ts->ts_nitems)) {
220 return ts->ts_map[tid];
221 }
222 return NULL;
223 }
224
225 /*
226 * npf_tableset_reload: iterate all tables and if the new table is of the
227 * same type and has no items, then we preserve the old one and its entries.
228 *
229 * => The caller is responsible for providing synchronisation.
230 */
231 void
232 npf_tableset_reload(npf_t *npf, npf_tableset_t *nts, npf_tableset_t *ots)
233 {
234 for (u_int tid = 0; tid < nts->ts_nitems; tid++) {
235 npf_table_t *t, *ot;
236
237 if ((t = nts->ts_map[tid]) == NULL) {
238 continue;
239 }
240
241 /* If our table has entries, just load it. */
242 if (t->t_nitems) {
243 continue;
244 }
245
246 /* Look for a currently existing table with such name. */
247 ot = npf_tableset_getbyname(ots, t->t_name);
248 if (ot == NULL) {
249 /* Not found: we have a new table. */
250 continue;
251 }
252
253 /* Found. Did the type change? */
254 if (t->t_type != ot->t_type) {
255 /* Yes, load the new. */
256 continue;
257 }
258
259 /*
260 * Preserve the current table. Acquire a reference since
261 * we are keeping it in the old table set. Update its ID.
262 */
263 atomic_inc_uint(&ot->t_refcnt);
264 nts->ts_map[tid] = ot;
265
266 KASSERT(npf_config_locked_p(npf));
267 ot->t_id = tid;
268
269 /* Destroy the new table (we hold the only reference). */
270 t->t_refcnt--;
271 npf_table_destroy(t);
272 }
273 }
274
275 int
276 npf_tableset_export(npf_t *npf, const npf_tableset_t *ts, nvlist_t *npf_dict)
277 {
278 const npf_table_t *t;
279
280 KASSERT(npf_config_locked_p(npf));
281
282 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
283 nvlist_t *table;
284
285 if ((t = ts->ts_map[tid]) == NULL) {
286 continue;
287 }
288 table = nvlist_create(0);
289 nvlist_add_string(table, "name", t->t_name);
290 nvlist_add_number(table, "type", t->t_type);
291 nvlist_add_number(table, "id", tid);
292
293 nvlist_append_nvlist_array(npf_dict, "tables", table);
294 nvlist_destroy(table);
295 }
296 return 0;
297 }
298
299 /*
300 * Few helper routines.
301 */
302
303 static npf_tblent_t *
304 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr,
305 const int alen, struct npf_hashl **rhtbl)
306 {
307 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT);
308 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask];
309 npf_tblent_t *ent;
310
311 /*
312 * Lookup the hash table and check for duplicates.
313 * Note: mask is ignored for the hash storage.
314 */
315 LIST_FOREACH(ent, htbl, te_listent) {
316 if (ent->te_alen != alen) {
317 continue;
318 }
319 if (memcmp(&ent->te_addr, addr, alen) == 0) {
320 break;
321 }
322 }
323 *rhtbl = htbl;
324 return ent;
325 }
326
327 static void
328 table_hash_flush(npf_table_t *t)
329 {
330 for (unsigned n = 0; n <= t->t_hashmask; n++) {
331 npf_tblent_t *ent;
332
333 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
334 LIST_REMOVE(ent, te_listent);
335 pool_cache_put(tblent_cache, ent);
336 }
337 }
338 }
339
340 static void
341 table_tree_flush(npf_table_t *t)
342 {
343 npf_tblent_t *ent;
344
345 while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
346 LIST_REMOVE(ent, te_listent);
347 pool_cache_put(tblent_cache, ent);
348 }
349 lpm_clear(t->t_lpm, NULL, NULL);
350 }
351
352 /*
353 * npf_table_create: create table with a specified ID.
354 */
355 npf_table_t *
356 npf_table_create(const char *name, u_int tid, int type,
357 const void *blob, size_t size)
358 {
359 npf_table_t *t;
360
361 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
362 strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
363
364 switch (type) {
365 case NPF_TABLE_TREE:
366 if ((t->t_lpm = lpm_create()) == NULL) {
367 goto out;
368 }
369 LIST_INIT(&t->t_list);
370 break;
371 case NPF_TABLE_HASH:
372 size = MIN(MAX(size, 1024 * 1024), 8); // XXX
373 t->t_hashl = hashinit(size, HASH_LIST, true, &t->t_hashmask);
374 if (t->t_hashl == NULL) {
375 goto out;
376 }
377 break;
378 case NPF_TABLE_CDB:
379 t->t_blob = kmem_alloc(size, KM_SLEEP);
380 if (t->t_blob == NULL) {
381 goto out;
382 }
383 memcpy(t->t_blob, blob, size);
384 t->t_bsize = size;
385
386 t->t_cdb = cdbr_open_mem(t->t_blob, size,
387 CDBR_DEFAULT, NULL, NULL);
388 if (t->t_cdb == NULL) {
389 kmem_free(t->t_blob, t->t_bsize);
390 goto out;
391 }
392 t->t_nitems = cdbr_entries(t->t_cdb);
393 break;
394 default:
395 KASSERT(false);
396 }
397 mutex_init(&t->t_lock, MUTEX_DEFAULT, IPL_NET);
398 t->t_type = type;
399 t->t_id = tid;
400 return t;
401 out:
402 kmem_free(t, sizeof(npf_table_t));
403 return NULL;
404 }
405
406 /*
407 * npf_table_destroy: free all table entries and table itself.
408 */
409 void
410 npf_table_destroy(npf_table_t *t)
411 {
412 KASSERT(t->t_refcnt == 0);
413
414 switch (t->t_type) {
415 case NPF_TABLE_HASH:
416 table_hash_flush(t);
417 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
418 break;
419 case NPF_TABLE_TREE:
420 table_tree_flush(t);
421 lpm_destroy(t->t_lpm);
422 break;
423 case NPF_TABLE_CDB:
424 cdbr_close(t->t_cdb);
425 kmem_free(t->t_blob, t->t_bsize);
426 break;
427 default:
428 KASSERT(false);
429 }
430 mutex_destroy(&t->t_lock);
431 kmem_free(t, sizeof(npf_table_t));
432 }
433
434 u_int
435 npf_table_getid(npf_table_t *t)
436 {
437 return t->t_id;
438 }
439
440 /*
441 * npf_table_check: validate the name, ID and type.
442 */
443 int
444 npf_table_check(npf_tableset_t *ts, const char *name, uint64_t tid, uint64_t type)
445 {
446 if (tid >= ts->ts_nitems) {
447 return EINVAL;
448 }
449 if (ts->ts_map[tid] != NULL) {
450 return EEXIST;
451 }
452 switch (type) {
453 case NPF_TABLE_TREE:
454 case NPF_TABLE_HASH:
455 case NPF_TABLE_CDB:
456 break;
457 default:
458 return EINVAL;
459 }
460 if (strlen(name) >= NPF_TABLE_MAXNAMELEN) {
461 return ENAMETOOLONG;
462 }
463 if (npf_tableset_getbyname(ts, name)) {
464 return EEXIST;
465 }
466 return 0;
467 }
468
469 static int
470 table_cidr_check(const u_int aidx, const npf_addr_t *addr,
471 const npf_netmask_t mask)
472 {
473 if (aidx > 1) {
474 return EINVAL;
475 }
476 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) {
477 return EINVAL;
478 }
479
480 /*
481 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128.
482 * If it is a host - shall use NPF_NO_NETMASK.
483 */
484 if (mask > (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) {
485 return EINVAL;
486 }
487 return 0;
488 }
489
490 /*
491 * npf_table_insert: add an IP CIDR entry into the table.
492 */
493 int
494 npf_table_insert(npf_table_t *t, const int alen,
495 const npf_addr_t *addr, const npf_netmask_t mask)
496 {
497 const u_int aidx = NPF_ADDRLEN2TREE(alen);
498 npf_tblent_t *ent;
499 int error;
500
501 error = table_cidr_check(aidx, addr, mask);
502 if (error) {
503 return error;
504 }
505 ent = pool_cache_get(tblent_cache, PR_WAITOK);
506 memcpy(&ent->te_addr, addr, alen);
507 ent->te_alen = alen;
508
509 /*
510 * Insert the entry. Return an error on duplicate.
511 */
512 mutex_enter(&t->t_lock);
513 switch (t->t_type) {
514 case NPF_TABLE_HASH: {
515 struct npf_hashl *htbl;
516
517 /*
518 * Hash tables by the concept support only IPs.
519 */
520 if (mask != NPF_NO_NETMASK) {
521 error = EINVAL;
522 break;
523 }
524 if (!table_hash_lookup(t, addr, alen, &htbl)) {
525 LIST_INSERT_HEAD(htbl, ent, te_listent);
526 t->t_nitems++;
527 } else {
528 error = EEXIST;
529 }
530 break;
531 }
532 case NPF_TABLE_TREE: {
533 const unsigned preflen =
534 (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
535 if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
536 lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
537 LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
538 ent->te_preflen = preflen;
539 t->t_nitems++;
540 error = 0;
541 } else {
542 error = EEXIST;
543 }
544 break;
545 }
546 case NPF_TABLE_CDB:
547 error = EINVAL;
548 break;
549 default:
550 KASSERT(false);
551 }
552 mutex_exit(&t->t_lock);
553
554 if (error) {
555 pool_cache_put(tblent_cache, ent);
556 }
557 return error;
558 }
559
560 /*
561 * npf_table_remove: remove the IP CIDR entry from the table.
562 */
563 int
564 npf_table_remove(npf_table_t *t, const int alen,
565 const npf_addr_t *addr, const npf_netmask_t mask)
566 {
567 const u_int aidx = NPF_ADDRLEN2TREE(alen);
568 npf_tblent_t *ent = NULL;
569 int error = ENOENT;
570
571 error = table_cidr_check(aidx, addr, mask);
572 if (error) {
573 return error;
574 }
575
576 mutex_enter(&t->t_lock);
577 switch (t->t_type) {
578 case NPF_TABLE_HASH: {
579 struct npf_hashl *htbl;
580
581 ent = table_hash_lookup(t, addr, alen, &htbl);
582 if (__predict_true(ent != NULL)) {
583 LIST_REMOVE(ent, te_listent);
584 t->t_nitems--;
585 }
586 break;
587 }
588 case NPF_TABLE_TREE: {
589 ent = lpm_lookup(t->t_lpm, addr, alen);
590 if (__predict_true(ent != NULL)) {
591 LIST_REMOVE(ent, te_listent);
592 lpm_remove(t->t_lpm, &ent->te_addr,
593 ent->te_alen, ent->te_preflen);
594 t->t_nitems--;
595 }
596 break;
597 }
598 case NPF_TABLE_CDB:
599 error = EINVAL;
600 break;
601 default:
602 KASSERT(false);
603 ent = NULL;
604 }
605 mutex_exit(&t->t_lock);
606
607 if (ent) {
608 pool_cache_put(tblent_cache, ent);
609 }
610 return error;
611 }
612
613 /*
614 * npf_table_lookup: find the table according to ID, lookup and match
615 * the contents with the specified IP address.
616 */
617 int
618 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr)
619 {
620 const u_int aidx = NPF_ADDRLEN2TREE(alen);
621 struct npf_hashl *htbl;
622 const void *data;
623 size_t dlen;
624 bool found;
625
626 if (__predict_false(aidx > 1)) {
627 return EINVAL;
628 }
629
630 switch (t->t_type) {
631 case NPF_TABLE_HASH:
632 mutex_enter(&t->t_lock);
633 found = table_hash_lookup(t, addr, alen, &htbl) != NULL;
634 mutex_exit(&t->t_lock);
635 break;
636 case NPF_TABLE_TREE:
637 mutex_enter(&t->t_lock);
638 found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
639 mutex_exit(&t->t_lock);
640 break;
641 case NPF_TABLE_CDB:
642 if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) {
643 found = dlen == (u_int)alen &&
644 memcmp(addr, data, dlen) == 0;
645 } else {
646 found = false;
647 }
648 break;
649 default:
650 KASSERT(false);
651 found = false;
652 }
653
654 return found ? 0 : ENOENT;
655 }
656
657 static int
658 table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
659 void *ubuf, size_t len, size_t *off)
660 {
661 void *ubufp = (uint8_t *)ubuf + *off;
662 npf_ioctl_ent_t uent;
663
664 if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
665 return ENOMEM;
666 }
667 uent.alen = alen;
668 memcpy(&uent.addr, addr, sizeof(npf_addr_t));
669 uent.mask = mask;
670
671 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
672 }
673
674 static int
675 table_hash_list(const npf_table_t *t, void *ubuf, size_t len)
676 {
677 size_t off = 0;
678 int error = 0;
679
680 for (unsigned n = 0; n <= t->t_hashmask; n++) {
681 npf_tblent_t *ent;
682
683 LIST_FOREACH(ent, &t->t_hashl[n], te_listent) {
684 error = table_ent_copyout(&ent->te_addr,
685 ent->te_alen, 0, ubuf, len, &off);
686 if (error)
687 break;
688 }
689 }
690 return error;
691 }
692
693 static int
694 table_tree_list(const npf_table_t *t, void *ubuf, size_t len)
695 {
696 npf_tblent_t *ent;
697 size_t off = 0;
698 int error = 0;
699
700 LIST_FOREACH(ent, &t->t_list, te_listent) {
701 error = table_ent_copyout(&ent->te_addr,
702 ent->te_alen, 0, ubuf, len, &off);
703 if (error)
704 break;
705 }
706 return error;
707 }
708
709 static int
710 table_cdb_list(npf_table_t *t, void *ubuf, size_t len)
711 {
712 size_t off = 0, dlen;
713 const void *data;
714 int error = 0;
715
716 for (size_t i = 0; i < t->t_nitems; i++) {
717 if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) {
718 return EINVAL;
719 }
720 error = table_ent_copyout(data, dlen, 0, ubuf, len, &off);
721 if (error)
722 break;
723 }
724 return error;
725 }
726
727 /*
728 * npf_table_list: copy a list of all table entries into a userspace buffer.
729 */
730 int
731 npf_table_list(npf_table_t *t, void *ubuf, size_t len)
732 {
733 int error = 0;
734
735 mutex_enter(&t->t_lock);
736 switch (t->t_type) {
737 case NPF_TABLE_HASH:
738 error = table_hash_list(t, ubuf, len);
739 break;
740 case NPF_TABLE_TREE:
741 error = table_tree_list(t, ubuf, len);
742 break;
743 case NPF_TABLE_CDB:
744 error = table_cdb_list(t, ubuf, len);
745 break;
746 default:
747 KASSERT(false);
748 }
749 mutex_exit(&t->t_lock);
750
751 return error;
752 }
753
754 /*
755 * npf_table_flush: remove all table entries.
756 */
757 int
758 npf_table_flush(npf_table_t *t)
759 {
760 int error = 0;
761
762 mutex_enter(&t->t_lock);
763 switch (t->t_type) {
764 case NPF_TABLE_HASH:
765 table_hash_flush(t);
766 t->t_nitems = 0;
767 break;
768 case NPF_TABLE_TREE:
769 table_tree_flush(t);
770 t->t_nitems = 0;
771 break;
772 case NPF_TABLE_CDB:
773 error = EINVAL;
774 break;
775 default:
776 KASSERT(false);
777 }
778 mutex_exit(&t->t_lock);
779 return error;
780 }
781