npf_tableset.c revision 1.25 1 /* $NetBSD: npf_tableset.c,v 1.25 2016/12/26 23:05:06 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2009-2016 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * NPF tableset module.
34 *
35 * Notes
36 *
37 * The tableset is an array of tables. After the creation, the array
38 * is immutable. The caller is responsible to synchronise the access
39 * to the tableset. The table can either be a hash or a tree. Its
40 * entries are protected by a read-write lock.
41 */
42
43 #ifdef _KERNEL
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.25 2016/12/26 23:05:06 christos Exp $");
46
47 #include <sys/param.h>
48 #include <sys/types.h>
49
50 #include <sys/atomic.h>
51 #include <sys/hash.h>
52 #include <sys/cdbr.h>
53 #include <sys/kmem.h>
54 #include <sys/malloc.h>
55 #include <sys/pool.h>
56 #include <sys/queue.h>
57 #include <sys/rwlock.h>
58 #include <sys/systm.h>
59 #include <sys/types.h>
60
61 #include "lpm.h"
62 #endif
63
64 #include "npf_impl.h"
65
66 typedef struct npf_tblent {
67 LIST_ENTRY(npf_tblent) te_listent;
68 uint16_t te_preflen;
69 uint16_t te_alen;
70 npf_addr_t te_addr;
71 } npf_tblent_t;
72
73 LIST_HEAD(npf_hashl, npf_tblent);
74
75 struct npf_table {
76 /*
77 * The storage type can be: a) hash b) tree c) cdb.
78 * There are separate trees for IPv4 and IPv6.
79 */
80 union {
81 struct {
82 struct npf_hashl *t_hashl;
83 u_long t_hashmask;
84 };
85 struct {
86 lpm_t * t_lpm;
87 LIST_HEAD(, npf_tblent) t_list;
88 };
89 struct {
90 void * t_blob;
91 size_t t_bsize;
92 struct cdbr * t_cdb;
93 };
94 } /* C11 */;
95
96 /*
97 * Table ID, type and lock. The ID may change during the
98 * config reload, it is protected by the npf_config_lock.
99 */
100 int t_type;
101 u_int t_id;
102 krwlock_t t_lock;
103
104 /* The number of items, reference count and table name. */
105 u_int t_nitems;
106 u_int t_refcnt;
107 char t_name[NPF_TABLE_MAXNAMELEN];
108 };
109
110 struct npf_tableset {
111 u_int ts_nitems;
112 npf_table_t * ts_map[];
113 };
114
115 #define NPF_TABLESET_SIZE(n) \
116 (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *))
117
118 #define NPF_ADDRLEN2TREE(alen) ((alen) >> 4)
119
120 static pool_cache_t tblent_cache __read_mostly;
121
122 /*
123 * npf_table_sysinit: initialise tableset structures.
124 */
125 void
126 npf_tableset_sysinit(void)
127 {
128 tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
129 0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
130 }
131
132 void
133 npf_tableset_sysfini(void)
134 {
135 pool_cache_destroy(tblent_cache);
136 }
137
138 npf_tableset_t *
139 npf_tableset_create(u_int nitems)
140 {
141 npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP);
142 ts->ts_nitems = nitems;
143 return ts;
144 }
145
146 void
147 npf_tableset_destroy(npf_tableset_t *ts)
148 {
149 /*
150 * Destroy all tables (no references should be held, since the
151 * ruleset should be destroyed before).
152 */
153 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
154 npf_table_t *t = ts->ts_map[tid];
155
156 if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) {
157 npf_table_destroy(t);
158 }
159 }
160 kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems));
161 }
162
163 /*
164 * npf_tableset_insert: insert the table into the specified tableset.
165 *
166 * => Returns 0 on success. Fails and returns error if ID is already used.
167 */
168 int
169 npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t)
170 {
171 const u_int tid = t->t_id;
172 int error;
173
174 KASSERT((u_int)tid < ts->ts_nitems);
175
176 if (ts->ts_map[tid] == NULL) {
177 atomic_inc_uint(&t->t_refcnt);
178 ts->ts_map[tid] = t;
179 error = 0;
180 } else {
181 error = EEXIST;
182 }
183 return error;
184 }
185
186 /*
187 * npf_tableset_getbyname: look for a table in the set given the name.
188 */
189 npf_table_t *
190 npf_tableset_getbyname(npf_tableset_t *ts, const char *name)
191 {
192 npf_table_t *t;
193
194 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
195 if ((t = ts->ts_map[tid]) == NULL)
196 continue;
197 if (strcmp(name, t->t_name) == 0)
198 return t;
199 }
200 return NULL;
201 }
202
203 npf_table_t *
204 npf_tableset_getbyid(npf_tableset_t *ts, u_int tid)
205 {
206 if (__predict_true(tid < ts->ts_nitems)) {
207 return ts->ts_map[tid];
208 }
209 return NULL;
210 }
211
212 /*
213 * npf_tableset_reload: iterate all tables and if the new table is of the
214 * same type and has no items, then we preserve the old one and its entries.
215 *
216 * => The caller is responsible for providing synchronisation.
217 */
218 void
219 npf_tableset_reload(npf_t *npf, npf_tableset_t *nts, npf_tableset_t *ots)
220 {
221 for (u_int tid = 0; tid < nts->ts_nitems; tid++) {
222 npf_table_t *t, *ot;
223
224 if ((t = nts->ts_map[tid]) == NULL) {
225 continue;
226 }
227
228 /* If our table has entries, just load it. */
229 if (t->t_nitems) {
230 continue;
231 }
232
233 /* Look for a currently existing table with such name. */
234 ot = npf_tableset_getbyname(ots, t->t_name);
235 if (ot == NULL) {
236 /* Not found: we have a new table. */
237 continue;
238 }
239
240 /* Found. Did the type change? */
241 if (t->t_type != ot->t_type) {
242 /* Yes, load the new. */
243 continue;
244 }
245
246 /*
247 * Preserve the current table. Acquire a reference since
248 * we are keeping it in the old table set. Update its ID.
249 */
250 atomic_inc_uint(&ot->t_refcnt);
251 nts->ts_map[tid] = ot;
252
253 KASSERT(npf_config_locked_p(npf));
254 ot->t_id = tid;
255
256 /* Destroy the new table (we hold the only reference). */
257 t->t_refcnt--;
258 npf_table_destroy(t);
259 }
260 }
261
262 int
263 npf_tableset_export(npf_t *npf, const npf_tableset_t *ts, prop_array_t tables)
264 {
265 const npf_table_t *t;
266
267 KASSERT(npf_config_locked_p(npf));
268
269 for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
270 if ((t = ts->ts_map[tid]) == NULL) {
271 continue;
272 }
273 prop_dictionary_t tdict = prop_dictionary_create();
274 prop_dictionary_set_cstring(tdict, "name", t->t_name);
275 prop_dictionary_set_uint32(tdict, "type", t->t_type);
276 prop_dictionary_set_uint32(tdict, "id", tid);
277
278 prop_array_add(tables, tdict);
279 prop_object_release(tdict);
280 }
281 return 0;
282 }
283
284 /*
285 * Few helper routines.
286 */
287
288 static npf_tblent_t *
289 table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr,
290 const int alen, struct npf_hashl **rhtbl)
291 {
292 const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT);
293 struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask];
294 npf_tblent_t *ent;
295
296 /*
297 * Lookup the hash table and check for duplicates.
298 * Note: mask is ignored for the hash storage.
299 */
300 LIST_FOREACH(ent, htbl, te_listent) {
301 if (ent->te_alen != alen) {
302 continue;
303 }
304 if (memcmp(&ent->te_addr, addr, alen) == 0) {
305 break;
306 }
307 }
308 *rhtbl = htbl;
309 return ent;
310 }
311
312 static void
313 table_hash_flush(npf_table_t *t)
314 {
315 for (unsigned n = 0; n <= t->t_hashmask; n++) {
316 npf_tblent_t *ent;
317
318 while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
319 LIST_REMOVE(ent, te_listent);
320 pool_cache_put(tblent_cache, ent);
321 }
322 }
323 }
324
325 static void
326 table_tree_flush(npf_table_t *t)
327 {
328 npf_tblent_t *ent;
329
330 while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
331 LIST_REMOVE(ent, te_listent);
332 pool_cache_put(tblent_cache, ent);
333 }
334 lpm_clear(t->t_lpm, NULL, NULL);
335 }
336
337 /*
338 * npf_table_create: create table with a specified ID.
339 */
340 npf_table_t *
341 npf_table_create(const char *name, u_int tid, int type,
342 void *blob, size_t size)
343 {
344 npf_table_t *t;
345
346 t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
347 strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
348
349 switch (type) {
350 case NPF_TABLE_TREE:
351 if ((t->t_lpm = lpm_create()) == NULL) {
352 goto out;
353 }
354 LIST_INIT(&t->t_list);
355 break;
356 case NPF_TABLE_HASH:
357 t->t_hashl = hashinit(1024, HASH_LIST, true, &t->t_hashmask);
358 if (t->t_hashl == NULL) {
359 goto out;
360 }
361 break;
362 case NPF_TABLE_CDB:
363 t->t_blob = blob;
364 t->t_bsize = size;
365 t->t_cdb = cdbr_open_mem(blob, size, CDBR_DEFAULT, NULL, NULL);
366 if (t->t_cdb == NULL) {
367 free(blob, M_TEMP);
368 goto out;
369 }
370 t->t_nitems = cdbr_entries(t->t_cdb);
371 break;
372 default:
373 KASSERT(false);
374 }
375 rw_init(&t->t_lock);
376 t->t_type = type;
377 t->t_id = tid;
378 return t;
379 out:
380 kmem_free(t, sizeof(npf_table_t));
381 return NULL;
382 }
383
384 /*
385 * npf_table_destroy: free all table entries and table itself.
386 */
387 void
388 npf_table_destroy(npf_table_t *t)
389 {
390 KASSERT(t->t_refcnt == 0);
391
392 switch (t->t_type) {
393 case NPF_TABLE_HASH:
394 table_hash_flush(t);
395 hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
396 break;
397 case NPF_TABLE_TREE:
398 table_tree_flush(t);
399 lpm_destroy(t->t_lpm);
400 break;
401 case NPF_TABLE_CDB:
402 cdbr_close(t->t_cdb);
403 free(t->t_blob, M_TEMP);
404 break;
405 default:
406 KASSERT(false);
407 }
408 rw_destroy(&t->t_lock);
409 kmem_free(t, sizeof(npf_table_t));
410 }
411
412 /*
413 * npf_table_check: validate the name, ID and type.
414 */
415 int
416 npf_table_check(npf_tableset_t *ts, const char *name, u_int tid, int type)
417 {
418 if ((u_int)tid >= ts->ts_nitems) {
419 return EINVAL;
420 }
421 if (ts->ts_map[tid] != NULL) {
422 return EEXIST;
423 }
424 switch (type) {
425 case NPF_TABLE_TREE:
426 case NPF_TABLE_HASH:
427 case NPF_TABLE_CDB:
428 break;
429 default:
430 return EINVAL;
431 }
432 if (strlen(name) >= NPF_TABLE_MAXNAMELEN) {
433 return ENAMETOOLONG;
434 }
435 if (npf_tableset_getbyname(ts, name)) {
436 return EEXIST;
437 }
438 return 0;
439 }
440
441 static int
442 table_cidr_check(const u_int aidx, const npf_addr_t *addr,
443 const npf_netmask_t mask)
444 {
445 if (aidx > 1) {
446 return EINVAL;
447 }
448 if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) {
449 return EINVAL;
450 }
451
452 /*
453 * For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128.
454 * If it is a host - shall use NPF_NO_NETMASK.
455 */
456 if (mask > (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) {
457 return EINVAL;
458 }
459 return 0;
460 }
461
462 /*
463 * npf_table_insert: add an IP CIDR entry into the table.
464 */
465 int
466 npf_table_insert(npf_table_t *t, const int alen,
467 const npf_addr_t *addr, const npf_netmask_t mask)
468 {
469 const u_int aidx = NPF_ADDRLEN2TREE(alen);
470 npf_tblent_t *ent;
471 int error;
472
473 error = table_cidr_check(aidx, addr, mask);
474 if (error) {
475 return error;
476 }
477 ent = pool_cache_get(tblent_cache, PR_WAITOK);
478 memcpy(&ent->te_addr, addr, alen);
479 ent->te_alen = alen;
480
481 /*
482 * Insert the entry. Return an error on duplicate.
483 */
484 rw_enter(&t->t_lock, RW_WRITER);
485 switch (t->t_type) {
486 case NPF_TABLE_HASH: {
487 struct npf_hashl *htbl;
488
489 /*
490 * Hash tables by the concept support only IPs.
491 */
492 if (mask != NPF_NO_NETMASK) {
493 error = EINVAL;
494 break;
495 }
496 if (!table_hash_lookup(t, addr, alen, &htbl)) {
497 LIST_INSERT_HEAD(htbl, ent, te_listent);
498 t->t_nitems++;
499 } else {
500 error = EEXIST;
501 }
502 break;
503 }
504 case NPF_TABLE_TREE: {
505 const unsigned preflen =
506 (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
507 if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
508 lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
509 LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
510 ent->te_preflen = preflen;
511 t->t_nitems++;
512 error = 0;
513 } else {
514 error = EEXIST;
515 }
516 break;
517 }
518 case NPF_TABLE_CDB:
519 error = EINVAL;
520 break;
521 default:
522 KASSERT(false);
523 }
524 rw_exit(&t->t_lock);
525
526 if (error) {
527 pool_cache_put(tblent_cache, ent);
528 }
529 return error;
530 }
531
532 /*
533 * npf_table_remove: remove the IP CIDR entry from the table.
534 */
535 int
536 npf_table_remove(npf_table_t *t, const int alen,
537 const npf_addr_t *addr, const npf_netmask_t mask)
538 {
539 const u_int aidx = NPF_ADDRLEN2TREE(alen);
540 npf_tblent_t *ent = NULL;
541 int error = ENOENT;
542
543 error = table_cidr_check(aidx, addr, mask);
544 if (error) {
545 return error;
546 }
547
548 rw_enter(&t->t_lock, RW_WRITER);
549 switch (t->t_type) {
550 case NPF_TABLE_HASH: {
551 struct npf_hashl *htbl;
552
553 ent = table_hash_lookup(t, addr, alen, &htbl);
554 if (__predict_true(ent != NULL)) {
555 LIST_REMOVE(ent, te_listent);
556 t->t_nitems--;
557 }
558 break;
559 }
560 case NPF_TABLE_TREE: {
561 ent = lpm_lookup(t->t_lpm, addr, alen);
562 if (__predict_true(ent != NULL)) {
563 LIST_REMOVE(ent, te_listent);
564 lpm_remove(t->t_lpm, &ent->te_addr,
565 ent->te_alen, ent->te_preflen);
566 t->t_nitems--;
567 }
568 break;
569 }
570 case NPF_TABLE_CDB:
571 error = EINVAL;
572 break;
573 default:
574 KASSERT(false);
575 ent = NULL;
576 }
577 rw_exit(&t->t_lock);
578
579 if (ent) {
580 pool_cache_put(tblent_cache, ent);
581 }
582 return error;
583 }
584
585 /*
586 * npf_table_lookup: find the table according to ID, lookup and match
587 * the contents with the specified IP address.
588 */
589 int
590 npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr)
591 {
592 const u_int aidx = NPF_ADDRLEN2TREE(alen);
593 struct npf_hashl *htbl;
594 const void *data;
595 size_t dlen;
596 bool found;
597
598 if (__predict_false(aidx > 1)) {
599 return EINVAL;
600 }
601
602 switch (t->t_type) {
603 case NPF_TABLE_HASH:
604 rw_enter(&t->t_lock, RW_READER);
605 found = table_hash_lookup(t, addr, alen, &htbl) != NULL;
606 rw_exit(&t->t_lock);
607 break;
608 case NPF_TABLE_TREE:
609 rw_enter(&t->t_lock, RW_READER);
610 found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
611 rw_exit(&t->t_lock);
612 break;
613 case NPF_TABLE_CDB:
614 if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) {
615 found = dlen == (u_int)alen &&
616 memcmp(addr, data, dlen) == 0;
617 } else {
618 found = false;
619 }
620 break;
621 default:
622 KASSERT(false);
623 found = false;
624 }
625
626 return found ? 0 : ENOENT;
627 }
628
629 static int
630 table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
631 void *ubuf, size_t len, size_t *off)
632 {
633 void *ubufp = (uint8_t *)ubuf + *off;
634 npf_ioctl_ent_t uent;
635
636 if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
637 return ENOMEM;
638 }
639 uent.alen = alen;
640 memcpy(&uent.addr, addr, sizeof(npf_addr_t));
641 uent.mask = mask;
642
643 return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
644 }
645
646 static int
647 table_hash_list(const npf_table_t *t, void *ubuf, size_t len)
648 {
649 size_t off = 0;
650 int error = 0;
651
652 for (unsigned n = 0; n <= t->t_hashmask; n++) {
653 npf_tblent_t *ent;
654
655 LIST_FOREACH(ent, &t->t_hashl[n], te_listent) {
656 error = table_ent_copyout(&ent->te_addr,
657 ent->te_alen, 0, ubuf, len, &off);
658 if (error)
659 break;
660 }
661 }
662 return error;
663 }
664
665 static int
666 table_tree_list(const npf_table_t *t, void *ubuf, size_t len)
667 {
668 npf_tblent_t *ent;
669 size_t off = 0;
670 int error = 0;
671
672 LIST_FOREACH(ent, &t->t_list, te_listent) {
673 error = table_ent_copyout(&ent->te_addr,
674 ent->te_alen, 0, ubuf, len, &off);
675 if (error)
676 break;
677 }
678 return error;
679 }
680
681 static int
682 table_cdb_list(npf_table_t *t, void *ubuf, size_t len)
683 {
684 size_t off = 0, dlen;
685 const void *data;
686 int error = 0;
687
688 for (size_t i = 0; i < t->t_nitems; i++) {
689 if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) {
690 return EINVAL;
691 }
692 error = table_ent_copyout(data, dlen, 0, ubuf, len, &off);
693 if (error)
694 break;
695 }
696 return error;
697 }
698
699 /*
700 * npf_table_list: copy a list of all table entries into a userspace buffer.
701 */
702 int
703 npf_table_list(npf_table_t *t, void *ubuf, size_t len)
704 {
705 int error = 0;
706
707 rw_enter(&t->t_lock, RW_READER);
708 switch (t->t_type) {
709 case NPF_TABLE_HASH:
710 error = table_hash_list(t, ubuf, len);
711 break;
712 case NPF_TABLE_TREE:
713 error = table_tree_list(t, ubuf, len);
714 break;
715 case NPF_TABLE_CDB:
716 error = table_cdb_list(t, ubuf, len);
717 break;
718 default:
719 KASSERT(false);
720 }
721 rw_exit(&t->t_lock);
722
723 return error;
724 }
725
726 /*
727 * npf_table_flush: remove all table entries.
728 */
729 int
730 npf_table_flush(npf_table_t *t)
731 {
732 int error = 0;
733
734 rw_enter(&t->t_lock, RW_WRITER);
735 switch (t->t_type) {
736 case NPF_TABLE_HASH:
737 table_hash_flush(t);
738 t->t_nitems = 0;
739 break;
740 case NPF_TABLE_TREE:
741 table_tree_flush(t);
742 t->t_nitems = 0;
743 break;
744 case NPF_TABLE_CDB:
745 error = EINVAL;
746 break;
747 default:
748 KASSERT(false);
749 }
750 rw_exit(&t->t_lock);
751 return error;
752 }
753