ip_encap.c revision 1.56 1 /* $NetBSD: ip_encap.c,v 1.56 2016/07/04 04:29:11 knakahara Exp $ */
2 /* $KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32 /*
33 * My grandfather said that there's a devil inside tunnelling technology...
34 *
35 * We have surprisingly many protocols that want packets with IP protocol
36 * #4 or #41. Here's a list of protocols that want protocol #41:
37 * RFC1933 configured tunnel
38 * RFC1933 automatic tunnel
39 * RFC2401 IPsec tunnel
40 * RFC2473 IPv6 generic packet tunnelling
41 * RFC2529 6over4 tunnel
42 * RFC3056 6to4 tunnel
43 * isatap tunnel
44 * mobile-ip6 (uses RFC2473)
45 * Here's a list of protocol that want protocol #4:
46 * RFC1853 IPv4-in-IPv4 tunnelling
47 * RFC2003 IPv4 encapsulation within IPv4
48 * RFC2344 reverse tunnelling for mobile-ip4
49 * RFC2401 IPsec tunnel
50 * Well, what can I say. They impose different en/decapsulation mechanism
51 * from each other, so they need separate protocol handler. The only one
52 * we can easily determine by protocol # is IPsec, which always has
53 * AH/ESP/IPComp header right after outer IP header.
54 *
55 * So, clearly good old protosw does not work for protocol #4 and #41.
56 * The code will let you match protocol via src/dst address pair.
57 */
58 /* XXX is M_NETADDR correct? */
59
60 /*
61 * With USE_RADIX the code will use radix table for tunnel lookup, for
62 * tunnels registered with encap_attach() with a addr/mask pair.
63 * Faster on machines with thousands of tunnel registerations (= interfaces).
64 *
65 * The code assumes that radix table code can handle non-continuous netmask,
66 * as it will pass radix table memory region with (src + dst) sockaddr pair.
67 */
68 #define USE_RADIX
69
70 #include <sys/cdefs.h>
71 __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.56 2016/07/04 04:29:11 knakahara Exp $");
72
73 #ifdef _KERNEL_OPT
74 #include "opt_mrouting.h"
75 #include "opt_inet.h"
76 #endif
77
78 #include <sys/param.h>
79 #include <sys/systm.h>
80 #include <sys/socket.h>
81 #include <sys/sockio.h>
82 #include <sys/mbuf.h>
83 #include <sys/errno.h>
84 #include <sys/queue.h>
85 #include <sys/kmem.h>
86 #include <sys/once.h>
87 #include <sys/mutex.h>
88 #include <sys/psref.h>
89 #include <sys/pslist.h>
90
91 #include <net/if.h>
92
93 #include <netinet/in.h>
94 #include <netinet/in_systm.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip_var.h>
97 #include <netinet/ip_encap.h>
98 #ifdef MROUTING
99 #include <netinet/ip_mroute.h>
100 #endif /* MROUTING */
101
102 #ifdef INET6
103 #include <netinet/ip6.h>
104 #include <netinet6/ip6_var.h>
105 #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
106 #include <netinet6/in6_var.h>
107 #include <netinet6/in6_pcb.h>
108 #include <netinet/icmp6.h>
109 #endif
110
111 #include <net/net_osdep.h>
112
113 enum direction { INBOUND, OUTBOUND };
114
115 #ifdef INET
116 static struct encaptab *encap4_lookup(struct mbuf *, int, int, enum direction,
117 struct psref *);
118 #endif
119 #ifdef INET6
120 static struct encaptab *encap6_lookup(struct mbuf *, int, int, enum direction,
121 struct psref *);
122 #endif
123 static int encap_add(struct encaptab *);
124 static int encap_remove(struct encaptab *);
125 static int encap_afcheck(int, const struct sockaddr *, const struct sockaddr *);
126 #ifdef USE_RADIX
127 static struct radix_node_head *encap_rnh(int);
128 static int mask_matchlen(const struct sockaddr *);
129 #else
130 static int mask_match(const struct encaptab *, const struct sockaddr *,
131 const struct sockaddr *);
132 #endif
133 static void encap_fillarg(struct mbuf *, const struct encaptab *);
134
135 /*
136 * In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking
137 * encap_table. So, it cannot use pserialize_read_enter()
138 */
139 static struct {
140 struct pslist_head list;
141 pserialize_t psz;
142 struct psref_class *elem_class; /* for the element of et_list */
143 } encaptab __cacheline_aligned = {
144 .list = PSLIST_INITIALIZER,
145 };
146 #define encap_table encaptab.list
147
148 #ifdef USE_RADIX
149 struct radix_node_head *encap_head[2]; /* 0 for AF_INET, 1 for AF_INET6 */
150 #endif
151
152 static ONCE_DECL(encap_init_control);
153
154 static int encap_init_once(void);
155
156 void
157 encap_init(void)
158 {
159 static int initialized = 0;
160
161 if (initialized)
162 return;
163 initialized++;
164 #if 0
165 /*
166 * we cannot use LIST_INIT() here, since drivers may want to call
167 * encap_attach(), on driver attach. encap_init() will be called
168 * on AF_INET{,6} initialization, which happens after driver
169 * initialization - using LIST_INIT() here can nuke encap_attach()
170 * from drivers.
171 */
172 PSLIST_INIT(&encap_table);
173 #endif
174
175 #ifdef USE_RADIX
176 /*
177 * initialize radix lookup table when the radix subsystem is inited.
178 */
179 rn_delayedinit((void *)&encap_head[0],
180 sizeof(struct sockaddr_pack) << 3);
181 #ifdef INET6
182 rn_delayedinit((void *)&encap_head[1],
183 sizeof(struct sockaddr_pack) << 3);
184 #endif
185 #endif
186 }
187
188 #ifdef INET
189 static struct encaptab *
190 encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir,
191 struct psref *match_psref)
192 {
193 struct ip *ip;
194 struct ip_pack4 pack;
195 struct encaptab *ep, *match;
196 int prio, matchprio;
197 int s;
198 #ifdef USE_RADIX
199 struct radix_node_head *rnh = encap_rnh(AF_INET);
200 struct radix_node *rn;
201 #endif
202
203 KASSERT(m->m_len >= sizeof(*ip));
204
205 ip = mtod(m, struct ip *);
206
207 memset(&pack, 0, sizeof(pack));
208 pack.p.sp_len = sizeof(pack);
209 pack.mine.sin_family = pack.yours.sin_family = AF_INET;
210 pack.mine.sin_len = pack.yours.sin_len = sizeof(struct sockaddr_in);
211 if (dir == INBOUND) {
212 pack.mine.sin_addr = ip->ip_dst;
213 pack.yours.sin_addr = ip->ip_src;
214 } else {
215 pack.mine.sin_addr = ip->ip_src;
216 pack.yours.sin_addr = ip->ip_dst;
217 }
218
219 match = NULL;
220 matchprio = 0;
221
222 s = pserialize_read_enter();
223 #ifdef USE_RADIX
224 rn = rnh->rnh_matchaddr((void *)&pack, rnh);
225 if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
226 struct encaptab *encapp = (struct encaptab *)rn;
227
228 psref_acquire(match_psref, &encapp->psref,
229 encaptab.elem_class);
230 match = encapp;
231 matchprio = mask_matchlen(match->srcmask) +
232 mask_matchlen(match->dstmask);
233 }
234 #endif
235 PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
236 struct psref elem_psref;
237
238 membar_datadep_consumer();
239
240 if (ep->af != AF_INET)
241 continue;
242 if (ep->proto >= 0 && ep->proto != proto)
243 continue;
244
245 psref_acquire(&elem_psref, &ep->psref,
246 encaptab.elem_class);
247 if (ep->func) {
248 pserialize_read_exit(s);
249 /* ep->func is sleepable. e.g. rtalloc1 */
250 prio = (*ep->func)(m, off, proto, ep->arg);
251 s = pserialize_read_enter();
252 } else {
253 #ifdef USE_RADIX
254 psref_release(&elem_psref, &ep->psref,
255 encaptab.elem_class);
256 continue;
257 #else
258 prio = mask_match(ep, (struct sockaddr *)&pack.mine,
259 (struct sockaddr *)&pack.yours);
260 #endif
261 }
262
263 /*
264 * We prioritize the matches by using bit length of the
265 * matches. mask_match() and user-supplied matching function
266 * should return the bit length of the matches (for example,
267 * if both src/dst are matched for IPv4, 64 should be returned).
268 * 0 or negative return value means "it did not match".
269 *
270 * The question is, since we have two "mask" portion, we
271 * cannot really define total order between entries.
272 * For example, which of these should be preferred?
273 * mask_match() returns 48 (32 + 16) for both of them.
274 * src=3ffe::/16, dst=3ffe:501::/32
275 * src=3ffe:501::/32, dst=3ffe::/16
276 *
277 * We need to loop through all the possible candidates
278 * to get the best match - the search takes O(n) for
279 * n attachments (i.e. interfaces).
280 *
281 * For radix-based lookup, I guess source takes precedence.
282 * See rn_{refines,lexobetter} for the correct answer.
283 */
284 if (prio <= 0) {
285 psref_release(&elem_psref, &ep->psref,
286 encaptab.elem_class);
287 continue;
288 }
289 if (prio > matchprio) {
290 /* release last matched ep */
291 if (match != NULL)
292 psref_release(match_psref, &match->psref,
293 encaptab.elem_class);
294
295 psref_copy(match_psref, &elem_psref,
296 encaptab.elem_class);
297 matchprio = prio;
298 match = ep;
299 }
300 KASSERTMSG((match == NULL) || psref_held(&match->psref,
301 encaptab.elem_class),
302 "current match = %p, but not hold its psref", match);
303
304 psref_release(&elem_psref, &ep->psref,
305 encaptab.elem_class);
306 }
307 pserialize_read_exit(s);
308
309 return match;
310 }
311
312 void
313 encap4_input(struct mbuf *m, ...)
314 {
315 int off, proto;
316 va_list ap;
317 const struct encapsw *esw;
318 struct encaptab *match;
319 struct psref match_psref;
320
321 va_start(ap, m);
322 off = va_arg(ap, int);
323 proto = va_arg(ap, int);
324 va_end(ap);
325
326 match = encap4_lookup(m, off, proto, INBOUND, &match_psref);
327 if (match) {
328 /* found a match, "match" has the best one */
329 esw = match->esw;
330 if (esw && esw->encapsw4.pr_input) {
331 encap_fillarg(m, match);
332 (*esw->encapsw4.pr_input)(m, off, proto);
333 psref_release(&match_psref, &match->psref,
334 encaptab.elem_class);
335 } else {
336 psref_release(&match_psref, &match->psref,
337 encaptab.elem_class);
338 m_freem(m);
339 }
340 return;
341 }
342
343 /* last resort: inject to raw socket */
344 rip_input(m, off, proto);
345 }
346 #endif
347
348 #ifdef INET6
349 static struct encaptab *
350 encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir,
351 struct psref *match_psref)
352 {
353 struct ip6_hdr *ip6;
354 struct ip_pack6 pack;
355 int prio, matchprio;
356 int s;
357 struct encaptab *ep, *match;
358 #ifdef USE_RADIX
359 struct radix_node_head *rnh = encap_rnh(AF_INET6);
360 struct radix_node *rn;
361 #endif
362
363 KASSERT(m->m_len >= sizeof(*ip6));
364
365 ip6 = mtod(m, struct ip6_hdr *);
366
367 memset(&pack, 0, sizeof(pack));
368 pack.p.sp_len = sizeof(pack);
369 pack.mine.sin6_family = pack.yours.sin6_family = AF_INET6;
370 pack.mine.sin6_len = pack.yours.sin6_len = sizeof(struct sockaddr_in6);
371 if (dir == INBOUND) {
372 pack.mine.sin6_addr = ip6->ip6_dst;
373 pack.yours.sin6_addr = ip6->ip6_src;
374 } else {
375 pack.mine.sin6_addr = ip6->ip6_src;
376 pack.yours.sin6_addr = ip6->ip6_dst;
377 }
378
379 match = NULL;
380 matchprio = 0;
381
382 s = pserialize_read_enter();
383 #ifdef USE_RADIX
384 rn = rnh->rnh_matchaddr((void *)&pack, rnh);
385 if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
386 struct encaptab *encapp = (struct encaptab *)rn;
387
388 psref_acquire(match_psref, &encapp->psref,
389 encaptab.elem_class);
390 match = encapp;
391 matchprio = mask_matchlen(match->srcmask) +
392 mask_matchlen(match->dstmask);
393 }
394 #endif
395 PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
396 struct psref elem_psref;
397
398 membar_datadep_consumer();
399
400 if (ep->af != AF_INET6)
401 continue;
402 if (ep->proto >= 0 && ep->proto != proto)
403 continue;
404
405 psref_acquire(&elem_psref, &ep->psref,
406 encaptab.elem_class);
407
408 if (ep->func) {
409 pserialize_read_exit(s);
410 /* ep->func is sleepable. e.g. rtalloc1 */
411 prio = (*ep->func)(m, off, proto, ep->arg);
412 s = pserialize_read_enter();
413 } else {
414 #ifdef USE_RADIX
415 psref_release(&elem_psref, &ep->psref,
416 encaptab.elem_class);
417 continue;
418 #else
419 prio = mask_match(ep, (struct sockaddr *)&pack.mine,
420 (struct sockaddr *)&pack.yours);
421 #endif
422 }
423
424 /* see encap4_lookup() for issues here */
425 if (prio <= 0) {
426 psref_release(&elem_psref, &ep->psref,
427 encaptab.elem_class);
428 continue;
429 }
430 if (prio > matchprio) {
431 /* release last matched ep */
432 if (match != NULL)
433 psref_release(match_psref, &match->psref,
434 encaptab.elem_class);
435
436 psref_copy(match_psref, &elem_psref,
437 encaptab.elem_class);
438 matchprio = prio;
439 match = ep;
440 }
441 KASSERTMSG((match == NULL) || psref_held(&match->psref,
442 encaptab.elem_class),
443 "current match = %p, but not hold its psref", match);
444
445 psref_release(&elem_psref, &ep->psref,
446 encaptab.elem_class);
447 }
448 pserialize_read_exit(s);
449
450 return match;
451 }
452
453 int
454 encap6_input(struct mbuf **mp, int *offp, int proto)
455 {
456 struct mbuf *m = *mp;
457 const struct encapsw *esw;
458 struct encaptab *match;
459 struct psref match_psref;
460
461 match = encap6_lookup(m, *offp, proto, INBOUND, &match_psref);
462
463 if (match) {
464 /* found a match */
465 esw = match->esw;
466 if (esw && esw->encapsw6.pr_input) {
467 int ret;
468 encap_fillarg(m, match);
469 ret = (*esw->encapsw6.pr_input)(mp, offp, proto);
470 psref_release(&match_psref, &match->psref,
471 encaptab.elem_class);
472 return ret;
473 } else {
474 psref_release(&match_psref, &match->psref,
475 encaptab.elem_class);
476 m_freem(m);
477 return IPPROTO_DONE;
478 }
479 }
480
481 /* last resort: inject to raw socket */
482 return rip6_input(mp, offp, proto);
483 }
484 #endif
485
486 /*
487 * XXX
488 * The encaptab list and the rnh radix tree must be manipulated atomically.
489 */
490 static int
491 encap_add(struct encaptab *ep)
492 {
493 #ifdef USE_RADIX
494 struct radix_node_head *rnh = encap_rnh(ep->af);
495 #endif
496
497 KASSERT(encap_lock_held());
498
499 #ifdef USE_RADIX
500 if (!ep->func && rnh) {
501 /* Wait for all readers to drain. */
502 pserialize_perform(encaptab.psz);
503
504 if (!rnh->rnh_addaddr((void *)ep->addrpack,
505 (void *)ep->maskpack, rnh, ep->nodes)) {
506 return EEXIST;
507 }
508 }
509 #endif
510 PSLIST_WRITER_INSERT_HEAD(&encap_table, ep, chain);
511
512 return 0;
513 }
514
515 /*
516 * XXX
517 * The encaptab list and the rnh radix tree must be manipulated atomically.
518 */
519 static int
520 encap_remove(struct encaptab *ep)
521 {
522 #ifdef USE_RADIX
523 struct radix_node_head *rnh = encap_rnh(ep->af);
524 #endif
525 int error = 0;
526
527 KASSERT(encap_lock_held());
528
529 #ifdef USE_RADIX
530 if (!ep->func && rnh) {
531 /* Wait for all readers to drain. */
532 pserialize_perform(encaptab.psz);
533
534 if (!rnh->rnh_deladdr((void *)ep->addrpack,
535 (void *)ep->maskpack, rnh))
536 error = ESRCH;
537 }
538 #endif
539 PSLIST_WRITER_REMOVE(ep, chain);
540
541 return error;
542 }
543
544 static int
545 encap_afcheck(int af, const struct sockaddr *sp, const struct sockaddr *dp)
546 {
547 if (sp && dp) {
548 if (sp->sa_len != dp->sa_len)
549 return EINVAL;
550 if (af != sp->sa_family || af != dp->sa_family)
551 return EINVAL;
552 } else if (!sp && !dp)
553 ;
554 else
555 return EINVAL;
556
557 switch (af) {
558 case AF_INET:
559 if (sp && sp->sa_len != sizeof(struct sockaddr_in))
560 return EINVAL;
561 if (dp && dp->sa_len != sizeof(struct sockaddr_in))
562 return EINVAL;
563 break;
564 #ifdef INET6
565 case AF_INET6:
566 if (sp && sp->sa_len != sizeof(struct sockaddr_in6))
567 return EINVAL;
568 if (dp && dp->sa_len != sizeof(struct sockaddr_in6))
569 return EINVAL;
570 break;
571 #endif
572 default:
573 return EAFNOSUPPORT;
574 }
575
576 return 0;
577 }
578
579 static int
580 encap_init_once(void)
581 {
582
583 encaptab.psz = pserialize_create();
584 encaptab.elem_class = psref_class_create("encapelem", IPL_SOFTNET);
585 if (encaptab.elem_class == NULL)
586 panic("encaptab.elem_class cannot be allocated.\n");
587
588 return 0;
589 }
590
591 /*
592 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
593 * length of mask (sm and dm) is assumed to be same as sp/dp.
594 * Return value will be necessary as input (cookie) for encap_detach().
595 */
596 const struct encaptab *
597 encap_attach(int af, int proto,
598 const struct sockaddr *sp, const struct sockaddr *sm,
599 const struct sockaddr *dp, const struct sockaddr *dm,
600 const struct encapsw *esw, void *arg)
601 {
602 struct encaptab *ep;
603 int error;
604 int s, pss;
605 size_t l;
606 struct ip_pack4 *pack4;
607 #ifdef INET6
608 struct ip_pack6 *pack6;
609 #endif
610
611 RUN_ONCE(&encap_init_control, encap_init_once);
612
613 s = splsoftnet();
614 /* sanity check on args */
615 error = encap_afcheck(af, sp, dp);
616 if (error)
617 goto fail;
618
619 /* check if anyone have already attached with exactly same config */
620 pss = pserialize_read_enter();
621 PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
622 membar_datadep_consumer();
623
624 if (ep->af != af)
625 continue;
626 if (ep->proto != proto)
627 continue;
628 if (ep->func)
629 continue;
630
631 KASSERT(ep->src != NULL);
632 KASSERT(ep->dst != NULL);
633 KASSERT(ep->srcmask != NULL);
634 KASSERT(ep->dstmask != NULL);
635
636 if (ep->src->sa_len != sp->sa_len ||
637 memcmp(ep->src, sp, sp->sa_len) != 0 ||
638 memcmp(ep->srcmask, sm, sp->sa_len) != 0)
639 continue;
640 if (ep->dst->sa_len != dp->sa_len ||
641 memcmp(ep->dst, dp, dp->sa_len) != 0 ||
642 memcmp(ep->dstmask, dm, dp->sa_len) != 0)
643 continue;
644
645 error = EEXIST;
646 pserialize_read_exit(pss);
647 goto fail;
648 }
649 pserialize_read_exit(pss);
650
651 switch (af) {
652 case AF_INET:
653 l = sizeof(*pack4);
654 break;
655 #ifdef INET6
656 case AF_INET6:
657 l = sizeof(*pack6);
658 break;
659 #endif
660 default:
661 goto fail;
662 }
663
664 /* M_NETADDR ok? */
665 ep = kmem_zalloc(sizeof(*ep), KM_NOSLEEP);
666 if (ep == NULL) {
667 error = ENOBUFS;
668 goto fail;
669 }
670 ep->addrpack = kmem_zalloc(l, KM_NOSLEEP);
671 if (ep->addrpack == NULL) {
672 error = ENOBUFS;
673 goto gc;
674 }
675 ep->maskpack = kmem_zalloc(l, KM_NOSLEEP);
676 if (ep->maskpack == NULL) {
677 error = ENOBUFS;
678 goto gc;
679 }
680
681 ep->af = af;
682 ep->proto = proto;
683 ep->addrpack->sa_len = l & 0xff;
684 ep->maskpack->sa_len = l & 0xff;
685 switch (af) {
686 case AF_INET:
687 pack4 = (struct ip_pack4 *)ep->addrpack;
688 ep->src = (struct sockaddr *)&pack4->mine;
689 ep->dst = (struct sockaddr *)&pack4->yours;
690 pack4 = (struct ip_pack4 *)ep->maskpack;
691 ep->srcmask = (struct sockaddr *)&pack4->mine;
692 ep->dstmask = (struct sockaddr *)&pack4->yours;
693 break;
694 #ifdef INET6
695 case AF_INET6:
696 pack6 = (struct ip_pack6 *)ep->addrpack;
697 ep->src = (struct sockaddr *)&pack6->mine;
698 ep->dst = (struct sockaddr *)&pack6->yours;
699 pack6 = (struct ip_pack6 *)ep->maskpack;
700 ep->srcmask = (struct sockaddr *)&pack6->mine;
701 ep->dstmask = (struct sockaddr *)&pack6->yours;
702 break;
703 #endif
704 }
705
706 memcpy(ep->src, sp, sp->sa_len);
707 memcpy(ep->srcmask, sm, sp->sa_len);
708 memcpy(ep->dst, dp, dp->sa_len);
709 memcpy(ep->dstmask, dm, dp->sa_len);
710 ep->esw = esw;
711 ep->arg = arg;
712 psref_target_init(&ep->psref, encaptab.elem_class);
713
714 error = encap_add(ep);
715 if (error)
716 goto gc;
717
718 error = 0;
719 splx(s);
720 return ep;
721
722 gc:
723 if (ep->addrpack)
724 kmem_free(ep->addrpack, l);
725 if (ep->maskpack)
726 kmem_free(ep->maskpack, l);
727 if (ep)
728 kmem_free(ep, sizeof(*ep));
729 fail:
730 splx(s);
731 return NULL;
732 }
733
734 const struct encaptab *
735 encap_attach_func(int af, int proto,
736 int (*func)(struct mbuf *, int, int, void *),
737 const struct encapsw *esw, void *arg)
738 {
739 struct encaptab *ep;
740 int error;
741 int s;
742
743 RUN_ONCE(&encap_init_control, encap_init_once);
744
745 s = splsoftnet();
746 /* sanity check on args */
747 if (!func) {
748 error = EINVAL;
749 goto fail;
750 }
751
752 error = encap_afcheck(af, NULL, NULL);
753 if (error)
754 goto fail;
755
756 ep = kmem_alloc(sizeof(*ep), KM_NOSLEEP); /*XXX*/
757 if (ep == NULL) {
758 error = ENOBUFS;
759 goto fail;
760 }
761 memset(ep, 0, sizeof(*ep));
762
763 ep->af = af;
764 ep->proto = proto;
765 ep->func = func;
766 ep->esw = esw;
767 ep->arg = arg;
768 psref_target_init(&ep->psref, encaptab.elem_class);
769
770 error = encap_add(ep);
771 if (error)
772 goto fail;
773
774 error = 0;
775 splx(s);
776 return ep;
777
778 fail:
779 splx(s);
780 return NULL;
781 }
782
783 /* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */
784
785 #ifdef INET6
786 void *
787 encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0)
788 {
789 void *d = d0;
790 struct ip6_hdr *ip6;
791 struct mbuf *m;
792 int off;
793 struct ip6ctlparam *ip6cp = NULL;
794 int nxt;
795 int s;
796 struct encaptab *ep;
797 const struct encapsw *esw;
798
799 if (sa->sa_family != AF_INET6 ||
800 sa->sa_len != sizeof(struct sockaddr_in6))
801 return NULL;
802
803 if ((unsigned)cmd >= PRC_NCMDS)
804 return NULL;
805 if (cmd == PRC_HOSTDEAD)
806 d = NULL;
807 else if (cmd == PRC_MSGSIZE)
808 ; /* special code is present, see below */
809 else if (inet6ctlerrmap[cmd] == 0)
810 return NULL;
811
812 /* if the parameter is from icmp6, decode it. */
813 if (d != NULL) {
814 ip6cp = (struct ip6ctlparam *)d;
815 m = ip6cp->ip6c_m;
816 ip6 = ip6cp->ip6c_ip6;
817 off = ip6cp->ip6c_off;
818 nxt = ip6cp->ip6c_nxt;
819
820 if (ip6 && cmd == PRC_MSGSIZE) {
821 int valid = 0;
822 struct encaptab *match;
823 struct psref elem_psref;
824
825 /*
826 * Check to see if we have a valid encap configuration.
827 */
828 match = encap6_lookup(m, off, nxt, OUTBOUND,
829 &elem_psref);
830 if (match)
831 valid++;
832 psref_release(&elem_psref, &match->psref,
833 encaptab.elem_class);
834
835 /*
836 * Depending on the value of "valid" and routing table
837 * size (mtudisc_{hi,lo}wat), we will:
838 * - recalcurate the new MTU and create the
839 * corresponding routing entry, or
840 * - ignore the MTU change notification.
841 */
842 icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
843 }
844 } else {
845 m = NULL;
846 ip6 = NULL;
847 nxt = -1;
848 }
849
850 /* inform all listeners */
851
852 s = pserialize_read_enter();
853 PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
854 struct psref elem_psref;
855
856 membar_datadep_consumer();
857
858 if (ep->af != AF_INET6)
859 continue;
860 if (ep->proto >= 0 && ep->proto != nxt)
861 continue;
862
863 /* should optimize by looking at address pairs */
864
865 /* XXX need to pass ep->arg or ep itself to listeners */
866 psref_acquire(&elem_psref, &ep->psref,
867 encaptab.elem_class);
868 esw = ep->esw;
869 if (esw && esw->encapsw6.pr_ctlinput) {
870 pserialize_read_exit(s);
871 /* pr_ctlinput is sleepable. e.g. rtcache_free */
872 (*esw->encapsw6.pr_ctlinput)(cmd, sa, d, ep->arg);
873 s = pserialize_read_enter();
874 }
875 psref_release(&elem_psref, &ep->psref,
876 encaptab.elem_class);
877 }
878 pserialize_read_exit(s);
879
880 rip6_ctlinput(cmd, sa, d0);
881 return NULL;
882 }
883 #endif
884
885 int
886 encap_detach(const struct encaptab *cookie)
887 {
888 const struct encaptab *ep = cookie;
889 struct encaptab *p;
890 int error;
891
892 KASSERT(encap_lock_held());
893
894 PSLIST_WRITER_FOREACH(p, &encap_table, struct encaptab, chain) {
895 membar_datadep_consumer();
896
897 if (p == ep) {
898 error = encap_remove(p);
899 if (error)
900 return error;
901 else
902 break;
903 }
904 }
905 if (p == NULL)
906 return ENOENT;
907
908 pserialize_perform(encaptab.psz);
909
910 psref_target_destroy(&p->psref,
911 encaptab.elem_class);
912 if (!ep->func) {
913 kmem_free(p->addrpack, ep->addrpack->sa_len);
914 kmem_free(p->maskpack, ep->maskpack->sa_len);
915 }
916 kmem_free(p, sizeof(*p));
917
918 return 0;
919 }
920
921 #ifdef USE_RADIX
922 static struct radix_node_head *
923 encap_rnh(int af)
924 {
925
926 switch (af) {
927 case AF_INET:
928 return encap_head[0];
929 #ifdef INET6
930 case AF_INET6:
931 return encap_head[1];
932 #endif
933 default:
934 return NULL;
935 }
936 }
937
938 static int
939 mask_matchlen(const struct sockaddr *sa)
940 {
941 const char *p, *ep;
942 int l;
943
944 p = (const char *)sa;
945 ep = p + sa->sa_len;
946 p += 2; /* sa_len + sa_family */
947
948 l = 0;
949 while (p < ep) {
950 l += (*p ? 8 : 0); /* estimate */
951 p++;
952 }
953 return l;
954 }
955 #endif
956
957 #ifndef USE_RADIX
958 static int
959 mask_match(const struct encaptab *ep,
960 const struct sockaddr *sp,
961 const struct sockaddr *dp)
962 {
963 struct sockaddr_storage s;
964 struct sockaddr_storage d;
965 int i;
966 const u_int8_t *p, *q;
967 u_int8_t *r;
968 int matchlen;
969
970 KASSERTMSG(ep->func == NULL, "wrong encaptab passed to mask_match");
971
972 if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
973 return 0;
974 if (sp->sa_family != ep->af || dp->sa_family != ep->af)
975 return 0;
976 if (sp->sa_len != ep->src->sa_len || dp->sa_len != ep->dst->sa_len)
977 return 0;
978
979 matchlen = 0;
980
981 p = (const u_int8_t *)sp;
982 q = (const u_int8_t *)ep->srcmask;
983 r = (u_int8_t *)&s;
984 for (i = 0 ; i < sp->sa_len; i++) {
985 r[i] = p[i] & q[i];
986 /* XXX estimate */
987 matchlen += (q[i] ? 8 : 0);
988 }
989
990 p = (const u_int8_t *)dp;
991 q = (const u_int8_t *)ep->dstmask;
992 r = (u_int8_t *)&d;
993 for (i = 0 ; i < dp->sa_len; i++) {
994 r[i] = p[i] & q[i];
995 /* XXX rough estimate */
996 matchlen += (q[i] ? 8 : 0);
997 }
998
999 /* need to overwrite len/family portion as we don't compare them */
1000 s.ss_len = sp->sa_len;
1001 s.ss_family = sp->sa_family;
1002 d.ss_len = dp->sa_len;
1003 d.ss_family = dp->sa_family;
1004
1005 if (memcmp(&s, ep->src, ep->src->sa_len) == 0 &&
1006 memcmp(&d, ep->dst, ep->dst->sa_len) == 0) {
1007 return matchlen;
1008 } else
1009 return 0;
1010 }
1011 #endif
1012
1013 static void
1014 encap_fillarg(struct mbuf *m, const struct encaptab *ep)
1015 {
1016 struct m_tag *mtag;
1017
1018 mtag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT);
1019 if (mtag) {
1020 *(void **)(mtag + 1) = ep->arg;
1021 m_tag_prepend(m, mtag);
1022 }
1023 }
1024
1025 void *
1026 encap_getarg(struct mbuf *m)
1027 {
1028 void *p;
1029 struct m_tag *mtag;
1030
1031 p = NULL;
1032 mtag = m_tag_find(m, PACKET_TAG_ENCAP, NULL);
1033 if (mtag != NULL) {
1034 p = *(void **)(mtag + 1);
1035 m_tag_delete(m, mtag);
1036 }
1037 return p;
1038 }
1039
1040 void
1041 encap_lock_enter(void)
1042 {
1043
1044 /* XXX future work
1045 * change interruptable lock.
1046 */
1047 KERNEL_LOCK(1, NULL);
1048 }
1049
1050 void
1051 encap_lock_exit(void)
1052 {
1053
1054 /* XXX future work
1055 * change interruptable lock
1056 */
1057 KERNEL_UNLOCK_ONE(NULL);
1058 }
1059
1060 bool
1061 encap_lock_held(void)
1062 {
1063
1064 /* XXX future work
1065 * should change interruptable lock.
1066 */
1067 return KERNEL_LOCKED_P();
1068 }
1069