in6_src.c revision 1.62 1 /* $NetBSD: in6_src.c,v 1.62 2016/06/21 10:25:27 ozaki-r Exp $ */
2 /* $KAME: in6_src.c,v 1.159 2005/10/19 01:40:32 t-momose Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
66 */
67
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: in6_src.c,v 1.62 2016/06/21 10:25:27 ozaki-r Exp $");
70
71 #ifdef _KERNEL_OPT
72 #include "opt_inet.h"
73 #endif
74
75 #include <sys/param.h>
76 #include <sys/systm.h>
77 #include <sys/malloc.h>
78 #include <sys/mbuf.h>
79 #include <sys/protosw.h>
80 #include <sys/socket.h>
81 #include <sys/socketvar.h>
82 #include <sys/ioctl.h>
83 #include <sys/errno.h>
84 #include <sys/time.h>
85 #include <sys/kernel.h>
86 #include <sys/proc.h>
87 #include <sys/kauth.h>
88
89 #include <net/if.h>
90 #include <net/if_types.h>
91 #include <net/route.h>
92
93 #include <netinet/in.h>
94 #include <netinet/in_var.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #include <netinet/in_pcb.h>
98 #include <netinet/portalgo.h>
99 #include <netinet6/in6_var.h>
100 #include <netinet/ip6.h>
101 #include <netinet6/in6_pcb.h>
102 #include <netinet6/ip6_var.h>
103 #include <netinet6/ip6_private.h>
104 #include <netinet6/nd6.h>
105 #include <netinet6/scope6_var.h>
106
107 #include <net/net_osdep.h>
108
109 #ifdef MIP6
110 #include <netinet6/mip6.h>
111 #include <netinet6/mip6_var.h>
112 #include "mip.h"
113 #if NMIP > 0
114 #include <net/if_mip.h>
115 #endif /* NMIP > 0 */
116 #endif /* MIP6 */
117
118 #include <netinet/tcp_vtw.h>
119
120 #define ADDR_LABEL_NOTAPP (-1)
121 struct in6_addrpolicy defaultaddrpolicy;
122
123 int ip6_prefer_tempaddr = 0;
124
125 static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
126 struct ip6_moptions *, struct route *, struct ifnet **, struct psref *,
127 struct rtentry **, int, int);
128 static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
129 struct ip6_moptions *, struct route *, struct ifnet **, struct psref *);
130
131 static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);
132
133 static void init_policy_queue(void);
134 static int add_addrsel_policyent(struct in6_addrpolicy *);
135 static int delete_addrsel_policyent(struct in6_addrpolicy *);
136 static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *),
137 void *);
138 static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
139 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
140
141 /*
142 * Return an IPv6 address, which is the most appropriate for a given
143 * destination and user specified options.
144 * If necessary, this function lookups the routing table and returns
145 * an entry to the caller for later use.
146 */
147 #if 0 /* diabled ad-hoc */
148 #define REPLACE(r) do {\
149 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
150 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
151 ip6stat.ip6s_sources_rule[(r)]++; \
152 /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \
153 goto replace; \
154 } while(0)
155 #define NEXT(r) do {\
156 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
157 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
158 ip6stat.ip6s_sources_rule[(r)]++; \
159 /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \
160 goto next; /* XXX: we can't use 'continue' here */ \
161 } while(0)
162 #define BREAK(r) do { \
163 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
164 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
165 ip6stat.ip6s_sources_rule[(r)]++; \
166 goto out; /* XXX: we can't use 'break' here */ \
167 } while(0)
168 #else
169 #define REPLACE(r) goto replace
170 #define NEXT(r) goto next
171 #define BREAK(r) goto out
172 #endif
173
174 struct in6_addr *
175 in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
176 struct ip6_moptions *mopts, struct route *ro, struct in6_addr *laddr,
177 struct ifnet **ifpp, struct psref *psref, int *errorp)
178 {
179 struct in6_addr dst;
180 struct ifnet *ifp = NULL;
181 struct in6_ifaddr *ia = NULL, *ia_best = NULL;
182 struct in6_pktinfo *pi = NULL;
183 int dst_scope = -1, best_scope = -1, best_matchlen = -1;
184 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
185 u_int32_t odstzone;
186 int error;
187 int prefer_tempaddr;
188 #if defined(MIP6) && NMIP > 0
189 u_int8_t ip6po_usecoa = 0;
190 #endif /* MIP6 && NMIP > 0 */
191 struct psref local_psref;
192 struct in6_addr *ret_ia = NULL;
193 int bound = curlwp_bind();
194 #define PSREF (psref == NULL) ? &local_psref : psref
195
196 KASSERT((ifpp != NULL && psref != NULL) ||
197 (ifpp == NULL && psref == NULL));
198
199 dst = dstsock->sin6_addr; /* make a copy for local operation */
200 *errorp = 0;
201 if (ifpp)
202 *ifpp = NULL;
203
204 /*
205 * Try to determine the outgoing interface for the given destination.
206 * We do this regardless of whether the socket is bound, since the
207 * caller may need this information as a side effect of the call
208 * to this function (e.g., for identifying the appropriate scope zone
209 * ID).
210 */
211 error = in6_selectif(dstsock, opts, mopts, ro, &ifp, PSREF);
212 if (ifpp != NULL)
213 *ifpp = ifp;
214
215 /*
216 * If the source address is explicitly specified by the caller,
217 * check if the requested source address is indeed a unicast address
218 * assigned to the node, and can be used as the packet's source
219 * address. If everything is okay, use the address as source.
220 */
221 if (opts && (pi = opts->ip6po_pktinfo) &&
222 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
223 struct sockaddr_in6 srcsock;
224 struct in6_ifaddr *ia6;
225
226 /*
227 * Determine the appropriate zone id of the source based on
228 * the zone of the destination and the outgoing interface.
229 * If the specified address is ambiguous wrt the scope zone,
230 * the interface must be specified; otherwise, ifa_ifwithaddr()
231 * will fail matching the address.
232 */
233 memset(&srcsock, 0, sizeof(srcsock));
234 srcsock.sin6_family = AF_INET6;
235 srcsock.sin6_len = sizeof(srcsock);
236 srcsock.sin6_addr = pi->ipi6_addr;
237 if (ifp) {
238 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
239 if (*errorp != 0)
240 goto exit;
241 }
242
243 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock));
244 if (ia6 == NULL ||
245 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) {
246 *errorp = EADDRNOTAVAIL;
247 goto exit;
248 }
249 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
250 if (ifpp)
251 *ifpp = ifp;
252 ret_ia = &ia6->ia_addr.sin6_addr;
253 goto exit;
254 }
255
256 /*
257 * If the socket has already bound the source, just use it. We don't
258 * care at the moment whether in6_selectif() succeeded above, even
259 * though it would eventually cause an error.
260 */
261 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) {
262 ret_ia = laddr;
263 goto exit;
264 }
265
266 /*
267 * The outgoing interface is crucial in the general selection procedure
268 * below. If it is not known at this point, we fail.
269 */
270 if (ifp == NULL) {
271 *errorp = error;
272 goto exit;
273 }
274
275 /*
276 * If the address is not yet determined, choose the best one based on
277 * the outgoing interface and the destination address.
278 */
279
280 #if defined(MIP6) && NMIP > 0
281 /*
282 * a caller can specify IP6PO_USECOA to not to use a home
283 * address. for example, the case that the neighbour
284 * unreachability detection to the global address.
285 */
286 if (opts != NULL &&
287 (opts->ip6po_flags & IP6PO_USECOA) != 0) {
288 ip6po_usecoa = 1;
289 }
290 #endif /* MIP6 && NMIP > 0 */
291
292 *errorp = in6_setscope(&dst, ifp, &odstzone);
293 if (*errorp != 0)
294 goto exit;
295
296 for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
297 int new_scope = -1, new_matchlen = -1;
298 struct in6_addrpolicy *new_policy = NULL;
299 u_int32_t srczone, osrczone, dstzone;
300 struct in6_addr src;
301 struct ifnet *ifp1 = ia->ia_ifp;
302
303 /*
304 * We'll never take an address that breaks the scope zone
305 * of the destination. We also skip an address if its zone
306 * does not contain the outgoing interface.
307 * XXX: we should probably use sin6_scope_id here.
308 */
309 if (in6_setscope(&dst, ifp1, &dstzone) ||
310 odstzone != dstzone) {
311 continue;
312 }
313 src = ia->ia_addr.sin6_addr;
314 if (in6_setscope(&src, ifp, &osrczone) ||
315 in6_setscope(&src, ifp1, &srczone) ||
316 osrczone != srczone) {
317 continue;
318 }
319
320 /* avoid unusable addresses */
321 if ((ia->ia6_flags &
322 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
323 continue;
324 }
325 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
326 continue;
327
328 #if defined(MIP6) && NMIP > 0
329 /* avoid unusable home addresses. */
330 if ((ia->ia6_flags & IN6_IFF_HOME) &&
331 !mip6_ifa6_is_addr_valid_hoa(ia))
332 continue;
333 #endif /* MIP6 && NMIP > 0 */
334
335 /* Rule 1: Prefer same address */
336 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) {
337 ia_best = ia;
338 BREAK(1); /* there should be no better candidate */
339 }
340
341 if (ia_best == NULL)
342 REPLACE(0);
343
344 /* Rule 2: Prefer appropriate scope */
345 if (dst_scope < 0)
346 dst_scope = in6_addrscope(&dst);
347 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
348 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
349 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
350 REPLACE(2);
351 NEXT(2);
352 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
353 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
354 NEXT(2);
355 REPLACE(2);
356 }
357
358 /*
359 * Rule 3: Avoid deprecated addresses. Note that the case of
360 * !ip6_use_deprecated is already rejected above.
361 */
362 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
363 NEXT(3);
364 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
365 REPLACE(3);
366
367 /* Rule 4: Prefer home addresses */
368 #if defined(MIP6) && NMIP > 0
369 if (!MIP6_IS_MN)
370 goto skip_rule4;
371
372 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
373 (ia->ia6_flags & IN6_IFF_HOME) == 0) {
374 /* both address are not home addresses. */
375 goto skip_rule4;
376 }
377
378 /*
379 * If SA is simultaneously a home address and care-of
380 * address and SB is not, then prefer SA. Similarly,
381 * if SB is simultaneously a home address and care-of
382 * address and SA is not, then prefer SB.
383 */
384 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
385 ia_best->ia_ifp->if_type != IFT_MIP)
386 &&
387 ((ia->ia6_flags & IN6_IFF_HOME) != 0 &&
388 ia->ia_ifp->if_type == IFT_MIP))
389 NEXT(4);
390 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
391 ia_best->ia_ifp->if_type == IFT_MIP)
392 &&
393 ((ia->ia6_flags & IN6_IFF_HOME) != 0 &&
394 ia->ia_ifp->if_type != IFT_MIP))
395 REPLACE(4);
396 if (ip6po_usecoa == 0) {
397 /*
398 * If SA is just a home address and SB is just
399 * a care-of address, then prefer
400 * SA. Similarly, if SB is just a home address
401 * and SA is just a care-of address, then
402 * prefer SB.
403 */
404 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
405 (ia->ia6_flags & IN6_IFF_HOME) == 0) {
406 NEXT(4);
407 }
408 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
409 (ia->ia6_flags & IN6_IFF_HOME) != 0) {
410 REPLACE(4);
411 }
412 } else {
413 /*
414 * a sender don't want to use a home address
415 * because:
416 *
417 * 1) we cannot use. (ex. NS or NA to global
418 * addresses.)
419 *
420 * 2) a user specified not to use.
421 * (ex. mip6control -u)
422 */
423 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
424 (ia->ia6_flags & IN6_IFF_HOME) != 0) {
425 /* XXX breaks stat */
426 NEXT(0);
427 }
428 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
429 (ia->ia6_flags & IN6_IFF_HOME) == 0) {
430 /* XXX breaks stat */
431 REPLACE(0);
432 }
433 }
434 skip_rule4:
435 #endif /* MIP6 && NMIP > 0 */
436
437 /* Rule 5: Prefer outgoing interface */
438 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
439 NEXT(5);
440 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
441 REPLACE(5);
442
443 /*
444 * Rule 6: Prefer matching label
445 * Note that best_policy should be non-NULL here.
446 */
447 if (dst_policy == NULL)
448 dst_policy = lookup_addrsel_policy(dstsock);
449 if (dst_policy->label != ADDR_LABEL_NOTAPP) {
450 new_policy = lookup_addrsel_policy(&ia->ia_addr);
451 if (dst_policy->label == best_policy->label &&
452 dst_policy->label != new_policy->label)
453 NEXT(6);
454 if (dst_policy->label != best_policy->label &&
455 dst_policy->label == new_policy->label)
456 REPLACE(6);
457 }
458
459 /*
460 * Rule 7: Prefer public addresses.
461 * We allow users to reverse the logic by configuring
462 * a sysctl variable, so that privacy conscious users can
463 * always prefer temporary addresses.
464 */
465 if (opts == NULL ||
466 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
467 prefer_tempaddr = ip6_prefer_tempaddr;
468 } else if (opts->ip6po_prefer_tempaddr ==
469 IP6PO_TEMPADDR_NOTPREFER) {
470 prefer_tempaddr = 0;
471 } else
472 prefer_tempaddr = 1;
473 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
474 (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
475 if (prefer_tempaddr)
476 REPLACE(7);
477 else
478 NEXT(7);
479 }
480 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
481 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
482 if (prefer_tempaddr)
483 NEXT(7);
484 else
485 REPLACE(7);
486 }
487
488 /*
489 * Rule 8: prefer addresses on alive interfaces.
490 * This is a KAME specific rule.
491 */
492 if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
493 !(ia->ia_ifp->if_flags & IFF_UP))
494 NEXT(8);
495 if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
496 (ia->ia_ifp->if_flags & IFF_UP))
497 REPLACE(8);
498
499 /*
500 * Rule 9: prefer addresses on "preferred" interfaces.
501 * This is a KAME specific rule.
502 */
503 #ifdef notyet /* until introducing address selection */
504 #define NDI_BEST ND_IFINFO(ia_best->ia_ifp)
505 #define NDI_NEW ND_IFINFO(ia->ia_ifp)
506 if ((NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) &&
507 !(NDI_NEW->flags & ND6_IFF_PREFER_SOURCE))
508 NEXT(9);
509 if (!(NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) &&
510 (NDI_NEW->flags & ND6_IFF_PREFER_SOURCE))
511 REPLACE(9);
512 #undef NDI_BEST
513 #undef NDI_NEW
514 #endif
515
516 /*
517 * Rule 14: Use longest matching prefix.
518 * Note: in the address selection draft, this rule is
519 * documented as "Rule 8". However, since it is also
520 * documented that this rule can be overridden, we assign
521 * a large number so that it is easy to assign smaller numbers
522 * to more preferred rules.
523 */
524 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
525 if (best_matchlen < new_matchlen)
526 REPLACE(14);
527 if (new_matchlen < best_matchlen)
528 NEXT(14);
529
530 /* Rule 15 is reserved. */
531
532 /*
533 * Last resort: just keep the current candidate.
534 * Or, do we need more rules?
535 */
536 continue;
537
538 replace:
539 ia_best = ia;
540 best_scope = (new_scope >= 0 ? new_scope :
541 in6_addrscope(&ia_best->ia_addr.sin6_addr));
542 best_policy = (new_policy ? new_policy :
543 lookup_addrsel_policy(&ia_best->ia_addr));
544 best_matchlen = (new_matchlen >= 0 ? new_matchlen :
545 in6_matchlen(&ia_best->ia_addr.sin6_addr,
546 &dst));
547
548 next:
549 continue;
550
551 out:
552 break;
553 }
554
555 if ((ia = ia_best) == NULL) {
556 *errorp = EADDRNOTAVAIL;
557 goto exit;
558 }
559
560 ret_ia = &ia->ia_addr.sin6_addr;
561 exit:
562 if (ifpp == NULL)
563 if_put(ifp, PSREF);
564 curlwp_bindx(bound);
565 return ret_ia;
566 #undef PSREF
567 }
568 #undef REPLACE
569 #undef BREAK
570 #undef NEXT
571
572 static int
573 selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
574 struct ip6_moptions *mopts, struct route *ro, struct ifnet **retifp,
575 struct psref *psref, struct rtentry **retrt, int clone, int norouteok)
576 {
577 int error = 0;
578 struct ifnet *ifp = NULL;
579 struct rtentry *rt = NULL;
580 struct sockaddr_in6 *sin6_next;
581 struct in6_pktinfo *pi = NULL;
582 struct in6_addr *dst;
583 struct psref local_psref;
584 #define PSREF ((psref == NULL) ? &local_psref : psref)
585
586 KASSERT((retifp != NULL && psref != NULL) ||
587 (retifp == NULL && psref == NULL));
588
589 dst = &dstsock->sin6_addr;
590
591 #if 0
592 if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
593 dstsock->sin6_addr.s6_addr32[1] == 0 &&
594 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
595 printf("in6_selectroute: strange destination %s\n",
596 ip6_sprintf(&dstsock->sin6_addr));
597 } else {
598 printf("in6_selectroute: destination = %s%%%d\n",
599 ip6_sprintf(&dstsock->sin6_addr),
600 dstsock->sin6_scope_id); /* for debug */
601 }
602 #endif
603
604 /* If the caller specify the outgoing interface explicitly, use it. */
605 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
606 /* XXX boundary check is assumed to be already done. */
607 ifp = if_get_byindex(pi->ipi6_ifindex, PSREF);
608 if (ifp != NULL &&
609 (norouteok || retrt == NULL ||
610 IN6_IS_ADDR_MULTICAST(dst))) {
611 /*
612 * we do not have to check or get the route for
613 * multicast.
614 */
615 goto done;
616 } else {
617 if_put(ifp, PSREF);
618 ifp = NULL;
619 goto getroute;
620 }
621 }
622
623 /*
624 * If the destination address is a multicast address and the outgoing
625 * interface for the address is specified by the caller, use it.
626 */
627 if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL) {
628 ifp = if_get_byindex(mopts->im6o_multicast_if_index, PSREF);
629 if (ifp != NULL)
630 goto done; /* we do not need a route for multicast. */
631 }
632
633 getroute:
634 /*
635 * If the next hop address for the packet is specified by the caller,
636 * use it as the gateway.
637 */
638 if (opts && opts->ip6po_nexthop) {
639 struct route *ron;
640
641 sin6_next = satosin6(opts->ip6po_nexthop);
642
643 /* at this moment, we only support AF_INET6 next hops */
644 if (sin6_next->sin6_family != AF_INET6) {
645 error = EAFNOSUPPORT; /* or should we proceed? */
646 goto done;
647 }
648
649 /*
650 * If the next hop is an IPv6 address, then the node identified
651 * by that address must be a neighbor of the sending host.
652 */
653 ron = &opts->ip6po_nextroute;
654 if ((rt = rtcache_lookup(ron, sin6tosa(sin6_next))) == NULL ||
655 (rt->rt_flags & RTF_GATEWAY) != 0 ||
656 !nd6_is_addr_neighbor(sin6_next, rt->rt_ifp)) {
657 rtcache_free(ron);
658 error = EHOSTUNREACH;
659 goto done;
660 }
661 ifp = rt->rt_ifp;
662 if (ifp != NULL)
663 if_acquire_NOMPSAFE(ifp, PSREF);
664
665 /*
666 * When cloning is required, try to allocate a route to the
667 * destination so that the caller can store path MTU
668 * information.
669 */
670 if (!clone)
671 goto done;
672 }
673
674 /*
675 * Use a cached route if it exists and is valid, else try to allocate
676 * a new one. Note that we should check the address family of the
677 * cached destination, in case of sharing the cache with IPv4.
678 */
679 if (ro != NULL) {
680 union {
681 struct sockaddr dst;
682 struct sockaddr_in6 dst6;
683 } u;
684
685 /* No route yet, so try to acquire one */
686 u.dst6 = *dstsock;
687 u.dst6.sin6_scope_id = 0;
688 rt = rtcache_lookup1(ro, &u.dst, clone);
689
690 /*
691 * do not care about the result if we have the nexthop
692 * explicitly specified.
693 */
694 if (opts && opts->ip6po_nexthop)
695 goto done;
696
697 if (rt == NULL)
698 error = EHOSTUNREACH;
699 else {
700 if_put(ifp, PSREF);
701 ifp = rt->rt_ifp;
702 if (ifp != NULL)
703 if_acquire_NOMPSAFE(ifp, PSREF);
704 }
705
706 /*
707 * Check if the outgoing interface conflicts with
708 * the interface specified by ipi6_ifindex (if specified).
709 * Note that loopback interface is always okay.
710 * (this may happen when we are sending a packet to one of
711 * our own addresses.)
712 */
713 if (opts && opts->ip6po_pktinfo &&
714 opts->ip6po_pktinfo->ipi6_ifindex) {
715 if (!(ifp->if_flags & IFF_LOOPBACK) &&
716 ifp->if_index !=
717 opts->ip6po_pktinfo->ipi6_ifindex) {
718 error = EHOSTUNREACH;
719 goto done;
720 }
721 }
722 }
723
724 done:
725 if (ifp == NULL && rt == NULL) {
726 /*
727 * This can happen if the caller did not pass a cached route
728 * nor any other hints. We treat this case an error.
729 */
730 error = EHOSTUNREACH;
731 }
732 if (error == EHOSTUNREACH)
733 IP6_STATINC(IP6_STAT_NOROUTE);
734
735 if (retifp != NULL)
736 *retifp = ifp;
737 else
738 if_put(ifp, PSREF);
739 if (retrt != NULL)
740 *retrt = rt; /* rt may be NULL */
741
742 return (error);
743 #undef PSREF
744 }
745
746 static int
747 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
748 struct ip6_moptions *mopts, struct route *ro, struct ifnet **retifp,
749 struct psref *psref)
750 {
751 int error, clone;
752 struct rtentry *rt = NULL;
753
754 KASSERT(retifp != NULL);
755 *retifp = NULL;
756
757 clone = IN6_IS_ADDR_MULTICAST(&dstsock->sin6_addr) ? 0 : 1;
758 if ((error = selectroute(dstsock, opts, mopts, ro, retifp, psref,
759 &rt, clone, 1)) != 0) {
760 return (error);
761 }
762
763 /*
764 * do not use a rejected or black hole route.
765 * XXX: this check should be done in the L2 output routine.
766 * However, if we skipped this check here, we'd see the following
767 * scenario:
768 * - install a rejected route for a scoped address prefix
769 * (like fe80::/10)
770 * - send a packet to a destination that matches the scoped prefix,
771 * with ambiguity about the scope zone.
772 * - pick the outgoing interface from the route, and disambiguate the
773 * scope zone with the interface.
774 * - ip6_output() would try to get another route with the "new"
775 * destination, which may be valid.
776 * - we'd see no error on output.
777 * Although this may not be very harmful, it should still be confusing.
778 * We thus reject the case here.
779 */
780 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE)))
781 return (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
782
783 /*
784 * Adjust the "outgoing" interface. If we're going to loop the packet
785 * back to ourselves, the ifp would be the loopback interface.
786 * However, we'd rather know the interface associated to the
787 * destination address (which should probably be one of our own
788 * addresses.)
789 */
790 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp &&
791 rt->rt_ifa->ifa_ifp != *retifp) {
792 if_put(*retifp, psref);
793 *retifp = rt->rt_ifa->ifa_ifp;
794 if_acquire_NOMPSAFE(*retifp, psref);
795 }
796
797 return (0);
798 }
799
800 /*
801 * close - meaningful only for bsdi and freebsd.
802 */
803
804 int
805 in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
806 struct ip6_moptions *mopts, struct route *ro, struct ifnet **retifp,
807 struct psref *psref, struct rtentry **retrt, int clone)
808 {
809 return selectroute(dstsock, opts, mopts, ro, retifp, psref,
810 retrt, clone, 0);
811 }
812
813 /*
814 * Default hop limit selection. The precedence is as follows:
815 * 1. Hoplimit value specified via ioctl.
816 * 2. (If the outgoing interface is detected) the current
817 * hop limit of the interface specified by router advertisement.
818 * 3. The system default hoplimit.
819 */
820 int
821 in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp)
822 {
823 if (in6p && in6p->in6p_hops >= 0)
824 return (in6p->in6p_hops);
825 else if (ifp)
826 return (ND_IFINFO(ifp)->chlim);
827 else
828 return (ip6_defhlim);
829 }
830
831 int
832 in6_selecthlim_rt(struct in6pcb *in6p)
833 {
834 struct rtentry *rt;
835
836 if (in6p == NULL)
837 return in6_selecthlim(in6p, NULL);
838
839 rt = rtcache_validate(&in6p->in6p_route);
840 if (rt != NULL)
841 return in6_selecthlim(in6p, rt->rt_ifp);
842 else
843 return in6_selecthlim(in6p, NULL);
844 }
845
846 /*
847 * Find an empty port and set it to the specified PCB.
848 */
849 int
850 in6_pcbsetport(struct sockaddr_in6 *sin6, struct in6pcb *in6p, struct lwp *l)
851 {
852 struct socket *so = in6p->in6p_socket;
853 struct inpcbtable *table = in6p->in6p_table;
854 u_int16_t lport, *lastport;
855 enum kauth_network_req req;
856 int error = 0;
857
858 if (in6p->in6p_flags & IN6P_LOWPORT) {
859 #ifndef IPNOPRIVPORTS
860 req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
861 #else
862 req = KAUTH_REQ_NETWORK_BIND_PORT;
863 #endif
864 lastport = &table->inpt_lastlow;
865 } else {
866 req = KAUTH_REQ_NETWORK_BIND_PORT;
867
868 lastport = &table->inpt_lastport;
869 }
870
871 /* XXX-kauth: KAUTH_REQ_NETWORK_BIND_AUTOASSIGN_{,PRIV}PORT */
872 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_BIND, req, so,
873 sin6, NULL);
874 if (error)
875 return (EACCES);
876
877 /*
878 * Use RFC6056 randomized port selection
879 */
880 error = portalgo_randport(&lport, &in6p->in6p_head, l->l_cred);
881 if (error)
882 return error;
883
884 in6p->in6p_flags |= IN6P_ANONPORT;
885 *lastport = lport;
886 in6p->in6p_lport = htons(lport);
887 in6_pcbstate(in6p, IN6P_BOUND);
888 return (0); /* success */
889 }
890
891 void
892 addrsel_policy_init(void)
893 {
894 init_policy_queue();
895
896 /* initialize the "last resort" policy */
897 memset(&defaultaddrpolicy, 0, sizeof(defaultaddrpolicy));
898 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
899 }
900
901 static struct in6_addrpolicy *
902 lookup_addrsel_policy(struct sockaddr_in6 *key)
903 {
904 struct in6_addrpolicy *match = NULL;
905
906 match = match_addrsel_policy(key);
907
908 if (match == NULL)
909 match = &defaultaddrpolicy;
910 else
911 match->use++;
912
913 return (match);
914 }
915
916 /*
917 * Subroutines to manage the address selection policy table via sysctl.
918 */
919 struct sel_walkarg {
920 size_t w_total;
921 size_t w_given;
922 void * w_where;
923 void *w_limit;
924 };
925
926 int sysctl_net_inet6_addrctlpolicy(SYSCTLFN_ARGS);
927 int
928 sysctl_net_inet6_addrctlpolicy(SYSCTLFN_ARGS)
929 {
930 int error = 0;
931 int s;
932
933 s = splsoftnet();
934
935 if (newp) {
936 error = EPERM;
937 goto end;
938 }
939 if (oldp && oldlenp == NULL) {
940 error = EINVAL;
941 goto end;
942 }
943 if (oldp || oldlenp) {
944 struct sel_walkarg w;
945 size_t oldlen = *oldlenp;
946
947 memset(&w, 0, sizeof(w));
948 w.w_given = oldlen;
949 w.w_where = oldp;
950 if (oldp)
951 w.w_limit = (char *)oldp + oldlen;
952
953 error = walk_addrsel_policy(dump_addrsel_policyent, &w);
954
955 *oldlenp = w.w_total;
956 if (oldp && w.w_total > oldlen && error == 0)
957 error = ENOMEM;
958 }
959
960 end:
961 splx(s);
962
963 return (error);
964 }
965
966 int
967 in6_src_ioctl(u_long cmd, void *data)
968 {
969 int i;
970 struct in6_addrpolicy ent0;
971
972 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
973 return (EOPNOTSUPP); /* check for safety */
974
975 ent0 = *(struct in6_addrpolicy *)data;
976
977 if (ent0.label == ADDR_LABEL_NOTAPP)
978 return (EINVAL);
979 /* check if the prefix mask is consecutive. */
980 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
981 return (EINVAL);
982 /* clear trailing garbages (if any) of the prefix address. */
983 for (i = 0; i < 4; i++) {
984 ent0.addr.sin6_addr.s6_addr32[i] &=
985 ent0.addrmask.sin6_addr.s6_addr32[i];
986 }
987 ent0.use = 0;
988
989 switch (cmd) {
990 case SIOCAADDRCTL_POLICY:
991 return (add_addrsel_policyent(&ent0));
992 case SIOCDADDRCTL_POLICY:
993 return (delete_addrsel_policyent(&ent0));
994 }
995
996 return (0); /* XXX: compromise compilers */
997 }
998
999 /*
1000 * The followings are implementation of the policy table using a
1001 * simple tail queue.
1002 * XXX such details should be hidden.
1003 * XXX implementation using binary tree should be more efficient.
1004 */
1005 struct addrsel_policyent {
1006 TAILQ_ENTRY(addrsel_policyent) ape_entry;
1007 struct in6_addrpolicy ape_policy;
1008 };
1009
1010 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
1011
1012 struct addrsel_policyhead addrsel_policytab;
1013
1014 static void
1015 init_policy_queue(void)
1016 {
1017 TAILQ_INIT(&addrsel_policytab);
1018 }
1019
1020 static int
1021 add_addrsel_policyent(struct in6_addrpolicy *newpolicy)
1022 {
1023 struct addrsel_policyent *newpol, *pol;
1024
1025 /* duplication check */
1026 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1027 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
1028 &pol->ape_policy.addr.sin6_addr) &&
1029 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
1030 &pol->ape_policy.addrmask.sin6_addr)) {
1031 return (EEXIST); /* or override it? */
1032 }
1033 }
1034
1035 newpol = malloc(sizeof(*newpol), M_IFADDR, M_WAITOK|M_ZERO);
1036
1037 /* XXX: should validate entry */
1038 newpol->ape_policy = *newpolicy;
1039
1040 TAILQ_INSERT_TAIL(&addrsel_policytab, newpol, ape_entry);
1041
1042 return (0);
1043 }
1044
1045 static int
1046 delete_addrsel_policyent(struct in6_addrpolicy *key)
1047 {
1048 struct addrsel_policyent *pol;
1049
1050 /* search for the entry in the table */
1051 for (pol = TAILQ_FIRST(&addrsel_policytab); pol;
1052 pol = TAILQ_NEXT(pol, ape_entry)) {
1053 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
1054 &pol->ape_policy.addr.sin6_addr) &&
1055 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
1056 &pol->ape_policy.addrmask.sin6_addr)) {
1057 break;
1058 }
1059 }
1060 if (pol == NULL) {
1061 return (ESRCH);
1062 }
1063
1064 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry);
1065
1066 return (0);
1067 }
1068
1069 static int
1070 walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w)
1071 {
1072 struct addrsel_policyent *pol;
1073 int error = 0;
1074
1075 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1076 if ((error = (*callback)(&pol->ape_policy, w)) != 0)
1077 return error;
1078 }
1079
1080 return error;
1081 }
1082
1083 static int
1084 dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg)
1085 {
1086 int error = 0;
1087 struct sel_walkarg *w = arg;
1088
1089 if (w->w_where && (char *)w->w_where + sizeof(*pol) <= (char *)w->w_limit) {
1090 if ((error = copyout(pol, w->w_where, sizeof(*pol))) != 0)
1091 return error;
1092 w->w_where = (char *)w->w_where + sizeof(*pol);
1093 }
1094 w->w_total += sizeof(*pol);
1095
1096 return error;
1097 }
1098
1099 static struct in6_addrpolicy *
1100 match_addrsel_policy(struct sockaddr_in6 *key)
1101 {
1102 struct addrsel_policyent *pent;
1103 struct in6_addrpolicy *bestpol = NULL, *pol;
1104 int matchlen, bestmatchlen = -1;
1105 u_char *mp, *ep, *k, *p, m;
1106
1107 for (pent = TAILQ_FIRST(&addrsel_policytab); pent;
1108 pent = TAILQ_NEXT(pent, ape_entry)) {
1109 matchlen = 0;
1110
1111 pol = &pent->ape_policy;
1112 mp = (u_char *)&pol->addrmask.sin6_addr;
1113 ep = mp + 16; /* XXX: scope field? */
1114 k = (u_char *)&key->sin6_addr;
1115 p = (u_char *)&pol->addr.sin6_addr;
1116 for (; mp < ep && *mp; mp++, k++, p++) {
1117 m = *mp;
1118 if ((*k & m) != *p)
1119 goto next; /* not match */
1120 if (m == 0xff) /* short cut for a typical case */
1121 matchlen += 8;
1122 else {
1123 while (m >= 0x80) {
1124 matchlen++;
1125 m <<= 1;
1126 }
1127 }
1128 }
1129
1130 /* matched. check if this is better than the current best. */
1131 if (bestpol == NULL ||
1132 matchlen > bestmatchlen) {
1133 bestpol = pol;
1134 bestmatchlen = matchlen;
1135 }
1136
1137 next:
1138 continue;
1139 }
1140
1141 return (bestpol);
1142 }
1143