in6_src.c revision 1.23 1 /* $KAME: in6_src.c,v 1.159 2005/10/19 01:40:32 t-momose Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1991, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
65 */
66
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: in6_src.c,v 1.23 2006/01/21 00:15:36 rpaulo Exp $");
69
70 #include "opt_inet.h"
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #ifndef __FreeBSD__
80 #include <sys/ioctl.h>
81 #else
82 #include <sys/sockio.h>
83 #endif
84 #ifdef __FreeBSD__
85 #include <sys/sysctl.h>
86 #endif
87 #include <sys/errno.h>
88 #include <sys/time.h>
89 #include <sys/kernel.h>
90 #include <sys/proc.h>
91
92 #include <net/if.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #ifdef RADIX_MPATH
96 #include <net/radix_mpath.h>
97 #endif
98
99 #include <netinet/in.h>
100 #include <netinet/in_var.h>
101 #include <netinet/in_systm.h>
102 #include <netinet/ip.h>
103 #include <netinet/in_pcb.h>
104 #include <netinet6/in6_var.h>
105 #include <netinet/ip6.h>
106 #ifndef __OpenBSD__
107 #include <netinet6/in6_pcb.h>
108 #endif
109 #include <netinet6/ip6_var.h>
110 #include <netinet6/nd6.h>
111 #include <netinet6/scope6_var.h>
112
113 #include <net/net_osdep.h>
114
115 #ifdef MIP6
116 #include <netinet6/mip6.h>
117 #include <netinet6/mip6_var.h>
118 #include "mip.h"
119 #if NMIP > 0
120 #include <net/if_mip.h>
121 #endif /* NMIP > 0 */
122 #endif /* MIP6 */
123
124 #ifndef __OpenBSD__
125 #include "loop.h"
126 #endif
127 #ifdef __NetBSD__
128 extern struct ifnet loif[NLOOP];
129 #endif
130
131 #define ADDR_LABEL_NOTAPP (-1)
132 struct in6_addrpolicy defaultaddrpolicy;
133
134 #ifdef notyet /* until introducing ND extensions and address selection */
135 int ip6_prefer_tempaddr = 0;
136 #endif
137
138 static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *,
139 struct ip6_moptions *, struct route_in6 *, struct ifnet **,
140 struct rtentry **, int, int));
141 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *,
142 struct ip6_moptions *, struct route_in6 *, struct ifnet **));
143
144 static struct in6_addrpolicy *lookup_addrsel_policy __P((struct sockaddr_in6 *));
145
146 static void init_policy_queue __P((void));
147 static int add_addrsel_policyent __P((struct in6_addrpolicy *));
148 static int delete_addrsel_policyent __P((struct in6_addrpolicy *));
149 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *),
150 void *));
151 static int dump_addrsel_policyent __P((struct in6_addrpolicy *, void *));
152 static struct in6_addrpolicy *match_addrsel_policy __P((struct sockaddr_in6 *));
153
154 /*
155 * Return an IPv6 address, which is the most appropriate for a given
156 * destination and user specified options.
157 * If necessary, this function lookups the routing table and returns
158 * an entry to the caller for later use.
159 */
160 #if 0 /* diabled ad-hoc */
161 #define REPLACE(r) do {\
162 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
163 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
164 ip6stat.ip6s_sources_rule[(r)]++; \
165 /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \
166 goto replace; \
167 } while(0)
168 #define NEXT(r) do {\
169 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
170 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
171 ip6stat.ip6s_sources_rule[(r)]++; \
172 /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \
173 goto next; /* XXX: we can't use 'continue' here */ \
174 } while(0)
175 #define BREAK(r) do { \
176 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
177 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
178 ip6stat.ip6s_sources_rule[(r)]++; \
179 goto out; /* XXX: we can't use 'break' here */ \
180 } while(0)
181 #else
182 #define REPLACE(r) goto replace
183 #define NEXT(r) goto next
184 #define BREAK(r) goto out
185 #endif
186
187 struct in6_addr *
188 in6_selectsrc(dstsock, opts, mopts, ro, laddr, ifpp, errorp)
189 struct sockaddr_in6 *dstsock;
190 struct ip6_pktopts *opts;
191 struct ip6_moptions *mopts;
192 struct route_in6 *ro;
193 struct in6_addr *laddr;
194 struct ifnet **ifpp;
195 int *errorp;
196 {
197 struct in6_addr dst;
198 struct ifnet *ifp = NULL;
199 struct in6_ifaddr *ia = NULL, *ia_best = NULL;
200 struct in6_pktinfo *pi = NULL;
201 int dst_scope = -1, best_scope = -1, best_matchlen = -1;
202 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
203 u_int32_t odstzone;
204 #ifdef notyet /* until introducing ND extensions and address selection */
205 int prefer_tempaddr;
206 #endif
207 #if defined(MIP6) && NMIP > 0
208 u_int8_t ip6po_usecoa = 0;
209 #endif /* MIP6 && NMIP > 0 */
210
211 dst = dstsock->sin6_addr; /* make a copy for local operation */
212 *errorp = 0;
213 if (ifpp)
214 *ifpp = NULL;
215
216 /*
217 * If the source address is explicitly specified by the caller,
218 * check if the requested source address is indeed a unicast address
219 * assigned to the node, and can be used as the packet's source
220 * address. If everything is okay, use the address as source.
221 */
222 if (opts && (pi = opts->ip6po_pktinfo) &&
223 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
224 struct sockaddr_in6 srcsock;
225 struct in6_ifaddr *ia6;
226
227 /* get the outgoing interface */
228 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp))
229 != 0) {
230 return (NULL);
231 }
232
233 /*
234 * Determine the appropriate zone id of the source based on
235 * the zone of the destination and the outgoing interface.
236 * If the specified address is ambiguous wrt the scope zone,
237 * the interface must be specified; otherwise, ifa_ifwithaddr()
238 * will fail matching the address.
239 */
240 bzero(&srcsock, sizeof(srcsock));
241 srcsock.sin6_family = AF_INET6;
242 srcsock.sin6_len = sizeof(srcsock);
243 srcsock.sin6_addr = pi->ipi6_addr;
244 if (ifp) {
245 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
246 if (*errorp != 0)
247 return (NULL);
248 }
249
250 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock));
251 if (ia6 == NULL ||
252 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) {
253 *errorp = EADDRNOTAVAIL;
254 return (NULL);
255 }
256 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
257 if (ifpp)
258 *ifpp = ifp;
259 return (&ia6->ia_addr.sin6_addr);
260 }
261
262 /*
263 * Otherwise, if the socket has already bound the source, just use it.
264 */
265 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr))
266 return (laddr);
267
268 /*
269 * If the address is not specified, choose the best one based on
270 * the outgoing interface and the destination address.
271 */
272 /* get the outgoing interface */
273 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0)
274 return (NULL);
275
276 #if defined(MIP6) && NMIP > 0
277 /*
278 * a caller can specify IP6PO_USECOA to not to use a home
279 * address. for example, the case that the neighbour
280 * unreachability detection to the global address.
281 */
282 if (opts != NULL &&
283 (opts->ip6po_flags & IP6PO_USECOA) != 0) {
284 ip6po_usecoa = 1;
285 }
286 #endif /* MIP6 && NMIP > 0 */
287
288 #ifdef DIAGNOSTIC
289 if (ifp == NULL) /* this should not happen */
290 panic("in6_selectsrc: NULL ifp");
291 #endif
292 *errorp = in6_setscope(&dst, ifp, &odstzone);
293 if (*errorp != 0)
294 return (NULL);
295
296 for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
297 int new_scope = -1, new_matchlen = -1;
298 struct in6_addrpolicy *new_policy = NULL;
299 u_int32_t srczone, osrczone, dstzone;
300 struct in6_addr src;
301 struct ifnet *ifp1 = ia->ia_ifp;
302
303 /*
304 * We'll never take an address that breaks the scope zone
305 * of the destination. We also skip an address if its zone
306 * does not contain the outgoing interface.
307 * XXX: we should probably use sin6_scope_id here.
308 */
309 if (in6_setscope(&dst, ifp1, &dstzone) ||
310 odstzone != dstzone) {
311 continue;
312 }
313 src = ia->ia_addr.sin6_addr;
314 if (in6_setscope(&src, ifp, &osrczone) ||
315 in6_setscope(&src, ifp1, &srczone) ||
316 osrczone != srczone) {
317 continue;
318 }
319
320 /* avoid unusable addresses */
321 if ((ia->ia6_flags &
322 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
323 continue;
324 }
325 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
326 continue;
327
328 #if defined(MIP6) && NMIP > 0
329 /* avoid unusable home addresses. */
330 if ((ia->ia6_flags & IN6_IFF_HOME) &&
331 !mip6_ifa6_is_addr_valid_hoa(ia))
332 continue;
333 #endif /* MIP6 && NMIP > 0 */
334
335 /* Rule 1: Prefer same address */
336 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) {
337 ia_best = ia;
338 BREAK(1); /* there should be no better candidate */
339 }
340
341 if (ia_best == NULL)
342 REPLACE(0);
343
344 /* Rule 2: Prefer appropriate scope */
345 if (dst_scope < 0)
346 dst_scope = in6_addrscope(&dst);
347 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
348 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
349 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
350 REPLACE(2);
351 NEXT(2);
352 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
353 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
354 NEXT(2);
355 REPLACE(2);
356 }
357
358 /*
359 * Rule 3: Avoid deprecated addresses. Note that the case of
360 * !ip6_use_deprecated is already rejected above.
361 */
362 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
363 NEXT(3);
364 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
365 REPLACE(3);
366
367 /* Rule 4: Prefer home addresses */
368 #if defined(MIP6) && NMIP > 0
369 if (!MIP6_IS_MN)
370 goto skip_rule4;
371
372 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
373 (ia->ia6_flags & IN6_IFF_HOME) == 0) {
374 /* both address are not home addresses. */
375 goto skip_rule4;
376 }
377
378 /*
379 * If SA is simultaneously a home address and care-of
380 * address and SB is not, then prefer SA. Similarly,
381 * if SB is simultaneously a home address and care-of
382 * address and SA is not, then prefer SB.
383 */
384 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
385 ia_best->ia_ifp->if_type != IFT_MIP)
386 &&
387 ((ia->ia6_flags & IN6_IFF_HOME) != 0 &&
388 ia->ia_ifp->if_type == IFT_MIP))
389 NEXT(4);
390 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
391 ia_best->ia_ifp->if_type == IFT_MIP)
392 &&
393 ((ia->ia6_flags & IN6_IFF_HOME) != 0 &&
394 ia->ia_ifp->if_type != IFT_MIP))
395 REPLACE(4);
396 if (ip6po_usecoa == 0) {
397 /*
398 * If SA is just a home address and SB is just
399 * a care-of address, then prefer
400 * SA. Similarly, if SB is just a home address
401 * and SA is just a care-of address, then
402 * prefer SB.
403 */
404 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
405 (ia->ia6_flags & IN6_IFF_HOME) == 0) {
406 NEXT(4);
407 }
408 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
409 (ia->ia6_flags & IN6_IFF_HOME) != 0) {
410 REPLACE(4);
411 }
412 } else {
413 /*
414 * a sender don't want to use a home address
415 * because:
416 *
417 * 1) we cannot use. (ex. NS or NA to global
418 * addresses.)
419 *
420 * 2) a user specified not to use.
421 * (ex. mip6control -u)
422 */
423 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
424 (ia->ia6_flags & IN6_IFF_HOME) != 0) {
425 /* XXX breaks stat */
426 NEXT(0);
427 }
428 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
429 (ia->ia6_flags & IN6_IFF_HOME) == 0) {
430 /* XXX breaks stat */
431 REPLACE(0);
432 }
433 }
434 skip_rule4:
435 #endif /* MIP6 && NMIP > 0 */
436
437 /* Rule 5: Prefer outgoing interface */
438 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
439 NEXT(5);
440 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
441 REPLACE(5);
442
443 /*
444 * Rule 6: Prefer matching label
445 * Note that best_policy should be non-NULL here.
446 */
447 if (dst_policy == NULL)
448 dst_policy = lookup_addrsel_policy(dstsock);
449 if (dst_policy->label != ADDR_LABEL_NOTAPP) {
450 new_policy = lookup_addrsel_policy(&ia->ia_addr);
451 if (dst_policy->label == best_policy->label &&
452 dst_policy->label != new_policy->label)
453 NEXT(6);
454 if (dst_policy->label != best_policy->label &&
455 dst_policy->label == new_policy->label)
456 REPLACE(6);
457 }
458
459 /*
460 * Rule 7: Prefer public addresses.
461 * We allow users to reverse the logic by configuring
462 * a sysctl variable, so that privacy conscious users can
463 * always prefer temporary addresses.
464 */
465 #ifdef notyet /* until introducing ND extensions and address selection */
466 if (opts == NULL ||
467 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
468 prefer_tempaddr = ip6_prefer_tempaddr;
469 } else if (opts->ip6po_prefer_tempaddr ==
470 IP6PO_TEMPADDR_NOTPREFER) {
471 prefer_tempaddr = 0;
472 } else
473 prefer_tempaddr = 1;
474 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
475 (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
476 if (prefer_tempaddr)
477 REPLACE(7);
478 else
479 NEXT(7);
480 }
481 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
482 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
483 if (prefer_tempaddr)
484 NEXT(7);
485 else
486 REPLACE(7);
487 }
488 #endif
489
490 /*
491 * Rule 8: prefer addresses on alive interfaces.
492 * This is a KAME specific rule.
493 */
494 if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
495 !(ia->ia_ifp->if_flags & IFF_UP))
496 NEXT(8);
497 if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
498 (ia->ia_ifp->if_flags & IFF_UP))
499 REPLACE(8);
500
501 /*
502 * Rule 9: prefer addresses on "preferred" interfaces.
503 * This is a KAME specific rule.
504 */
505 #ifdef notyet /* until introducing address selection */
506 #define NDI_BEST ND_IFINFO(ia_best->ia_ifp)
507 #define NDI_NEW ND_IFINFO(ia->ia_ifp)
508 if ((NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) &&
509 !(NDI_NEW->flags & ND6_IFF_PREFER_SOURCE))
510 NEXT(9);
511 if (!(NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) &&
512 (NDI_NEW->flags & ND6_IFF_PREFER_SOURCE))
513 REPLACE(9);
514 #undef NDI_BEST
515 #undef NDI_NEW
516 #endif
517
518 /*
519 * Rule 14: Use longest matching prefix.
520 * Note: in the address selection draft, this rule is
521 * documented as "Rule 8". However, since it is also
522 * documented that this rule can be overridden, we assign
523 * a large number so that it is easy to assign smaller numbers
524 * to more preferred rules.
525 */
526 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
527 if (best_matchlen < new_matchlen)
528 REPLACE(14);
529 if (new_matchlen < best_matchlen)
530 NEXT(14);
531
532 /* Rule 15 is reserved. */
533
534 /*
535 * Last resort: just keep the current candidate.
536 * Or, do we need more rules?
537 */
538 continue;
539
540 replace:
541 ia_best = ia;
542 best_scope = (new_scope >= 0 ? new_scope :
543 in6_addrscope(&ia_best->ia_addr.sin6_addr));
544 best_policy = (new_policy ? new_policy :
545 lookup_addrsel_policy(&ia_best->ia_addr));
546 best_matchlen = (new_matchlen >= 0 ? new_matchlen :
547 in6_matchlen(&ia_best->ia_addr.sin6_addr,
548 &dst));
549
550 next:
551 continue;
552
553 out:
554 break;
555 }
556
557 if ((ia = ia_best) == NULL) {
558 *errorp = EADDRNOTAVAIL;
559 return (NULL);
560 }
561
562 if (ifpp)
563 *ifpp = ifp;
564 return (&ia->ia_addr.sin6_addr);
565 }
566 #undef REPLACE
567 #undef BREAK
568 #undef NEXT
569
570 static int
571 selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone, norouteok)
572 struct sockaddr_in6 *dstsock;
573 struct ip6_pktopts *opts;
574 struct ip6_moptions *mopts;
575 #ifdef NEW_STRUCT_ROUTE
576 struct route *ro;
577 #else
578 struct route_in6 *ro;
579 #endif
580 struct ifnet **retifp;
581 struct rtentry **retrt;
582 int clone;
583 int norouteok;
584 {
585 int error = 0;
586 struct ifnet *ifp = NULL;
587 struct rtentry *rt = NULL;
588 struct sockaddr_in6 *sin6_next;
589 struct in6_pktinfo *pi = NULL;
590 struct in6_addr *dst;
591
592 dst = &dstsock->sin6_addr;
593
594 #if 0
595 if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
596 dstsock->sin6_addr.s6_addr32[1] == 0 &&
597 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
598 printf("in6_selectroute: strange destination %s\n",
599 ip6_sprintf(&dstsock->sin6_addr));
600 } else {
601 printf("in6_selectroute: destination = %s%%%d\n",
602 ip6_sprintf(&dstsock->sin6_addr),
603 dstsock->sin6_scope_id); /* for debug */
604 }
605 #endif
606
607 /* If the caller specify the outgoing interface explicitly, use it. */
608 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
609 /* XXX boundary check is assumed to be already done. */
610 #ifdef __FreeBSD__
611 ifp = ifnet_byindex(pi->ipi6_ifindex);
612 #else
613 ifp = ifindex2ifnet[pi->ipi6_ifindex];
614 #endif
615 if (ifp != NULL &&
616 (norouteok || retrt == NULL ||
617 IN6_IS_ADDR_MULTICAST(dst))) {
618 /*
619 * we do not have to check or get the route for
620 * multicast.
621 */
622 goto done;
623 } else
624 goto getroute;
625 }
626
627 /*
628 * If the destination address is a multicast address and the outgoing
629 * interface for the address is specified by the caller, use it.
630 */
631 if (IN6_IS_ADDR_MULTICAST(dst) &&
632 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) {
633 goto done; /* we do not need a route for multicast. */
634 }
635
636 getroute:
637 /*
638 * If the next hop address for the packet is specified by the caller,
639 * use it as the gateway.
640 */
641 if (opts && opts->ip6po_nexthop) {
642 #ifdef notyet /* until introducing RFC3542 support */
643 struct route_in6 *ron;
644 #endif
645
646 sin6_next = satosin6(opts->ip6po_nexthop);
647
648 /* at this moment, we only support AF_INET6 next hops */
649 if (sin6_next->sin6_family != AF_INET6) {
650 error = EAFNOSUPPORT; /* or should we proceed? */
651 goto done;
652 }
653
654 /*
655 * If the next hop is an IPv6 address, then the node identified
656 * by that address must be a neighbor of the sending host.
657 */
658 #ifdef notyet /* see above */
659 ron = &opts->ip6po_nextroute;
660 if ((ron->ro_rt &&
661 (ron->ro_rt->rt_flags & (RTF_UP | RTF_GATEWAY)) !=
662 RTF_UP) ||
663 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
664 &sin6_next->sin6_addr)) {
665 if (ron->ro_rt) {
666 RTFREE(ron->ro_rt);
667 ron->ro_rt = NULL;
668 }
669 *satosin6(&ron->ro_dst) = *sin6_next;
670 }
671 if (ron->ro_rt == NULL) {
672 rtalloc((struct route *)ron); /* multi path case? */
673 if (ron->ro_rt == NULL ||
674 (ron->ro_rt->rt_flags & RTF_GATEWAY)) {
675 if (ron->ro_rt) {
676 RTFREE(ron->ro_rt);
677 ron->ro_rt = NULL;
678 }
679 error = EHOSTUNREACH;
680 goto done;
681 }
682 }
683 if (!nd6_is_addr_neighbor(sin6_next, ron->ro_rt->rt_ifp)) {
684 RTFREE(ron->ro_rt);
685 ron->ro_rt = NULL;
686 error = EHOSTUNREACH;
687 goto done;
688 }
689 rt = ron->ro_rt;
690 ifp = rt->rt_ifp;
691
692 /*
693 * When cloning is required, try to allocate a route to the
694 * destination so that the caller can store path MTU
695 * information.
696 */
697 if (!clone)
698 goto done;
699 #endif
700 }
701
702 /*
703 * Use a cached route if it exists and is valid, else try to allocate
704 * a new one. Note that we should check the address family of the
705 * cached destination, in case of sharing the cache with IPv4.
706 */
707 if (ro) {
708 if (ro->ro_rt &&
709 (!(ro->ro_rt->rt_flags & RTF_UP) ||
710 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 ||
711 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr,
712 dst))) {
713 RTFREE(ro->ro_rt);
714 ro->ro_rt = (struct rtentry *)NULL;
715 }
716 if (ro->ro_rt == (struct rtentry *)NULL) {
717 struct sockaddr_in6 *sa6;
718
719 /* No route yet, so try to acquire one */
720 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
721 sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
722 *sa6 = *dstsock;
723 sa6->sin6_scope_id = 0;
724 if (clone) {
725 #ifdef RADIX_MPATH
726 rtalloc_mpath((struct route *)ro,
727 ntohl(sa6->sin6_addr.s6_addr32[3]));
728 #else
729 rtalloc((struct route *)ro);
730 #endif /* RADIX_MPATH */
731 } else {
732 #ifdef RADIX_MPATH
733 rtalloc_mpath((struct route *)ro,
734 ntohl(sa6->sin6_addr.s6_addr32[3]));
735 #else
736 ro->ro_rt = rtalloc1(&((struct route *)ro)
737 ->ro_dst, 0);
738 #endif /* RADIX_MPATH */
739 }
740 }
741
742 /*
743 * do not care about the result if we have the nexthop
744 * explicitly specified.
745 */
746 if (opts && opts->ip6po_nexthop)
747 goto done;
748
749 if (ro->ro_rt) {
750 ifp = ro->ro_rt->rt_ifp;
751
752 if (ifp == NULL) { /* can this really happen? */
753 RTFREE(ro->ro_rt);
754 ro->ro_rt = NULL;
755 }
756 }
757 if (ro->ro_rt == NULL)
758 error = EHOSTUNREACH;
759 rt = ro->ro_rt;
760
761 /*
762 * Check if the outgoing interface conflicts with
763 * the interface specified by ipi6_ifindex (if specified).
764 * Note that loopback interface is always okay.
765 * (this may happen when we are sending a packet to one of
766 * our own addresses.)
767 */
768 if (opts && opts->ip6po_pktinfo &&
769 opts->ip6po_pktinfo->ipi6_ifindex) {
770 if (!(ifp->if_flags & IFF_LOOPBACK) &&
771 ifp->if_index !=
772 opts->ip6po_pktinfo->ipi6_ifindex) {
773 error = EHOSTUNREACH;
774 goto done;
775 }
776 }
777 }
778
779 done:
780 if (ifp == NULL && rt == NULL) {
781 /*
782 * This can happen if the caller did not pass a cached route
783 * nor any other hints. We treat this case an error.
784 */
785 error = EHOSTUNREACH;
786 }
787 if (error == EHOSTUNREACH)
788 ip6stat.ip6s_noroute++;
789
790 if (retifp != NULL)
791 *retifp = ifp;
792 if (retrt != NULL)
793 *retrt = rt; /* rt may be NULL */
794
795 return (error);
796 }
797
798 static int
799 in6_selectif(dstsock, opts, mopts, ro, retifp)
800 struct sockaddr_in6 *dstsock;
801 struct ip6_pktopts *opts;
802 struct ip6_moptions *mopts;
803 struct route_in6 *ro;
804 struct ifnet **retifp;
805 {
806 int error, clone;
807 struct rtentry *rt = NULL;
808
809 clone = IN6_IS_ADDR_MULTICAST(&dstsock->sin6_addr) ? 0 : 1;
810 if ((error = selectroute(dstsock, opts, mopts, ro, retifp,
811 &rt, clone, 1)) != 0) {
812 return (error);
813 }
814
815 /*
816 * do not use a rejected or black hole route.
817 * XXX: this check should be done in the L2 output routine.
818 * However, if we skipped this check here, we'd see the following
819 * scenario:
820 * - install a rejected route for a scoped address prefix
821 * (like fe80::/10)
822 * - send a packet to a destination that matches the scoped prefix,
823 * with ambiguity about the scope zone.
824 * - pick the outgoing interface from the route, and disambiguate the
825 * scope zone with the interface.
826 * - ip6_output() would try to get another route with the "new"
827 * destination, which may be valid.
828 * - we'd see no error on output.
829 * Although this may not be very harmful, it should still be confusing.
830 * We thus reject the case here.
831 */
832 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE)))
833 return (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
834
835 /*
836 * Adjust the "outgoing" interface. If we're going to loop the packet
837 * back to ourselves, the ifp would be the loopback interface.
838 * However, we'd rather know the interface associated to the
839 * destination address (which should probably be one of our own
840 * addresses.)
841 */
842 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp)
843 *retifp = rt->rt_ifa->ifa_ifp;
844
845 return (0);
846 }
847
848 int
849 in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone)
850 struct sockaddr_in6 *dstsock;
851 struct ip6_pktopts *opts;
852 struct ip6_moptions *mopts;
853 struct route_in6 *ro;
854 struct ifnet **retifp;
855 struct rtentry **retrt;
856 int clone; /* meaningful only for bsdi and freebsd. */
857 {
858 return (selectroute(dstsock, opts, mopts, ro, retifp,
859 retrt, clone, 0));
860 }
861
862 /*
863 * Default hop limit selection. The precedence is as follows:
864 * 1. Hoplimit value specified via ioctl.
865 * 2. (If the outgoing interface is detected) the current
866 * hop limit of the interface specified by router advertisement.
867 * 3. The system default hoplimit.
868 */
869 int
870 in6_selecthlim(in6p, ifp)
871 struct in6pcb *in6p;
872 struct ifnet *ifp;
873 {
874 if (in6p && in6p->in6p_hops >= 0)
875 return (in6p->in6p_hops);
876 else if (ifp)
877 return (ND_IFINFO(ifp)->chlim);
878 else
879 return (ip6_defhlim);
880 }
881
882 /*
883 * Find an empty port and set it to the specified PCB.
884 */
885 int
886 in6_pcbsetport(laddr, in6p, p)
887 struct in6_addr *laddr;
888 struct in6pcb *in6p;
889 struct proc *p;
890 {
891 struct socket *so = in6p->in6p_socket;
892 struct inpcbtable *table = in6p->in6p_table;
893 int cnt;
894 u_int16_t minport, maxport;
895 u_int16_t lport, *lastport;
896 int wild = 0;
897 void *t;
898
899 /* XXX: this is redundant when called from in6_pcbbind */
900 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
901 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
902 (so->so_options & SO_ACCEPTCONN) == 0))
903 wild = 1;
904
905 if (in6p->in6p_flags & IN6P_LOWPORT) {
906 #ifndef IPNOPRIVPORTS
907 if (p == 0 || (suser(p->p_ucred, &p->p_acflag) != 0))
908 return (EACCES);
909 #endif
910 minport = ip6_lowportmin;
911 maxport = ip6_lowportmax;
912 lastport = &table->inpt_lastlow;
913 } else {
914 minport = ip6_anonportmin;
915 maxport = ip6_anonportmax;
916 lastport = &table->inpt_lastport;
917 }
918
919 if (minport > maxport) { /* sanity check */
920 u_int16_t swp;
921
922 swp = minport;
923 minport = maxport;
924 maxport = swp;
925 }
926
927 lport = *lastport - 1;
928 for (cnt = maxport - minport + 1; cnt; cnt--, lport--) {
929 if (lport < minport || lport > maxport)
930 lport = maxport;
931 #ifdef INET
932 if (IN6_IS_ADDR_V4MAPPED(laddr)) {
933 t = in_pcblookup_port(table,
934 *(struct in_addr *)&laddr->s6_addr32[3],
935 lport, wild);
936 } else
937 #endif
938 {
939 t = in6_pcblookup_port(table, laddr, lport, wild);
940 }
941 if (t == 0)
942 goto found;
943 }
944
945 return (EAGAIN);
946
947 found:
948 in6p->in6p_flags |= IN6P_ANONPORT;
949 *lastport = lport;
950 in6p->in6p_lport = htons(lport);
951 in6_pcbstate(in6p, IN6P_BOUND);
952 return (0); /* success */
953 }
954
955 void
956 addrsel_policy_init()
957 {
958 init_policy_queue();
959
960 /* initialize the "last resort" policy */
961 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy));
962 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
963 }
964
965 static struct in6_addrpolicy *
966 lookup_addrsel_policy(key)
967 struct sockaddr_in6 *key;
968 {
969 struct in6_addrpolicy *match = NULL;
970
971 match = match_addrsel_policy(key);
972
973 if (match == NULL)
974 match = &defaultaddrpolicy;
975 else
976 match->use++;
977
978 return (match);
979 }
980
981 /*
982 * Subroutines to manage the address selection policy table via sysctl.
983 */
984 struct walkarg {
985 size_t w_total;
986 size_t w_given;
987 caddr_t w_where;
988 caddr_t w_limit;
989 };
990
991 int
992 in6_src_sysctl(oldp, oldlenp, newp, newlen)
993 void *oldp;
994 size_t *oldlenp;
995 void *newp;
996 size_t newlen;
997 {
998 int error = 0;
999 int s;
1000
1001 s = splsoftnet();
1002
1003 if (newp) {
1004 error = EPERM;
1005 goto end;
1006 }
1007 if (oldp && oldlenp == NULL) {
1008 error = EINVAL;
1009 goto end;
1010 }
1011 if (oldp || oldlenp) {
1012 struct walkarg w;
1013 size_t oldlen = (oldlenp ? *oldlenp : 0);
1014
1015 bzero(&w, sizeof(w));
1016 w.w_given = oldlen;
1017 w.w_where = oldp;
1018 if (oldp)
1019 w.w_limit = (caddr_t)oldp + oldlen;
1020
1021 error = walk_addrsel_policy(dump_addrsel_policyent, &w);
1022
1023 *oldlenp = w.w_total;
1024 if (oldp && w.w_total > oldlen && error == 0)
1025 error = ENOMEM;
1026 }
1027
1028 end:
1029 splx(s);
1030
1031 return (error);
1032 }
1033
1034 int
1035 in6_src_ioctl(cmd, data)
1036 u_long cmd;
1037 caddr_t data;
1038 {
1039 int i;
1040 struct in6_addrpolicy ent0;
1041
1042 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
1043 return (EOPNOTSUPP); /* check for safety */
1044
1045 ent0 = *(struct in6_addrpolicy *)data;
1046
1047 if (ent0.label == ADDR_LABEL_NOTAPP)
1048 return (EINVAL);
1049 /* check if the prefix mask is consecutive. */
1050 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
1051 return (EINVAL);
1052 /* clear trailing garbages (if any) of the prefix address. */
1053 for (i = 0; i < 4; i++) {
1054 ent0.addr.sin6_addr.s6_addr32[i] &=
1055 ent0.addrmask.sin6_addr.s6_addr32[i];
1056 }
1057 ent0.use = 0;
1058
1059 switch (cmd) {
1060 case SIOCAADDRCTL_POLICY:
1061 return (add_addrsel_policyent(&ent0));
1062 case SIOCDADDRCTL_POLICY:
1063 return (delete_addrsel_policyent(&ent0));
1064 }
1065
1066 return (0); /* XXX: compromise compilers */
1067 }
1068
1069 /*
1070 * The followings are implementation of the policy table using a
1071 * simple tail queue.
1072 * XXX such details should be hidden.
1073 * XXX implementation using binary tree should be more efficient.
1074 */
1075 struct addrsel_policyent {
1076 TAILQ_ENTRY(addrsel_policyent) ape_entry;
1077 struct in6_addrpolicy ape_policy;
1078 };
1079
1080 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
1081
1082 struct addrsel_policyhead addrsel_policytab;
1083
1084 static void
1085 init_policy_queue()
1086 {
1087 TAILQ_INIT(&addrsel_policytab);
1088 }
1089
1090 static int
1091 add_addrsel_policyent(newpolicy)
1092 struct in6_addrpolicy *newpolicy;
1093 {
1094 struct addrsel_policyent *new, *pol;
1095
1096 /* duplication check */
1097 for (pol = TAILQ_FIRST(&addrsel_policytab); pol;
1098 pol = TAILQ_NEXT(pol, ape_entry)) {
1099 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
1100 &pol->ape_policy.addr.sin6_addr) &&
1101 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
1102 &pol->ape_policy.addrmask.sin6_addr)) {
1103 return (EEXIST); /* or override it? */
1104 }
1105 }
1106
1107 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR,
1108 M_WAITOK);
1109 bzero(new, sizeof(*new));
1110
1111 /* XXX: should validate entry */
1112 new->ape_policy = *newpolicy;
1113
1114 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry);
1115
1116 return (0);
1117 }
1118
1119 static int
1120 delete_addrsel_policyent(key)
1121 struct in6_addrpolicy *key;
1122 {
1123 struct addrsel_policyent *pol;
1124
1125 /* search for the entry in the table */
1126 for (pol = TAILQ_FIRST(&addrsel_policytab); pol;
1127 pol = TAILQ_NEXT(pol, ape_entry)) {
1128 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
1129 &pol->ape_policy.addr.sin6_addr) &&
1130 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
1131 &pol->ape_policy.addrmask.sin6_addr)) {
1132 break;
1133 }
1134 }
1135 if (pol == NULL) {
1136 return (ESRCH);
1137 }
1138
1139 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry);
1140
1141 return (0);
1142 }
1143
1144 static int
1145 walk_addrsel_policy(callback, w)
1146 int (*callback) __P((struct in6_addrpolicy *, void *));
1147 void *w;
1148 {
1149 struct addrsel_policyent *pol;
1150 int error = 0;
1151
1152 for (pol = TAILQ_FIRST(&addrsel_policytab); pol;
1153 pol = TAILQ_NEXT(pol, ape_entry)) {
1154 if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
1155 return (error);
1156 }
1157 }
1158
1159 return (error);
1160 }
1161
1162 static int
1163 dump_addrsel_policyent(pol, arg)
1164 struct in6_addrpolicy *pol;
1165 void *arg;
1166 {
1167 int error = 0;
1168 struct walkarg *w = arg;
1169
1170 if (w->w_where && w->w_where + sizeof(*pol) <= w->w_limit) {
1171 if ((error = copyout(pol, w->w_where, sizeof(*pol))) != 0)
1172 return (error);
1173 w->w_where += sizeof(*pol);
1174 }
1175 w->w_total += sizeof(*pol);
1176
1177 return (error);
1178 }
1179
1180 static struct in6_addrpolicy *
1181 match_addrsel_policy(key)
1182 struct sockaddr_in6 *key;
1183 {
1184 struct addrsel_policyent *pent;
1185 struct in6_addrpolicy *bestpol = NULL, *pol;
1186 int matchlen, bestmatchlen = -1;
1187 u_char *mp, *ep, *k, *p, m;
1188
1189 for (pent = TAILQ_FIRST(&addrsel_policytab); pent;
1190 pent = TAILQ_NEXT(pent, ape_entry)) {
1191 matchlen = 0;
1192
1193 pol = &pent->ape_policy;
1194 mp = (u_char *)&pol->addrmask.sin6_addr;
1195 ep = mp + 16; /* XXX: scope field? */
1196 k = (u_char *)&key->sin6_addr;
1197 p = (u_char *)&pol->addr.sin6_addr;
1198 for (; mp < ep && *mp; mp++, k++, p++) {
1199 m = *mp;
1200 if ((*k & m) != *p)
1201 goto next; /* not match */
1202 if (m == 0xff) /* short cut for a typical case */
1203 matchlen += 8;
1204 else {
1205 while (m >= 0x80) {
1206 matchlen++;
1207 m <<= 1;
1208 }
1209 }
1210 }
1211
1212 /* matched. check if this is better than the current best. */
1213 if (bestpol == NULL ||
1214 matchlen > bestmatchlen) {
1215 bestpol = pol;
1216 bestmatchlen = matchlen;
1217 }
1218
1219 next:
1220 continue;
1221 }
1222
1223 return (bestpol);
1224 }
1225