in6_src.c revision 1.25 1 /* $KAME: in6_src.c,v 1.159 2005/10/19 01:40:32 t-momose Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1991, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
65 */
66
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: in6_src.c,v 1.25 2006/05/05 00:03:22 rpaulo Exp $");
69
70 #include "opt_inet.h"
71
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #ifndef __FreeBSD__
80 #include <sys/ioctl.h>
81 #else
82 #include <sys/sockio.h>
83 #endif
84 #ifdef __FreeBSD__
85 #include <sys/sysctl.h>
86 #endif
87 #include <sys/errno.h>
88 #include <sys/time.h>
89 #include <sys/kernel.h>
90 #include <sys/proc.h>
91
92 #include <net/if.h>
93 #include <net/if_types.h>
94 #include <net/route.h>
95 #ifdef RADIX_MPATH
96 #include <net/radix_mpath.h>
97 #endif
98
99 #include <netinet/in.h>
100 #include <netinet/in_var.h>
101 #include <netinet/in_systm.h>
102 #include <netinet/ip.h>
103 #include <netinet/in_pcb.h>
104 #include <netinet6/in6_var.h>
105 #include <netinet/ip6.h>
106 #ifndef __OpenBSD__
107 #include <netinet6/in6_pcb.h>
108 #endif
109 #include <netinet6/ip6_var.h>
110 #include <netinet6/nd6.h>
111 #include <netinet6/scope6_var.h>
112
113 #include <net/net_osdep.h>
114
115 #ifdef MIP6
116 #include <netinet6/mip6.h>
117 #include <netinet6/mip6_var.h>
118 #include "mip.h"
119 #if NMIP > 0
120 #include <net/if_mip.h>
121 #endif /* NMIP > 0 */
122 #endif /* MIP6 */
123
124 #ifndef __OpenBSD__
125 #include "loop.h"
126 #endif
127 #ifdef __NetBSD__
128 extern struct ifnet loif[NLOOP];
129 #endif
130
131 #define ADDR_LABEL_NOTAPP (-1)
132 struct in6_addrpolicy defaultaddrpolicy;
133
134 #ifdef notyet /* until introducing ND extensions and address selection */
135 int ip6_prefer_tempaddr = 0;
136 #endif
137
138 static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *,
139 struct ip6_moptions *, struct route_in6 *, struct ifnet **,
140 struct rtentry **, int, int));
141 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *,
142 struct ip6_moptions *, struct route_in6 *, struct ifnet **));
143
144 static struct in6_addrpolicy *lookup_addrsel_policy __P((struct sockaddr_in6 *));
145
146 static void init_policy_queue __P((void));
147 static int add_addrsel_policyent __P((struct in6_addrpolicy *));
148 static int delete_addrsel_policyent __P((struct in6_addrpolicy *));
149 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *),
150 void *));
151 static int dump_addrsel_policyent __P((struct in6_addrpolicy *, void *));
152 static struct in6_addrpolicy *match_addrsel_policy __P((struct sockaddr_in6 *));
153
154 /*
155 * Return an IPv6 address, which is the most appropriate for a given
156 * destination and user specified options.
157 * If necessary, this function lookups the routing table and returns
158 * an entry to the caller for later use.
159 */
160 #if 0 /* diabled ad-hoc */
161 #define REPLACE(r) do {\
162 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
163 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
164 ip6stat.ip6s_sources_rule[(r)]++; \
165 /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \
166 goto replace; \
167 } while(0)
168 #define NEXT(r) do {\
169 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
170 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
171 ip6stat.ip6s_sources_rule[(r)]++; \
172 /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \
173 goto next; /* XXX: we can't use 'continue' here */ \
174 } while(0)
175 #define BREAK(r) do { \
176 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
177 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
178 ip6stat.ip6s_sources_rule[(r)]++; \
179 goto out; /* XXX: we can't use 'break' here */ \
180 } while(0)
181 #else
182 #define REPLACE(r) goto replace
183 #define NEXT(r) goto next
184 #define BREAK(r) goto out
185 #endif
186
187 struct in6_addr *
188 in6_selectsrc(dstsock, opts, mopts, ro, laddr, ifpp, errorp)
189 struct sockaddr_in6 *dstsock;
190 struct ip6_pktopts *opts;
191 struct ip6_moptions *mopts;
192 struct route_in6 *ro;
193 struct in6_addr *laddr;
194 struct ifnet **ifpp;
195 int *errorp;
196 {
197 struct in6_addr dst;
198 struct ifnet *ifp = NULL;
199 struct in6_ifaddr *ia = NULL, *ia_best = NULL;
200 struct in6_pktinfo *pi = NULL;
201 int dst_scope = -1, best_scope = -1, best_matchlen = -1;
202 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
203 u_int32_t odstzone;
204 #ifdef notyet /* until introducing ND extensions and address selection */
205 int prefer_tempaddr;
206 #endif
207 #if defined(MIP6) && NMIP > 0
208 u_int8_t ip6po_usecoa = 0;
209 #endif /* MIP6 && NMIP > 0 */
210
211 dst = dstsock->sin6_addr; /* make a copy for local operation */
212 *errorp = 0;
213 if (ifpp)
214 *ifpp = NULL;
215
216 /*
217 * If the source address is explicitly specified by the caller,
218 * check if the requested source address is indeed a unicast address
219 * assigned to the node, and can be used as the packet's source
220 * address. If everything is okay, use the address as source.
221 */
222 if (opts && (pi = opts->ip6po_pktinfo) &&
223 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
224 struct sockaddr_in6 srcsock;
225 struct in6_ifaddr *ia6;
226
227 /* get the outgoing interface */
228 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp))
229 != 0) {
230 return (NULL);
231 }
232
233 /*
234 * Determine the appropriate zone id of the source based on
235 * the zone of the destination and the outgoing interface.
236 * If the specified address is ambiguous wrt the scope zone,
237 * the interface must be specified; otherwise, ifa_ifwithaddr()
238 * will fail matching the address.
239 */
240 bzero(&srcsock, sizeof(srcsock));
241 srcsock.sin6_family = AF_INET6;
242 srcsock.sin6_len = sizeof(srcsock);
243 srcsock.sin6_addr = pi->ipi6_addr;
244 if (ifp) {
245 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
246 if (*errorp != 0)
247 return (NULL);
248 }
249
250 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock));
251 if (ia6 == NULL ||
252 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) {
253 *errorp = EADDRNOTAVAIL;
254 return (NULL);
255 }
256 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
257 if (ifpp)
258 *ifpp = ifp;
259 return (&ia6->ia_addr.sin6_addr);
260 }
261
262 /*
263 * Otherwise, if the socket has already bound the source, just use it.
264 */
265 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr))
266 return (laddr);
267
268 /*
269 * If the address is not specified, choose the best one based on
270 * the outgoing interface and the destination address.
271 */
272 /* get the outgoing interface */
273 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0)
274 return (NULL);
275
276 #if defined(MIP6) && NMIP > 0
277 /*
278 * a caller can specify IP6PO_USECOA to not to use a home
279 * address. for example, the case that the neighbour
280 * unreachability detection to the global address.
281 */
282 if (opts != NULL &&
283 (opts->ip6po_flags & IP6PO_USECOA) != 0) {
284 ip6po_usecoa = 1;
285 }
286 #endif /* MIP6 && NMIP > 0 */
287
288 #ifdef DIAGNOSTIC
289 if (ifp == NULL) /* this should not happen */
290 panic("in6_selectsrc: NULL ifp");
291 #endif
292 *errorp = in6_setscope(&dst, ifp, &odstzone);
293 if (*errorp != 0)
294 return (NULL);
295
296 for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
297 int new_scope = -1, new_matchlen = -1;
298 struct in6_addrpolicy *new_policy = NULL;
299 u_int32_t srczone, osrczone, dstzone;
300 struct in6_addr src;
301 struct ifnet *ifp1 = ia->ia_ifp;
302
303 /*
304 * We'll never take an address that breaks the scope zone
305 * of the destination. We also skip an address if its zone
306 * does not contain the outgoing interface.
307 * XXX: we should probably use sin6_scope_id here.
308 */
309 if (in6_setscope(&dst, ifp1, &dstzone) ||
310 odstzone != dstzone) {
311 continue;
312 }
313 src = ia->ia_addr.sin6_addr;
314 if (in6_setscope(&src, ifp, &osrczone) ||
315 in6_setscope(&src, ifp1, &srczone) ||
316 osrczone != srczone) {
317 continue;
318 }
319
320 /* avoid unusable addresses */
321 if ((ia->ia6_flags &
322 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
323 continue;
324 }
325 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
326 continue;
327
328 #if defined(MIP6) && NMIP > 0
329 /* avoid unusable home addresses. */
330 if ((ia->ia6_flags & IN6_IFF_HOME) &&
331 !mip6_ifa6_is_addr_valid_hoa(ia))
332 continue;
333 #endif /* MIP6 && NMIP > 0 */
334
335 /* Rule 1: Prefer same address */
336 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) {
337 ia_best = ia;
338 BREAK(1); /* there should be no better candidate */
339 }
340
341 if (ia_best == NULL)
342 REPLACE(0);
343
344 /* Rule 2: Prefer appropriate scope */
345 if (dst_scope < 0)
346 dst_scope = in6_addrscope(&dst);
347 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
348 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
349 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
350 REPLACE(2);
351 NEXT(2);
352 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
353 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
354 NEXT(2);
355 REPLACE(2);
356 }
357
358 /*
359 * Rule 3: Avoid deprecated addresses. Note that the case of
360 * !ip6_use_deprecated is already rejected above.
361 */
362 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
363 NEXT(3);
364 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
365 REPLACE(3);
366
367 /* Rule 4: Prefer home addresses */
368 #if defined(MIP6) && NMIP > 0
369 if (!MIP6_IS_MN)
370 goto skip_rule4;
371
372 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
373 (ia->ia6_flags & IN6_IFF_HOME) == 0) {
374 /* both address are not home addresses. */
375 goto skip_rule4;
376 }
377
378 /*
379 * If SA is simultaneously a home address and care-of
380 * address and SB is not, then prefer SA. Similarly,
381 * if SB is simultaneously a home address and care-of
382 * address and SA is not, then prefer SB.
383 */
384 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
385 ia_best->ia_ifp->if_type != IFT_MIP)
386 &&
387 ((ia->ia6_flags & IN6_IFF_HOME) != 0 &&
388 ia->ia_ifp->if_type == IFT_MIP))
389 NEXT(4);
390 if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
391 ia_best->ia_ifp->if_type == IFT_MIP)
392 &&
393 ((ia->ia6_flags & IN6_IFF_HOME) != 0 &&
394 ia->ia_ifp->if_type != IFT_MIP))
395 REPLACE(4);
396 if (ip6po_usecoa == 0) {
397 /*
398 * If SA is just a home address and SB is just
399 * a care-of address, then prefer
400 * SA. Similarly, if SB is just a home address
401 * and SA is just a care-of address, then
402 * prefer SB.
403 */
404 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
405 (ia->ia6_flags & IN6_IFF_HOME) == 0) {
406 NEXT(4);
407 }
408 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
409 (ia->ia6_flags & IN6_IFF_HOME) != 0) {
410 REPLACE(4);
411 }
412 } else {
413 /*
414 * a sender don't want to use a home address
415 * because:
416 *
417 * 1) we cannot use. (ex. NS or NA to global
418 * addresses.)
419 *
420 * 2) a user specified not to use.
421 * (ex. mip6control -u)
422 */
423 if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
424 (ia->ia6_flags & IN6_IFF_HOME) != 0) {
425 /* XXX breaks stat */
426 NEXT(0);
427 }
428 if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
429 (ia->ia6_flags & IN6_IFF_HOME) == 0) {
430 /* XXX breaks stat */
431 REPLACE(0);
432 }
433 }
434 skip_rule4:
435 #endif /* MIP6 && NMIP > 0 */
436
437 /* Rule 5: Prefer outgoing interface */
438 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
439 NEXT(5);
440 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
441 REPLACE(5);
442
443 /*
444 * Rule 6: Prefer matching label
445 * Note that best_policy should be non-NULL here.
446 */
447 if (dst_policy == NULL)
448 dst_policy = lookup_addrsel_policy(dstsock);
449 if (dst_policy->label != ADDR_LABEL_NOTAPP) {
450 new_policy = lookup_addrsel_policy(&ia->ia_addr);
451 if (dst_policy->label == best_policy->label &&
452 dst_policy->label != new_policy->label)
453 NEXT(6);
454 if (dst_policy->label != best_policy->label &&
455 dst_policy->label == new_policy->label)
456 REPLACE(6);
457 }
458
459 /*
460 * Rule 7: Prefer public addresses.
461 * We allow users to reverse the logic by configuring
462 * a sysctl variable, so that privacy conscious users can
463 * always prefer temporary addresses.
464 */
465 #ifdef notyet /* until introducing ND extensions and address selection */
466 if (opts == NULL ||
467 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
468 prefer_tempaddr = ip6_prefer_tempaddr;
469 } else if (opts->ip6po_prefer_tempaddr ==
470 IP6PO_TEMPADDR_NOTPREFER) {
471 prefer_tempaddr = 0;
472 } else
473 prefer_tempaddr = 1;
474 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
475 (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
476 if (prefer_tempaddr)
477 REPLACE(7);
478 else
479 NEXT(7);
480 }
481 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
482 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
483 if (prefer_tempaddr)
484 NEXT(7);
485 else
486 REPLACE(7);
487 }
488 #endif
489
490 /*
491 * Rule 8: prefer addresses on alive interfaces.
492 * This is a KAME specific rule.
493 */
494 if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
495 !(ia->ia_ifp->if_flags & IFF_UP))
496 NEXT(8);
497 if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
498 (ia->ia_ifp->if_flags & IFF_UP))
499 REPLACE(8);
500
501 /*
502 * Rule 9: prefer addresses on "preferred" interfaces.
503 * This is a KAME specific rule.
504 */
505 #ifdef notyet /* until introducing address selection */
506 #define NDI_BEST ND_IFINFO(ia_best->ia_ifp)
507 #define NDI_NEW ND_IFINFO(ia->ia_ifp)
508 if ((NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) &&
509 !(NDI_NEW->flags & ND6_IFF_PREFER_SOURCE))
510 NEXT(9);
511 if (!(NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) &&
512 (NDI_NEW->flags & ND6_IFF_PREFER_SOURCE))
513 REPLACE(9);
514 #undef NDI_BEST
515 #undef NDI_NEW
516 #endif
517
518 /*
519 * Rule 14: Use longest matching prefix.
520 * Note: in the address selection draft, this rule is
521 * documented as "Rule 8". However, since it is also
522 * documented that this rule can be overridden, we assign
523 * a large number so that it is easy to assign smaller numbers
524 * to more preferred rules.
525 */
526 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
527 if (best_matchlen < new_matchlen)
528 REPLACE(14);
529 if (new_matchlen < best_matchlen)
530 NEXT(14);
531
532 /* Rule 15 is reserved. */
533
534 /*
535 * Last resort: just keep the current candidate.
536 * Or, do we need more rules?
537 */
538 continue;
539
540 replace:
541 ia_best = ia;
542 best_scope = (new_scope >= 0 ? new_scope :
543 in6_addrscope(&ia_best->ia_addr.sin6_addr));
544 best_policy = (new_policy ? new_policy :
545 lookup_addrsel_policy(&ia_best->ia_addr));
546 best_matchlen = (new_matchlen >= 0 ? new_matchlen :
547 in6_matchlen(&ia_best->ia_addr.sin6_addr,
548 &dst));
549
550 next:
551 continue;
552
553 out:
554 break;
555 }
556
557 if ((ia = ia_best) == NULL) {
558 *errorp = EADDRNOTAVAIL;
559 return (NULL);
560 }
561
562 if (ifpp)
563 *ifpp = ifp;
564 return (&ia->ia_addr.sin6_addr);
565 }
566 #undef REPLACE
567 #undef BREAK
568 #undef NEXT
569
570 static int
571 selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone, norouteok)
572 struct sockaddr_in6 *dstsock;
573 struct ip6_pktopts *opts;
574 struct ip6_moptions *mopts;
575 #ifdef NEW_STRUCT_ROUTE
576 struct route *ro;
577 #else
578 struct route_in6 *ro;
579 #endif
580 struct ifnet **retifp;
581 struct rtentry **retrt;
582 int clone;
583 int norouteok;
584 {
585 int error = 0;
586 struct ifnet *ifp = NULL;
587 struct rtentry *rt = NULL;
588 struct sockaddr_in6 *sin6_next;
589 struct in6_pktinfo *pi = NULL;
590 struct in6_addr *dst;
591
592 dst = &dstsock->sin6_addr;
593
594 #if 0
595 if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
596 dstsock->sin6_addr.s6_addr32[1] == 0 &&
597 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
598 printf("in6_selectroute: strange destination %s\n",
599 ip6_sprintf(&dstsock->sin6_addr));
600 } else {
601 printf("in6_selectroute: destination = %s%%%d\n",
602 ip6_sprintf(&dstsock->sin6_addr),
603 dstsock->sin6_scope_id); /* for debug */
604 }
605 #endif
606
607 /* If the caller specify the outgoing interface explicitly, use it. */
608 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
609 /* XXX boundary check is assumed to be already done. */
610 #ifdef __FreeBSD__
611 ifp = ifnet_byindex(pi->ipi6_ifindex);
612 #else
613 ifp = ifindex2ifnet[pi->ipi6_ifindex];
614 #endif
615 if (ifp != NULL &&
616 (norouteok || retrt == NULL ||
617 IN6_IS_ADDR_MULTICAST(dst))) {
618 /*
619 * we do not have to check or get the route for
620 * multicast.
621 */
622 goto done;
623 } else
624 goto getroute;
625 }
626
627 /*
628 * If the destination address is a multicast address and the outgoing
629 * interface for the address is specified by the caller, use it.
630 */
631 if (IN6_IS_ADDR_MULTICAST(dst) &&
632 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) {
633 goto done; /* we do not need a route for multicast. */
634 }
635
636 getroute:
637 /*
638 * If the next hop address for the packet is specified by the caller,
639 * use it as the gateway.
640 */
641 if (opts && opts->ip6po_nexthop) {
642 struct route_in6 *ron;
643
644 sin6_next = satosin6(opts->ip6po_nexthop);
645
646 /* at this moment, we only support AF_INET6 next hops */
647 if (sin6_next->sin6_family != AF_INET6) {
648 error = EAFNOSUPPORT; /* or should we proceed? */
649 goto done;
650 }
651
652 /*
653 * If the next hop is an IPv6 address, then the node identified
654 * by that address must be a neighbor of the sending host.
655 */
656 ron = &opts->ip6po_nextroute;
657 if ((ron->ro_rt &&
658 (ron->ro_rt->rt_flags & (RTF_UP | RTF_GATEWAY)) !=
659 RTF_UP) ||
660 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
661 &sin6_next->sin6_addr)) {
662 if (ron->ro_rt) {
663 RTFREE(ron->ro_rt);
664 ron->ro_rt = NULL;
665 }
666 *satosin6(&ron->ro_dst) = *sin6_next;
667 }
668 if (ron->ro_rt == NULL) {
669 rtalloc((struct route *)ron); /* multi path case? */
670 if (ron->ro_rt == NULL ||
671 (ron->ro_rt->rt_flags & RTF_GATEWAY)) {
672 if (ron->ro_rt) {
673 RTFREE(ron->ro_rt);
674 ron->ro_rt = NULL;
675 }
676 error = EHOSTUNREACH;
677 goto done;
678 }
679 }
680 if (!nd6_is_addr_neighbor(sin6_next, ron->ro_rt->rt_ifp)) {
681 RTFREE(ron->ro_rt);
682 ron->ro_rt = NULL;
683 error = EHOSTUNREACH;
684 goto done;
685 }
686 rt = ron->ro_rt;
687 ifp = rt->rt_ifp;
688
689 /*
690 * When cloning is required, try to allocate a route to the
691 * destination so that the caller can store path MTU
692 * information.
693 */
694 if (!clone)
695 goto done;
696 }
697
698 /*
699 * Use a cached route if it exists and is valid, else try to allocate
700 * a new one. Note that we should check the address family of the
701 * cached destination, in case of sharing the cache with IPv4.
702 */
703 if (ro) {
704 if (ro->ro_rt &&
705 (!(ro->ro_rt->rt_flags & RTF_UP) ||
706 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 ||
707 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr,
708 dst))) {
709 RTFREE(ro->ro_rt);
710 ro->ro_rt = (struct rtentry *)NULL;
711 }
712 if (ro->ro_rt == (struct rtentry *)NULL) {
713 struct sockaddr_in6 *sa6;
714
715 /* No route yet, so try to acquire one */
716 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
717 sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
718 *sa6 = *dstsock;
719 sa6->sin6_scope_id = 0;
720 if (clone) {
721 #ifdef RADIX_MPATH
722 rtalloc_mpath((struct route *)ro,
723 ntohl(sa6->sin6_addr.s6_addr32[3]));
724 #else
725 rtalloc((struct route *)ro);
726 #endif /* RADIX_MPATH */
727 } else {
728 #ifdef RADIX_MPATH
729 rtalloc_mpath((struct route *)ro,
730 ntohl(sa6->sin6_addr.s6_addr32[3]));
731 #else
732 ro->ro_rt = rtalloc1(&((struct route *)ro)
733 ->ro_dst, 0);
734 #endif /* RADIX_MPATH */
735 }
736 }
737
738 /*
739 * do not care about the result if we have the nexthop
740 * explicitly specified.
741 */
742 if (opts && opts->ip6po_nexthop)
743 goto done;
744
745 if (ro->ro_rt) {
746 ifp = ro->ro_rt->rt_ifp;
747
748 if (ifp == NULL) { /* can this really happen? */
749 RTFREE(ro->ro_rt);
750 ro->ro_rt = NULL;
751 }
752 }
753 if (ro->ro_rt == NULL)
754 error = EHOSTUNREACH;
755 rt = ro->ro_rt;
756
757 /*
758 * Check if the outgoing interface conflicts with
759 * the interface specified by ipi6_ifindex (if specified).
760 * Note that loopback interface is always okay.
761 * (this may happen when we are sending a packet to one of
762 * our own addresses.)
763 */
764 if (opts && opts->ip6po_pktinfo &&
765 opts->ip6po_pktinfo->ipi6_ifindex) {
766 if (!(ifp->if_flags & IFF_LOOPBACK) &&
767 ifp->if_index !=
768 opts->ip6po_pktinfo->ipi6_ifindex) {
769 error = EHOSTUNREACH;
770 goto done;
771 }
772 }
773 }
774
775 done:
776 if (ifp == NULL && rt == NULL) {
777 /*
778 * This can happen if the caller did not pass a cached route
779 * nor any other hints. We treat this case an error.
780 */
781 error = EHOSTUNREACH;
782 }
783 if (error == EHOSTUNREACH)
784 ip6stat.ip6s_noroute++;
785
786 if (retifp != NULL)
787 *retifp = ifp;
788 if (retrt != NULL)
789 *retrt = rt; /* rt may be NULL */
790
791 return (error);
792 }
793
794 static int
795 in6_selectif(dstsock, opts, mopts, ro, retifp)
796 struct sockaddr_in6 *dstsock;
797 struct ip6_pktopts *opts;
798 struct ip6_moptions *mopts;
799 struct route_in6 *ro;
800 struct ifnet **retifp;
801 {
802 int error, clone;
803 struct rtentry *rt = NULL;
804
805 clone = IN6_IS_ADDR_MULTICAST(&dstsock->sin6_addr) ? 0 : 1;
806 if ((error = selectroute(dstsock, opts, mopts, ro, retifp,
807 &rt, clone, 1)) != 0) {
808 return (error);
809 }
810
811 /*
812 * do not use a rejected or black hole route.
813 * XXX: this check should be done in the L2 output routine.
814 * However, if we skipped this check here, we'd see the following
815 * scenario:
816 * - install a rejected route for a scoped address prefix
817 * (like fe80::/10)
818 * - send a packet to a destination that matches the scoped prefix,
819 * with ambiguity about the scope zone.
820 * - pick the outgoing interface from the route, and disambiguate the
821 * scope zone with the interface.
822 * - ip6_output() would try to get another route with the "new"
823 * destination, which may be valid.
824 * - we'd see no error on output.
825 * Although this may not be very harmful, it should still be confusing.
826 * We thus reject the case here.
827 */
828 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE)))
829 return (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
830
831 /*
832 * Adjust the "outgoing" interface. If we're going to loop the packet
833 * back to ourselves, the ifp would be the loopback interface.
834 * However, we'd rather know the interface associated to the
835 * destination address (which should probably be one of our own
836 * addresses.)
837 */
838 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp)
839 *retifp = rt->rt_ifa->ifa_ifp;
840
841 return (0);
842 }
843
844 int
845 in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone)
846 struct sockaddr_in6 *dstsock;
847 struct ip6_pktopts *opts;
848 struct ip6_moptions *mopts;
849 struct route_in6 *ro;
850 struct ifnet **retifp;
851 struct rtentry **retrt;
852 int clone; /* meaningful only for bsdi and freebsd. */
853 {
854 return (selectroute(dstsock, opts, mopts, ro, retifp,
855 retrt, clone, 0));
856 }
857
858 /*
859 * Default hop limit selection. The precedence is as follows:
860 * 1. Hoplimit value specified via ioctl.
861 * 2. (If the outgoing interface is detected) the current
862 * hop limit of the interface specified by router advertisement.
863 * 3. The system default hoplimit.
864 */
865 int
866 in6_selecthlim(in6p, ifp)
867 struct in6pcb *in6p;
868 struct ifnet *ifp;
869 {
870 if (in6p && in6p->in6p_hops >= 0)
871 return (in6p->in6p_hops);
872 else if (ifp)
873 return (ND_IFINFO(ifp)->chlim);
874 else
875 return (ip6_defhlim);
876 }
877
878 /*
879 * Find an empty port and set it to the specified PCB.
880 */
881 int
882 in6_pcbsetport(laddr, in6p, p)
883 struct in6_addr *laddr;
884 struct in6pcb *in6p;
885 struct proc *p;
886 {
887 struct socket *so = in6p->in6p_socket;
888 struct inpcbtable *table = in6p->in6p_table;
889 int cnt;
890 u_int16_t minport, maxport;
891 u_int16_t lport, *lastport;
892 int wild = 0;
893 void *t;
894
895 /* XXX: this is redundant when called from in6_pcbbind */
896 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
897 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
898 (so->so_options & SO_ACCEPTCONN) == 0))
899 wild = 1;
900
901 if (in6p->in6p_flags & IN6P_LOWPORT) {
902 #ifndef IPNOPRIVPORTS
903 if (p == 0 || (suser(p->p_ucred, &p->p_acflag) != 0))
904 return (EACCES);
905 #endif
906 minport = ip6_lowportmin;
907 maxport = ip6_lowportmax;
908 lastport = &table->inpt_lastlow;
909 } else {
910 minport = ip6_anonportmin;
911 maxport = ip6_anonportmax;
912 lastport = &table->inpt_lastport;
913 }
914
915 if (minport > maxport) { /* sanity check */
916 u_int16_t swp;
917
918 swp = minport;
919 minport = maxport;
920 maxport = swp;
921 }
922
923 lport = *lastport - 1;
924 for (cnt = maxport - minport + 1; cnt; cnt--, lport--) {
925 if (lport < minport || lport > maxport)
926 lport = maxport;
927 #ifdef INET
928 if (IN6_IS_ADDR_V4MAPPED(laddr)) {
929 t = in_pcblookup_port(table,
930 *(struct in_addr *)&laddr->s6_addr32[3],
931 lport, wild);
932 } else
933 #endif
934 {
935 t = in6_pcblookup_port(table, laddr, lport, wild);
936 }
937 if (t == 0)
938 goto found;
939 }
940
941 return (EAGAIN);
942
943 found:
944 in6p->in6p_flags |= IN6P_ANONPORT;
945 *lastport = lport;
946 in6p->in6p_lport = htons(lport);
947 in6_pcbstate(in6p, IN6P_BOUND);
948 return (0); /* success */
949 }
950
951 void
952 addrsel_policy_init()
953 {
954 init_policy_queue();
955
956 /* initialize the "last resort" policy */
957 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy));
958 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
959 }
960
961 static struct in6_addrpolicy *
962 lookup_addrsel_policy(key)
963 struct sockaddr_in6 *key;
964 {
965 struct in6_addrpolicy *match = NULL;
966
967 match = match_addrsel_policy(key);
968
969 if (match == NULL)
970 match = &defaultaddrpolicy;
971 else
972 match->use++;
973
974 return (match);
975 }
976
977 /*
978 * Subroutines to manage the address selection policy table via sysctl.
979 */
980 struct walkarg {
981 size_t w_total;
982 size_t w_given;
983 caddr_t w_where;
984 caddr_t w_limit;
985 };
986
987 int
988 in6_src_sysctl(oldp, oldlenp, newp, newlen)
989 void *oldp;
990 size_t *oldlenp;
991 void *newp;
992 size_t newlen;
993 {
994 int error = 0;
995 int s;
996
997 s = splsoftnet();
998
999 if (newp) {
1000 error = EPERM;
1001 goto end;
1002 }
1003 if (oldp && oldlenp == NULL) {
1004 error = EINVAL;
1005 goto end;
1006 }
1007 if (oldp || oldlenp) {
1008 struct walkarg w;
1009 size_t oldlen = *oldlenp;
1010
1011 bzero(&w, sizeof(w));
1012 w.w_given = oldlen;
1013 w.w_where = oldp;
1014 if (oldp)
1015 w.w_limit = (caddr_t)oldp + oldlen;
1016
1017 error = walk_addrsel_policy(dump_addrsel_policyent, &w);
1018
1019 *oldlenp = w.w_total;
1020 if (oldp && w.w_total > oldlen && error == 0)
1021 error = ENOMEM;
1022 }
1023
1024 end:
1025 splx(s);
1026
1027 return (error);
1028 }
1029
1030 int
1031 in6_src_ioctl(cmd, data)
1032 u_long cmd;
1033 caddr_t data;
1034 {
1035 int i;
1036 struct in6_addrpolicy ent0;
1037
1038 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
1039 return (EOPNOTSUPP); /* check for safety */
1040
1041 ent0 = *(struct in6_addrpolicy *)data;
1042
1043 if (ent0.label == ADDR_LABEL_NOTAPP)
1044 return (EINVAL);
1045 /* check if the prefix mask is consecutive. */
1046 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
1047 return (EINVAL);
1048 /* clear trailing garbages (if any) of the prefix address. */
1049 for (i = 0; i < 4; i++) {
1050 ent0.addr.sin6_addr.s6_addr32[i] &=
1051 ent0.addrmask.sin6_addr.s6_addr32[i];
1052 }
1053 ent0.use = 0;
1054
1055 switch (cmd) {
1056 case SIOCAADDRCTL_POLICY:
1057 return (add_addrsel_policyent(&ent0));
1058 case SIOCDADDRCTL_POLICY:
1059 return (delete_addrsel_policyent(&ent0));
1060 }
1061
1062 return (0); /* XXX: compromise compilers */
1063 }
1064
1065 /*
1066 * The followings are implementation of the policy table using a
1067 * simple tail queue.
1068 * XXX such details should be hidden.
1069 * XXX implementation using binary tree should be more efficient.
1070 */
1071 struct addrsel_policyent {
1072 TAILQ_ENTRY(addrsel_policyent) ape_entry;
1073 struct in6_addrpolicy ape_policy;
1074 };
1075
1076 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
1077
1078 struct addrsel_policyhead addrsel_policytab;
1079
1080 static void
1081 init_policy_queue()
1082 {
1083 TAILQ_INIT(&addrsel_policytab);
1084 }
1085
1086 static int
1087 add_addrsel_policyent(newpolicy)
1088 struct in6_addrpolicy *newpolicy;
1089 {
1090 struct addrsel_policyent *new, *pol;
1091
1092 /* duplication check */
1093 for (pol = TAILQ_FIRST(&addrsel_policytab); pol;
1094 pol = TAILQ_NEXT(pol, ape_entry)) {
1095 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
1096 &pol->ape_policy.addr.sin6_addr) &&
1097 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
1098 &pol->ape_policy.addrmask.sin6_addr)) {
1099 return (EEXIST); /* or override it? */
1100 }
1101 }
1102
1103 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR,
1104 M_WAITOK);
1105 bzero(new, sizeof(*new));
1106
1107 /* XXX: should validate entry */
1108 new->ape_policy = *newpolicy;
1109
1110 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry);
1111
1112 return (0);
1113 }
1114
1115 static int
1116 delete_addrsel_policyent(key)
1117 struct in6_addrpolicy *key;
1118 {
1119 struct addrsel_policyent *pol;
1120
1121 /* search for the entry in the table */
1122 for (pol = TAILQ_FIRST(&addrsel_policytab); pol;
1123 pol = TAILQ_NEXT(pol, ape_entry)) {
1124 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
1125 &pol->ape_policy.addr.sin6_addr) &&
1126 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
1127 &pol->ape_policy.addrmask.sin6_addr)) {
1128 break;
1129 }
1130 }
1131 if (pol == NULL) {
1132 return (ESRCH);
1133 }
1134
1135 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry);
1136
1137 return (0);
1138 }
1139
1140 static int
1141 walk_addrsel_policy(callback, w)
1142 int (*callback) __P((struct in6_addrpolicy *, void *));
1143 void *w;
1144 {
1145 struct addrsel_policyent *pol;
1146 int error = 0;
1147
1148 for (pol = TAILQ_FIRST(&addrsel_policytab); pol;
1149 pol = TAILQ_NEXT(pol, ape_entry)) {
1150 if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
1151 return (error);
1152 }
1153 }
1154
1155 return (error);
1156 }
1157
1158 static int
1159 dump_addrsel_policyent(pol, arg)
1160 struct in6_addrpolicy *pol;
1161 void *arg;
1162 {
1163 int error = 0;
1164 struct walkarg *w = arg;
1165
1166 if (w->w_where && w->w_where + sizeof(*pol) <= w->w_limit) {
1167 if ((error = copyout(pol, w->w_where, sizeof(*pol))) != 0)
1168 return (error);
1169 w->w_where += sizeof(*pol);
1170 }
1171 w->w_total += sizeof(*pol);
1172
1173 return (error);
1174 }
1175
1176 static struct in6_addrpolicy *
1177 match_addrsel_policy(key)
1178 struct sockaddr_in6 *key;
1179 {
1180 struct addrsel_policyent *pent;
1181 struct in6_addrpolicy *bestpol = NULL, *pol;
1182 int matchlen, bestmatchlen = -1;
1183 u_char *mp, *ep, *k, *p, m;
1184
1185 for (pent = TAILQ_FIRST(&addrsel_policytab); pent;
1186 pent = TAILQ_NEXT(pent, ape_entry)) {
1187 matchlen = 0;
1188
1189 pol = &pent->ape_policy;
1190 mp = (u_char *)&pol->addrmask.sin6_addr;
1191 ep = mp + 16; /* XXX: scope field? */
1192 k = (u_char *)&key->sin6_addr;
1193 p = (u_char *)&pol->addr.sin6_addr;
1194 for (; mp < ep && *mp; mp++, k++, p++) {
1195 m = *mp;
1196 if ((*k & m) != *p)
1197 goto next; /* not match */
1198 if (m == 0xff) /* short cut for a typical case */
1199 matchlen += 8;
1200 else {
1201 while (m >= 0x80) {
1202 matchlen++;
1203 m <<= 1;
1204 }
1205 }
1206 }
1207
1208 /* matched. check if this is better than the current best. */
1209 if (bestpol == NULL ||
1210 matchlen > bestmatchlen) {
1211 bestpol = pol;
1212 bestmatchlen = matchlen;
1213 }
1214
1215 next:
1216 continue;
1217 }
1218
1219 return (bestpol);
1220 }
1221