route.c revision 1.176 1 /* $NetBSD: route.c,v 1.176 2016/10/21 03:04:33 ozaki-r Exp $ */
2
3 /*-
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the project nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62 /*
63 * Copyright (c) 1980, 1986, 1991, 1993
64 * The Regents of the University of California. All rights reserved.
65 *
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
68 * are met:
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)route.c 8.3 (Berkeley) 1/9/95
91 */
92
93 #ifdef _KERNEL_OPT
94 #include "opt_inet.h"
95 #include "opt_route.h"
96 #include "opt_net_mpsafe.h"
97 #endif
98
99 #include <sys/cdefs.h>
100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.176 2016/10/21 03:04:33 ozaki-r Exp $");
101
102 #include <sys/param.h>
103 #ifdef RTFLUSH_DEBUG
104 #include <sys/sysctl.h>
105 #endif
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/mbuf.h>
110 #include <sys/socket.h>
111 #include <sys/socketvar.h>
112 #include <sys/domain.h>
113 #include <sys/protosw.h>
114 #include <sys/kernel.h>
115 #include <sys/ioctl.h>
116 #include <sys/pool.h>
117 #include <sys/kauth.h>
118 #include <sys/workqueue.h>
119
120 #include <net/if.h>
121 #include <net/if_dl.h>
122 #include <net/route.h>
123
124 #include <netinet/in.h>
125 #include <netinet/in_var.h>
126
127 #ifdef RTFLUSH_DEBUG
128 #define rtcache_debug() __predict_false(_rtcache_debug)
129 #else /* RTFLUSH_DEBUG */
130 #define rtcache_debug() 0
131 #endif /* RTFLUSH_DEBUG */
132
133 struct rtstat rtstat;
134
135 static int rttrash; /* routes not in table but not freed */
136
137 static struct pool rtentry_pool;
138 static struct pool rttimer_pool;
139
140 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */
141 struct workqueue *rt_timer_wq;
142 struct work rt_timer_wk;
143
144 #ifdef RTFLUSH_DEBUG
145 static int _rtcache_debug = 0;
146 #endif /* RTFLUSH_DEBUG */
147
148 static kauth_listener_t route_listener;
149
150 static int rtdeletemsg(struct rtentry *);
151 static void rtflushall(int);
152
153 static void rt_maskedcopy(const struct sockaddr *,
154 struct sockaddr *, const struct sockaddr *);
155
156 static void rtcache_clear(struct route *);
157 static void rtcache_clear_rtentry(int, struct rtentry *);
158 static void rtcache_invalidate(struct dom_rtlist *);
159
160 #ifdef DDB
161 static void db_print_sa(const struct sockaddr *);
162 static void db_print_ifa(struct ifaddr *);
163 static int db_show_rtentry(struct rtentry *, void *);
164 #endif
165
166 #ifdef RTFLUSH_DEBUG
167 static void sysctl_net_rtcache_setup(struct sysctllog **);
168 static void
169 sysctl_net_rtcache_setup(struct sysctllog **clog)
170 {
171 const struct sysctlnode *rnode;
172
173 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
174 CTLTYPE_NODE,
175 "rtcache", SYSCTL_DESCR("Route cache related settings"),
176 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
177 return;
178 if (sysctl_createv(clog, 0, &rnode, &rnode,
179 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
180 "debug", SYSCTL_DESCR("Debug route caches"),
181 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
182 return;
183 }
184 #endif /* RTFLUSH_DEBUG */
185
186 static inline void
187 rt_destroy(struct rtentry *rt)
188 {
189 if (rt->_rt_key != NULL)
190 sockaddr_free(rt->_rt_key);
191 if (rt->rt_gateway != NULL)
192 sockaddr_free(rt->rt_gateway);
193 if (rt_gettag(rt) != NULL)
194 sockaddr_free(rt_gettag(rt));
195 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
196 }
197
198 static inline const struct sockaddr *
199 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
200 {
201 if (rt->_rt_key == key)
202 goto out;
203
204 if (rt->_rt_key != NULL)
205 sockaddr_free(rt->_rt_key);
206 rt->_rt_key = sockaddr_dup(key, flags);
207 out:
208 rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
209 return rt->_rt_key;
210 }
211
212 struct ifaddr *
213 rt_get_ifa(struct rtentry *rt)
214 {
215 struct ifaddr *ifa;
216
217 if ((ifa = rt->rt_ifa) == NULL)
218 return ifa;
219 else if (ifa->ifa_getifa == NULL)
220 return ifa;
221 #if 0
222 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
223 return ifa;
224 #endif
225 else {
226 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
227 if (ifa == NULL)
228 return NULL;
229 rt_replace_ifa(rt, ifa);
230 return ifa;
231 }
232 }
233
234 static void
235 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
236 {
237 rt->rt_ifa = ifa;
238 if (ifa->ifa_seqno != NULL)
239 rt->rt_ifa_seqno = *ifa->ifa_seqno;
240 }
241
242 /*
243 * Is this route the connected route for the ifa?
244 */
245 static int
246 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
247 {
248 const struct sockaddr *key, *dst, *odst;
249 struct sockaddr_storage maskeddst;
250
251 key = rt_getkey(rt);
252 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
253 if (dst == NULL ||
254 dst->sa_family != key->sa_family ||
255 dst->sa_len != key->sa_len)
256 return 0;
257 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
258 odst = dst;
259 dst = (struct sockaddr *)&maskeddst;
260 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
261 ifa->ifa_netmask);
262 }
263 return (memcmp(dst, key, dst->sa_len) == 0);
264 }
265
266 void
267 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
268 {
269 if (rt->rt_ifa &&
270 rt->rt_ifa != ifa &&
271 rt->rt_ifa->ifa_flags & IFA_ROUTE &&
272 rt_ifa_connected(rt, rt->rt_ifa))
273 {
274 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
275 "replace deleted IFA_ROUTE\n",
276 (void *)rt->_rt_key, (void *)rt->rt_ifa);
277 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
278 if (rt_ifa_connected(rt, ifa)) {
279 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
280 "replace added IFA_ROUTE\n",
281 (void *)rt->_rt_key, (void *)ifa);
282 ifa->ifa_flags |= IFA_ROUTE;
283 }
284 }
285
286 ifaref(ifa);
287 ifafree(rt->rt_ifa);
288 rt_set_ifa1(rt, ifa);
289 }
290
291 static void
292 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
293 {
294 ifaref(ifa);
295 rt_set_ifa1(rt, ifa);
296 }
297
298 static int
299 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
300 void *arg0, void *arg1, void *arg2, void *arg3)
301 {
302 struct rt_msghdr *rtm;
303 int result;
304
305 result = KAUTH_RESULT_DEFER;
306 rtm = arg1;
307
308 if (action != KAUTH_NETWORK_ROUTE)
309 return result;
310
311 if (rtm->rtm_type == RTM_GET)
312 result = KAUTH_RESULT_ALLOW;
313
314 return result;
315 }
316
317 void
318 rt_init(void)
319 {
320
321 #ifdef RTFLUSH_DEBUG
322 sysctl_net_rtcache_setup(NULL);
323 #endif
324
325 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
326 NULL, IPL_SOFTNET);
327 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
328 NULL, IPL_SOFTNET);
329
330 rn_init(); /* initialize all zeroes, all ones, mask table */
331 rtbl_init();
332
333 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
334 route_listener_cb, NULL);
335 }
336
337 static void
338 rtflushall(int family)
339 {
340 struct domain *dom;
341
342 if (rtcache_debug())
343 printf("%s: enter\n", __func__);
344
345 if ((dom = pffinddomain(family)) == NULL)
346 return;
347
348 rtcache_invalidate(&dom->dom_rtcache);
349 }
350
351 static void
352 rtcache(struct route *ro)
353 {
354 struct domain *dom;
355
356 rtcache_invariants(ro);
357 KASSERT(ro->_ro_rt != NULL);
358 KASSERT(ro->ro_invalid == false);
359 KASSERT(rtcache_getdst(ro) != NULL);
360
361 if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL)
362 return;
363
364 LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next);
365 rtcache_invariants(ro);
366 }
367
368 #ifdef RT_DEBUG
369 static void
370 dump_rt(const struct rtentry *rt)
371 {
372 char buf[512];
373
374 aprint_normal("rt: ");
375 aprint_normal("p=%p ", rt);
376 if (rt->_rt_key == NULL) {
377 aprint_normal("dst=(NULL) ");
378 } else {
379 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
380 aprint_normal("dst=%s ", buf);
381 }
382 if (rt->rt_gateway == NULL) {
383 aprint_normal("gw=(NULL) ");
384 } else {
385 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
386 aprint_normal("gw=%s ", buf);
387 }
388 aprint_normal("flags=%x ", rt->rt_flags);
389 if (rt->rt_ifp == NULL) {
390 aprint_normal("if=(NULL) ");
391 } else {
392 aprint_normal("if=%s ", rt->rt_ifp->if_xname);
393 }
394 aprint_normal("\n");
395 }
396 #endif /* RT_DEBUG */
397
398 /*
399 * Packet routing routines. If success, refcnt of a returned rtentry
400 * will be incremented. The caller has to rtfree it by itself.
401 */
402 struct rtentry *
403 rtalloc1(const struct sockaddr *dst, int report)
404 {
405 rtbl_t *rtbl;
406 struct rtentry *rt;
407 int s;
408
409 s = splsoftnet();
410 rtbl = rt_gettable(dst->sa_family);
411 if (rtbl == NULL)
412 goto miss;
413
414 rt = rt_matchaddr(rtbl, dst);
415 if (rt == NULL)
416 goto miss;
417
418 rt->rt_refcnt++;
419
420 splx(s);
421 return rt;
422 miss:
423 rtstat.rts_unreach++;
424 if (report) {
425 struct rt_addrinfo info;
426
427 memset(&info, 0, sizeof(info));
428 info.rti_info[RTAX_DST] = dst;
429 rt_missmsg(RTM_MISS, &info, 0, 0);
430 }
431 splx(s);
432 return NULL;
433 }
434
435 #if defined(DEBUG) && !defined(NET_MPSAFE)
436 /*
437 * Check the following constraint for each rtcache:
438 * if a rtcache holds a rtentry, the rtentry's refcnt is more than zero,
439 * i.e., the rtentry should be referenced at least by the rtcache.
440 */
441 static void
442 rtcache_check_rtrefcnt(int family)
443 {
444 struct domain *dom = pffinddomain(family);
445 struct route *ro;
446
447 if (dom == NULL)
448 return;
449
450 LIST_FOREACH(ro, &dom->dom_rtcache, ro_rtcache_next)
451 KDASSERT(ro->_ro_rt == NULL || ro->_ro_rt->rt_refcnt > 0);
452 }
453 #endif
454
455 void
456 rtfree(struct rtentry *rt)
457 {
458 struct ifaddr *ifa;
459
460 KASSERT(rt != NULL);
461 KASSERT(rt->rt_refcnt > 0);
462
463 rt->rt_refcnt--;
464 #if defined(DEBUG) && !defined(NET_MPSAFE)
465 if (rt_getkey(rt) != NULL)
466 rtcache_check_rtrefcnt(rt_getkey(rt)->sa_family);
467 #endif
468 if (rt->rt_refcnt == 0 && (rt->rt_flags & RTF_UP) == 0) {
469 rt_assert_inactive(rt);
470 rttrash--;
471 rt_timer_remove_all(rt, 0);
472 ifa = rt->rt_ifa;
473 rt->rt_ifa = NULL;
474 ifafree(ifa);
475 rt->rt_ifp = NULL;
476 rt_destroy(rt);
477 pool_put(&rtentry_pool, rt);
478 }
479 }
480
481 /*
482 * Force a routing table entry to the specified
483 * destination to go through the given gateway.
484 * Normally called as a result of a routing redirect
485 * message from the network layer.
486 *
487 * N.B.: must be called at splsoftnet
488 */
489 void
490 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
491 const struct sockaddr *netmask, int flags, const struct sockaddr *src,
492 struct rtentry **rtp)
493 {
494 struct rtentry *rt;
495 int error = 0;
496 uint64_t *stat = NULL;
497 struct rt_addrinfo info;
498 struct ifaddr *ifa;
499 struct psref psref;
500
501 /* verify the gateway is directly reachable */
502 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
503 error = ENETUNREACH;
504 goto out;
505 }
506 rt = rtalloc1(dst, 0);
507 /*
508 * If the redirect isn't from our current router for this dst,
509 * it's either old or wrong. If it redirects us to ourselves,
510 * we have a routing loop, perhaps as a result of an interface
511 * going down recently.
512 */
513 if (!(flags & RTF_DONE) && rt &&
514 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
515 error = EINVAL;
516 else {
517 int s = pserialize_read_enter();
518 struct ifaddr *_ifa;
519
520 _ifa = ifa_ifwithaddr(gateway);
521 if (_ifa != NULL)
522 error = EHOSTUNREACH;
523 pserialize_read_exit(s);
524 }
525 if (error)
526 goto done;
527 /*
528 * Create a new entry if we just got back a wildcard entry
529 * or the lookup failed. This is necessary for hosts
530 * which use routing redirects generated by smart gateways
531 * to dynamically build the routing tables.
532 */
533 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
534 goto create;
535 /*
536 * Don't listen to the redirect if it's
537 * for a route to an interface.
538 */
539 if (rt->rt_flags & RTF_GATEWAY) {
540 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
541 /*
542 * Changing from route to net => route to host.
543 * Create new route, rather than smashing route to net.
544 */
545 create:
546 if (rt != NULL)
547 rtfree(rt);
548 flags |= RTF_GATEWAY | RTF_DYNAMIC;
549 memset(&info, 0, sizeof(info));
550 info.rti_info[RTAX_DST] = dst;
551 info.rti_info[RTAX_GATEWAY] = gateway;
552 info.rti_info[RTAX_NETMASK] = netmask;
553 info.rti_ifa = ifa;
554 info.rti_flags = flags;
555 rt = NULL;
556 error = rtrequest1(RTM_ADD, &info, &rt);
557 if (rt != NULL)
558 flags = rt->rt_flags;
559 stat = &rtstat.rts_dynamic;
560 } else {
561 /*
562 * Smash the current notion of the gateway to
563 * this destination. Should check about netmask!!!
564 */
565 error = rt_setgate(rt, gateway);
566 if (error == 0) {
567 rt->rt_flags |= RTF_MODIFIED;
568 flags |= RTF_MODIFIED;
569 }
570 stat = &rtstat.rts_newgateway;
571 }
572 } else
573 error = EHOSTUNREACH;
574 done:
575 if (rt) {
576 if (rtp != NULL && !error)
577 *rtp = rt;
578 else
579 rtfree(rt);
580 }
581 out:
582 if (error)
583 rtstat.rts_badredirect++;
584 else if (stat != NULL)
585 (*stat)++;
586 memset(&info, 0, sizeof(info));
587 info.rti_info[RTAX_DST] = dst;
588 info.rti_info[RTAX_GATEWAY] = gateway;
589 info.rti_info[RTAX_NETMASK] = netmask;
590 info.rti_info[RTAX_AUTHOR] = src;
591 rt_missmsg(RTM_REDIRECT, &info, flags, error);
592 ifa_release(ifa, &psref);
593 }
594
595 /*
596 * Delete a route and generate a message.
597 * It doesn't free a passed rt.
598 */
599 static int
600 rtdeletemsg(struct rtentry *rt)
601 {
602 int error;
603 struct rt_addrinfo info;
604
605 /*
606 * Request the new route so that the entry is not actually
607 * deleted. That will allow the information being reported to
608 * be accurate (and consistent with route_output()).
609 */
610 memset(&info, 0, sizeof(info));
611 info.rti_info[RTAX_DST] = rt_getkey(rt);
612 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
613 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
614 info.rti_flags = rt->rt_flags;
615 error = rtrequest1(RTM_DELETE, &info, NULL);
616
617 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
618
619 return error;
620 }
621
622 struct ifaddr *
623 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
624 const struct sockaddr *gateway, struct psref *psref)
625 {
626 struct ifaddr *ifa = NULL;
627
628 if ((flags & RTF_GATEWAY) == 0) {
629 /*
630 * If we are adding a route to an interface,
631 * and the interface is a pt to pt link
632 * we should search for the destination
633 * as our clue to the interface. Otherwise
634 * we can use the local address.
635 */
636 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
637 ifa = ifa_ifwithdstaddr_psref(dst, psref);
638 if (ifa == NULL)
639 ifa = ifa_ifwithaddr_psref(gateway, psref);
640 } else {
641 /*
642 * If we are adding a route to a remote net
643 * or host, the gateway may still be on the
644 * other end of a pt to pt link.
645 */
646 ifa = ifa_ifwithdstaddr_psref(gateway, psref);
647 }
648 if (ifa == NULL)
649 ifa = ifa_ifwithnet_psref(gateway, psref);
650 if (ifa == NULL) {
651 int s;
652 struct rtentry *rt;
653
654 rt = rtalloc1(dst, 0);
655 if (rt == NULL)
656 return NULL;
657 /*
658 * Just in case. May not need to do this workaround.
659 * Revisit when working on rtentry MP-ification.
660 */
661 s = pserialize_read_enter();
662 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
663 if (ifa == rt->rt_ifa)
664 break;
665 }
666 if (ifa != NULL)
667 ifa_acquire(ifa, psref);
668 pserialize_read_exit(s);
669 rtfree(rt);
670 if (ifa == NULL)
671 return NULL;
672 }
673 if (ifa->ifa_addr->sa_family != dst->sa_family) {
674 struct ifaddr *nifa;
675 int s;
676
677 s = pserialize_read_enter();
678 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
679 if (nifa != NULL) {
680 ifa_release(ifa, psref);
681 ifa_acquire(nifa, psref);
682 ifa = nifa;
683 }
684 pserialize_read_exit(s);
685 }
686 return ifa;
687 }
688
689 /*
690 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
691 * The caller has to rtfree it by itself.
692 */
693 int
694 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
695 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
696 {
697 struct rt_addrinfo info;
698
699 memset(&info, 0, sizeof(info));
700 info.rti_flags = flags;
701 info.rti_info[RTAX_DST] = dst;
702 info.rti_info[RTAX_GATEWAY] = gateway;
703 info.rti_info[RTAX_NETMASK] = netmask;
704 return rtrequest1(req, &info, ret_nrt);
705 }
706
707 /*
708 * It's a utility function to add/remove a route to/from the routing table
709 * and tell user processes the addition/removal on success.
710 */
711 int
712 rtrequest_newmsg(const int req, const struct sockaddr *dst,
713 const struct sockaddr *gateway, const struct sockaddr *netmask,
714 const int flags)
715 {
716 int error;
717 struct rtentry *ret_nrt = NULL;
718
719 KASSERT(req == RTM_ADD || req == RTM_DELETE);
720
721 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt);
722 if (error != 0)
723 return error;
724
725 KASSERT(ret_nrt != NULL);
726
727 rt_newmsg(req, ret_nrt); /* tell user process */
728 rtfree(ret_nrt);
729
730 return 0;
731 }
732
733 struct ifnet *
734 rt_getifp(struct rt_addrinfo *info, struct psref *psref)
735 {
736 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
737
738 if (info->rti_ifp != NULL)
739 return NULL;
740 /*
741 * ifp may be specified by sockaddr_dl when protocol address
742 * is ambiguous
743 */
744 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
745 struct ifaddr *ifa;
746 int s = pserialize_read_enter();
747
748 ifa = ifa_ifwithnet(ifpaddr);
749 if (ifa != NULL)
750 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
751 psref);
752 pserialize_read_exit(s);
753 }
754
755 return info->rti_ifp;
756 }
757
758 struct ifaddr *
759 rt_getifa(struct rt_addrinfo *info, struct psref *psref)
760 {
761 struct ifaddr *ifa = NULL;
762 const struct sockaddr *dst = info->rti_info[RTAX_DST];
763 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
764 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
765 int flags = info->rti_flags;
766 const struct sockaddr *sa;
767
768 if (info->rti_ifa == NULL && ifaaddr != NULL) {
769 ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
770 if (ifa != NULL)
771 goto got;
772 }
773
774 sa = ifaaddr != NULL ? ifaaddr :
775 (gateway != NULL ? gateway : dst);
776 if (sa != NULL && info->rti_ifp != NULL)
777 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
778 else if (dst != NULL && gateway != NULL)
779 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
780 else if (sa != NULL)
781 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
782 if (ifa == NULL)
783 return NULL;
784 got:
785 if (ifa->ifa_getifa != NULL) {
786 /* FIXME NOMPSAFE */
787 ifa = (*ifa->ifa_getifa)(ifa, dst);
788 if (ifa == NULL)
789 return NULL;
790 ifa_acquire(ifa, psref);
791 }
792 info->rti_ifa = ifa;
793 if (info->rti_ifp == NULL)
794 info->rti_ifp = ifa->ifa_ifp;
795 return ifa;
796 }
797
798 /*
799 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
800 * The caller has to rtfree it by itself.
801 */
802 int
803 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
804 {
805 int s = splsoftnet(), ss;
806 int error = 0, rc;
807 struct rtentry *rt;
808 rtbl_t *rtbl;
809 struct ifaddr *ifa = NULL, *ifa2 = NULL;
810 struct sockaddr_storage maskeddst;
811 const struct sockaddr *dst = info->rti_info[RTAX_DST];
812 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
813 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
814 int flags = info->rti_flags;
815 struct psref psref_ifp, psref_ifa;
816 int bound = 0;
817 struct ifnet *ifp = NULL;
818 bool need_to_release_ifa = true;
819 #define senderr(x) { error = x ; goto bad; }
820
821 bound = curlwp_bind();
822 if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
823 senderr(ESRCH);
824 if (flags & RTF_HOST)
825 netmask = NULL;
826 switch (req) {
827 case RTM_DELETE:
828 if (netmask) {
829 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
830 netmask);
831 dst = (struct sockaddr *)&maskeddst;
832 }
833 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
834 senderr(ESRCH);
835 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
836 senderr(ESRCH);
837 rt->rt_flags &= ~RTF_UP;
838 if ((ifa = rt->rt_ifa)) {
839 if (ifa->ifa_flags & IFA_ROUTE &&
840 rt_ifa_connected(rt, ifa)) {
841 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
842 "deleted IFA_ROUTE\n",
843 (void *)rt->_rt_key, (void *)ifa);
844 ifa->ifa_flags &= ~IFA_ROUTE;
845 }
846 if (ifa->ifa_rtrequest)
847 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
848 ifa = NULL;
849 }
850 rttrash++;
851 if (ret_nrt) {
852 *ret_nrt = rt;
853 rt->rt_refcnt++;
854 } else if (rt->rt_refcnt <= 0) {
855 /* Adjust the refcount */
856 rt->rt_refcnt++;
857 rtfree(rt);
858 }
859 rtcache_clear_rtentry(dst->sa_family, rt);
860 break;
861
862 case RTM_ADD:
863 if (info->rti_ifa == NULL) {
864 ifp = rt_getifp(info, &psref_ifp);
865 ifa = rt_getifa(info, &psref_ifa);
866 if (ifa == NULL)
867 senderr(ENETUNREACH);
868 } else {
869 /* Caller should have a reference of ifa */
870 ifa = info->rti_ifa;
871 need_to_release_ifa = false;
872 }
873 rt = pool_get(&rtentry_pool, PR_NOWAIT);
874 if (rt == NULL)
875 senderr(ENOBUFS);
876 memset(rt, 0, sizeof(*rt));
877 rt->rt_flags = RTF_UP | flags;
878 LIST_INIT(&rt->rt_timer);
879
880 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
881 if (netmask) {
882 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
883 netmask);
884 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
885 } else {
886 rt_setkey(rt, dst, M_NOWAIT);
887 }
888 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
889 if (rt_getkey(rt) == NULL ||
890 rt_setgate(rt, gateway) != 0) {
891 pool_put(&rtentry_pool, rt);
892 senderr(ENOBUFS);
893 }
894
895 rt_set_ifa(rt, ifa);
896 if (info->rti_info[RTAX_TAG] != NULL) {
897 const struct sockaddr *tag;
898 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
899 if (tag == NULL)
900 senderr(ENOBUFS);
901 }
902 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
903
904 ss = pserialize_read_enter();
905 if (info->rti_info[RTAX_IFP] != NULL) {
906 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
907 if (ifa2 != NULL)
908 rt->rt_ifp = ifa2->ifa_ifp;
909 else
910 rt->rt_ifp = ifa->ifa_ifp;
911 } else
912 rt->rt_ifp = ifa->ifa_ifp;
913 pserialize_read_exit(ss);
914
915 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
916 rc = rt_addaddr(rtbl, rt, netmask);
917 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
918 if (rc != 0) {
919 ifafree(ifa); /* for rt_set_ifa above */
920 rt_destroy(rt);
921 pool_put(&rtentry_pool, rt);
922 senderr(rc);
923 }
924 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
925 if (ifa->ifa_rtrequest)
926 ifa->ifa_rtrequest(req, rt, info);
927 if (need_to_release_ifa)
928 ifa_release(ifa, &psref_ifa);
929 ifa = NULL;
930 if_put(ifp, &psref_ifp);
931 ifp = NULL;
932 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
933 if (ret_nrt) {
934 *ret_nrt = rt;
935 rt->rt_refcnt++;
936 }
937 rtflushall(dst->sa_family);
938 break;
939 case RTM_GET:
940 if (netmask != NULL) {
941 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
942 netmask);
943 dst = (struct sockaddr *)&maskeddst;
944 }
945 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
946 senderr(ESRCH);
947 if (ret_nrt != NULL) {
948 *ret_nrt = rt;
949 rt->rt_refcnt++;
950 }
951 break;
952 }
953 bad:
954 if (need_to_release_ifa)
955 ifa_release(ifa, &psref_ifa);
956 if_put(ifp, &psref_ifp);
957 curlwp_bindx(bound);
958 splx(s);
959 return error;
960 }
961
962 int
963 rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
964 {
965 struct sockaddr *new, *old;
966
967 KASSERT(rt->_rt_key != NULL);
968 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
969
970 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
971 if (new == NULL)
972 return ENOMEM;
973
974 old = rt->rt_gateway;
975 rt->rt_gateway = new;
976 if (old != NULL)
977 sockaddr_free(old);
978
979 KASSERT(rt->_rt_key != NULL);
980 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
981
982 if (rt->rt_flags & RTF_GATEWAY) {
983 struct rtentry *gwrt = rtalloc1(gate, 1);
984 /*
985 * If we switched gateways, grab the MTU from the new
986 * gateway route if the current MTU, if the current MTU is
987 * greater than the MTU of gateway.
988 * Note that, if the MTU of gateway is 0, we will reset the
989 * MTU of the route to run PMTUD again from scratch. XXX
990 */
991 if (gwrt != NULL) {
992 KASSERT(gwrt->_rt_key != NULL);
993 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
994 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
995 rt->rt_rmx.rmx_mtu &&
996 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
997 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
998 }
999 rtfree(gwrt);
1000 }
1001 }
1002 KASSERT(rt->_rt_key != NULL);
1003 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1004 return 0;
1005 }
1006
1007 static void
1008 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
1009 const struct sockaddr *netmask)
1010 {
1011 const char *netmaskp = &netmask->sa_data[0],
1012 *srcp = &src->sa_data[0];
1013 char *dstp = &dst->sa_data[0];
1014 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
1015 const char *srcend = (char *)dst + src->sa_len;
1016
1017 dst->sa_len = src->sa_len;
1018 dst->sa_family = src->sa_family;
1019
1020 while (dstp < maskend)
1021 *dstp++ = *srcp++ & *netmaskp++;
1022 if (dstp < srcend)
1023 memset(dstp, 0, (size_t)(srcend - dstp));
1024 }
1025
1026 /*
1027 * Inform the routing socket of a route change.
1028 */
1029 void
1030 rt_newmsg(const int cmd, const struct rtentry *rt)
1031 {
1032 struct rt_addrinfo info;
1033
1034 memset((void *)&info, 0, sizeof(info));
1035 info.rti_info[RTAX_DST] = rt_getkey(rt);
1036 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1037 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1038 if (rt->rt_ifp) {
1039 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1040 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1041 }
1042
1043 rt_missmsg(cmd, &info, rt->rt_flags, 0);
1044 }
1045
1046 /*
1047 * Set up or tear down a routing table entry, normally
1048 * for an interface.
1049 */
1050 int
1051 rtinit(struct ifaddr *ifa, int cmd, int flags)
1052 {
1053 struct rtentry *rt;
1054 struct sockaddr *dst, *odst;
1055 struct sockaddr_storage maskeddst;
1056 struct rtentry *nrt = NULL;
1057 int error;
1058 struct rt_addrinfo info;
1059
1060 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1061 if (cmd == RTM_DELETE) {
1062 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1063 /* Delete subnet route for this interface */
1064 odst = dst;
1065 dst = (struct sockaddr *)&maskeddst;
1066 rt_maskedcopy(odst, dst, ifa->ifa_netmask);
1067 }
1068 if ((rt = rtalloc1(dst, 0)) != NULL) {
1069 if (rt->rt_ifa != ifa) {
1070 rtfree(rt);
1071 return (flags & RTF_HOST) ? EHOSTUNREACH
1072 : ENETUNREACH;
1073 }
1074 rtfree(rt);
1075 }
1076 }
1077 memset(&info, 0, sizeof(info));
1078 info.rti_ifa = ifa;
1079 info.rti_flags = flags | ifa->ifa_flags;
1080 info.rti_info[RTAX_DST] = dst;
1081 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1082
1083 /*
1084 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1085 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate
1086 * variable) when RTF_HOST is 1. still not sure if i can safely
1087 * change it to meet bsdi4 behavior.
1088 */
1089 if (cmd != RTM_LLINFO_UPD)
1090 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1091 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
1092 &nrt);
1093 if (error != 0)
1094 return error;
1095
1096 rt = nrt;
1097 switch (cmd) {
1098 case RTM_DELETE:
1099 rt_newmsg(cmd, rt);
1100 break;
1101 case RTM_LLINFO_UPD:
1102 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
1103 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
1104 rt_newmsg(RTM_CHANGE, rt);
1105 break;
1106 case RTM_ADD:
1107 if (rt->rt_ifa != ifa) {
1108 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa,
1109 rt->rt_ifa);
1110 if (rt->rt_ifa->ifa_rtrequest != NULL) {
1111 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
1112 &info);
1113 }
1114 rt_replace_ifa(rt, ifa);
1115 rt->rt_ifp = ifa->ifa_ifp;
1116 if (ifa->ifa_rtrequest != NULL)
1117 ifa->ifa_rtrequest(RTM_ADD, rt, &info);
1118 }
1119 rt_newmsg(cmd, rt);
1120 break;
1121 }
1122 rtfree(rt);
1123 return error;
1124 }
1125
1126 /*
1127 * Create a local route entry for the address.
1128 * Announce the addition of the address and the route to the routing socket.
1129 */
1130 int
1131 rt_ifa_addlocal(struct ifaddr *ifa)
1132 {
1133 struct rtentry *rt;
1134 int e;
1135
1136 /* If there is no loopback entry, allocate one. */
1137 rt = rtalloc1(ifa->ifa_addr, 0);
1138 #ifdef RT_DEBUG
1139 if (rt != NULL)
1140 dump_rt(rt);
1141 #endif
1142 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
1143 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1144 {
1145 struct rt_addrinfo info;
1146 struct rtentry *nrt;
1147
1148 memset(&info, 0, sizeof(info));
1149 info.rti_flags = RTF_HOST | RTF_LOCAL;
1150 if (!(ifa->ifa_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))
1151 info.rti_flags |= RTF_LLDATA;
1152 info.rti_info[RTAX_DST] = ifa->ifa_addr;
1153 info.rti_info[RTAX_GATEWAY] =
1154 (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
1155 info.rti_ifa = ifa;
1156 nrt = NULL;
1157 e = rtrequest1(RTM_ADD, &info, &nrt);
1158 if (nrt && ifa != nrt->rt_ifa)
1159 rt_replace_ifa(nrt, ifa);
1160 rt_newaddrmsg(RTM_ADD, ifa, e, nrt);
1161 if (nrt != NULL) {
1162 #ifdef RT_DEBUG
1163 dump_rt(nrt);
1164 #endif
1165 rtfree(nrt);
1166 }
1167 } else {
1168 e = 0;
1169 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL);
1170 }
1171 if (rt != NULL)
1172 rtfree(rt);
1173 return e;
1174 }
1175
1176 /*
1177 * Remove the local route entry for the address.
1178 * Announce the removal of the address and the route to the routing socket.
1179 */
1180 int
1181 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
1182 {
1183 struct rtentry *rt;
1184 int e = 0;
1185
1186 rt = rtalloc1(ifa->ifa_addr, 0);
1187
1188 /*
1189 * Before deleting, check if a corresponding loopbacked
1190 * host route surely exists. With this check, we can avoid
1191 * deleting an interface direct route whose destination is
1192 * the same as the address being removed. This can happen
1193 * when removing a subnet-router anycast address on an
1194 * interface attached to a shared medium.
1195 */
1196 if (rt != NULL &&
1197 (rt->rt_flags & RTF_HOST) &&
1198 (rt->rt_ifp->if_flags & IFF_LOOPBACK))
1199 {
1200 /* If we cannot replace the route's ifaddr with the equivalent
1201 * ifaddr of another interface, I believe it is safest to
1202 * delete the route.
1203 */
1204 if (alt_ifa == NULL) {
1205 e = rtdeletemsg(rt);
1206 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1207 } else {
1208 rt_replace_ifa(rt, alt_ifa);
1209 rt_newmsg(RTM_CHANGE, rt);
1210 }
1211 } else
1212 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1213 if (rt != NULL)
1214 rtfree(rt);
1215 return e;
1216 }
1217
1218 /*
1219 * Route timer routines. These routes allow functions to be called
1220 * for various routes at any time. This is useful in supporting
1221 * path MTU discovery and redirect route deletion.
1222 *
1223 * This is similar to some BSDI internal functions, but it provides
1224 * for multiple queues for efficiency's sake...
1225 */
1226
1227 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
1228 static int rt_init_done = 0;
1229
1230 /*
1231 * Some subtle order problems with domain initialization mean that
1232 * we cannot count on this being run from rt_init before various
1233 * protocol initializations are done. Therefore, we make sure
1234 * that this is run when the first queue is added...
1235 */
1236
1237 static void rt_timer_work(struct work *, void *);
1238
1239 void
1240 rt_timer_init(void)
1241 {
1242 int error;
1243
1244 assert(rt_init_done == 0);
1245
1246 LIST_INIT(&rttimer_queue_head);
1247 callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1248 error = workqueue_create(&rt_timer_wq, "rt_timer",
1249 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
1250 if (error)
1251 panic("%s: workqueue_create failed (%d)\n", __func__, error);
1252 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1253 rt_init_done = 1;
1254 }
1255
1256 struct rttimer_queue *
1257 rt_timer_queue_create(u_int timeout)
1258 {
1259 struct rttimer_queue *rtq;
1260
1261 if (rt_init_done == 0)
1262 rt_timer_init();
1263
1264 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1265 if (rtq == NULL)
1266 return NULL;
1267 memset(rtq, 0, sizeof(*rtq));
1268
1269 rtq->rtq_timeout = timeout;
1270 TAILQ_INIT(&rtq->rtq_head);
1271 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1272
1273 return rtq;
1274 }
1275
1276 void
1277 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1278 {
1279
1280 rtq->rtq_timeout = timeout;
1281 }
1282
1283 void
1284 rt_timer_queue_remove_all(struct rttimer_queue *rtq, int destroy)
1285 {
1286 struct rttimer *r;
1287
1288 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1289 LIST_REMOVE(r, rtt_link);
1290 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1291 if (destroy)
1292 (*r->rtt_func)(r->rtt_rt, r);
1293 rtfree(r->rtt_rt);
1294 pool_put(&rttimer_pool, r);
1295 if (rtq->rtq_count > 0)
1296 rtq->rtq_count--;
1297 else
1298 printf("rt_timer_queue_remove_all: "
1299 "rtq_count reached 0\n");
1300 }
1301 }
1302
1303 void
1304 rt_timer_queue_destroy(struct rttimer_queue *rtq, int destroy)
1305 {
1306
1307 rt_timer_queue_remove_all(rtq, destroy);
1308
1309 LIST_REMOVE(rtq, rtq_link);
1310
1311 /*
1312 * Caller is responsible for freeing the rttimer_queue structure.
1313 */
1314 }
1315
1316 unsigned long
1317 rt_timer_count(struct rttimer_queue *rtq)
1318 {
1319 return rtq->rtq_count;
1320 }
1321
1322 void
1323 rt_timer_remove_all(struct rtentry *rt, int destroy)
1324 {
1325 struct rttimer *r;
1326
1327 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1328 LIST_REMOVE(r, rtt_link);
1329 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1330 if (destroy)
1331 (*r->rtt_func)(r->rtt_rt, r);
1332 if (r->rtt_queue->rtq_count > 0)
1333 r->rtt_queue->rtq_count--;
1334 else
1335 printf("rt_timer_remove_all: rtq_count reached 0\n");
1336 rtfree(r->rtt_rt);
1337 pool_put(&rttimer_pool, r);
1338 }
1339 }
1340
1341 int
1342 rt_timer_add(struct rtentry *rt,
1343 void (*func)(struct rtentry *, struct rttimer *),
1344 struct rttimer_queue *queue)
1345 {
1346 struct rttimer *r;
1347
1348 KASSERT(func != NULL);
1349 /*
1350 * If there's already a timer with this action, destroy it before
1351 * we add a new one.
1352 */
1353 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1354 if (r->rtt_func == func)
1355 break;
1356 }
1357 if (r != NULL) {
1358 LIST_REMOVE(r, rtt_link);
1359 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1360 if (r->rtt_queue->rtq_count > 0)
1361 r->rtt_queue->rtq_count--;
1362 else
1363 printf("rt_timer_add: rtq_count reached 0\n");
1364 rtfree(r->rtt_rt);
1365 } else {
1366 r = pool_get(&rttimer_pool, PR_NOWAIT);
1367 if (r == NULL)
1368 return ENOBUFS;
1369 }
1370
1371 memset(r, 0, sizeof(*r));
1372
1373 rt->rt_refcnt++;
1374 r->rtt_rt = rt;
1375 r->rtt_time = time_uptime;
1376 r->rtt_func = func;
1377 r->rtt_queue = queue;
1378 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1379 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1380 r->rtt_queue->rtq_count++;
1381
1382 return 0;
1383 }
1384
1385 static void
1386 rt_timer_work(struct work *wk, void *arg)
1387 {
1388 struct rttimer_queue *rtq;
1389 struct rttimer *r;
1390 int s;
1391
1392 s = splsoftnet();
1393 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1394 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1395 (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
1396 LIST_REMOVE(r, rtt_link);
1397 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1398 (*r->rtt_func)(r->rtt_rt, r);
1399 rtfree(r->rtt_rt);
1400 pool_put(&rttimer_pool, r);
1401 if (rtq->rtq_count > 0)
1402 rtq->rtq_count--;
1403 else
1404 printf("rt_timer_timer: rtq_count reached 0\n");
1405 }
1406 }
1407 splx(s);
1408
1409 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1410 }
1411
1412 void
1413 rt_timer_timer(void *arg)
1414 {
1415
1416 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
1417 }
1418
1419 static struct rtentry *
1420 _rtcache_init(struct route *ro, int flag)
1421 {
1422 rtcache_invariants(ro);
1423 KASSERT(ro->_ro_rt == NULL);
1424
1425 if (rtcache_getdst(ro) == NULL)
1426 return NULL;
1427 ro->ro_invalid = false;
1428 if ((ro->_ro_rt = rtalloc1(rtcache_getdst(ro), flag)) != NULL)
1429 rtcache(ro);
1430
1431 rtcache_invariants(ro);
1432 return ro->_ro_rt;
1433 }
1434
1435 struct rtentry *
1436 rtcache_init(struct route *ro)
1437 {
1438 return _rtcache_init(ro, 1);
1439 }
1440
1441 struct rtentry *
1442 rtcache_init_noclone(struct route *ro)
1443 {
1444 return _rtcache_init(ro, 0);
1445 }
1446
1447 struct rtentry *
1448 rtcache_update(struct route *ro, int clone)
1449 {
1450 rtcache_clear(ro);
1451 return _rtcache_init(ro, clone);
1452 }
1453
1454 void
1455 rtcache_copy(struct route *new_ro, const struct route *old_ro)
1456 {
1457 struct rtentry *rt;
1458
1459 KASSERT(new_ro != old_ro);
1460 rtcache_invariants(new_ro);
1461 rtcache_invariants(old_ro);
1462
1463 if ((rt = rtcache_validate(old_ro)) != NULL)
1464 rt->rt_refcnt++;
1465
1466 if (rtcache_getdst(old_ro) == NULL ||
1467 rtcache_setdst(new_ro, rtcache_getdst(old_ro)) != 0)
1468 return;
1469
1470 new_ro->ro_invalid = false;
1471 if ((new_ro->_ro_rt = rt) != NULL)
1472 rtcache(new_ro);
1473 rtcache_invariants(new_ro);
1474 }
1475
1476 static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist);
1477
1478 static void
1479 rtcache_invalidate(struct dom_rtlist *rtlist)
1480 {
1481 struct route *ro;
1482
1483 while ((ro = LIST_FIRST(rtlist)) != NULL) {
1484 rtcache_invariants(ro);
1485 KASSERT(ro->_ro_rt != NULL);
1486 ro->ro_invalid = true;
1487 LIST_REMOVE(ro, ro_rtcache_next);
1488 LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next);
1489 rtcache_invariants(ro);
1490 }
1491 }
1492
1493 static void
1494 rtcache_clear_rtentry(int family, struct rtentry *rt)
1495 {
1496 struct domain *dom;
1497 struct route *ro, *nro;
1498
1499 if ((dom = pffinddomain(family)) == NULL)
1500 return;
1501
1502 LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) {
1503 if (ro->_ro_rt == rt)
1504 rtcache_clear(ro);
1505 }
1506 }
1507
1508 static void
1509 rtcache_clear(struct route *ro)
1510 {
1511 rtcache_invariants(ro);
1512 if (ro->_ro_rt == NULL)
1513 return;
1514
1515 LIST_REMOVE(ro, ro_rtcache_next);
1516
1517 rtfree(ro->_ro_rt);
1518 ro->_ro_rt = NULL;
1519 ro->ro_invalid = false;
1520 rtcache_invariants(ro);
1521 }
1522
1523 struct rtentry *
1524 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, int clone,
1525 int *hitp)
1526 {
1527 const struct sockaddr *odst;
1528 struct rtentry *rt = NULL;
1529
1530 odst = rtcache_getdst(ro);
1531 if (odst == NULL)
1532 goto miss;
1533
1534 if (sockaddr_cmp(odst, dst) != 0) {
1535 rtcache_free(ro);
1536 goto miss;
1537 }
1538
1539 rt = rtcache_validate(ro);
1540 if (rt == NULL) {
1541 rtcache_clear(ro);
1542 goto miss;
1543 }
1544
1545 *hitp = 1;
1546 rtcache_invariants(ro);
1547
1548 return rt;
1549 miss:
1550 *hitp = 0;
1551 if (rtcache_setdst(ro, dst) == 0)
1552 rt = _rtcache_init(ro, clone);
1553
1554 rtcache_invariants(ro);
1555
1556 return rt;
1557 }
1558
1559 void
1560 rtcache_free(struct route *ro)
1561 {
1562 rtcache_clear(ro);
1563 if (ro->ro_sa != NULL) {
1564 sockaddr_free(ro->ro_sa);
1565 ro->ro_sa = NULL;
1566 }
1567 rtcache_invariants(ro);
1568 }
1569
1570 int
1571 rtcache_setdst(struct route *ro, const struct sockaddr *sa)
1572 {
1573 KASSERT(sa != NULL);
1574
1575 rtcache_invariants(ro);
1576 if (ro->ro_sa != NULL) {
1577 if (ro->ro_sa->sa_family == sa->sa_family) {
1578 rtcache_clear(ro);
1579 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
1580 rtcache_invariants(ro);
1581 return 0;
1582 }
1583 /* free ro_sa, wrong family */
1584 rtcache_free(ro);
1585 }
1586
1587 KASSERT(ro->_ro_rt == NULL);
1588
1589 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
1590 rtcache_invariants(ro);
1591 return ENOMEM;
1592 }
1593 rtcache_invariants(ro);
1594 return 0;
1595 }
1596
1597 const struct sockaddr *
1598 rt_settag(struct rtentry *rt, const struct sockaddr *tag)
1599 {
1600 if (rt->rt_tag != tag) {
1601 if (rt->rt_tag != NULL)
1602 sockaddr_free(rt->rt_tag);
1603 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
1604 }
1605 return rt->rt_tag;
1606 }
1607
1608 struct sockaddr *
1609 rt_gettag(const struct rtentry *rt)
1610 {
1611 return rt->rt_tag;
1612 }
1613
1614 int
1615 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
1616 {
1617
1618 if ((rt->rt_flags & RTF_REJECT) != 0) {
1619 /* Mimic looutput */
1620 if (ifp->if_flags & IFF_LOOPBACK)
1621 return (rt->rt_flags & RTF_HOST) ?
1622 EHOSTUNREACH : ENETUNREACH;
1623 else if (rt->rt_rmx.rmx_expire == 0 ||
1624 time_uptime < rt->rt_rmx.rmx_expire)
1625 return (rt->rt_flags & RTF_GATEWAY) ?
1626 EHOSTUNREACH : EHOSTDOWN;
1627 }
1628
1629 return 0;
1630 }
1631
1632 #ifdef DDB
1633
1634 #include <machine/db_machdep.h>
1635 #include <ddb/db_interface.h>
1636 #include <ddb/db_output.h>
1637
1638 #define rt_expire rt_rmx.rmx_expire
1639
1640 static void
1641 db_print_sa(const struct sockaddr *sa)
1642 {
1643 int len;
1644 const u_char *p;
1645
1646 if (sa == NULL) {
1647 db_printf("[NULL]");
1648 return;
1649 }
1650
1651 p = (const u_char *)sa;
1652 len = sa->sa_len;
1653 db_printf("[");
1654 while (len > 0) {
1655 db_printf("%d", *p);
1656 p++; len--;
1657 if (len) db_printf(",");
1658 }
1659 db_printf("]\n");
1660 }
1661
1662 static void
1663 db_print_ifa(struct ifaddr *ifa)
1664 {
1665 if (ifa == NULL)
1666 return;
1667 db_printf(" ifa_addr=");
1668 db_print_sa(ifa->ifa_addr);
1669 db_printf(" ifa_dsta=");
1670 db_print_sa(ifa->ifa_dstaddr);
1671 db_printf(" ifa_mask=");
1672 db_print_sa(ifa->ifa_netmask);
1673 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n",
1674 ifa->ifa_flags,
1675 ifa->ifa_refcnt,
1676 ifa->ifa_metric);
1677 }
1678
1679 /*
1680 * Function to pass to rt_walktree().
1681 * Return non-zero error to abort walk.
1682 */
1683 static int
1684 db_show_rtentry(struct rtentry *rt, void *w)
1685 {
1686 db_printf("rtentry=%p", rt);
1687
1688 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
1689 rt->rt_flags, rt->rt_refcnt,
1690 rt->rt_use, (uint64_t)rt->rt_expire);
1691
1692 db_printf(" key="); db_print_sa(rt_getkey(rt));
1693 db_printf(" mask="); db_print_sa(rt_mask(rt));
1694 db_printf(" gw="); db_print_sa(rt->rt_gateway);
1695
1696 db_printf(" ifp=%p ", rt->rt_ifp);
1697 if (rt->rt_ifp)
1698 db_printf("(%s)", rt->rt_ifp->if_xname);
1699 else
1700 db_printf("(NULL)");
1701
1702 db_printf(" ifa=%p\n", rt->rt_ifa);
1703 db_print_ifa(rt->rt_ifa);
1704
1705 db_printf(" gwroute=%p llinfo=%p\n",
1706 rt->rt_gwroute, rt->rt_llinfo);
1707
1708 return 0;
1709 }
1710
1711 /*
1712 * Function to print all the route trees.
1713 * Use this from ddb: "show routes"
1714 */
1715 void
1716 db_show_routes(db_expr_t addr, bool have_addr,
1717 db_expr_t count, const char *modif)
1718 {
1719 rt_walktree(AF_INET, db_show_rtentry, NULL);
1720 }
1721 #endif
1722