route.c revision 1.178 1 /* $NetBSD: route.c,v 1.178 2016/10/21 10:52:47 ozaki-r Exp $ */
2
3 /*-
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the project nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62 /*
63 * Copyright (c) 1980, 1986, 1991, 1993
64 * The Regents of the University of California. All rights reserved.
65 *
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
68 * are met:
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)route.c 8.3 (Berkeley) 1/9/95
91 */
92
93 #ifdef _KERNEL_OPT
94 #include "opt_inet.h"
95 #include "opt_route.h"
96 #include "opt_net_mpsafe.h"
97 #endif
98
99 #include <sys/cdefs.h>
100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.178 2016/10/21 10:52:47 ozaki-r Exp $");
101
102 #include <sys/param.h>
103 #ifdef RTFLUSH_DEBUG
104 #include <sys/sysctl.h>
105 #endif
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/mbuf.h>
110 #include <sys/socket.h>
111 #include <sys/socketvar.h>
112 #include <sys/domain.h>
113 #include <sys/protosw.h>
114 #include <sys/kernel.h>
115 #include <sys/ioctl.h>
116 #include <sys/pool.h>
117 #include <sys/kauth.h>
118 #include <sys/workqueue.h>
119
120 #include <net/if.h>
121 #include <net/if_dl.h>
122 #include <net/route.h>
123
124 #include <netinet/in.h>
125 #include <netinet/in_var.h>
126
127 #ifdef RTFLUSH_DEBUG
128 #define rtcache_debug() __predict_false(_rtcache_debug)
129 #else /* RTFLUSH_DEBUG */
130 #define rtcache_debug() 0
131 #endif /* RTFLUSH_DEBUG */
132
133 struct rtstat rtstat;
134
135 static int rttrash; /* routes not in table but not freed */
136
137 static struct pool rtentry_pool;
138 static struct pool rttimer_pool;
139
140 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */
141 static struct workqueue *rt_timer_wq;
142 static struct work rt_timer_wk;
143
144 static void rt_timer_init(void);
145 static void rt_timer_queue_remove_all(struct rttimer_queue *, int);
146 static void rt_timer_remove_all(struct rtentry *);
147 static void rt_timer_timer(void *);
148
149 #ifdef RTFLUSH_DEBUG
150 static int _rtcache_debug = 0;
151 #endif /* RTFLUSH_DEBUG */
152
153 static kauth_listener_t route_listener;
154
155 static int rtdeletemsg(struct rtentry *);
156 static void rtflushall(int);
157
158 static void rt_maskedcopy(const struct sockaddr *,
159 struct sockaddr *, const struct sockaddr *);
160
161 static void rtcache_clear(struct route *);
162 static void rtcache_clear_rtentry(int, struct rtentry *);
163 static void rtcache_invalidate(struct dom_rtlist *);
164
165 #ifdef DDB
166 static void db_print_sa(const struct sockaddr *);
167 static void db_print_ifa(struct ifaddr *);
168 static int db_show_rtentry(struct rtentry *, void *);
169 #endif
170
171 #ifdef RTFLUSH_DEBUG
172 static void sysctl_net_rtcache_setup(struct sysctllog **);
173 static void
174 sysctl_net_rtcache_setup(struct sysctllog **clog)
175 {
176 const struct sysctlnode *rnode;
177
178 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
179 CTLTYPE_NODE,
180 "rtcache", SYSCTL_DESCR("Route cache related settings"),
181 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
182 return;
183 if (sysctl_createv(clog, 0, &rnode, &rnode,
184 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
185 "debug", SYSCTL_DESCR("Debug route caches"),
186 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
187 return;
188 }
189 #endif /* RTFLUSH_DEBUG */
190
191 static inline void
192 rt_destroy(struct rtentry *rt)
193 {
194 if (rt->_rt_key != NULL)
195 sockaddr_free(rt->_rt_key);
196 if (rt->rt_gateway != NULL)
197 sockaddr_free(rt->rt_gateway);
198 if (rt_gettag(rt) != NULL)
199 sockaddr_free(rt_gettag(rt));
200 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
201 }
202
203 static inline const struct sockaddr *
204 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
205 {
206 if (rt->_rt_key == key)
207 goto out;
208
209 if (rt->_rt_key != NULL)
210 sockaddr_free(rt->_rt_key);
211 rt->_rt_key = sockaddr_dup(key, flags);
212 out:
213 rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
214 return rt->_rt_key;
215 }
216
217 struct ifaddr *
218 rt_get_ifa(struct rtentry *rt)
219 {
220 struct ifaddr *ifa;
221
222 if ((ifa = rt->rt_ifa) == NULL)
223 return ifa;
224 else if (ifa->ifa_getifa == NULL)
225 return ifa;
226 #if 0
227 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
228 return ifa;
229 #endif
230 else {
231 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
232 if (ifa == NULL)
233 return NULL;
234 rt_replace_ifa(rt, ifa);
235 return ifa;
236 }
237 }
238
239 static void
240 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
241 {
242 rt->rt_ifa = ifa;
243 if (ifa->ifa_seqno != NULL)
244 rt->rt_ifa_seqno = *ifa->ifa_seqno;
245 }
246
247 /*
248 * Is this route the connected route for the ifa?
249 */
250 static int
251 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
252 {
253 const struct sockaddr *key, *dst, *odst;
254 struct sockaddr_storage maskeddst;
255
256 key = rt_getkey(rt);
257 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
258 if (dst == NULL ||
259 dst->sa_family != key->sa_family ||
260 dst->sa_len != key->sa_len)
261 return 0;
262 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
263 odst = dst;
264 dst = (struct sockaddr *)&maskeddst;
265 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
266 ifa->ifa_netmask);
267 }
268 return (memcmp(dst, key, dst->sa_len) == 0);
269 }
270
271 void
272 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
273 {
274 if (rt->rt_ifa &&
275 rt->rt_ifa != ifa &&
276 rt->rt_ifa->ifa_flags & IFA_ROUTE &&
277 rt_ifa_connected(rt, rt->rt_ifa))
278 {
279 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
280 "replace deleted IFA_ROUTE\n",
281 (void *)rt->_rt_key, (void *)rt->rt_ifa);
282 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
283 if (rt_ifa_connected(rt, ifa)) {
284 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
285 "replace added IFA_ROUTE\n",
286 (void *)rt->_rt_key, (void *)ifa);
287 ifa->ifa_flags |= IFA_ROUTE;
288 }
289 }
290
291 ifaref(ifa);
292 ifafree(rt->rt_ifa);
293 rt_set_ifa1(rt, ifa);
294 }
295
296 static void
297 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
298 {
299 ifaref(ifa);
300 rt_set_ifa1(rt, ifa);
301 }
302
303 static int
304 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
305 void *arg0, void *arg1, void *arg2, void *arg3)
306 {
307 struct rt_msghdr *rtm;
308 int result;
309
310 result = KAUTH_RESULT_DEFER;
311 rtm = arg1;
312
313 if (action != KAUTH_NETWORK_ROUTE)
314 return result;
315
316 if (rtm->rtm_type == RTM_GET)
317 result = KAUTH_RESULT_ALLOW;
318
319 return result;
320 }
321
322 void
323 rt_init(void)
324 {
325
326 #ifdef RTFLUSH_DEBUG
327 sysctl_net_rtcache_setup(NULL);
328 #endif
329
330 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
331 NULL, IPL_SOFTNET);
332 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
333 NULL, IPL_SOFTNET);
334
335 rn_init(); /* initialize all zeroes, all ones, mask table */
336 rtbl_init();
337
338 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
339 route_listener_cb, NULL);
340 }
341
342 static void
343 rtflushall(int family)
344 {
345 struct domain *dom;
346
347 if (rtcache_debug())
348 printf("%s: enter\n", __func__);
349
350 if ((dom = pffinddomain(family)) == NULL)
351 return;
352
353 rtcache_invalidate(&dom->dom_rtcache);
354 }
355
356 static void
357 rtcache(struct route *ro)
358 {
359 struct domain *dom;
360
361 rtcache_invariants(ro);
362 KASSERT(ro->_ro_rt != NULL);
363 KASSERT(ro->ro_invalid == false);
364 KASSERT(rtcache_getdst(ro) != NULL);
365
366 if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL)
367 return;
368
369 LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next);
370 rtcache_invariants(ro);
371 }
372
373 #ifdef RT_DEBUG
374 static void
375 dump_rt(const struct rtentry *rt)
376 {
377 char buf[512];
378
379 aprint_normal("rt: ");
380 aprint_normal("p=%p ", rt);
381 if (rt->_rt_key == NULL) {
382 aprint_normal("dst=(NULL) ");
383 } else {
384 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
385 aprint_normal("dst=%s ", buf);
386 }
387 if (rt->rt_gateway == NULL) {
388 aprint_normal("gw=(NULL) ");
389 } else {
390 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
391 aprint_normal("gw=%s ", buf);
392 }
393 aprint_normal("flags=%x ", rt->rt_flags);
394 if (rt->rt_ifp == NULL) {
395 aprint_normal("if=(NULL) ");
396 } else {
397 aprint_normal("if=%s ", rt->rt_ifp->if_xname);
398 }
399 aprint_normal("\n");
400 }
401 #endif /* RT_DEBUG */
402
403 /*
404 * Packet routing routines. If success, refcnt of a returned rtentry
405 * will be incremented. The caller has to rtfree it by itself.
406 */
407 struct rtentry *
408 rtalloc1(const struct sockaddr *dst, int report)
409 {
410 rtbl_t *rtbl;
411 struct rtentry *rt;
412 int s;
413
414 s = splsoftnet();
415 rtbl = rt_gettable(dst->sa_family);
416 if (rtbl == NULL)
417 goto miss;
418
419 rt = rt_matchaddr(rtbl, dst);
420 if (rt == NULL)
421 goto miss;
422
423 rt->rt_refcnt++;
424
425 splx(s);
426 return rt;
427 miss:
428 rtstat.rts_unreach++;
429 if (report) {
430 struct rt_addrinfo info;
431
432 memset(&info, 0, sizeof(info));
433 info.rti_info[RTAX_DST] = dst;
434 rt_missmsg(RTM_MISS, &info, 0, 0);
435 }
436 splx(s);
437 return NULL;
438 }
439
440 #if defined(DEBUG) && !defined(NET_MPSAFE)
441 /*
442 * Check the following constraint for each rtcache:
443 * if a rtcache holds a rtentry, the rtentry's refcnt is more than zero,
444 * i.e., the rtentry should be referenced at least by the rtcache.
445 */
446 static void
447 rtcache_check_rtrefcnt(int family)
448 {
449 struct domain *dom = pffinddomain(family);
450 struct route *ro;
451
452 if (dom == NULL)
453 return;
454
455 LIST_FOREACH(ro, &dom->dom_rtcache, ro_rtcache_next)
456 KDASSERT(ro->_ro_rt == NULL || ro->_ro_rt->rt_refcnt > 0);
457 }
458 #endif
459
460 void
461 rtfree(struct rtentry *rt)
462 {
463 struct ifaddr *ifa;
464
465 KASSERT(rt != NULL);
466 KASSERT(rt->rt_refcnt > 0);
467
468 rt->rt_refcnt--;
469 #if defined(DEBUG) && !defined(NET_MPSAFE)
470 if (rt_getkey(rt) != NULL)
471 rtcache_check_rtrefcnt(rt_getkey(rt)->sa_family);
472 #endif
473 if (rt->rt_refcnt == 0 && (rt->rt_flags & RTF_UP) == 0) {
474 rt_assert_inactive(rt);
475 rttrash--;
476 rt_timer_remove_all(rt);
477 ifa = rt->rt_ifa;
478 rt->rt_ifa = NULL;
479 ifafree(ifa);
480 rt->rt_ifp = NULL;
481 rt_destroy(rt);
482 pool_put(&rtentry_pool, rt);
483 }
484 }
485
486 /*
487 * Force a routing table entry to the specified
488 * destination to go through the given gateway.
489 * Normally called as a result of a routing redirect
490 * message from the network layer.
491 *
492 * N.B.: must be called at splsoftnet
493 */
494 void
495 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
496 const struct sockaddr *netmask, int flags, const struct sockaddr *src,
497 struct rtentry **rtp)
498 {
499 struct rtentry *rt;
500 int error = 0;
501 uint64_t *stat = NULL;
502 struct rt_addrinfo info;
503 struct ifaddr *ifa;
504 struct psref psref;
505
506 /* verify the gateway is directly reachable */
507 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
508 error = ENETUNREACH;
509 goto out;
510 }
511 rt = rtalloc1(dst, 0);
512 /*
513 * If the redirect isn't from our current router for this dst,
514 * it's either old or wrong. If it redirects us to ourselves,
515 * we have a routing loop, perhaps as a result of an interface
516 * going down recently.
517 */
518 if (!(flags & RTF_DONE) && rt &&
519 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
520 error = EINVAL;
521 else {
522 int s = pserialize_read_enter();
523 struct ifaddr *_ifa;
524
525 _ifa = ifa_ifwithaddr(gateway);
526 if (_ifa != NULL)
527 error = EHOSTUNREACH;
528 pserialize_read_exit(s);
529 }
530 if (error)
531 goto done;
532 /*
533 * Create a new entry if we just got back a wildcard entry
534 * or the lookup failed. This is necessary for hosts
535 * which use routing redirects generated by smart gateways
536 * to dynamically build the routing tables.
537 */
538 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
539 goto create;
540 /*
541 * Don't listen to the redirect if it's
542 * for a route to an interface.
543 */
544 if (rt->rt_flags & RTF_GATEWAY) {
545 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
546 /*
547 * Changing from route to net => route to host.
548 * Create new route, rather than smashing route to net.
549 */
550 create:
551 if (rt != NULL)
552 rtfree(rt);
553 flags |= RTF_GATEWAY | RTF_DYNAMIC;
554 memset(&info, 0, sizeof(info));
555 info.rti_info[RTAX_DST] = dst;
556 info.rti_info[RTAX_GATEWAY] = gateway;
557 info.rti_info[RTAX_NETMASK] = netmask;
558 info.rti_ifa = ifa;
559 info.rti_flags = flags;
560 rt = NULL;
561 error = rtrequest1(RTM_ADD, &info, &rt);
562 if (rt != NULL)
563 flags = rt->rt_flags;
564 stat = &rtstat.rts_dynamic;
565 } else {
566 /*
567 * Smash the current notion of the gateway to
568 * this destination. Should check about netmask!!!
569 */
570 error = rt_setgate(rt, gateway);
571 if (error == 0) {
572 rt->rt_flags |= RTF_MODIFIED;
573 flags |= RTF_MODIFIED;
574 }
575 stat = &rtstat.rts_newgateway;
576 }
577 } else
578 error = EHOSTUNREACH;
579 done:
580 if (rt) {
581 if (rtp != NULL && !error)
582 *rtp = rt;
583 else
584 rtfree(rt);
585 }
586 out:
587 if (error)
588 rtstat.rts_badredirect++;
589 else if (stat != NULL)
590 (*stat)++;
591 memset(&info, 0, sizeof(info));
592 info.rti_info[RTAX_DST] = dst;
593 info.rti_info[RTAX_GATEWAY] = gateway;
594 info.rti_info[RTAX_NETMASK] = netmask;
595 info.rti_info[RTAX_AUTHOR] = src;
596 rt_missmsg(RTM_REDIRECT, &info, flags, error);
597 ifa_release(ifa, &psref);
598 }
599
600 /*
601 * Delete a route and generate a message.
602 * It doesn't free a passed rt.
603 */
604 static int
605 rtdeletemsg(struct rtentry *rt)
606 {
607 int error;
608 struct rt_addrinfo info;
609
610 /*
611 * Request the new route so that the entry is not actually
612 * deleted. That will allow the information being reported to
613 * be accurate (and consistent with route_output()).
614 */
615 memset(&info, 0, sizeof(info));
616 info.rti_info[RTAX_DST] = rt_getkey(rt);
617 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
618 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
619 info.rti_flags = rt->rt_flags;
620 error = rtrequest1(RTM_DELETE, &info, NULL);
621
622 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
623
624 return error;
625 }
626
627 struct ifaddr *
628 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
629 const struct sockaddr *gateway, struct psref *psref)
630 {
631 struct ifaddr *ifa = NULL;
632
633 if ((flags & RTF_GATEWAY) == 0) {
634 /*
635 * If we are adding a route to an interface,
636 * and the interface is a pt to pt link
637 * we should search for the destination
638 * as our clue to the interface. Otherwise
639 * we can use the local address.
640 */
641 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
642 ifa = ifa_ifwithdstaddr_psref(dst, psref);
643 if (ifa == NULL)
644 ifa = ifa_ifwithaddr_psref(gateway, psref);
645 } else {
646 /*
647 * If we are adding a route to a remote net
648 * or host, the gateway may still be on the
649 * other end of a pt to pt link.
650 */
651 ifa = ifa_ifwithdstaddr_psref(gateway, psref);
652 }
653 if (ifa == NULL)
654 ifa = ifa_ifwithnet_psref(gateway, psref);
655 if (ifa == NULL) {
656 int s;
657 struct rtentry *rt;
658
659 rt = rtalloc1(dst, 0);
660 if (rt == NULL)
661 return NULL;
662 /*
663 * Just in case. May not need to do this workaround.
664 * Revisit when working on rtentry MP-ification.
665 */
666 s = pserialize_read_enter();
667 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
668 if (ifa == rt->rt_ifa)
669 break;
670 }
671 if (ifa != NULL)
672 ifa_acquire(ifa, psref);
673 pserialize_read_exit(s);
674 rtfree(rt);
675 if (ifa == NULL)
676 return NULL;
677 }
678 if (ifa->ifa_addr->sa_family != dst->sa_family) {
679 struct ifaddr *nifa;
680 int s;
681
682 s = pserialize_read_enter();
683 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
684 if (nifa != NULL) {
685 ifa_release(ifa, psref);
686 ifa_acquire(nifa, psref);
687 ifa = nifa;
688 }
689 pserialize_read_exit(s);
690 }
691 return ifa;
692 }
693
694 /*
695 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
696 * The caller has to rtfree it by itself.
697 */
698 int
699 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
700 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
701 {
702 struct rt_addrinfo info;
703
704 memset(&info, 0, sizeof(info));
705 info.rti_flags = flags;
706 info.rti_info[RTAX_DST] = dst;
707 info.rti_info[RTAX_GATEWAY] = gateway;
708 info.rti_info[RTAX_NETMASK] = netmask;
709 return rtrequest1(req, &info, ret_nrt);
710 }
711
712 /*
713 * It's a utility function to add/remove a route to/from the routing table
714 * and tell user processes the addition/removal on success.
715 */
716 int
717 rtrequest_newmsg(const int req, const struct sockaddr *dst,
718 const struct sockaddr *gateway, const struct sockaddr *netmask,
719 const int flags)
720 {
721 int error;
722 struct rtentry *ret_nrt = NULL;
723
724 KASSERT(req == RTM_ADD || req == RTM_DELETE);
725
726 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt);
727 if (error != 0)
728 return error;
729
730 KASSERT(ret_nrt != NULL);
731
732 rt_newmsg(req, ret_nrt); /* tell user process */
733 rtfree(ret_nrt);
734
735 return 0;
736 }
737
738 struct ifnet *
739 rt_getifp(struct rt_addrinfo *info, struct psref *psref)
740 {
741 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
742
743 if (info->rti_ifp != NULL)
744 return NULL;
745 /*
746 * ifp may be specified by sockaddr_dl when protocol address
747 * is ambiguous
748 */
749 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
750 struct ifaddr *ifa;
751 int s = pserialize_read_enter();
752
753 ifa = ifa_ifwithnet(ifpaddr);
754 if (ifa != NULL)
755 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
756 psref);
757 pserialize_read_exit(s);
758 }
759
760 return info->rti_ifp;
761 }
762
763 struct ifaddr *
764 rt_getifa(struct rt_addrinfo *info, struct psref *psref)
765 {
766 struct ifaddr *ifa = NULL;
767 const struct sockaddr *dst = info->rti_info[RTAX_DST];
768 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
769 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
770 int flags = info->rti_flags;
771 const struct sockaddr *sa;
772
773 if (info->rti_ifa == NULL && ifaaddr != NULL) {
774 ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
775 if (ifa != NULL)
776 goto got;
777 }
778
779 sa = ifaaddr != NULL ? ifaaddr :
780 (gateway != NULL ? gateway : dst);
781 if (sa != NULL && info->rti_ifp != NULL)
782 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
783 else if (dst != NULL && gateway != NULL)
784 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
785 else if (sa != NULL)
786 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
787 if (ifa == NULL)
788 return NULL;
789 got:
790 if (ifa->ifa_getifa != NULL) {
791 /* FIXME NOMPSAFE */
792 ifa = (*ifa->ifa_getifa)(ifa, dst);
793 if (ifa == NULL)
794 return NULL;
795 ifa_acquire(ifa, psref);
796 }
797 info->rti_ifa = ifa;
798 if (info->rti_ifp == NULL)
799 info->rti_ifp = ifa->ifa_ifp;
800 return ifa;
801 }
802
803 /*
804 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
805 * The caller has to rtfree it by itself.
806 */
807 int
808 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
809 {
810 int s = splsoftnet(), ss;
811 int error = 0, rc;
812 struct rtentry *rt;
813 rtbl_t *rtbl;
814 struct ifaddr *ifa = NULL, *ifa2 = NULL;
815 struct sockaddr_storage maskeddst;
816 const struct sockaddr *dst = info->rti_info[RTAX_DST];
817 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
818 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
819 int flags = info->rti_flags;
820 struct psref psref_ifp, psref_ifa;
821 int bound = 0;
822 struct ifnet *ifp = NULL;
823 bool need_to_release_ifa = true;
824 #define senderr(x) { error = x ; goto bad; }
825
826 bound = curlwp_bind();
827 if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
828 senderr(ESRCH);
829 if (flags & RTF_HOST)
830 netmask = NULL;
831 switch (req) {
832 case RTM_DELETE:
833 if (netmask) {
834 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
835 netmask);
836 dst = (struct sockaddr *)&maskeddst;
837 }
838 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
839 senderr(ESRCH);
840 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
841 senderr(ESRCH);
842 rt->rt_flags &= ~RTF_UP;
843 if ((ifa = rt->rt_ifa)) {
844 if (ifa->ifa_flags & IFA_ROUTE &&
845 rt_ifa_connected(rt, ifa)) {
846 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
847 "deleted IFA_ROUTE\n",
848 (void *)rt->_rt_key, (void *)ifa);
849 ifa->ifa_flags &= ~IFA_ROUTE;
850 }
851 if (ifa->ifa_rtrequest)
852 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
853 ifa = NULL;
854 }
855 rttrash++;
856 if (ret_nrt) {
857 *ret_nrt = rt;
858 rt->rt_refcnt++;
859 } else if (rt->rt_refcnt <= 0) {
860 /* Adjust the refcount */
861 rt->rt_refcnt++;
862 rtfree(rt);
863 }
864 rtcache_clear_rtentry(dst->sa_family, rt);
865 break;
866
867 case RTM_ADD:
868 if (info->rti_ifa == NULL) {
869 ifp = rt_getifp(info, &psref_ifp);
870 ifa = rt_getifa(info, &psref_ifa);
871 if (ifa == NULL)
872 senderr(ENETUNREACH);
873 } else {
874 /* Caller should have a reference of ifa */
875 ifa = info->rti_ifa;
876 need_to_release_ifa = false;
877 }
878 rt = pool_get(&rtentry_pool, PR_NOWAIT);
879 if (rt == NULL)
880 senderr(ENOBUFS);
881 memset(rt, 0, sizeof(*rt));
882 rt->rt_flags = RTF_UP | flags;
883 LIST_INIT(&rt->rt_timer);
884
885 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
886 if (netmask) {
887 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
888 netmask);
889 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
890 } else {
891 rt_setkey(rt, dst, M_NOWAIT);
892 }
893 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
894 if (rt_getkey(rt) == NULL ||
895 rt_setgate(rt, gateway) != 0) {
896 pool_put(&rtentry_pool, rt);
897 senderr(ENOBUFS);
898 }
899
900 rt_set_ifa(rt, ifa);
901 if (info->rti_info[RTAX_TAG] != NULL) {
902 const struct sockaddr *tag;
903 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
904 if (tag == NULL)
905 senderr(ENOBUFS);
906 }
907 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
908
909 ss = pserialize_read_enter();
910 if (info->rti_info[RTAX_IFP] != NULL) {
911 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
912 if (ifa2 != NULL)
913 rt->rt_ifp = ifa2->ifa_ifp;
914 else
915 rt->rt_ifp = ifa->ifa_ifp;
916 } else
917 rt->rt_ifp = ifa->ifa_ifp;
918 pserialize_read_exit(ss);
919
920 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
921 rc = rt_addaddr(rtbl, rt, netmask);
922 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
923 if (rc != 0) {
924 ifafree(ifa); /* for rt_set_ifa above */
925 rt_destroy(rt);
926 pool_put(&rtentry_pool, rt);
927 senderr(rc);
928 }
929 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
930 if (ifa->ifa_rtrequest)
931 ifa->ifa_rtrequest(req, rt, info);
932 if (need_to_release_ifa)
933 ifa_release(ifa, &psref_ifa);
934 ifa = NULL;
935 if_put(ifp, &psref_ifp);
936 ifp = NULL;
937 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
938 if (ret_nrt) {
939 *ret_nrt = rt;
940 rt->rt_refcnt++;
941 }
942 rtflushall(dst->sa_family);
943 break;
944 case RTM_GET:
945 if (netmask != NULL) {
946 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
947 netmask);
948 dst = (struct sockaddr *)&maskeddst;
949 }
950 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
951 senderr(ESRCH);
952 if (ret_nrt != NULL) {
953 *ret_nrt = rt;
954 rt->rt_refcnt++;
955 }
956 break;
957 }
958 bad:
959 if (need_to_release_ifa)
960 ifa_release(ifa, &psref_ifa);
961 if_put(ifp, &psref_ifp);
962 curlwp_bindx(bound);
963 splx(s);
964 return error;
965 }
966
967 int
968 rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
969 {
970 struct sockaddr *new, *old;
971
972 KASSERT(rt->_rt_key != NULL);
973 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
974
975 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
976 if (new == NULL)
977 return ENOMEM;
978
979 old = rt->rt_gateway;
980 rt->rt_gateway = new;
981 if (old != NULL)
982 sockaddr_free(old);
983
984 KASSERT(rt->_rt_key != NULL);
985 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
986
987 if (rt->rt_flags & RTF_GATEWAY) {
988 struct rtentry *gwrt = rtalloc1(gate, 1);
989 /*
990 * If we switched gateways, grab the MTU from the new
991 * gateway route if the current MTU, if the current MTU is
992 * greater than the MTU of gateway.
993 * Note that, if the MTU of gateway is 0, we will reset the
994 * MTU of the route to run PMTUD again from scratch. XXX
995 */
996 if (gwrt != NULL) {
997 KASSERT(gwrt->_rt_key != NULL);
998 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
999 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
1000 rt->rt_rmx.rmx_mtu &&
1001 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
1002 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
1003 }
1004 rtfree(gwrt);
1005 }
1006 }
1007 KASSERT(rt->_rt_key != NULL);
1008 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1009 return 0;
1010 }
1011
1012 static void
1013 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
1014 const struct sockaddr *netmask)
1015 {
1016 const char *netmaskp = &netmask->sa_data[0],
1017 *srcp = &src->sa_data[0];
1018 char *dstp = &dst->sa_data[0];
1019 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
1020 const char *srcend = (char *)dst + src->sa_len;
1021
1022 dst->sa_len = src->sa_len;
1023 dst->sa_family = src->sa_family;
1024
1025 while (dstp < maskend)
1026 *dstp++ = *srcp++ & *netmaskp++;
1027 if (dstp < srcend)
1028 memset(dstp, 0, (size_t)(srcend - dstp));
1029 }
1030
1031 /*
1032 * Inform the routing socket of a route change.
1033 */
1034 void
1035 rt_newmsg(const int cmd, const struct rtentry *rt)
1036 {
1037 struct rt_addrinfo info;
1038
1039 memset((void *)&info, 0, sizeof(info));
1040 info.rti_info[RTAX_DST] = rt_getkey(rt);
1041 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1042 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1043 if (rt->rt_ifp) {
1044 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1045 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1046 }
1047
1048 rt_missmsg(cmd, &info, rt->rt_flags, 0);
1049 }
1050
1051 /*
1052 * Set up or tear down a routing table entry, normally
1053 * for an interface.
1054 */
1055 int
1056 rtinit(struct ifaddr *ifa, int cmd, int flags)
1057 {
1058 struct rtentry *rt;
1059 struct sockaddr *dst, *odst;
1060 struct sockaddr_storage maskeddst;
1061 struct rtentry *nrt = NULL;
1062 int error;
1063 struct rt_addrinfo info;
1064
1065 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1066 if (cmd == RTM_DELETE) {
1067 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1068 /* Delete subnet route for this interface */
1069 odst = dst;
1070 dst = (struct sockaddr *)&maskeddst;
1071 rt_maskedcopy(odst, dst, ifa->ifa_netmask);
1072 }
1073 if ((rt = rtalloc1(dst, 0)) != NULL) {
1074 if (rt->rt_ifa != ifa) {
1075 rtfree(rt);
1076 return (flags & RTF_HOST) ? EHOSTUNREACH
1077 : ENETUNREACH;
1078 }
1079 rtfree(rt);
1080 }
1081 }
1082 memset(&info, 0, sizeof(info));
1083 info.rti_ifa = ifa;
1084 info.rti_flags = flags | ifa->ifa_flags;
1085 info.rti_info[RTAX_DST] = dst;
1086 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1087
1088 /*
1089 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1090 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate
1091 * variable) when RTF_HOST is 1. still not sure if i can safely
1092 * change it to meet bsdi4 behavior.
1093 */
1094 if (cmd != RTM_LLINFO_UPD)
1095 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1096 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
1097 &nrt);
1098 if (error != 0)
1099 return error;
1100
1101 rt = nrt;
1102 switch (cmd) {
1103 case RTM_DELETE:
1104 rt_newmsg(cmd, rt);
1105 break;
1106 case RTM_LLINFO_UPD:
1107 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
1108 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
1109 rt_newmsg(RTM_CHANGE, rt);
1110 break;
1111 case RTM_ADD:
1112 if (rt->rt_ifa != ifa) {
1113 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa,
1114 rt->rt_ifa);
1115 if (rt->rt_ifa->ifa_rtrequest != NULL) {
1116 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
1117 &info);
1118 }
1119 rt_replace_ifa(rt, ifa);
1120 rt->rt_ifp = ifa->ifa_ifp;
1121 if (ifa->ifa_rtrequest != NULL)
1122 ifa->ifa_rtrequest(RTM_ADD, rt, &info);
1123 }
1124 rt_newmsg(cmd, rt);
1125 break;
1126 }
1127 rtfree(rt);
1128 return error;
1129 }
1130
1131 /*
1132 * Create a local route entry for the address.
1133 * Announce the addition of the address and the route to the routing socket.
1134 */
1135 int
1136 rt_ifa_addlocal(struct ifaddr *ifa)
1137 {
1138 struct rtentry *rt;
1139 int e;
1140
1141 /* If there is no loopback entry, allocate one. */
1142 rt = rtalloc1(ifa->ifa_addr, 0);
1143 #ifdef RT_DEBUG
1144 if (rt != NULL)
1145 dump_rt(rt);
1146 #endif
1147 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
1148 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1149 {
1150 struct rt_addrinfo info;
1151 struct rtentry *nrt;
1152
1153 memset(&info, 0, sizeof(info));
1154 info.rti_flags = RTF_HOST | RTF_LOCAL;
1155 if (!(ifa->ifa_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))
1156 info.rti_flags |= RTF_LLDATA;
1157 info.rti_info[RTAX_DST] = ifa->ifa_addr;
1158 info.rti_info[RTAX_GATEWAY] =
1159 (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
1160 info.rti_ifa = ifa;
1161 nrt = NULL;
1162 e = rtrequest1(RTM_ADD, &info, &nrt);
1163 if (nrt && ifa != nrt->rt_ifa)
1164 rt_replace_ifa(nrt, ifa);
1165 rt_newaddrmsg(RTM_ADD, ifa, e, nrt);
1166 if (nrt != NULL) {
1167 #ifdef RT_DEBUG
1168 dump_rt(nrt);
1169 #endif
1170 rtfree(nrt);
1171 }
1172 } else {
1173 e = 0;
1174 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL);
1175 }
1176 if (rt != NULL)
1177 rtfree(rt);
1178 return e;
1179 }
1180
1181 /*
1182 * Remove the local route entry for the address.
1183 * Announce the removal of the address and the route to the routing socket.
1184 */
1185 int
1186 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
1187 {
1188 struct rtentry *rt;
1189 int e = 0;
1190
1191 rt = rtalloc1(ifa->ifa_addr, 0);
1192
1193 /*
1194 * Before deleting, check if a corresponding loopbacked
1195 * host route surely exists. With this check, we can avoid
1196 * deleting an interface direct route whose destination is
1197 * the same as the address being removed. This can happen
1198 * when removing a subnet-router anycast address on an
1199 * interface attached to a shared medium.
1200 */
1201 if (rt != NULL &&
1202 (rt->rt_flags & RTF_HOST) &&
1203 (rt->rt_ifp->if_flags & IFF_LOOPBACK))
1204 {
1205 /* If we cannot replace the route's ifaddr with the equivalent
1206 * ifaddr of another interface, I believe it is safest to
1207 * delete the route.
1208 */
1209 if (alt_ifa == NULL) {
1210 e = rtdeletemsg(rt);
1211 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1212 } else {
1213 rt_replace_ifa(rt, alt_ifa);
1214 rt_newmsg(RTM_CHANGE, rt);
1215 }
1216 } else
1217 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1218 if (rt != NULL)
1219 rtfree(rt);
1220 return e;
1221 }
1222
1223 /*
1224 * Route timer routines. These routes allow functions to be called
1225 * for various routes at any time. This is useful in supporting
1226 * path MTU discovery and redirect route deletion.
1227 *
1228 * This is similar to some BSDI internal functions, but it provides
1229 * for multiple queues for efficiency's sake...
1230 */
1231
1232 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
1233 static int rt_init_done = 0;
1234
1235 /*
1236 * Some subtle order problems with domain initialization mean that
1237 * we cannot count on this being run from rt_init before various
1238 * protocol initializations are done. Therefore, we make sure
1239 * that this is run when the first queue is added...
1240 */
1241
1242 static void rt_timer_work(struct work *, void *);
1243
1244 static void
1245 rt_timer_init(void)
1246 {
1247 int error;
1248
1249 assert(rt_init_done == 0);
1250
1251 LIST_INIT(&rttimer_queue_head);
1252 callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1253 error = workqueue_create(&rt_timer_wq, "rt_timer",
1254 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
1255 if (error)
1256 panic("%s: workqueue_create failed (%d)\n", __func__, error);
1257 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1258 rt_init_done = 1;
1259 }
1260
1261 struct rttimer_queue *
1262 rt_timer_queue_create(u_int timeout)
1263 {
1264 struct rttimer_queue *rtq;
1265
1266 if (rt_init_done == 0)
1267 rt_timer_init();
1268
1269 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1270 if (rtq == NULL)
1271 return NULL;
1272 memset(rtq, 0, sizeof(*rtq));
1273
1274 rtq->rtq_timeout = timeout;
1275 TAILQ_INIT(&rtq->rtq_head);
1276 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1277
1278 return rtq;
1279 }
1280
1281 void
1282 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1283 {
1284
1285 rtq->rtq_timeout = timeout;
1286 }
1287
1288 static void
1289 rt_timer_queue_remove_all(struct rttimer_queue *rtq, int destroy)
1290 {
1291 struct rttimer *r;
1292
1293 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1294 LIST_REMOVE(r, rtt_link);
1295 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1296 if (destroy)
1297 (*r->rtt_func)(r->rtt_rt, r);
1298 rtfree(r->rtt_rt);
1299 pool_put(&rttimer_pool, r);
1300 if (rtq->rtq_count > 0)
1301 rtq->rtq_count--;
1302 else
1303 printf("rt_timer_queue_remove_all: "
1304 "rtq_count reached 0\n");
1305 }
1306 }
1307
1308 void
1309 rt_timer_queue_destroy(struct rttimer_queue *rtq, int destroy)
1310 {
1311
1312 rt_timer_queue_remove_all(rtq, destroy);
1313
1314 LIST_REMOVE(rtq, rtq_link);
1315
1316 /*
1317 * Caller is responsible for freeing the rttimer_queue structure.
1318 */
1319 }
1320
1321 unsigned long
1322 rt_timer_count(struct rttimer_queue *rtq)
1323 {
1324 return rtq->rtq_count;
1325 }
1326
1327 static void
1328 rt_timer_remove_all(struct rtentry *rt)
1329 {
1330 struct rttimer *r;
1331
1332 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1333 LIST_REMOVE(r, rtt_link);
1334 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1335 if (r->rtt_queue->rtq_count > 0)
1336 r->rtt_queue->rtq_count--;
1337 else
1338 printf("rt_timer_remove_all: rtq_count reached 0\n");
1339 rtfree(r->rtt_rt);
1340 pool_put(&rttimer_pool, r);
1341 }
1342 }
1343
1344 int
1345 rt_timer_add(struct rtentry *rt,
1346 void (*func)(struct rtentry *, struct rttimer *),
1347 struct rttimer_queue *queue)
1348 {
1349 struct rttimer *r;
1350
1351 KASSERT(func != NULL);
1352 /*
1353 * If there's already a timer with this action, destroy it before
1354 * we add a new one.
1355 */
1356 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1357 if (r->rtt_func == func)
1358 break;
1359 }
1360 if (r != NULL) {
1361 LIST_REMOVE(r, rtt_link);
1362 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1363 if (r->rtt_queue->rtq_count > 0)
1364 r->rtt_queue->rtq_count--;
1365 else
1366 printf("rt_timer_add: rtq_count reached 0\n");
1367 rtfree(r->rtt_rt);
1368 } else {
1369 r = pool_get(&rttimer_pool, PR_NOWAIT);
1370 if (r == NULL)
1371 return ENOBUFS;
1372 }
1373
1374 memset(r, 0, sizeof(*r));
1375
1376 rt->rt_refcnt++;
1377 r->rtt_rt = rt;
1378 r->rtt_time = time_uptime;
1379 r->rtt_func = func;
1380 r->rtt_queue = queue;
1381 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1382 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1383 r->rtt_queue->rtq_count++;
1384
1385 return 0;
1386 }
1387
1388 static void
1389 rt_timer_work(struct work *wk, void *arg)
1390 {
1391 struct rttimer_queue *rtq;
1392 struct rttimer *r;
1393 int s;
1394
1395 s = splsoftnet();
1396 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1397 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1398 (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
1399 LIST_REMOVE(r, rtt_link);
1400 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1401 (*r->rtt_func)(r->rtt_rt, r);
1402 rtfree(r->rtt_rt);
1403 pool_put(&rttimer_pool, r);
1404 if (rtq->rtq_count > 0)
1405 rtq->rtq_count--;
1406 else
1407 printf("rt_timer_timer: rtq_count reached 0\n");
1408 }
1409 }
1410 splx(s);
1411
1412 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1413 }
1414
1415 static void
1416 rt_timer_timer(void *arg)
1417 {
1418
1419 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
1420 }
1421
1422 static struct rtentry *
1423 _rtcache_init(struct route *ro, int flag)
1424 {
1425 rtcache_invariants(ro);
1426 KASSERT(ro->_ro_rt == NULL);
1427
1428 if (rtcache_getdst(ro) == NULL)
1429 return NULL;
1430 ro->ro_invalid = false;
1431 if ((ro->_ro_rt = rtalloc1(rtcache_getdst(ro), flag)) != NULL)
1432 rtcache(ro);
1433
1434 rtcache_invariants(ro);
1435 return ro->_ro_rt;
1436 }
1437
1438 struct rtentry *
1439 rtcache_init(struct route *ro)
1440 {
1441 return _rtcache_init(ro, 1);
1442 }
1443
1444 struct rtentry *
1445 rtcache_init_noclone(struct route *ro)
1446 {
1447 return _rtcache_init(ro, 0);
1448 }
1449
1450 struct rtentry *
1451 rtcache_update(struct route *ro, int clone)
1452 {
1453 rtcache_clear(ro);
1454 return _rtcache_init(ro, clone);
1455 }
1456
1457 void
1458 rtcache_copy(struct route *new_ro, const struct route *old_ro)
1459 {
1460 struct rtentry *rt;
1461
1462 KASSERT(new_ro != old_ro);
1463 rtcache_invariants(new_ro);
1464 rtcache_invariants(old_ro);
1465
1466 if ((rt = rtcache_validate(old_ro)) != NULL)
1467 rt->rt_refcnt++;
1468
1469 if (rtcache_getdst(old_ro) == NULL ||
1470 rtcache_setdst(new_ro, rtcache_getdst(old_ro)) != 0)
1471 return;
1472
1473 new_ro->ro_invalid = false;
1474 if ((new_ro->_ro_rt = rt) != NULL)
1475 rtcache(new_ro);
1476 rtcache_invariants(new_ro);
1477 }
1478
1479 static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist);
1480
1481 static void
1482 rtcache_invalidate(struct dom_rtlist *rtlist)
1483 {
1484 struct route *ro;
1485
1486 while ((ro = LIST_FIRST(rtlist)) != NULL) {
1487 rtcache_invariants(ro);
1488 KASSERT(ro->_ro_rt != NULL);
1489 ro->ro_invalid = true;
1490 LIST_REMOVE(ro, ro_rtcache_next);
1491 LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next);
1492 rtcache_invariants(ro);
1493 }
1494 }
1495
1496 static void
1497 rtcache_clear_rtentry(int family, struct rtentry *rt)
1498 {
1499 struct domain *dom;
1500 struct route *ro, *nro;
1501
1502 if ((dom = pffinddomain(family)) == NULL)
1503 return;
1504
1505 LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) {
1506 if (ro->_ro_rt == rt)
1507 rtcache_clear(ro);
1508 }
1509 }
1510
1511 static void
1512 rtcache_clear(struct route *ro)
1513 {
1514 rtcache_invariants(ro);
1515 if (ro->_ro_rt == NULL)
1516 return;
1517
1518 LIST_REMOVE(ro, ro_rtcache_next);
1519
1520 rtfree(ro->_ro_rt);
1521 ro->_ro_rt = NULL;
1522 ro->ro_invalid = false;
1523 rtcache_invariants(ro);
1524 }
1525
1526 struct rtentry *
1527 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, int clone,
1528 int *hitp)
1529 {
1530 const struct sockaddr *odst;
1531 struct rtentry *rt = NULL;
1532
1533 odst = rtcache_getdst(ro);
1534 if (odst == NULL)
1535 goto miss;
1536
1537 if (sockaddr_cmp(odst, dst) != 0) {
1538 rtcache_free(ro);
1539 goto miss;
1540 }
1541
1542 rt = rtcache_validate(ro);
1543 if (rt == NULL) {
1544 rtcache_clear(ro);
1545 goto miss;
1546 }
1547
1548 *hitp = 1;
1549 rtcache_invariants(ro);
1550
1551 return rt;
1552 miss:
1553 *hitp = 0;
1554 if (rtcache_setdst(ro, dst) == 0)
1555 rt = _rtcache_init(ro, clone);
1556
1557 rtcache_invariants(ro);
1558
1559 return rt;
1560 }
1561
1562 void
1563 rtcache_free(struct route *ro)
1564 {
1565 rtcache_clear(ro);
1566 if (ro->ro_sa != NULL) {
1567 sockaddr_free(ro->ro_sa);
1568 ro->ro_sa = NULL;
1569 }
1570 rtcache_invariants(ro);
1571 }
1572
1573 int
1574 rtcache_setdst(struct route *ro, const struct sockaddr *sa)
1575 {
1576 KASSERT(sa != NULL);
1577
1578 rtcache_invariants(ro);
1579 if (ro->ro_sa != NULL) {
1580 if (ro->ro_sa->sa_family == sa->sa_family) {
1581 rtcache_clear(ro);
1582 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
1583 rtcache_invariants(ro);
1584 return 0;
1585 }
1586 /* free ro_sa, wrong family */
1587 rtcache_free(ro);
1588 }
1589
1590 KASSERT(ro->_ro_rt == NULL);
1591
1592 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
1593 rtcache_invariants(ro);
1594 return ENOMEM;
1595 }
1596 rtcache_invariants(ro);
1597 return 0;
1598 }
1599
1600 const struct sockaddr *
1601 rt_settag(struct rtentry *rt, const struct sockaddr *tag)
1602 {
1603 if (rt->rt_tag != tag) {
1604 if (rt->rt_tag != NULL)
1605 sockaddr_free(rt->rt_tag);
1606 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
1607 }
1608 return rt->rt_tag;
1609 }
1610
1611 struct sockaddr *
1612 rt_gettag(const struct rtentry *rt)
1613 {
1614 return rt->rt_tag;
1615 }
1616
1617 int
1618 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
1619 {
1620
1621 if ((rt->rt_flags & RTF_REJECT) != 0) {
1622 /* Mimic looutput */
1623 if (ifp->if_flags & IFF_LOOPBACK)
1624 return (rt->rt_flags & RTF_HOST) ?
1625 EHOSTUNREACH : ENETUNREACH;
1626 else if (rt->rt_rmx.rmx_expire == 0 ||
1627 time_uptime < rt->rt_rmx.rmx_expire)
1628 return (rt->rt_flags & RTF_GATEWAY) ?
1629 EHOSTUNREACH : EHOSTDOWN;
1630 }
1631
1632 return 0;
1633 }
1634
1635 #ifdef DDB
1636
1637 #include <machine/db_machdep.h>
1638 #include <ddb/db_interface.h>
1639 #include <ddb/db_output.h>
1640
1641 #define rt_expire rt_rmx.rmx_expire
1642
1643 static void
1644 db_print_sa(const struct sockaddr *sa)
1645 {
1646 int len;
1647 const u_char *p;
1648
1649 if (sa == NULL) {
1650 db_printf("[NULL]");
1651 return;
1652 }
1653
1654 p = (const u_char *)sa;
1655 len = sa->sa_len;
1656 db_printf("[");
1657 while (len > 0) {
1658 db_printf("%d", *p);
1659 p++; len--;
1660 if (len) db_printf(",");
1661 }
1662 db_printf("]\n");
1663 }
1664
1665 static void
1666 db_print_ifa(struct ifaddr *ifa)
1667 {
1668 if (ifa == NULL)
1669 return;
1670 db_printf(" ifa_addr=");
1671 db_print_sa(ifa->ifa_addr);
1672 db_printf(" ifa_dsta=");
1673 db_print_sa(ifa->ifa_dstaddr);
1674 db_printf(" ifa_mask=");
1675 db_print_sa(ifa->ifa_netmask);
1676 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n",
1677 ifa->ifa_flags,
1678 ifa->ifa_refcnt,
1679 ifa->ifa_metric);
1680 }
1681
1682 /*
1683 * Function to pass to rt_walktree().
1684 * Return non-zero error to abort walk.
1685 */
1686 static int
1687 db_show_rtentry(struct rtentry *rt, void *w)
1688 {
1689 db_printf("rtentry=%p", rt);
1690
1691 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
1692 rt->rt_flags, rt->rt_refcnt,
1693 rt->rt_use, (uint64_t)rt->rt_expire);
1694
1695 db_printf(" key="); db_print_sa(rt_getkey(rt));
1696 db_printf(" mask="); db_print_sa(rt_mask(rt));
1697 db_printf(" gw="); db_print_sa(rt->rt_gateway);
1698
1699 db_printf(" ifp=%p ", rt->rt_ifp);
1700 if (rt->rt_ifp)
1701 db_printf("(%s)", rt->rt_ifp->if_xname);
1702 else
1703 db_printf("(NULL)");
1704
1705 db_printf(" ifa=%p\n", rt->rt_ifa);
1706 db_print_ifa(rt->rt_ifa);
1707
1708 db_printf(" gwroute=%p llinfo=%p\n",
1709 rt->rt_gwroute, rt->rt_llinfo);
1710
1711 return 0;
1712 }
1713
1714 /*
1715 * Function to print all the route trees.
1716 * Use this from ddb: "show routes"
1717 */
1718 void
1719 db_show_routes(db_expr_t addr, bool have_addr,
1720 db_expr_t count, const char *modif)
1721 {
1722 rt_walktree(AF_INET, db_show_rtentry, NULL);
1723 }
1724 #endif
1725