route.c revision 1.196 1 /* $NetBSD: route.c,v 1.196 2017/06/22 09:56:48 ozaki-r Exp $ */
2
3 /*-
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the project nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62 /*
63 * Copyright (c) 1980, 1986, 1991, 1993
64 * The Regents of the University of California. All rights reserved.
65 *
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
68 * are met:
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)route.c 8.3 (Berkeley) 1/9/95
91 */
92
93 #ifdef _KERNEL_OPT
94 #include "opt_inet.h"
95 #include "opt_route.h"
96 #include "opt_net_mpsafe.h"
97 #endif
98
99 #include <sys/cdefs.h>
100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.196 2017/06/22 09:56:48 ozaki-r Exp $");
101
102 #include <sys/param.h>
103 #ifdef RTFLUSH_DEBUG
104 #include <sys/sysctl.h>
105 #endif
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/mbuf.h>
110 #include <sys/socket.h>
111 #include <sys/socketvar.h>
112 #include <sys/domain.h>
113 #include <sys/kernel.h>
114 #include <sys/ioctl.h>
115 #include <sys/pool.h>
116 #include <sys/kauth.h>
117 #include <sys/workqueue.h>
118 #include <sys/syslog.h>
119 #include <sys/rwlock.h>
120 #include <sys/mutex.h>
121 #include <sys/cpu.h>
122
123 #include <net/if.h>
124 #include <net/if_dl.h>
125 #include <net/route.h>
126 #if defined(INET) || defined(INET6)
127 #include <net/if_llatbl.h>
128 #endif
129
130 #include <netinet/in.h>
131 #include <netinet/in_var.h>
132
133 #ifdef RTFLUSH_DEBUG
134 #define rtcache_debug() __predict_false(_rtcache_debug)
135 #else /* RTFLUSH_DEBUG */
136 #define rtcache_debug() 0
137 #endif /* RTFLUSH_DEBUG */
138
139 #ifdef RT_DEBUG
140 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \
141 __func__, __LINE__, (rt), (rt)->rt_refcnt)
142 #else
143 #define RT_REFCNT_TRACE(rt) do {} while (0)
144 #endif
145
146 #ifdef DEBUG
147 #define dlog(level, fmt, args...) log(level, fmt, ##args)
148 #else
149 #define dlog(level, fmt, args...) do {} while (0)
150 #endif
151
152 struct rtstat rtstat;
153
154 static int rttrash; /* routes not in table but not freed */
155
156 static struct pool rtentry_pool;
157 static struct pool rttimer_pool;
158
159 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */
160 static struct workqueue *rt_timer_wq;
161 static struct work rt_timer_wk;
162
163 static void rt_timer_init(void);
164 static void rt_timer_queue_remove_all(struct rttimer_queue *);
165 static void rt_timer_remove_all(struct rtentry *);
166 static void rt_timer_timer(void *);
167
168 /*
169 * Locking notes:
170 * - The routing table is protected by a global rwlock
171 * - API: RT_RLOCK and friends
172 * - rtcaches are protected by a global rwlock
173 * - API: RTCACHE_RLOCK and friends
174 * - References to a rtentry is managed by reference counting and psref
175 * - Reference couting is used for temporal reference when a rtentry
176 * is fetched from the routing table
177 * - psref is used for temporal reference when a rtentry is fetched
178 * from a rtcache
179 * - struct route (rtcache) has struct psref, so we cannot obtain
180 * a reference twice on the same struct route
181 * - Befere destroying or updating a rtentry, we have to wait for
182 * all references left (see below for details)
183 * - APIs
184 * - An obtained rtentry via rtalloc1 or rtrequest* must be
185 * unreferenced by rt_unref
186 * - An obtained rtentry via rtcache_* must be unreferenced by
187 * rtcache_unref
188 * - TODO: once we get a lockless routing table, we should use only
189 * psref for rtentries
190 * - rtentry destruction
191 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE)
192 * - If a caller of rtrequest grabs a reference of a rtentry, the caller
193 * has a responsibility to destroy the rtentry by itself by calling
194 * rt_free
195 * - If not, rtrequest itself does that
196 * - If rt_free is called in softint, the actual destruction routine is
197 * deferred to a workqueue
198 * - rtentry update
199 * - When updating a rtentry, RTF_UPDATING flag is set
200 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from
201 * the routing table or a rtcache results in either of the following
202 * cases:
203 * - if the caller runs in softint, the caller fails to fetch
204 * - otherwise, the caller waits for the update completed and retries
205 * to fetch (probably succeed to fetch for the second time)
206 */
207
208 /*
209 * Global locks for the routing table and rtcaches.
210 * Locking order: rtcache_lock => rt_lock
211 */
212 static krwlock_t rt_lock __cacheline_aligned;
213 #ifdef NET_MPSAFE
214 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER)
215 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER)
216 #define RT_UNLOCK() rw_exit(&rt_lock)
217 #define RT_LOCKED() rw_lock_held(&rt_lock)
218 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock))
219 #else
220 #define RT_RLOCK() do {} while (0)
221 #define RT_WLOCK() do {} while (0)
222 #define RT_UNLOCK() do {} while (0)
223 #define RT_LOCKED() false
224 #define RT_ASSERT_WLOCK() do {} while (0)
225 #endif
226
227 static krwlock_t rtcache_lock __cacheline_aligned;
228 #ifdef NET_MPSAFE
229 #define RTCACHE_RLOCK() rw_enter(&rtcache_lock, RW_READER)
230 #define RTCACHE_WLOCK() rw_enter(&rtcache_lock, RW_WRITER)
231 #define RTCACHE_UNLOCK() rw_exit(&rtcache_lock)
232 #define RTCACHE_ASSERT_WLOCK() KASSERT(rw_write_held(&rtcache_lock))
233 #define RTCACHE_WLOCKED() rw_write_held(&rtcache_lock)
234 #else
235 #define RTCACHE_RLOCK() do {} while (0)
236 #define RTCACHE_WLOCK() do {} while (0)
237 #define RTCACHE_UNLOCK() do {} while (0)
238 #define RTCACHE_ASSERT_WLOCK() do {} while (0)
239 #define RTCACHE_WLOCKED() false
240 #endif
241
242 /*
243 * mutex and cv that are used to wait for references to a rtentry left
244 * before updating the rtentry.
245 */
246 static struct {
247 kmutex_t lock;
248 kcondvar_t cv;
249 bool ongoing;
250 const struct lwp *lwp;
251 } rt_update_global __cacheline_aligned;
252
253 /*
254 * A workqueue and stuff that are used to defer the destruction routine
255 * of rtentries.
256 */
257 static struct {
258 struct workqueue *wq;
259 struct work wk;
260 kmutex_t lock;
261 struct rtentry *queue[10];
262 } rt_free_global __cacheline_aligned;
263
264 /* psref for rtentry */
265 static struct psref_class *rt_psref_class __read_mostly;
266
267 #ifdef RTFLUSH_DEBUG
268 static int _rtcache_debug = 0;
269 #endif /* RTFLUSH_DEBUG */
270
271 static kauth_listener_t route_listener;
272
273 static int rtdeletemsg(struct rtentry *);
274 static void rtflushall(int);
275
276 static void rt_maskedcopy(const struct sockaddr *,
277 struct sockaddr *, const struct sockaddr *);
278
279 static void rtcache_clear(struct route *);
280 static void rtcache_clear_rtentry(int, struct rtentry *);
281 static void rtcache_invalidate(struct dom_rtlist *);
282
283 static void rt_ref(struct rtentry *);
284
285 static struct rtentry *
286 rtalloc1_locked(const struct sockaddr *, int, bool, bool);
287 static struct rtentry *
288 rtcache_validate_locked(struct route *);
289 static void rtcache_free_locked(struct route *);
290 static int rtcache_setdst_locked(struct route *, const struct sockaddr *);
291
292 static void rtcache_ref(struct rtentry *, struct route *);
293
294 #ifdef NET_MPSAFE
295 static void rt_update_wait(void);
296 #endif
297
298 static bool rt_wait_ok(void);
299 static void rt_wait_refcnt(const char *, struct rtentry *, int);
300 static void rt_wait_psref(struct rtentry *);
301
302 #ifdef DDB
303 static void db_print_sa(const struct sockaddr *);
304 static void db_print_ifa(struct ifaddr *);
305 static int db_show_rtentry(struct rtentry *, void *);
306 #endif
307
308 #ifdef RTFLUSH_DEBUG
309 static void sysctl_net_rtcache_setup(struct sysctllog **);
310 static void
311 sysctl_net_rtcache_setup(struct sysctllog **clog)
312 {
313 const struct sysctlnode *rnode;
314
315 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
316 CTLTYPE_NODE,
317 "rtcache", SYSCTL_DESCR("Route cache related settings"),
318 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
319 return;
320 if (sysctl_createv(clog, 0, &rnode, &rnode,
321 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
322 "debug", SYSCTL_DESCR("Debug route caches"),
323 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
324 return;
325 }
326 #endif /* RTFLUSH_DEBUG */
327
328 static inline void
329 rt_destroy(struct rtentry *rt)
330 {
331 if (rt->_rt_key != NULL)
332 sockaddr_free(rt->_rt_key);
333 if (rt->rt_gateway != NULL)
334 sockaddr_free(rt->rt_gateway);
335 if (rt_gettag(rt) != NULL)
336 sockaddr_free(rt_gettag(rt));
337 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
338 }
339
340 static inline const struct sockaddr *
341 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
342 {
343 if (rt->_rt_key == key)
344 goto out;
345
346 if (rt->_rt_key != NULL)
347 sockaddr_free(rt->_rt_key);
348 rt->_rt_key = sockaddr_dup(key, flags);
349 out:
350 rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
351 return rt->_rt_key;
352 }
353
354 struct ifaddr *
355 rt_get_ifa(struct rtentry *rt)
356 {
357 struct ifaddr *ifa;
358
359 if ((ifa = rt->rt_ifa) == NULL)
360 return ifa;
361 else if (ifa->ifa_getifa == NULL)
362 return ifa;
363 #if 0
364 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
365 return ifa;
366 #endif
367 else {
368 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
369 if (ifa == NULL)
370 return NULL;
371 rt_replace_ifa(rt, ifa);
372 return ifa;
373 }
374 }
375
376 static void
377 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
378 {
379 rt->rt_ifa = ifa;
380 if (ifa->ifa_seqno != NULL)
381 rt->rt_ifa_seqno = *ifa->ifa_seqno;
382 }
383
384 /*
385 * Is this route the connected route for the ifa?
386 */
387 static int
388 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
389 {
390 const struct sockaddr *key, *dst, *odst;
391 struct sockaddr_storage maskeddst;
392
393 key = rt_getkey(rt);
394 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
395 if (dst == NULL ||
396 dst->sa_family != key->sa_family ||
397 dst->sa_len != key->sa_len)
398 return 0;
399 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
400 odst = dst;
401 dst = (struct sockaddr *)&maskeddst;
402 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
403 ifa->ifa_netmask);
404 }
405 return (memcmp(dst, key, dst->sa_len) == 0);
406 }
407
408 void
409 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
410 {
411 if (rt->rt_ifa &&
412 rt->rt_ifa != ifa &&
413 rt->rt_ifa->ifa_flags & IFA_ROUTE &&
414 rt_ifa_connected(rt, rt->rt_ifa))
415 {
416 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
417 "replace deleted IFA_ROUTE\n",
418 (void *)rt->_rt_key, (void *)rt->rt_ifa);
419 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
420 if (rt_ifa_connected(rt, ifa)) {
421 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
422 "replace added IFA_ROUTE\n",
423 (void *)rt->_rt_key, (void *)ifa);
424 ifa->ifa_flags |= IFA_ROUTE;
425 }
426 }
427
428 ifaref(ifa);
429 ifafree(rt->rt_ifa);
430 rt_set_ifa1(rt, ifa);
431 }
432
433 static void
434 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
435 {
436 ifaref(ifa);
437 rt_set_ifa1(rt, ifa);
438 }
439
440 static int
441 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
442 void *arg0, void *arg1, void *arg2, void *arg3)
443 {
444 struct rt_msghdr *rtm;
445 int result;
446
447 result = KAUTH_RESULT_DEFER;
448 rtm = arg1;
449
450 if (action != KAUTH_NETWORK_ROUTE)
451 return result;
452
453 if (rtm->rtm_type == RTM_GET)
454 result = KAUTH_RESULT_ALLOW;
455
456 return result;
457 }
458
459 static void rt_free_work(struct work *, void *);
460
461 void
462 rt_init(void)
463 {
464 int error;
465
466 #ifdef RTFLUSH_DEBUG
467 sysctl_net_rtcache_setup(NULL);
468 #endif
469
470 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
471 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET);
472
473 error = workqueue_create(&rt_free_global.wq, "rt_free",
474 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
475 if (error)
476 panic("%s: workqueue_create failed (%d)\n", __func__, error);
477
478 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
479 cv_init(&rt_update_global.cv, "rt_update");
480
481 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
482 NULL, IPL_SOFTNET);
483 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
484 NULL, IPL_SOFTNET);
485
486 rn_init(); /* initialize all zeroes, all ones, mask table */
487 rtbl_init();
488
489 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
490 route_listener_cb, NULL);
491 }
492
493 static void
494 rtflushall(int family)
495 {
496 struct domain *dom;
497
498 if (rtcache_debug())
499 printf("%s: enter\n", __func__);
500
501 if ((dom = pffinddomain(family)) == NULL)
502 return;
503
504 RTCACHE_WLOCK();
505 rtcache_invalidate(&dom->dom_rtcache);
506 RTCACHE_UNLOCK();
507 }
508
509 static void
510 rtcache(struct route *ro)
511 {
512 struct domain *dom;
513
514 RTCACHE_ASSERT_WLOCK();
515
516 rtcache_invariants(ro);
517 KASSERT(ro->_ro_rt != NULL);
518 KASSERT(ro->ro_invalid == false);
519 KASSERT(rtcache_getdst(ro) != NULL);
520
521 if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL)
522 return;
523
524 LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next);
525 rtcache_invariants(ro);
526 }
527
528 #ifdef RT_DEBUG
529 static void
530 dump_rt(const struct rtentry *rt)
531 {
532 char buf[512];
533
534 aprint_normal("rt: ");
535 aprint_normal("p=%p ", rt);
536 if (rt->_rt_key == NULL) {
537 aprint_normal("dst=(NULL) ");
538 } else {
539 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
540 aprint_normal("dst=%s ", buf);
541 }
542 if (rt->rt_gateway == NULL) {
543 aprint_normal("gw=(NULL) ");
544 } else {
545 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
546 aprint_normal("gw=%s ", buf);
547 }
548 aprint_normal("flags=%x ", rt->rt_flags);
549 if (rt->rt_ifp == NULL) {
550 aprint_normal("if=(NULL) ");
551 } else {
552 aprint_normal("if=%s ", rt->rt_ifp->if_xname);
553 }
554 aprint_normal("\n");
555 }
556 #endif /* RT_DEBUG */
557
558 /*
559 * Packet routing routines. If success, refcnt of a returned rtentry
560 * will be incremented. The caller has to rtfree it by itself.
561 */
562 struct rtentry *
563 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok,
564 bool wlock)
565 {
566 rtbl_t *rtbl;
567 struct rtentry *rt;
568 int s;
569
570 #ifdef NET_MPSAFE
571 retry:
572 #endif
573 s = splsoftnet();
574 rtbl = rt_gettable(dst->sa_family);
575 if (rtbl == NULL)
576 goto miss;
577
578 rt = rt_matchaddr(rtbl, dst);
579 if (rt == NULL)
580 goto miss;
581
582 if (!ISSET(rt->rt_flags, RTF_UP))
583 goto miss;
584
585 #ifdef NET_MPSAFE
586 if (ISSET(rt->rt_flags, RTF_UPDATING) &&
587 /* XXX updater should be always able to acquire */
588 curlwp != rt_update_global.lwp) {
589 bool need_lock = false;
590 if (!wait_ok || !rt_wait_ok())
591 goto miss;
592 RT_UNLOCK();
593 splx(s);
594
595 /* XXX need more proper solution */
596 if (RTCACHE_WLOCKED()) {
597 RTCACHE_UNLOCK();
598 need_lock = true;
599 }
600
601 /* We can wait until the update is complete */
602 rt_update_wait();
603
604 if (need_lock)
605 RTCACHE_WLOCK();
606 if (wlock)
607 RT_WLOCK();
608 else
609 RT_RLOCK();
610 goto retry;
611 }
612 #endif /* NET_MPSAFE */
613
614 rt_ref(rt);
615 RT_REFCNT_TRACE(rt);
616
617 splx(s);
618 return rt;
619 miss:
620 rtstat.rts_unreach++;
621 if (report) {
622 struct rt_addrinfo info;
623
624 memset(&info, 0, sizeof(info));
625 info.rti_info[RTAX_DST] = dst;
626 rt_missmsg(RTM_MISS, &info, 0, 0);
627 }
628 splx(s);
629 return NULL;
630 }
631
632 struct rtentry *
633 rtalloc1(const struct sockaddr *dst, int report)
634 {
635 struct rtentry *rt;
636
637 RT_RLOCK();
638 rt = rtalloc1_locked(dst, report, true, false);
639 RT_UNLOCK();
640
641 return rt;
642 }
643
644 static void
645 rt_ref(struct rtentry *rt)
646 {
647
648 KASSERT(rt->rt_refcnt >= 0);
649 atomic_inc_uint(&rt->rt_refcnt);
650 }
651
652 void
653 rt_unref(struct rtentry *rt)
654 {
655
656 KASSERT(rt != NULL);
657 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt);
658
659 atomic_dec_uint(&rt->rt_refcnt);
660 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) {
661 mutex_enter(&rt_free_global.lock);
662 cv_broadcast(&rt->rt_cv);
663 mutex_exit(&rt_free_global.lock);
664 }
665 }
666
667 static bool
668 rt_wait_ok(void)
669 {
670
671 KASSERT(!cpu_intr_p());
672 return !cpu_softintr_p();
673 }
674
675 void
676 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt)
677 {
678 mutex_enter(&rt_free_global.lock);
679 while (rt->rt_refcnt > cnt) {
680 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n",
681 __func__, title, rt->rt_refcnt);
682 cv_wait(&rt->rt_cv, &rt_free_global.lock);
683 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n",
684 __func__, title, rt->rt_refcnt);
685 }
686 mutex_exit(&rt_free_global.lock);
687 }
688
689 void
690 rt_wait_psref(struct rtentry *rt)
691 {
692
693 psref_target_destroy(&rt->rt_psref, rt_psref_class);
694 psref_target_init(&rt->rt_psref, rt_psref_class);
695 }
696
697 static void
698 _rt_free(struct rtentry *rt)
699 {
700 struct ifaddr *ifa;
701
702 /*
703 * Need to avoid a deadlock on rt_wait_refcnt of update
704 * and a conflict on psref_target_destroy of update.
705 */
706 #ifdef NET_MPSAFE
707 rt_update_wait();
708 #endif
709
710 RT_REFCNT_TRACE(rt);
711 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt);
712 rt_wait_refcnt("free", rt, 0);
713 #ifdef NET_MPSAFE
714 psref_target_destroy(&rt->rt_psref, rt_psref_class);
715 #endif
716
717 rt_assert_inactive(rt);
718 rttrash--;
719 ifa = rt->rt_ifa;
720 rt->rt_ifa = NULL;
721 ifafree(ifa);
722 rt->rt_ifp = NULL;
723 cv_destroy(&rt->rt_cv);
724 rt_destroy(rt);
725 pool_put(&rtentry_pool, rt);
726 }
727
728 static void
729 rt_free_work(struct work *wk, void *arg)
730 {
731 int i;
732 struct rtentry *rt;
733
734 restart:
735 mutex_enter(&rt_free_global.lock);
736 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
737 if (rt_free_global.queue[i] == NULL)
738 continue;
739 rt = rt_free_global.queue[i];
740 rt_free_global.queue[i] = NULL;
741 mutex_exit(&rt_free_global.lock);
742
743 atomic_dec_uint(&rt->rt_refcnt);
744 _rt_free(rt);
745 goto restart;
746 }
747 mutex_exit(&rt_free_global.lock);
748 }
749
750 void
751 rt_free(struct rtentry *rt)
752 {
753
754 KASSERT(rt->rt_refcnt > 0);
755 if (!rt_wait_ok()) {
756 int i;
757 mutex_enter(&rt_free_global.lock);
758 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
759 if (rt_free_global.queue[i] == NULL) {
760 rt_free_global.queue[i] = rt;
761 break;
762 }
763 }
764 KASSERT(i < sizeof(rt_free_global.queue));
765 rt_ref(rt);
766 mutex_exit(&rt_free_global.lock);
767 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL);
768 } else {
769 atomic_dec_uint(&rt->rt_refcnt);
770 _rt_free(rt);
771 }
772 }
773
774 #ifdef NET_MPSAFE
775 static void
776 rt_update_wait(void)
777 {
778
779 mutex_enter(&rt_update_global.lock);
780 while (rt_update_global.ongoing) {
781 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp);
782 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
783 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp);
784 }
785 mutex_exit(&rt_update_global.lock);
786 }
787 #endif
788
789 int
790 rt_update_prepare(struct rtentry *rt)
791 {
792
793 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp);
794
795 RTCACHE_WLOCK();
796 RT_WLOCK();
797 /* If the entry is being destroyed, don't proceed the update. */
798 if (!ISSET(rt->rt_flags, RTF_UP)) {
799 RT_UNLOCK();
800 RTCACHE_UNLOCK();
801 return -1;
802 }
803 rt->rt_flags |= RTF_UPDATING;
804 RT_UNLOCK();
805 RTCACHE_UNLOCK();
806
807 mutex_enter(&rt_update_global.lock);
808 while (rt_update_global.ongoing) {
809 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n",
810 __func__, rt, curlwp);
811 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
812 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n",
813 __func__, rt, curlwp);
814 }
815 rt_update_global.ongoing = true;
816 /* XXX need it to avoid rt_update_wait by updater itself. */
817 rt_update_global.lwp = curlwp;
818 mutex_exit(&rt_update_global.lock);
819
820 rt_wait_refcnt("update", rt, 1);
821 rt_wait_psref(rt);
822
823 return 0;
824 }
825
826 void
827 rt_update_finish(struct rtentry *rt)
828 {
829
830 RTCACHE_WLOCK();
831 RT_WLOCK();
832 rt->rt_flags &= ~RTF_UPDATING;
833 RT_UNLOCK();
834 RTCACHE_UNLOCK();
835
836 mutex_enter(&rt_update_global.lock);
837 rt_update_global.ongoing = false;
838 rt_update_global.lwp = NULL;
839 cv_broadcast(&rt_update_global.cv);
840 mutex_exit(&rt_update_global.lock);
841
842 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp);
843 }
844
845 /*
846 * Force a routing table entry to the specified
847 * destination to go through the given gateway.
848 * Normally called as a result of a routing redirect
849 * message from the network layer.
850 *
851 * N.B.: must be called at splsoftnet
852 */
853 void
854 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
855 const struct sockaddr *netmask, int flags, const struct sockaddr *src,
856 struct rtentry **rtp)
857 {
858 struct rtentry *rt;
859 int error = 0;
860 uint64_t *stat = NULL;
861 struct rt_addrinfo info;
862 struct ifaddr *ifa;
863 struct psref psref;
864
865 /* verify the gateway is directly reachable */
866 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
867 error = ENETUNREACH;
868 goto out;
869 }
870 rt = rtalloc1(dst, 0);
871 /*
872 * If the redirect isn't from our current router for this dst,
873 * it's either old or wrong. If it redirects us to ourselves,
874 * we have a routing loop, perhaps as a result of an interface
875 * going down recently.
876 */
877 if (!(flags & RTF_DONE) && rt &&
878 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
879 error = EINVAL;
880 else {
881 int s = pserialize_read_enter();
882 struct ifaddr *_ifa;
883
884 _ifa = ifa_ifwithaddr(gateway);
885 if (_ifa != NULL)
886 error = EHOSTUNREACH;
887 pserialize_read_exit(s);
888 }
889 if (error)
890 goto done;
891 /*
892 * Create a new entry if we just got back a wildcard entry
893 * or the lookup failed. This is necessary for hosts
894 * which use routing redirects generated by smart gateways
895 * to dynamically build the routing tables.
896 */
897 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
898 goto create;
899 /*
900 * Don't listen to the redirect if it's
901 * for a route to an interface.
902 */
903 if (rt->rt_flags & RTF_GATEWAY) {
904 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
905 /*
906 * Changing from route to net => route to host.
907 * Create new route, rather than smashing route to net.
908 */
909 create:
910 if (rt != NULL)
911 rt_unref(rt);
912 flags |= RTF_GATEWAY | RTF_DYNAMIC;
913 memset(&info, 0, sizeof(info));
914 info.rti_info[RTAX_DST] = dst;
915 info.rti_info[RTAX_GATEWAY] = gateway;
916 info.rti_info[RTAX_NETMASK] = netmask;
917 info.rti_ifa = ifa;
918 info.rti_flags = flags;
919 rt = NULL;
920 error = rtrequest1(RTM_ADD, &info, &rt);
921 if (rt != NULL)
922 flags = rt->rt_flags;
923 stat = &rtstat.rts_dynamic;
924 } else {
925 /*
926 * Smash the current notion of the gateway to
927 * this destination. Should check about netmask!!!
928 */
929 #ifdef NET_MPSAFE
930 KASSERT(!cpu_softintr_p());
931
932 error = rt_update_prepare(rt);
933 if (error == 0) {
934 #endif
935 error = rt_setgate(rt, gateway);
936 if (error == 0) {
937 rt->rt_flags |= RTF_MODIFIED;
938 flags |= RTF_MODIFIED;
939 }
940 #ifdef NET_MPSAFE
941 rt_update_finish(rt);
942 } else {
943 /*
944 * If error != 0, the rtentry is being
945 * destroyed, so doing nothing doesn't
946 * matter.
947 */
948 }
949 #endif
950 stat = &rtstat.rts_newgateway;
951 }
952 } else
953 error = EHOSTUNREACH;
954 done:
955 if (rt) {
956 if (rtp != NULL && !error)
957 *rtp = rt;
958 else
959 rt_unref(rt);
960 }
961 out:
962 if (error)
963 rtstat.rts_badredirect++;
964 else if (stat != NULL)
965 (*stat)++;
966 memset(&info, 0, sizeof(info));
967 info.rti_info[RTAX_DST] = dst;
968 info.rti_info[RTAX_GATEWAY] = gateway;
969 info.rti_info[RTAX_NETMASK] = netmask;
970 info.rti_info[RTAX_AUTHOR] = src;
971 rt_missmsg(RTM_REDIRECT, &info, flags, error);
972 ifa_release(ifa, &psref);
973 }
974
975 /*
976 * Delete a route and generate a message.
977 * It doesn't free a passed rt.
978 */
979 static int
980 rtdeletemsg(struct rtentry *rt)
981 {
982 int error;
983 struct rt_addrinfo info;
984 struct rtentry *retrt;
985
986 /*
987 * Request the new route so that the entry is not actually
988 * deleted. That will allow the information being reported to
989 * be accurate (and consistent with route_output()).
990 */
991 memset(&info, 0, sizeof(info));
992 info.rti_info[RTAX_DST] = rt_getkey(rt);
993 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
994 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
995 info.rti_flags = rt->rt_flags;
996 error = rtrequest1(RTM_DELETE, &info, &retrt);
997
998 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
999
1000 return error;
1001 }
1002
1003 struct ifaddr *
1004 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
1005 const struct sockaddr *gateway, struct psref *psref)
1006 {
1007 struct ifaddr *ifa = NULL;
1008
1009 if ((flags & RTF_GATEWAY) == 0) {
1010 /*
1011 * If we are adding a route to an interface,
1012 * and the interface is a pt to pt link
1013 * we should search for the destination
1014 * as our clue to the interface. Otherwise
1015 * we can use the local address.
1016 */
1017 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
1018 ifa = ifa_ifwithdstaddr_psref(dst, psref);
1019 if (ifa == NULL)
1020 ifa = ifa_ifwithaddr_psref(gateway, psref);
1021 } else {
1022 /*
1023 * If we are adding a route to a remote net
1024 * or host, the gateway may still be on the
1025 * other end of a pt to pt link.
1026 */
1027 ifa = ifa_ifwithdstaddr_psref(gateway, psref);
1028 }
1029 if (ifa == NULL)
1030 ifa = ifa_ifwithnet_psref(gateway, psref);
1031 if (ifa == NULL) {
1032 int s;
1033 struct rtentry *rt;
1034
1035 /* XXX we cannot call rtalloc1 if holding the rt lock */
1036 if (RT_LOCKED())
1037 rt = rtalloc1_locked(gateway, 0, true, true);
1038 else
1039 rt = rtalloc1(gateway, 0);
1040 if (rt == NULL)
1041 return NULL;
1042 if (rt->rt_flags & RTF_GATEWAY) {
1043 rt_unref(rt);
1044 return NULL;
1045 }
1046 /*
1047 * Just in case. May not need to do this workaround.
1048 * Revisit when working on rtentry MP-ification.
1049 */
1050 s = pserialize_read_enter();
1051 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
1052 if (ifa == rt->rt_ifa)
1053 break;
1054 }
1055 if (ifa != NULL)
1056 ifa_acquire(ifa, psref);
1057 pserialize_read_exit(s);
1058 rt_unref(rt);
1059 if (ifa == NULL)
1060 return NULL;
1061 }
1062 if (ifa->ifa_addr->sa_family != dst->sa_family) {
1063 struct ifaddr *nifa;
1064 int s;
1065
1066 s = pserialize_read_enter();
1067 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1068 if (nifa != NULL) {
1069 ifa_release(ifa, psref);
1070 ifa_acquire(nifa, psref);
1071 ifa = nifa;
1072 }
1073 pserialize_read_exit(s);
1074 }
1075 return ifa;
1076 }
1077
1078 /*
1079 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1080 * The caller has to rtfree it by itself.
1081 */
1082 int
1083 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
1084 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
1085 {
1086 struct rt_addrinfo info;
1087
1088 memset(&info, 0, sizeof(info));
1089 info.rti_flags = flags;
1090 info.rti_info[RTAX_DST] = dst;
1091 info.rti_info[RTAX_GATEWAY] = gateway;
1092 info.rti_info[RTAX_NETMASK] = netmask;
1093 return rtrequest1(req, &info, ret_nrt);
1094 }
1095
1096 /*
1097 * It's a utility function to add/remove a route to/from the routing table
1098 * and tell user processes the addition/removal on success.
1099 */
1100 int
1101 rtrequest_newmsg(const int req, const struct sockaddr *dst,
1102 const struct sockaddr *gateway, const struct sockaddr *netmask,
1103 const int flags)
1104 {
1105 int error;
1106 struct rtentry *ret_nrt = NULL;
1107
1108 KASSERT(req == RTM_ADD || req == RTM_DELETE);
1109
1110 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt);
1111 if (error != 0)
1112 return error;
1113
1114 KASSERT(ret_nrt != NULL);
1115
1116 rt_newmsg(req, ret_nrt); /* tell user process */
1117 if (req == RTM_DELETE)
1118 rt_free(ret_nrt);
1119 else
1120 rt_unref(ret_nrt);
1121
1122 return 0;
1123 }
1124
1125 struct ifnet *
1126 rt_getifp(struct rt_addrinfo *info, struct psref *psref)
1127 {
1128 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
1129
1130 if (info->rti_ifp != NULL)
1131 return NULL;
1132 /*
1133 * ifp may be specified by sockaddr_dl when protocol address
1134 * is ambiguous
1135 */
1136 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
1137 struct ifaddr *ifa;
1138 int s = pserialize_read_enter();
1139
1140 ifa = ifa_ifwithnet(ifpaddr);
1141 if (ifa != NULL)
1142 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
1143 psref);
1144 pserialize_read_exit(s);
1145 }
1146
1147 return info->rti_ifp;
1148 }
1149
1150 struct ifaddr *
1151 rt_getifa(struct rt_addrinfo *info, struct psref *psref)
1152 {
1153 struct ifaddr *ifa = NULL;
1154 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1155 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1156 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
1157 int flags = info->rti_flags;
1158 const struct sockaddr *sa;
1159
1160 if (info->rti_ifa == NULL && ifaaddr != NULL) {
1161 ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
1162 if (ifa != NULL)
1163 goto got;
1164 }
1165
1166 sa = ifaaddr != NULL ? ifaaddr :
1167 (gateway != NULL ? gateway : dst);
1168 if (sa != NULL && info->rti_ifp != NULL)
1169 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
1170 else if (dst != NULL && gateway != NULL)
1171 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
1172 else if (sa != NULL)
1173 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
1174 if (ifa == NULL)
1175 return NULL;
1176 got:
1177 if (ifa->ifa_getifa != NULL) {
1178 /* FIXME ifa_getifa is NOMPSAFE */
1179 ifa = (*ifa->ifa_getifa)(ifa, dst);
1180 if (ifa == NULL)
1181 return NULL;
1182 ifa_acquire(ifa, psref);
1183 }
1184 info->rti_ifa = ifa;
1185 if (info->rti_ifp == NULL)
1186 info->rti_ifp = ifa->ifa_ifp;
1187 return ifa;
1188 }
1189
1190 /*
1191 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1192 * The caller has to rtfree it by itself.
1193 */
1194 int
1195 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
1196 {
1197 int s = splsoftnet(), ss;
1198 int error = 0, rc;
1199 struct rtentry *rt;
1200 rtbl_t *rtbl;
1201 struct ifaddr *ifa = NULL;
1202 struct sockaddr_storage maskeddst;
1203 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1204 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1205 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
1206 int flags = info->rti_flags;
1207 struct psref psref_ifp, psref_ifa;
1208 int bound = 0;
1209 struct ifnet *ifp = NULL;
1210 bool need_to_release_ifa = true;
1211 bool need_unlock = true;
1212 #define senderr(x) { error = x ; goto bad; }
1213
1214 RT_WLOCK();
1215
1216 bound = curlwp_bind();
1217 if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
1218 senderr(ESRCH);
1219 if (flags & RTF_HOST)
1220 netmask = NULL;
1221 switch (req) {
1222 case RTM_DELETE:
1223 if (netmask) {
1224 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1225 netmask);
1226 dst = (struct sockaddr *)&maskeddst;
1227 }
1228 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1229 senderr(ESRCH);
1230 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
1231 senderr(ESRCH);
1232 rt->rt_flags &= ~RTF_UP;
1233 if ((ifa = rt->rt_ifa)) {
1234 if (ifa->ifa_flags & IFA_ROUTE &&
1235 rt_ifa_connected(rt, ifa)) {
1236 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
1237 "deleted IFA_ROUTE\n",
1238 (void *)rt->_rt_key, (void *)ifa);
1239 ifa->ifa_flags &= ~IFA_ROUTE;
1240 }
1241 if (ifa->ifa_rtrequest)
1242 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
1243 ifa = NULL;
1244 }
1245 rttrash++;
1246 if (ret_nrt) {
1247 *ret_nrt = rt;
1248 rt_ref(rt);
1249 RT_REFCNT_TRACE(rt);
1250 }
1251 RT_UNLOCK();
1252 need_unlock = false;
1253 rt_timer_remove_all(rt);
1254 rtcache_clear_rtentry(dst->sa_family, rt);
1255 #if defined(INET) || defined(INET6)
1256 if (netmask != NULL)
1257 lltable_prefix_free(dst->sa_family, dst, netmask, 0);
1258 #endif
1259 if (ret_nrt == NULL) {
1260 /* Adjust the refcount */
1261 rt_ref(rt);
1262 RT_REFCNT_TRACE(rt);
1263 rt_free(rt);
1264 }
1265 break;
1266
1267 case RTM_ADD:
1268 if (info->rti_ifa == NULL) {
1269 ifp = rt_getifp(info, &psref_ifp);
1270 ifa = rt_getifa(info, &psref_ifa);
1271 if (ifa == NULL)
1272 senderr(ENETUNREACH);
1273 } else {
1274 /* Caller should have a reference of ifa */
1275 ifa = info->rti_ifa;
1276 need_to_release_ifa = false;
1277 }
1278 rt = pool_get(&rtentry_pool, PR_NOWAIT);
1279 if (rt == NULL)
1280 senderr(ENOBUFS);
1281 memset(rt, 0, sizeof(*rt));
1282 rt->rt_flags = RTF_UP | flags;
1283 LIST_INIT(&rt->rt_timer);
1284
1285 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1286 if (netmask) {
1287 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1288 netmask);
1289 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
1290 } else {
1291 rt_setkey(rt, dst, M_NOWAIT);
1292 }
1293 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1294 if (rt_getkey(rt) == NULL ||
1295 rt_setgate(rt, gateway) != 0) {
1296 pool_put(&rtentry_pool, rt);
1297 senderr(ENOBUFS);
1298 }
1299
1300 rt_set_ifa(rt, ifa);
1301 if (info->rti_info[RTAX_TAG] != NULL) {
1302 const struct sockaddr *tag;
1303 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
1304 if (tag == NULL)
1305 senderr(ENOBUFS);
1306 }
1307 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1308
1309 ss = pserialize_read_enter();
1310 if (info->rti_info[RTAX_IFP] != NULL) {
1311 struct ifaddr *ifa2;
1312 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
1313 if (ifa2 != NULL)
1314 rt->rt_ifp = ifa2->ifa_ifp;
1315 else
1316 rt->rt_ifp = ifa->ifa_ifp;
1317 } else
1318 rt->rt_ifp = ifa->ifa_ifp;
1319 pserialize_read_exit(ss);
1320 cv_init(&rt->rt_cv, "rtentry");
1321 psref_target_init(&rt->rt_psref, rt_psref_class);
1322
1323 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1324 rc = rt_addaddr(rtbl, rt, netmask);
1325 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1326 if (rc != 0) {
1327 ifafree(ifa); /* for rt_set_ifa above */
1328 cv_destroy(&rt->rt_cv);
1329 rt_destroy(rt);
1330 pool_put(&rtentry_pool, rt);
1331 senderr(rc);
1332 }
1333 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1334 if (ifa->ifa_rtrequest)
1335 ifa->ifa_rtrequest(req, rt, info);
1336 if (need_to_release_ifa)
1337 ifa_release(ifa, &psref_ifa);
1338 ifa = NULL;
1339 if_put(ifp, &psref_ifp);
1340 ifp = NULL;
1341 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1342 if (ret_nrt) {
1343 *ret_nrt = rt;
1344 rt_ref(rt);
1345 RT_REFCNT_TRACE(rt);
1346 }
1347 RT_UNLOCK();
1348 need_unlock = false;
1349 rtflushall(dst->sa_family);
1350 break;
1351 case RTM_GET:
1352 if (netmask != NULL) {
1353 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1354 netmask);
1355 dst = (struct sockaddr *)&maskeddst;
1356 }
1357 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1358 senderr(ESRCH);
1359 if (ret_nrt != NULL) {
1360 *ret_nrt = rt;
1361 rt_ref(rt);
1362 RT_REFCNT_TRACE(rt);
1363 }
1364 break;
1365 }
1366 bad:
1367 if (need_to_release_ifa)
1368 ifa_release(ifa, &psref_ifa);
1369 if_put(ifp, &psref_ifp);
1370 curlwp_bindx(bound);
1371 if (need_unlock)
1372 RT_UNLOCK();
1373 splx(s);
1374 return error;
1375 }
1376
1377 int
1378 rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
1379 {
1380 struct sockaddr *new, *old;
1381
1382 KASSERT(rt->_rt_key != NULL);
1383 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1384
1385 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
1386 if (new == NULL)
1387 return ENOMEM;
1388
1389 old = rt->rt_gateway;
1390 rt->rt_gateway = new;
1391 if (old != NULL)
1392 sockaddr_free(old);
1393
1394 KASSERT(rt->_rt_key != NULL);
1395 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1396
1397 if (rt->rt_flags & RTF_GATEWAY) {
1398 struct rtentry *gwrt;
1399
1400 /* XXX we cannot call rtalloc1 if holding the rt lock */
1401 if (RT_LOCKED())
1402 gwrt = rtalloc1_locked(gate, 1, false, true);
1403 else
1404 gwrt = rtalloc1(gate, 1);
1405 /*
1406 * If we switched gateways, grab the MTU from the new
1407 * gateway route if the current MTU, if the current MTU is
1408 * greater than the MTU of gateway.
1409 * Note that, if the MTU of gateway is 0, we will reset the
1410 * MTU of the route to run PMTUD again from scratch. XXX
1411 */
1412 if (gwrt != NULL) {
1413 KASSERT(gwrt->_rt_key != NULL);
1414 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
1415 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
1416 rt->rt_rmx.rmx_mtu &&
1417 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
1418 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
1419 }
1420 rt_unref(gwrt);
1421 }
1422 }
1423 KASSERT(rt->_rt_key != NULL);
1424 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1425 return 0;
1426 }
1427
1428 static void
1429 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
1430 const struct sockaddr *netmask)
1431 {
1432 const char *netmaskp = &netmask->sa_data[0],
1433 *srcp = &src->sa_data[0];
1434 char *dstp = &dst->sa_data[0];
1435 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
1436 const char *srcend = (char *)dst + src->sa_len;
1437
1438 dst->sa_len = src->sa_len;
1439 dst->sa_family = src->sa_family;
1440
1441 while (dstp < maskend)
1442 *dstp++ = *srcp++ & *netmaskp++;
1443 if (dstp < srcend)
1444 memset(dstp, 0, (size_t)(srcend - dstp));
1445 }
1446
1447 /*
1448 * Inform the routing socket of a route change.
1449 */
1450 void
1451 rt_newmsg(const int cmd, const struct rtentry *rt)
1452 {
1453 struct rt_addrinfo info;
1454
1455 memset((void *)&info, 0, sizeof(info));
1456 info.rti_info[RTAX_DST] = rt_getkey(rt);
1457 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1458 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1459 if (rt->rt_ifp) {
1460 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1461 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1462 }
1463
1464 rt_missmsg(cmd, &info, rt->rt_flags, 0);
1465 }
1466
1467 /*
1468 * Set up or tear down a routing table entry, normally
1469 * for an interface.
1470 */
1471 int
1472 rtinit(struct ifaddr *ifa, int cmd, int flags)
1473 {
1474 struct rtentry *rt;
1475 struct sockaddr *dst, *odst;
1476 struct sockaddr_storage maskeddst;
1477 struct rtentry *nrt = NULL;
1478 int error;
1479 struct rt_addrinfo info;
1480
1481 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1482 if (cmd == RTM_DELETE) {
1483 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1484 /* Delete subnet route for this interface */
1485 odst = dst;
1486 dst = (struct sockaddr *)&maskeddst;
1487 rt_maskedcopy(odst, dst, ifa->ifa_netmask);
1488 }
1489 if ((rt = rtalloc1(dst, 0)) != NULL) {
1490 if (rt->rt_ifa != ifa) {
1491 rt_unref(rt);
1492 return (flags & RTF_HOST) ? EHOSTUNREACH
1493 : ENETUNREACH;
1494 }
1495 rt_unref(rt);
1496 }
1497 }
1498 memset(&info, 0, sizeof(info));
1499 info.rti_ifa = ifa;
1500 info.rti_flags = flags | ifa->ifa_flags;
1501 info.rti_info[RTAX_DST] = dst;
1502 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1503
1504 /*
1505 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1506 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate
1507 * variable) when RTF_HOST is 1. still not sure if i can safely
1508 * change it to meet bsdi4 behavior.
1509 */
1510 if (cmd != RTM_LLINFO_UPD)
1511 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1512 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
1513 &nrt);
1514 if (error != 0)
1515 return error;
1516
1517 rt = nrt;
1518 RT_REFCNT_TRACE(rt);
1519 switch (cmd) {
1520 case RTM_DELETE:
1521 rt_newmsg(cmd, rt);
1522 rt_free(rt);
1523 break;
1524 case RTM_LLINFO_UPD:
1525 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
1526 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
1527 rt_newmsg(RTM_CHANGE, rt);
1528 rt_unref(rt);
1529 break;
1530 case RTM_ADD:
1531 /*
1532 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest
1533 * called via rtrequest1. Can we just prevent the replacement
1534 * somehow and remove the following code? And also doesn't
1535 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again?
1536 */
1537 if (rt->rt_ifa != ifa) {
1538 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa,
1539 rt->rt_ifa);
1540 #ifdef NET_MPSAFE
1541 KASSERT(!cpu_softintr_p());
1542
1543 error = rt_update_prepare(rt);
1544 if (error == 0) {
1545 #endif
1546 if (rt->rt_ifa->ifa_rtrequest != NULL) {
1547 rt->rt_ifa->ifa_rtrequest(RTM_DELETE,
1548 rt, &info);
1549 }
1550 rt_replace_ifa(rt, ifa);
1551 rt->rt_ifp = ifa->ifa_ifp;
1552 if (ifa->ifa_rtrequest != NULL)
1553 ifa->ifa_rtrequest(RTM_ADD, rt, &info);
1554 #ifdef NET_MPSAFE
1555 rt_update_finish(rt);
1556 } else {
1557 /*
1558 * If error != 0, the rtentry is being
1559 * destroyed, so doing nothing doesn't
1560 * matter.
1561 */
1562 }
1563 #endif
1564 }
1565 rt_newmsg(cmd, rt);
1566 rt_unref(rt);
1567 RT_REFCNT_TRACE(rt);
1568 break;
1569 }
1570 return error;
1571 }
1572
1573 /*
1574 * Create a local route entry for the address.
1575 * Announce the addition of the address and the route to the routing socket.
1576 */
1577 int
1578 rt_ifa_addlocal(struct ifaddr *ifa)
1579 {
1580 struct rtentry *rt;
1581 int e;
1582
1583 /* If there is no loopback entry, allocate one. */
1584 rt = rtalloc1(ifa->ifa_addr, 0);
1585 #ifdef RT_DEBUG
1586 if (rt != NULL)
1587 dump_rt(rt);
1588 #endif
1589 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
1590 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1591 {
1592 struct rt_addrinfo info;
1593 struct rtentry *nrt;
1594
1595 memset(&info, 0, sizeof(info));
1596 info.rti_flags = RTF_HOST | RTF_LOCAL;
1597 if (!(ifa->ifa_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))
1598 info.rti_flags |= RTF_LLDATA;
1599 info.rti_info[RTAX_DST] = ifa->ifa_addr;
1600 info.rti_info[RTAX_GATEWAY] =
1601 (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
1602 info.rti_ifa = ifa;
1603 nrt = NULL;
1604 e = rtrequest1(RTM_ADD, &info, &nrt);
1605 if (nrt && ifa != nrt->rt_ifa)
1606 rt_replace_ifa(nrt, ifa);
1607 rt_newaddrmsg(RTM_ADD, ifa, e, nrt);
1608 if (nrt != NULL) {
1609 #ifdef RT_DEBUG
1610 dump_rt(nrt);
1611 #endif
1612 rt_unref(nrt);
1613 RT_REFCNT_TRACE(nrt);
1614 }
1615 } else {
1616 e = 0;
1617 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL);
1618 }
1619 if (rt != NULL)
1620 rt_unref(rt);
1621 return e;
1622 }
1623
1624 /*
1625 * Remove the local route entry for the address.
1626 * Announce the removal of the address and the route to the routing socket.
1627 */
1628 int
1629 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
1630 {
1631 struct rtentry *rt;
1632 int e = 0;
1633
1634 rt = rtalloc1(ifa->ifa_addr, 0);
1635
1636 /*
1637 * Before deleting, check if a corresponding loopbacked
1638 * host route surely exists. With this check, we can avoid
1639 * deleting an interface direct route whose destination is
1640 * the same as the address being removed. This can happen
1641 * when removing a subnet-router anycast address on an
1642 * interface attached to a shared medium.
1643 */
1644 if (rt != NULL &&
1645 (rt->rt_flags & RTF_HOST) &&
1646 (rt->rt_ifp->if_flags & IFF_LOOPBACK))
1647 {
1648 /* If we cannot replace the route's ifaddr with the equivalent
1649 * ifaddr of another interface, I believe it is safest to
1650 * delete the route.
1651 */
1652 if (alt_ifa == NULL) {
1653 e = rtdeletemsg(rt);
1654 if (e == 0) {
1655 rt_unref(rt);
1656 rt_free(rt);
1657 rt = NULL;
1658 }
1659 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1660 } else {
1661 rt_replace_ifa(rt, alt_ifa);
1662 rt_newmsg(RTM_CHANGE, rt);
1663 }
1664 } else
1665 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1666 if (rt != NULL)
1667 rt_unref(rt);
1668 return e;
1669 }
1670
1671 /*
1672 * Route timer routines. These routes allow functions to be called
1673 * for various routes at any time. This is useful in supporting
1674 * path MTU discovery and redirect route deletion.
1675 *
1676 * This is similar to some BSDI internal functions, but it provides
1677 * for multiple queues for efficiency's sake...
1678 */
1679
1680 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
1681 static int rt_init_done = 0;
1682
1683 /*
1684 * Some subtle order problems with domain initialization mean that
1685 * we cannot count on this being run from rt_init before various
1686 * protocol initializations are done. Therefore, we make sure
1687 * that this is run when the first queue is added...
1688 */
1689
1690 static void rt_timer_work(struct work *, void *);
1691
1692 static void
1693 rt_timer_init(void)
1694 {
1695 int error;
1696
1697 assert(rt_init_done == 0);
1698
1699 /* XXX should be in rt_init */
1700 rw_init(&rt_lock);
1701 rw_init(&rtcache_lock);
1702
1703 LIST_INIT(&rttimer_queue_head);
1704 callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1705 error = workqueue_create(&rt_timer_wq, "rt_timer",
1706 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
1707 if (error)
1708 panic("%s: workqueue_create failed (%d)\n", __func__, error);
1709 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1710 rt_init_done = 1;
1711 }
1712
1713 struct rttimer_queue *
1714 rt_timer_queue_create(u_int timeout)
1715 {
1716 struct rttimer_queue *rtq;
1717
1718 if (rt_init_done == 0)
1719 rt_timer_init();
1720
1721 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1722 if (rtq == NULL)
1723 return NULL;
1724 memset(rtq, 0, sizeof(*rtq));
1725
1726 rtq->rtq_timeout = timeout;
1727 TAILQ_INIT(&rtq->rtq_head);
1728 RT_WLOCK();
1729 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1730 RT_UNLOCK();
1731
1732 return rtq;
1733 }
1734
1735 void
1736 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1737 {
1738
1739 rtq->rtq_timeout = timeout;
1740 }
1741
1742 static void
1743 rt_timer_queue_remove_all(struct rttimer_queue *rtq)
1744 {
1745 struct rttimer *r;
1746
1747 RT_ASSERT_WLOCK();
1748
1749 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1750 LIST_REMOVE(r, rtt_link);
1751 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1752 rt_ref(r->rtt_rt); /* XXX */
1753 RT_REFCNT_TRACE(r->rtt_rt);
1754 RT_UNLOCK();
1755 (*r->rtt_func)(r->rtt_rt, r);
1756 pool_put(&rttimer_pool, r);
1757 RT_WLOCK();
1758 if (rtq->rtq_count > 0)
1759 rtq->rtq_count--;
1760 else
1761 printf("rt_timer_queue_remove_all: "
1762 "rtq_count reached 0\n");
1763 }
1764 }
1765
1766 void
1767 rt_timer_queue_destroy(struct rttimer_queue *rtq)
1768 {
1769
1770 RT_WLOCK();
1771 rt_timer_queue_remove_all(rtq);
1772 LIST_REMOVE(rtq, rtq_link);
1773 RT_UNLOCK();
1774
1775 /*
1776 * Caller is responsible for freeing the rttimer_queue structure.
1777 */
1778 }
1779
1780 unsigned long
1781 rt_timer_count(struct rttimer_queue *rtq)
1782 {
1783 return rtq->rtq_count;
1784 }
1785
1786 static void
1787 rt_timer_remove_all(struct rtentry *rt)
1788 {
1789 struct rttimer *r;
1790
1791 RT_WLOCK();
1792 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1793 LIST_REMOVE(r, rtt_link);
1794 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1795 if (r->rtt_queue->rtq_count > 0)
1796 r->rtt_queue->rtq_count--;
1797 else
1798 printf("rt_timer_remove_all: rtq_count reached 0\n");
1799 pool_put(&rttimer_pool, r);
1800 }
1801 RT_UNLOCK();
1802 }
1803
1804 int
1805 rt_timer_add(struct rtentry *rt,
1806 void (*func)(struct rtentry *, struct rttimer *),
1807 struct rttimer_queue *queue)
1808 {
1809 struct rttimer *r;
1810
1811 KASSERT(func != NULL);
1812 RT_WLOCK();
1813 /*
1814 * If there's already a timer with this action, destroy it before
1815 * we add a new one.
1816 */
1817 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1818 if (r->rtt_func == func)
1819 break;
1820 }
1821 if (r != NULL) {
1822 LIST_REMOVE(r, rtt_link);
1823 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1824 if (r->rtt_queue->rtq_count > 0)
1825 r->rtt_queue->rtq_count--;
1826 else
1827 printf("rt_timer_add: rtq_count reached 0\n");
1828 } else {
1829 r = pool_get(&rttimer_pool, PR_NOWAIT);
1830 if (r == NULL) {
1831 RT_UNLOCK();
1832 return ENOBUFS;
1833 }
1834 }
1835
1836 memset(r, 0, sizeof(*r));
1837
1838 r->rtt_rt = rt;
1839 r->rtt_time = time_uptime;
1840 r->rtt_func = func;
1841 r->rtt_queue = queue;
1842 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1843 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1844 r->rtt_queue->rtq_count++;
1845
1846 RT_UNLOCK();
1847
1848 return 0;
1849 }
1850
1851 static void
1852 rt_timer_work(struct work *wk, void *arg)
1853 {
1854 struct rttimer_queue *rtq;
1855 struct rttimer *r;
1856
1857 RT_WLOCK();
1858 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1859 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1860 (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
1861 LIST_REMOVE(r, rtt_link);
1862 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1863 rt_ref(r->rtt_rt); /* XXX */
1864 RT_REFCNT_TRACE(r->rtt_rt);
1865 RT_UNLOCK();
1866 (*r->rtt_func)(r->rtt_rt, r);
1867 pool_put(&rttimer_pool, r);
1868 RT_WLOCK();
1869 if (rtq->rtq_count > 0)
1870 rtq->rtq_count--;
1871 else
1872 printf("rt_timer_timer: rtq_count reached 0\n");
1873 }
1874 }
1875 RT_UNLOCK();
1876
1877 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1878 }
1879
1880 static void
1881 rt_timer_timer(void *arg)
1882 {
1883
1884 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
1885 }
1886
1887 static struct rtentry *
1888 _rtcache_init(struct route *ro, int flag)
1889 {
1890 struct rtentry *rt;
1891
1892 rtcache_invariants(ro);
1893 KASSERT(ro->_ro_rt == NULL);
1894 RTCACHE_ASSERT_WLOCK();
1895
1896 if (rtcache_getdst(ro) == NULL)
1897 return NULL;
1898 ro->ro_invalid = false;
1899 rt = rtalloc1(rtcache_getdst(ro), flag);
1900 if (rt != NULL && ISSET(rt->rt_flags, RTF_UP)) {
1901 ro->_ro_rt = rt;
1902 KASSERT(!ISSET(rt->rt_flags, RTF_UPDATING));
1903 rtcache_ref(rt, ro);
1904 rt_unref(rt);
1905 rtcache(ro);
1906 } else if (rt != NULL)
1907 rt_unref(rt);
1908
1909 rtcache_invariants(ro);
1910 return ro->_ro_rt;
1911 }
1912
1913 struct rtentry *
1914 rtcache_init(struct route *ro)
1915 {
1916 struct rtentry *rt;
1917 RTCACHE_WLOCK();
1918 rt = _rtcache_init(ro, 1);
1919 RTCACHE_UNLOCK();
1920 return rt;
1921 }
1922
1923 struct rtentry *
1924 rtcache_init_noclone(struct route *ro)
1925 {
1926 struct rtentry *rt;
1927 RTCACHE_WLOCK();
1928 rt = _rtcache_init(ro, 0);
1929 RTCACHE_UNLOCK();
1930 return rt;
1931 }
1932
1933 struct rtentry *
1934 rtcache_update(struct route *ro, int clone)
1935 {
1936 struct rtentry *rt;
1937 RTCACHE_WLOCK();
1938 rtcache_clear(ro);
1939 rt = _rtcache_init(ro, clone);
1940 RTCACHE_UNLOCK();
1941 return rt;
1942 }
1943
1944 void
1945 rtcache_copy(struct route *new_ro, struct route *old_ro)
1946 {
1947 struct rtentry *rt;
1948 int ret;
1949
1950 KASSERT(new_ro != old_ro);
1951 rtcache_invariants(new_ro);
1952 rtcache_invariants(old_ro);
1953
1954 rt = rtcache_validate(old_ro);
1955
1956 if (rtcache_getdst(old_ro) == NULL)
1957 goto out;
1958 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro));
1959 if (ret != 0)
1960 goto out;
1961
1962 RTCACHE_WLOCK();
1963 new_ro->ro_invalid = false;
1964 if ((new_ro->_ro_rt = rt) != NULL)
1965 rtcache(new_ro);
1966 rtcache_invariants(new_ro);
1967 RTCACHE_UNLOCK();
1968 out:
1969 rtcache_unref(rt, old_ro);
1970 return;
1971 }
1972
1973 static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist);
1974
1975 #if defined(RT_DEBUG) && defined(NET_MPSAFE)
1976 static void
1977 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro)
1978 {
1979 char dst[64];
1980
1981 sockaddr_format(ro->ro_sa, dst, 64);
1982 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst,
1983 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref);
1984 }
1985 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro))
1986 #else
1987 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0)
1988 #endif
1989
1990 static void
1991 rtcache_ref(struct rtentry *rt, struct route *ro)
1992 {
1993
1994 KASSERT(rt != NULL);
1995
1996 #ifdef NET_MPSAFE
1997 RTCACHE_PSREF_TRACE(rt, ro);
1998 ro->ro_bound = curlwp_bind();
1999 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
2000 #endif
2001 }
2002
2003 void
2004 rtcache_unref(struct rtentry *rt, struct route *ro)
2005 {
2006
2007 if (rt == NULL)
2008 return;
2009
2010 #ifdef NET_MPSAFE
2011 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
2012 curlwp_bindx(ro->ro_bound);
2013 RTCACHE_PSREF_TRACE(rt, ro);
2014 #endif
2015 }
2016
2017 static struct rtentry *
2018 rtcache_validate_locked(struct route *ro)
2019 {
2020 struct rtentry *rt = NULL;
2021
2022 #ifdef NET_MPSAFE
2023 retry:
2024 #endif
2025 rt = ro->_ro_rt;
2026 rtcache_invariants(ro);
2027
2028 if (ro->ro_invalid) {
2029 rt = NULL;
2030 goto out;
2031 }
2032
2033 RT_RLOCK();
2034 if (rt != NULL && (rt->rt_flags & RTF_UP) != 0 && rt->rt_ifp != NULL) {
2035 #ifdef NET_MPSAFE
2036 if (ISSET(rt->rt_flags, RTF_UPDATING)) {
2037 if (rt_wait_ok()) {
2038 RT_UNLOCK();
2039 RTCACHE_UNLOCK();
2040 /* We can wait until the update is complete */
2041 rt_update_wait();
2042 RTCACHE_RLOCK();
2043 goto retry;
2044 } else {
2045 rt = NULL;
2046 }
2047 } else
2048 #endif
2049 rtcache_ref(rt, ro);
2050 } else
2051 rt = NULL;
2052 RT_UNLOCK();
2053 out:
2054 return rt;
2055 }
2056
2057 struct rtentry *
2058 rtcache_validate(struct route *ro)
2059 {
2060 struct rtentry *rt;
2061
2062 RTCACHE_RLOCK();
2063 rt = rtcache_validate_locked(ro);
2064 RTCACHE_UNLOCK();
2065 return rt;
2066 }
2067
2068 static void
2069 rtcache_invalidate(struct dom_rtlist *rtlist)
2070 {
2071 struct route *ro;
2072
2073 RTCACHE_ASSERT_WLOCK();
2074
2075 while ((ro = LIST_FIRST(rtlist)) != NULL) {
2076 rtcache_invariants(ro);
2077 KASSERT(ro->_ro_rt != NULL);
2078 ro->ro_invalid = true;
2079 LIST_REMOVE(ro, ro_rtcache_next);
2080 LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next);
2081 rtcache_invariants(ro);
2082 }
2083 }
2084
2085 static void
2086 rtcache_clear_rtentry(int family, struct rtentry *rt)
2087 {
2088 struct domain *dom;
2089 struct route *ro, *nro;
2090
2091 if ((dom = pffinddomain(family)) == NULL)
2092 return;
2093
2094 RTCACHE_WLOCK();
2095 LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) {
2096 if (ro->_ro_rt == rt)
2097 rtcache_clear(ro);
2098 }
2099 RTCACHE_UNLOCK();
2100 }
2101
2102 static void
2103 rtcache_clear(struct route *ro)
2104 {
2105
2106 RTCACHE_ASSERT_WLOCK();
2107
2108 rtcache_invariants(ro);
2109 if (ro->_ro_rt == NULL)
2110 return;
2111
2112 LIST_REMOVE(ro, ro_rtcache_next);
2113
2114 ro->_ro_rt = NULL;
2115 ro->ro_invalid = false;
2116 rtcache_invariants(ro);
2117 }
2118
2119 struct rtentry *
2120 rtcache_lookup2(struct route *ro, const struct sockaddr *dst,
2121 int clone, int *hitp)
2122 {
2123 const struct sockaddr *odst;
2124 struct rtentry *rt = NULL;
2125
2126 RTCACHE_RLOCK();
2127 odst = rtcache_getdst(ro);
2128 if (odst == NULL) {
2129 RTCACHE_UNLOCK();
2130 RTCACHE_WLOCK();
2131 goto miss;
2132 }
2133
2134 if (sockaddr_cmp(odst, dst) != 0) {
2135 RTCACHE_UNLOCK();
2136 RTCACHE_WLOCK();
2137 rtcache_free_locked(ro);
2138 goto miss;
2139 }
2140
2141 rt = rtcache_validate_locked(ro);
2142 if (rt == NULL) {
2143 RTCACHE_UNLOCK();
2144 RTCACHE_WLOCK();
2145 rtcache_clear(ro);
2146 goto miss;
2147 }
2148
2149 rtcache_invariants(ro);
2150
2151 RTCACHE_UNLOCK();
2152 if (hitp != NULL)
2153 *hitp = 1;
2154 return rt;
2155 miss:
2156 if (hitp != NULL)
2157 *hitp = 0;
2158 if (rtcache_setdst_locked(ro, dst) == 0)
2159 rt = _rtcache_init(ro, clone);
2160
2161 rtcache_invariants(ro);
2162
2163 RTCACHE_UNLOCK();
2164 return rt;
2165 }
2166
2167 static void
2168 rtcache_free_locked(struct route *ro)
2169 {
2170
2171 RTCACHE_ASSERT_WLOCK();
2172 rtcache_clear(ro);
2173 if (ro->ro_sa != NULL) {
2174 sockaddr_free(ro->ro_sa);
2175 ro->ro_sa = NULL;
2176 }
2177 rtcache_invariants(ro);
2178 }
2179
2180 void
2181 rtcache_free(struct route *ro)
2182 {
2183
2184 RTCACHE_WLOCK();
2185 rtcache_free_locked(ro);
2186 RTCACHE_UNLOCK();
2187 }
2188
2189 static int
2190 rtcache_setdst_locked(struct route *ro, const struct sockaddr *sa)
2191 {
2192 KASSERT(sa != NULL);
2193
2194 RTCACHE_ASSERT_WLOCK();
2195
2196 rtcache_invariants(ro);
2197 if (ro->ro_sa != NULL) {
2198 if (ro->ro_sa->sa_family == sa->sa_family) {
2199 rtcache_clear(ro);
2200 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
2201 rtcache_invariants(ro);
2202 return 0;
2203 }
2204 /* free ro_sa, wrong family */
2205 rtcache_free_locked(ro);
2206 }
2207
2208 KASSERT(ro->_ro_rt == NULL);
2209
2210 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
2211 rtcache_invariants(ro);
2212 return ENOMEM;
2213 }
2214 rtcache_invariants(ro);
2215 return 0;
2216 }
2217
2218 int
2219 rtcache_setdst(struct route *ro, const struct sockaddr *sa)
2220 {
2221 int error;
2222
2223 RTCACHE_WLOCK();
2224 error = rtcache_setdst_locked(ro, sa);
2225 RTCACHE_UNLOCK();
2226
2227 return error;
2228 }
2229
2230 const struct sockaddr *
2231 rt_settag(struct rtentry *rt, const struct sockaddr *tag)
2232 {
2233 if (rt->rt_tag != tag) {
2234 if (rt->rt_tag != NULL)
2235 sockaddr_free(rt->rt_tag);
2236 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
2237 }
2238 return rt->rt_tag;
2239 }
2240
2241 struct sockaddr *
2242 rt_gettag(const struct rtentry *rt)
2243 {
2244 return rt->rt_tag;
2245 }
2246
2247 int
2248 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
2249 {
2250
2251 if ((rt->rt_flags & RTF_REJECT) != 0) {
2252 /* Mimic looutput */
2253 if (ifp->if_flags & IFF_LOOPBACK)
2254 return (rt->rt_flags & RTF_HOST) ?
2255 EHOSTUNREACH : ENETUNREACH;
2256 else if (rt->rt_rmx.rmx_expire == 0 ||
2257 time_uptime < rt->rt_rmx.rmx_expire)
2258 return (rt->rt_flags & RTF_GATEWAY) ?
2259 EHOSTUNREACH : EHOSTDOWN;
2260 }
2261
2262 return 0;
2263 }
2264
2265 void
2266 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *),
2267 void *v)
2268 {
2269
2270 for (;;) {
2271 int s;
2272 int error;
2273 struct rtentry *rt, *retrt = NULL;
2274
2275 RT_RLOCK();
2276 s = splsoftnet();
2277 rt = rtbl_search_matched_entry(family, f, v);
2278 if (rt == NULL) {
2279 splx(s);
2280 RT_UNLOCK();
2281 return;
2282 }
2283 rt->rt_refcnt++;
2284 splx(s);
2285 RT_UNLOCK();
2286
2287 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway,
2288 rt_mask(rt), rt->rt_flags, &retrt);
2289 if (error == 0) {
2290 KASSERT(retrt == rt);
2291 KASSERT((retrt->rt_flags & RTF_UP) == 0);
2292 retrt->rt_ifp = NULL;
2293 rt_unref(rt);
2294 rt_free(retrt);
2295 } else if (error == ESRCH) {
2296 /* Someone deleted the entry already. */
2297 rt_unref(rt);
2298 } else {
2299 log(LOG_ERR, "%s: unable to delete rtentry @ %p, "
2300 "error = %d\n", rt->rt_ifp->if_xname, rt, error);
2301 /* XXX how to treat this case? */
2302 }
2303 }
2304 }
2305
2306 int
2307 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v)
2308 {
2309 int error;
2310
2311 RT_RLOCK();
2312 error = rtbl_walktree(family, f, v);
2313 RT_UNLOCK();
2314
2315 return error;
2316 }
2317
2318 #ifdef DDB
2319
2320 #include <machine/db_machdep.h>
2321 #include <ddb/db_interface.h>
2322 #include <ddb/db_output.h>
2323
2324 #define rt_expire rt_rmx.rmx_expire
2325
2326 static void
2327 db_print_sa(const struct sockaddr *sa)
2328 {
2329 int len;
2330 const u_char *p;
2331
2332 if (sa == NULL) {
2333 db_printf("[NULL]");
2334 return;
2335 }
2336
2337 p = (const u_char *)sa;
2338 len = sa->sa_len;
2339 db_printf("[");
2340 while (len > 0) {
2341 db_printf("%d", *p);
2342 p++; len--;
2343 if (len) db_printf(",");
2344 }
2345 db_printf("]\n");
2346 }
2347
2348 static void
2349 db_print_ifa(struct ifaddr *ifa)
2350 {
2351 if (ifa == NULL)
2352 return;
2353 db_printf(" ifa_addr=");
2354 db_print_sa(ifa->ifa_addr);
2355 db_printf(" ifa_dsta=");
2356 db_print_sa(ifa->ifa_dstaddr);
2357 db_printf(" ifa_mask=");
2358 db_print_sa(ifa->ifa_netmask);
2359 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n",
2360 ifa->ifa_flags,
2361 ifa->ifa_refcnt,
2362 ifa->ifa_metric);
2363 }
2364
2365 /*
2366 * Function to pass to rt_walktree().
2367 * Return non-zero error to abort walk.
2368 */
2369 static int
2370 db_show_rtentry(struct rtentry *rt, void *w)
2371 {
2372 db_printf("rtentry=%p", rt);
2373
2374 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
2375 rt->rt_flags, rt->rt_refcnt,
2376 rt->rt_use, (uint64_t)rt->rt_expire);
2377
2378 db_printf(" key="); db_print_sa(rt_getkey(rt));
2379 db_printf(" mask="); db_print_sa(rt_mask(rt));
2380 db_printf(" gw="); db_print_sa(rt->rt_gateway);
2381
2382 db_printf(" ifp=%p ", rt->rt_ifp);
2383 if (rt->rt_ifp)
2384 db_printf("(%s)", rt->rt_ifp->if_xname);
2385 else
2386 db_printf("(NULL)");
2387
2388 db_printf(" ifa=%p\n", rt->rt_ifa);
2389 db_print_ifa(rt->rt_ifa);
2390
2391 db_printf(" gwroute=%p llinfo=%p\n",
2392 rt->rt_gwroute, rt->rt_llinfo);
2393
2394 return 0;
2395 }
2396
2397 /*
2398 * Function to print all the route trees.
2399 * Use this from ddb: "show routes"
2400 */
2401 void
2402 db_show_routes(db_expr_t addr, bool have_addr,
2403 db_expr_t count, const char *modif)
2404 {
2405 rt_walktree(AF_INET, db_show_rtentry, NULL);
2406 }
2407 #endif
2408