route.c revision 1.198 1 /* $NetBSD: route.c,v 1.198 2017/09/21 04:44:32 ozaki-r Exp $ */
2
3 /*-
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the project nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62 /*
63 * Copyright (c) 1980, 1986, 1991, 1993
64 * The Regents of the University of California. All rights reserved.
65 *
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
68 * are met:
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)route.c 8.3 (Berkeley) 1/9/95
91 */
92
93 #ifdef _KERNEL_OPT
94 #include "opt_inet.h"
95 #include "opt_route.h"
96 #include "opt_net_mpsafe.h"
97 #endif
98
99 #include <sys/cdefs.h>
100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.198 2017/09/21 04:44:32 ozaki-r Exp $");
101
102 #include <sys/param.h>
103 #ifdef RTFLUSH_DEBUG
104 #include <sys/sysctl.h>
105 #endif
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/mbuf.h>
110 #include <sys/socket.h>
111 #include <sys/socketvar.h>
112 #include <sys/domain.h>
113 #include <sys/kernel.h>
114 #include <sys/ioctl.h>
115 #include <sys/pool.h>
116 #include <sys/kauth.h>
117 #include <sys/workqueue.h>
118 #include <sys/syslog.h>
119 #include <sys/rwlock.h>
120 #include <sys/mutex.h>
121 #include <sys/cpu.h>
122
123 #include <net/if.h>
124 #include <net/if_dl.h>
125 #include <net/route.h>
126 #if defined(INET) || defined(INET6)
127 #include <net/if_llatbl.h>
128 #endif
129
130 #include <netinet/in.h>
131 #include <netinet/in_var.h>
132
133 #ifdef RTFLUSH_DEBUG
134 #define rtcache_debug() __predict_false(_rtcache_debug)
135 #else /* RTFLUSH_DEBUG */
136 #define rtcache_debug() 0
137 #endif /* RTFLUSH_DEBUG */
138
139 #ifdef RT_DEBUG
140 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \
141 __func__, __LINE__, (rt), (rt)->rt_refcnt)
142 #else
143 #define RT_REFCNT_TRACE(rt) do {} while (0)
144 #endif
145
146 #ifdef DEBUG
147 #define dlog(level, fmt, args...) log(level, fmt, ##args)
148 #else
149 #define dlog(level, fmt, args...) do {} while (0)
150 #endif
151
152 struct rtstat rtstat;
153
154 static int rttrash; /* routes not in table but not freed */
155
156 static struct pool rtentry_pool;
157 static struct pool rttimer_pool;
158
159 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */
160 static struct workqueue *rt_timer_wq;
161 static struct work rt_timer_wk;
162
163 static void rt_timer_init(void);
164 static void rt_timer_queue_remove_all(struct rttimer_queue *);
165 static void rt_timer_remove_all(struct rtentry *);
166 static void rt_timer_timer(void *);
167
168 /*
169 * Locking notes:
170 * - The routing table is protected by a global rwlock
171 * - API: RT_RLOCK and friends
172 * - rtcaches are protected by a global rwlock
173 * - API: RTCACHE_RLOCK and friends
174 * - References to a rtentry is managed by reference counting and psref
175 * - Reference couting is used for temporal reference when a rtentry
176 * is fetched from the routing table
177 * - psref is used for temporal reference when a rtentry is fetched
178 * from a rtcache
179 * - struct route (rtcache) has struct psref, so we cannot obtain
180 * a reference twice on the same struct route
181 * - Befere destroying or updating a rtentry, we have to wait for
182 * all references left (see below for details)
183 * - APIs
184 * - An obtained rtentry via rtalloc1 or rtrequest* must be
185 * unreferenced by rt_unref
186 * - An obtained rtentry via rtcache_* must be unreferenced by
187 * rtcache_unref
188 * - TODO: once we get a lockless routing table, we should use only
189 * psref for rtentries
190 * - rtentry destruction
191 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE)
192 * - If a caller of rtrequest grabs a reference of a rtentry, the caller
193 * has a responsibility to destroy the rtentry by itself by calling
194 * rt_free
195 * - If not, rtrequest itself does that
196 * - If rt_free is called in softint, the actual destruction routine is
197 * deferred to a workqueue
198 * - rtentry update
199 * - When updating a rtentry, RTF_UPDATING flag is set
200 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from
201 * the routing table or a rtcache results in either of the following
202 * cases:
203 * - if the caller runs in softint, the caller fails to fetch
204 * - otherwise, the caller waits for the update completed and retries
205 * to fetch (probably succeed to fetch for the second time)
206 */
207
208 /*
209 * Global locks for the routing table and rtcaches.
210 * Locking order: rtcache_lock => rt_lock
211 */
212 static krwlock_t rt_lock __cacheline_aligned;
213 #ifdef NET_MPSAFE
214 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER)
215 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER)
216 #define RT_UNLOCK() rw_exit(&rt_lock)
217 #define RT_LOCKED() rw_lock_held(&rt_lock)
218 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock))
219 #else
220 #define RT_RLOCK() do {} while (0)
221 #define RT_WLOCK() do {} while (0)
222 #define RT_UNLOCK() do {} while (0)
223 #define RT_LOCKED() false
224 #define RT_ASSERT_WLOCK() do {} while (0)
225 #endif
226
227 static krwlock_t rtcache_lock __cacheline_aligned;
228 #ifdef NET_MPSAFE
229 #define RTCACHE_RLOCK() rw_enter(&rtcache_lock, RW_READER)
230 #define RTCACHE_WLOCK() rw_enter(&rtcache_lock, RW_WRITER)
231 #define RTCACHE_UNLOCK() rw_exit(&rtcache_lock)
232 #define RTCACHE_ASSERT_WLOCK() KASSERT(rw_write_held(&rtcache_lock))
233 #define RTCACHE_WLOCKED() rw_write_held(&rtcache_lock)
234 #else
235 #define RTCACHE_RLOCK() do {} while (0)
236 #define RTCACHE_WLOCK() do {} while (0)
237 #define RTCACHE_UNLOCK() do {} while (0)
238 #define RTCACHE_ASSERT_WLOCK() do {} while (0)
239 #define RTCACHE_WLOCKED() false
240 #endif
241
242 /*
243 * mutex and cv that are used to wait for references to a rtentry left
244 * before updating the rtentry.
245 */
246 static struct {
247 kmutex_t lock;
248 kcondvar_t cv;
249 bool ongoing;
250 const struct lwp *lwp;
251 } rt_update_global __cacheline_aligned;
252
253 /*
254 * A workqueue and stuff that are used to defer the destruction routine
255 * of rtentries.
256 */
257 static struct {
258 struct workqueue *wq;
259 struct work wk;
260 kmutex_t lock;
261 struct rtentry *queue[10];
262 } rt_free_global __cacheline_aligned;
263
264 /* psref for rtentry */
265 static struct psref_class *rt_psref_class __read_mostly;
266
267 #ifdef RTFLUSH_DEBUG
268 static int _rtcache_debug = 0;
269 #endif /* RTFLUSH_DEBUG */
270
271 static kauth_listener_t route_listener;
272
273 static int rtdeletemsg(struct rtentry *);
274 static void rtflushall(int);
275
276 static void rt_maskedcopy(const struct sockaddr *,
277 struct sockaddr *, const struct sockaddr *);
278
279 static void rtcache_clear(struct route *);
280 static void rtcache_clear_rtentry(int, struct rtentry *);
281 static void rtcache_invalidate(struct dom_rtlist *);
282
283 static void rt_ref(struct rtentry *);
284
285 static struct rtentry *
286 rtalloc1_locked(const struct sockaddr *, int, bool, bool);
287 static struct rtentry *
288 rtcache_validate_locked(struct route *);
289 static void rtcache_free_locked(struct route *);
290 static int rtcache_setdst_locked(struct route *, const struct sockaddr *);
291
292 static void rtcache_ref(struct rtentry *, struct route *);
293
294 #ifdef NET_MPSAFE
295 static void rt_update_wait(void);
296 #endif
297
298 static bool rt_wait_ok(void);
299 static void rt_wait_refcnt(const char *, struct rtentry *, int);
300 static void rt_wait_psref(struct rtentry *);
301
302 #ifdef DDB
303 static void db_print_sa(const struct sockaddr *);
304 static void db_print_ifa(struct ifaddr *);
305 static int db_show_rtentry(struct rtentry *, void *);
306 #endif
307
308 #ifdef RTFLUSH_DEBUG
309 static void sysctl_net_rtcache_setup(struct sysctllog **);
310 static void
311 sysctl_net_rtcache_setup(struct sysctllog **clog)
312 {
313 const struct sysctlnode *rnode;
314
315 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
316 CTLTYPE_NODE,
317 "rtcache", SYSCTL_DESCR("Route cache related settings"),
318 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
319 return;
320 if (sysctl_createv(clog, 0, &rnode, &rnode,
321 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
322 "debug", SYSCTL_DESCR("Debug route caches"),
323 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
324 return;
325 }
326 #endif /* RTFLUSH_DEBUG */
327
328 static inline void
329 rt_destroy(struct rtentry *rt)
330 {
331 if (rt->_rt_key != NULL)
332 sockaddr_free(rt->_rt_key);
333 if (rt->rt_gateway != NULL)
334 sockaddr_free(rt->rt_gateway);
335 if (rt_gettag(rt) != NULL)
336 sockaddr_free(rt_gettag(rt));
337 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
338 }
339
340 static inline const struct sockaddr *
341 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
342 {
343 if (rt->_rt_key == key)
344 goto out;
345
346 if (rt->_rt_key != NULL)
347 sockaddr_free(rt->_rt_key);
348 rt->_rt_key = sockaddr_dup(key, flags);
349 out:
350 rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
351 return rt->_rt_key;
352 }
353
354 struct ifaddr *
355 rt_get_ifa(struct rtentry *rt)
356 {
357 struct ifaddr *ifa;
358
359 if ((ifa = rt->rt_ifa) == NULL)
360 return ifa;
361 else if (ifa->ifa_getifa == NULL)
362 return ifa;
363 #if 0
364 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
365 return ifa;
366 #endif
367 else {
368 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
369 if (ifa == NULL)
370 return NULL;
371 rt_replace_ifa(rt, ifa);
372 return ifa;
373 }
374 }
375
376 static void
377 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
378 {
379 rt->rt_ifa = ifa;
380 if (ifa->ifa_seqno != NULL)
381 rt->rt_ifa_seqno = *ifa->ifa_seqno;
382 }
383
384 /*
385 * Is this route the connected route for the ifa?
386 */
387 static int
388 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
389 {
390 const struct sockaddr *key, *dst, *odst;
391 struct sockaddr_storage maskeddst;
392
393 key = rt_getkey(rt);
394 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
395 if (dst == NULL ||
396 dst->sa_family != key->sa_family ||
397 dst->sa_len != key->sa_len)
398 return 0;
399 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
400 odst = dst;
401 dst = (struct sockaddr *)&maskeddst;
402 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
403 ifa->ifa_netmask);
404 }
405 return (memcmp(dst, key, dst->sa_len) == 0);
406 }
407
408 void
409 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
410 {
411 if (rt->rt_ifa &&
412 rt->rt_ifa != ifa &&
413 rt->rt_ifa->ifa_flags & IFA_ROUTE &&
414 rt_ifa_connected(rt, rt->rt_ifa))
415 {
416 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
417 "replace deleted IFA_ROUTE\n",
418 (void *)rt->_rt_key, (void *)rt->rt_ifa);
419 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
420 if (rt_ifa_connected(rt, ifa)) {
421 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
422 "replace added IFA_ROUTE\n",
423 (void *)rt->_rt_key, (void *)ifa);
424 ifa->ifa_flags |= IFA_ROUTE;
425 }
426 }
427
428 ifaref(ifa);
429 ifafree(rt->rt_ifa);
430 rt_set_ifa1(rt, ifa);
431 }
432
433 static void
434 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
435 {
436 ifaref(ifa);
437 rt_set_ifa1(rt, ifa);
438 }
439
440 static int
441 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
442 void *arg0, void *arg1, void *arg2, void *arg3)
443 {
444 struct rt_msghdr *rtm;
445 int result;
446
447 result = KAUTH_RESULT_DEFER;
448 rtm = arg1;
449
450 if (action != KAUTH_NETWORK_ROUTE)
451 return result;
452
453 if (rtm->rtm_type == RTM_GET)
454 result = KAUTH_RESULT_ALLOW;
455
456 return result;
457 }
458
459 static void rt_free_work(struct work *, void *);
460
461 void
462 rt_init(void)
463 {
464 int error;
465
466 #ifdef RTFLUSH_DEBUG
467 sysctl_net_rtcache_setup(NULL);
468 #endif
469
470 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
471 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET);
472
473 error = workqueue_create(&rt_free_global.wq, "rt_free",
474 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
475 if (error)
476 panic("%s: workqueue_create failed (%d)\n", __func__, error);
477
478 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
479 cv_init(&rt_update_global.cv, "rt_update");
480
481 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
482 NULL, IPL_SOFTNET);
483 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
484 NULL, IPL_SOFTNET);
485
486 rn_init(); /* initialize all zeroes, all ones, mask table */
487 rtbl_init();
488
489 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
490 route_listener_cb, NULL);
491 }
492
493 static void
494 rtflushall(int family)
495 {
496 struct domain *dom;
497
498 if (rtcache_debug())
499 printf("%s: enter\n", __func__);
500
501 if ((dom = pffinddomain(family)) == NULL)
502 return;
503
504 RTCACHE_WLOCK();
505 rtcache_invalidate(&dom->dom_rtcache);
506 RTCACHE_UNLOCK();
507 }
508
509 static void
510 rtcache(struct route *ro)
511 {
512 struct domain *dom;
513
514 RTCACHE_ASSERT_WLOCK();
515
516 rtcache_invariants(ro);
517 KASSERT(ro->_ro_rt != NULL);
518 KASSERT(ro->ro_invalid == false);
519 KASSERT(rtcache_getdst(ro) != NULL);
520
521 if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL)
522 return;
523
524 LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next);
525 rtcache_invariants(ro);
526 }
527
528 #ifdef RT_DEBUG
529 static void
530 dump_rt(const struct rtentry *rt)
531 {
532 char buf[512];
533
534 aprint_normal("rt: ");
535 aprint_normal("p=%p ", rt);
536 if (rt->_rt_key == NULL) {
537 aprint_normal("dst=(NULL) ");
538 } else {
539 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
540 aprint_normal("dst=%s ", buf);
541 }
542 if (rt->rt_gateway == NULL) {
543 aprint_normal("gw=(NULL) ");
544 } else {
545 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
546 aprint_normal("gw=%s ", buf);
547 }
548 aprint_normal("flags=%x ", rt->rt_flags);
549 if (rt->rt_ifp == NULL) {
550 aprint_normal("if=(NULL) ");
551 } else {
552 aprint_normal("if=%s ", rt->rt_ifp->if_xname);
553 }
554 aprint_normal("\n");
555 }
556 #endif /* RT_DEBUG */
557
558 /*
559 * Packet routing routines. If success, refcnt of a returned rtentry
560 * will be incremented. The caller has to rtfree it by itself.
561 */
562 struct rtentry *
563 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok,
564 bool wlock)
565 {
566 rtbl_t *rtbl;
567 struct rtentry *rt;
568 int s;
569
570 #ifdef NET_MPSAFE
571 retry:
572 #endif
573 s = splsoftnet();
574 rtbl = rt_gettable(dst->sa_family);
575 if (rtbl == NULL)
576 goto miss;
577
578 rt = rt_matchaddr(rtbl, dst);
579 if (rt == NULL)
580 goto miss;
581
582 if (!ISSET(rt->rt_flags, RTF_UP))
583 goto miss;
584
585 #ifdef NET_MPSAFE
586 if (ISSET(rt->rt_flags, RTF_UPDATING) &&
587 /* XXX updater should be always able to acquire */
588 curlwp != rt_update_global.lwp) {
589 bool need_lock = false;
590 if (!wait_ok || !rt_wait_ok())
591 goto miss;
592 RT_UNLOCK();
593 splx(s);
594
595 /* XXX need more proper solution */
596 if (RTCACHE_WLOCKED()) {
597 RTCACHE_UNLOCK();
598 need_lock = true;
599 }
600
601 /* We can wait until the update is complete */
602 rt_update_wait();
603
604 if (need_lock)
605 RTCACHE_WLOCK();
606 if (wlock)
607 RT_WLOCK();
608 else
609 RT_RLOCK();
610 goto retry;
611 }
612 #endif /* NET_MPSAFE */
613
614 rt_ref(rt);
615 RT_REFCNT_TRACE(rt);
616
617 splx(s);
618 return rt;
619 miss:
620 rtstat.rts_unreach++;
621 if (report) {
622 struct rt_addrinfo info;
623
624 memset(&info, 0, sizeof(info));
625 info.rti_info[RTAX_DST] = dst;
626 rt_missmsg(RTM_MISS, &info, 0, 0);
627 }
628 splx(s);
629 return NULL;
630 }
631
632 struct rtentry *
633 rtalloc1(const struct sockaddr *dst, int report)
634 {
635 struct rtentry *rt;
636
637 RT_RLOCK();
638 rt = rtalloc1_locked(dst, report, true, false);
639 RT_UNLOCK();
640
641 return rt;
642 }
643
644 static void
645 rt_ref(struct rtentry *rt)
646 {
647
648 KASSERT(rt->rt_refcnt >= 0);
649 atomic_inc_uint(&rt->rt_refcnt);
650 }
651
652 void
653 rt_unref(struct rtentry *rt)
654 {
655
656 KASSERT(rt != NULL);
657 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt);
658
659 atomic_dec_uint(&rt->rt_refcnt);
660 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) {
661 mutex_enter(&rt_free_global.lock);
662 cv_broadcast(&rt->rt_cv);
663 mutex_exit(&rt_free_global.lock);
664 }
665 }
666
667 static bool
668 rt_wait_ok(void)
669 {
670
671 KASSERT(!cpu_intr_p());
672 return !cpu_softintr_p();
673 }
674
675 void
676 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt)
677 {
678 mutex_enter(&rt_free_global.lock);
679 while (rt->rt_refcnt > cnt) {
680 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n",
681 __func__, title, rt->rt_refcnt);
682 cv_wait(&rt->rt_cv, &rt_free_global.lock);
683 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n",
684 __func__, title, rt->rt_refcnt);
685 }
686 mutex_exit(&rt_free_global.lock);
687 }
688
689 void
690 rt_wait_psref(struct rtentry *rt)
691 {
692
693 psref_target_destroy(&rt->rt_psref, rt_psref_class);
694 psref_target_init(&rt->rt_psref, rt_psref_class);
695 }
696
697 static void
698 _rt_free(struct rtentry *rt)
699 {
700 struct ifaddr *ifa;
701
702 /*
703 * Need to avoid a deadlock on rt_wait_refcnt of update
704 * and a conflict on psref_target_destroy of update.
705 */
706 #ifdef NET_MPSAFE
707 rt_update_wait();
708 #endif
709
710 RT_REFCNT_TRACE(rt);
711 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt);
712 rt_wait_refcnt("free", rt, 0);
713 #ifdef NET_MPSAFE
714 psref_target_destroy(&rt->rt_psref, rt_psref_class);
715 #endif
716
717 rt_assert_inactive(rt);
718 rttrash--;
719 ifa = rt->rt_ifa;
720 rt->rt_ifa = NULL;
721 ifafree(ifa);
722 rt->rt_ifp = NULL;
723 cv_destroy(&rt->rt_cv);
724 rt_destroy(rt);
725 pool_put(&rtentry_pool, rt);
726 }
727
728 static void
729 rt_free_work(struct work *wk, void *arg)
730 {
731 int i;
732 struct rtentry *rt;
733
734 restart:
735 mutex_enter(&rt_free_global.lock);
736 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
737 if (rt_free_global.queue[i] == NULL)
738 continue;
739 rt = rt_free_global.queue[i];
740 rt_free_global.queue[i] = NULL;
741 mutex_exit(&rt_free_global.lock);
742
743 atomic_dec_uint(&rt->rt_refcnt);
744 _rt_free(rt);
745 goto restart;
746 }
747 mutex_exit(&rt_free_global.lock);
748 }
749
750 void
751 rt_free(struct rtentry *rt)
752 {
753
754 KASSERT(rt->rt_refcnt > 0);
755 if (!rt_wait_ok()) {
756 int i;
757 mutex_enter(&rt_free_global.lock);
758 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
759 if (rt_free_global.queue[i] == NULL) {
760 rt_free_global.queue[i] = rt;
761 break;
762 }
763 }
764 KASSERT(i < sizeof(rt_free_global.queue));
765 rt_ref(rt);
766 mutex_exit(&rt_free_global.lock);
767 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL);
768 } else {
769 atomic_dec_uint(&rt->rt_refcnt);
770 _rt_free(rt);
771 }
772 }
773
774 #ifdef NET_MPSAFE
775 static void
776 rt_update_wait(void)
777 {
778
779 mutex_enter(&rt_update_global.lock);
780 while (rt_update_global.ongoing) {
781 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp);
782 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
783 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp);
784 }
785 mutex_exit(&rt_update_global.lock);
786 }
787 #endif
788
789 int
790 rt_update_prepare(struct rtentry *rt)
791 {
792
793 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp);
794
795 RTCACHE_WLOCK();
796 RT_WLOCK();
797 /* If the entry is being destroyed, don't proceed the update. */
798 if (!ISSET(rt->rt_flags, RTF_UP)) {
799 RT_UNLOCK();
800 RTCACHE_UNLOCK();
801 return -1;
802 }
803 rt->rt_flags |= RTF_UPDATING;
804 RT_UNLOCK();
805 RTCACHE_UNLOCK();
806
807 mutex_enter(&rt_update_global.lock);
808 while (rt_update_global.ongoing) {
809 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n",
810 __func__, rt, curlwp);
811 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
812 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n",
813 __func__, rt, curlwp);
814 }
815 rt_update_global.ongoing = true;
816 /* XXX need it to avoid rt_update_wait by updater itself. */
817 rt_update_global.lwp = curlwp;
818 mutex_exit(&rt_update_global.lock);
819
820 rt_wait_refcnt("update", rt, 1);
821 rt_wait_psref(rt);
822
823 return 0;
824 }
825
826 void
827 rt_update_finish(struct rtentry *rt)
828 {
829
830 RTCACHE_WLOCK();
831 RT_WLOCK();
832 rt->rt_flags &= ~RTF_UPDATING;
833 RT_UNLOCK();
834 RTCACHE_UNLOCK();
835
836 mutex_enter(&rt_update_global.lock);
837 rt_update_global.ongoing = false;
838 rt_update_global.lwp = NULL;
839 cv_broadcast(&rt_update_global.cv);
840 mutex_exit(&rt_update_global.lock);
841
842 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp);
843 }
844
845 /*
846 * Force a routing table entry to the specified
847 * destination to go through the given gateway.
848 * Normally called as a result of a routing redirect
849 * message from the network layer.
850 *
851 * N.B.: must be called at splsoftnet
852 */
853 void
854 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
855 const struct sockaddr *netmask, int flags, const struct sockaddr *src,
856 struct rtentry **rtp)
857 {
858 struct rtentry *rt;
859 int error = 0;
860 uint64_t *stat = NULL;
861 struct rt_addrinfo info;
862 struct ifaddr *ifa;
863 struct psref psref;
864
865 /* verify the gateway is directly reachable */
866 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
867 error = ENETUNREACH;
868 goto out;
869 }
870 rt = rtalloc1(dst, 0);
871 /*
872 * If the redirect isn't from our current router for this dst,
873 * it's either old or wrong. If it redirects us to ourselves,
874 * we have a routing loop, perhaps as a result of an interface
875 * going down recently.
876 */
877 if (!(flags & RTF_DONE) && rt &&
878 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
879 error = EINVAL;
880 else {
881 int s = pserialize_read_enter();
882 struct ifaddr *_ifa;
883
884 _ifa = ifa_ifwithaddr(gateway);
885 if (_ifa != NULL)
886 error = EHOSTUNREACH;
887 pserialize_read_exit(s);
888 }
889 if (error)
890 goto done;
891 /*
892 * Create a new entry if we just got back a wildcard entry
893 * or the lookup failed. This is necessary for hosts
894 * which use routing redirects generated by smart gateways
895 * to dynamically build the routing tables.
896 */
897 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
898 goto create;
899 /*
900 * Don't listen to the redirect if it's
901 * for a route to an interface.
902 */
903 if (rt->rt_flags & RTF_GATEWAY) {
904 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
905 /*
906 * Changing from route to net => route to host.
907 * Create new route, rather than smashing route to net.
908 */
909 create:
910 if (rt != NULL)
911 rt_unref(rt);
912 flags |= RTF_GATEWAY | RTF_DYNAMIC;
913 memset(&info, 0, sizeof(info));
914 info.rti_info[RTAX_DST] = dst;
915 info.rti_info[RTAX_GATEWAY] = gateway;
916 info.rti_info[RTAX_NETMASK] = netmask;
917 info.rti_ifa = ifa;
918 info.rti_flags = flags;
919 rt = NULL;
920 error = rtrequest1(RTM_ADD, &info, &rt);
921 if (rt != NULL)
922 flags = rt->rt_flags;
923 stat = &rtstat.rts_dynamic;
924 } else {
925 /*
926 * Smash the current notion of the gateway to
927 * this destination. Should check about netmask!!!
928 */
929 #ifdef NET_MPSAFE
930 KASSERT(!cpu_softintr_p());
931
932 error = rt_update_prepare(rt);
933 if (error == 0) {
934 #endif
935 error = rt_setgate(rt, gateway);
936 if (error == 0) {
937 rt->rt_flags |= RTF_MODIFIED;
938 flags |= RTF_MODIFIED;
939 }
940 #ifdef NET_MPSAFE
941 rt_update_finish(rt);
942 } else {
943 /*
944 * If error != 0, the rtentry is being
945 * destroyed, so doing nothing doesn't
946 * matter.
947 */
948 }
949 #endif
950 stat = &rtstat.rts_newgateway;
951 }
952 } else
953 error = EHOSTUNREACH;
954 done:
955 if (rt) {
956 if (rtp != NULL && !error)
957 *rtp = rt;
958 else
959 rt_unref(rt);
960 }
961 out:
962 if (error)
963 rtstat.rts_badredirect++;
964 else if (stat != NULL)
965 (*stat)++;
966 memset(&info, 0, sizeof(info));
967 info.rti_info[RTAX_DST] = dst;
968 info.rti_info[RTAX_GATEWAY] = gateway;
969 info.rti_info[RTAX_NETMASK] = netmask;
970 info.rti_info[RTAX_AUTHOR] = src;
971 rt_missmsg(RTM_REDIRECT, &info, flags, error);
972 ifa_release(ifa, &psref);
973 }
974
975 /*
976 * Delete a route and generate a message.
977 * It doesn't free a passed rt.
978 */
979 static int
980 rtdeletemsg(struct rtentry *rt)
981 {
982 int error;
983 struct rt_addrinfo info;
984 struct rtentry *retrt;
985
986 /*
987 * Request the new route so that the entry is not actually
988 * deleted. That will allow the information being reported to
989 * be accurate (and consistent with route_output()).
990 */
991 memset(&info, 0, sizeof(info));
992 info.rti_info[RTAX_DST] = rt_getkey(rt);
993 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
994 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
995 info.rti_flags = rt->rt_flags;
996 error = rtrequest1(RTM_DELETE, &info, &retrt);
997
998 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
999
1000 return error;
1001 }
1002
1003 struct ifaddr *
1004 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
1005 const struct sockaddr *gateway, struct psref *psref)
1006 {
1007 struct ifaddr *ifa = NULL;
1008
1009 if ((flags & RTF_GATEWAY) == 0) {
1010 /*
1011 * If we are adding a route to an interface,
1012 * and the interface is a pt to pt link
1013 * we should search for the destination
1014 * as our clue to the interface. Otherwise
1015 * we can use the local address.
1016 */
1017 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
1018 ifa = ifa_ifwithdstaddr_psref(dst, psref);
1019 if (ifa == NULL)
1020 ifa = ifa_ifwithaddr_psref(gateway, psref);
1021 } else {
1022 /*
1023 * If we are adding a route to a remote net
1024 * or host, the gateway may still be on the
1025 * other end of a pt to pt link.
1026 */
1027 ifa = ifa_ifwithdstaddr_psref(gateway, psref);
1028 }
1029 if (ifa == NULL)
1030 ifa = ifa_ifwithnet_psref(gateway, psref);
1031 if (ifa == NULL) {
1032 int s;
1033 struct rtentry *rt;
1034
1035 /* XXX we cannot call rtalloc1 if holding the rt lock */
1036 if (RT_LOCKED())
1037 rt = rtalloc1_locked(gateway, 0, true, true);
1038 else
1039 rt = rtalloc1(gateway, 0);
1040 if (rt == NULL)
1041 return NULL;
1042 if (rt->rt_flags & RTF_GATEWAY) {
1043 rt_unref(rt);
1044 return NULL;
1045 }
1046 /*
1047 * Just in case. May not need to do this workaround.
1048 * Revisit when working on rtentry MP-ification.
1049 */
1050 s = pserialize_read_enter();
1051 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
1052 if (ifa == rt->rt_ifa)
1053 break;
1054 }
1055 if (ifa != NULL)
1056 ifa_acquire(ifa, psref);
1057 pserialize_read_exit(s);
1058 rt_unref(rt);
1059 if (ifa == NULL)
1060 return NULL;
1061 }
1062 if (ifa->ifa_addr->sa_family != dst->sa_family) {
1063 struct ifaddr *nifa;
1064 int s;
1065
1066 s = pserialize_read_enter();
1067 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1068 if (nifa != NULL) {
1069 ifa_release(ifa, psref);
1070 ifa_acquire(nifa, psref);
1071 ifa = nifa;
1072 }
1073 pserialize_read_exit(s);
1074 }
1075 return ifa;
1076 }
1077
1078 /*
1079 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1080 * The caller has to rtfree it by itself.
1081 */
1082 int
1083 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
1084 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
1085 {
1086 struct rt_addrinfo info;
1087
1088 memset(&info, 0, sizeof(info));
1089 info.rti_flags = flags;
1090 info.rti_info[RTAX_DST] = dst;
1091 info.rti_info[RTAX_GATEWAY] = gateway;
1092 info.rti_info[RTAX_NETMASK] = netmask;
1093 return rtrequest1(req, &info, ret_nrt);
1094 }
1095
1096 /*
1097 * It's a utility function to add/remove a route to/from the routing table
1098 * and tell user processes the addition/removal on success.
1099 */
1100 int
1101 rtrequest_newmsg(const int req, const struct sockaddr *dst,
1102 const struct sockaddr *gateway, const struct sockaddr *netmask,
1103 const int flags)
1104 {
1105 int error;
1106 struct rtentry *ret_nrt = NULL;
1107
1108 KASSERT(req == RTM_ADD || req == RTM_DELETE);
1109
1110 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt);
1111 if (error != 0)
1112 return error;
1113
1114 KASSERT(ret_nrt != NULL);
1115
1116 rt_newmsg(req, ret_nrt); /* tell user process */
1117 if (req == RTM_DELETE)
1118 rt_free(ret_nrt);
1119 else
1120 rt_unref(ret_nrt);
1121
1122 return 0;
1123 }
1124
1125 struct ifnet *
1126 rt_getifp(struct rt_addrinfo *info, struct psref *psref)
1127 {
1128 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
1129
1130 if (info->rti_ifp != NULL)
1131 return NULL;
1132 /*
1133 * ifp may be specified by sockaddr_dl when protocol address
1134 * is ambiguous
1135 */
1136 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
1137 struct ifaddr *ifa;
1138 int s = pserialize_read_enter();
1139
1140 ifa = ifa_ifwithnet(ifpaddr);
1141 if (ifa != NULL)
1142 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
1143 psref);
1144 pserialize_read_exit(s);
1145 }
1146
1147 return info->rti_ifp;
1148 }
1149
1150 struct ifaddr *
1151 rt_getifa(struct rt_addrinfo *info, struct psref *psref)
1152 {
1153 struct ifaddr *ifa = NULL;
1154 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1155 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1156 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
1157 int flags = info->rti_flags;
1158 const struct sockaddr *sa;
1159
1160 if (info->rti_ifa == NULL && ifaaddr != NULL) {
1161 ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
1162 if (ifa != NULL)
1163 goto got;
1164 }
1165
1166 sa = ifaaddr != NULL ? ifaaddr :
1167 (gateway != NULL ? gateway : dst);
1168 if (sa != NULL && info->rti_ifp != NULL)
1169 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
1170 else if (dst != NULL && gateway != NULL)
1171 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
1172 else if (sa != NULL)
1173 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
1174 if (ifa == NULL)
1175 return NULL;
1176 got:
1177 if (ifa->ifa_getifa != NULL) {
1178 /* FIXME ifa_getifa is NOMPSAFE */
1179 ifa = (*ifa->ifa_getifa)(ifa, dst);
1180 if (ifa == NULL)
1181 return NULL;
1182 ifa_acquire(ifa, psref);
1183 }
1184 info->rti_ifa = ifa;
1185 if (info->rti_ifp == NULL)
1186 info->rti_ifp = ifa->ifa_ifp;
1187 return ifa;
1188 }
1189
1190 /*
1191 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1192 * The caller has to rtfree it by itself.
1193 */
1194 int
1195 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
1196 {
1197 int s = splsoftnet(), ss;
1198 int error = 0, rc;
1199 struct rtentry *rt;
1200 rtbl_t *rtbl;
1201 struct ifaddr *ifa = NULL;
1202 struct sockaddr_storage maskeddst;
1203 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1204 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1205 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
1206 int flags = info->rti_flags;
1207 struct psref psref_ifp, psref_ifa;
1208 int bound = 0;
1209 struct ifnet *ifp = NULL;
1210 bool need_to_release_ifa = true;
1211 bool need_unlock = true;
1212 #define senderr(x) { error = x ; goto bad; }
1213
1214 RT_WLOCK();
1215
1216 bound = curlwp_bind();
1217 if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
1218 senderr(ESRCH);
1219 if (flags & RTF_HOST)
1220 netmask = NULL;
1221 switch (req) {
1222 case RTM_DELETE:
1223 if (netmask) {
1224 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1225 netmask);
1226 dst = (struct sockaddr *)&maskeddst;
1227 }
1228 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1229 senderr(ESRCH);
1230 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
1231 senderr(ESRCH);
1232 rt->rt_flags &= ~RTF_UP;
1233 if ((ifa = rt->rt_ifa)) {
1234 if (ifa->ifa_flags & IFA_ROUTE &&
1235 rt_ifa_connected(rt, ifa)) {
1236 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
1237 "deleted IFA_ROUTE\n",
1238 (void *)rt->_rt_key, (void *)ifa);
1239 ifa->ifa_flags &= ~IFA_ROUTE;
1240 }
1241 if (ifa->ifa_rtrequest)
1242 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
1243 ifa = NULL;
1244 }
1245 rttrash++;
1246 if (ret_nrt) {
1247 *ret_nrt = rt;
1248 rt_ref(rt);
1249 RT_REFCNT_TRACE(rt);
1250 }
1251 RT_UNLOCK();
1252 need_unlock = false;
1253 rt_timer_remove_all(rt);
1254 rtcache_clear_rtentry(dst->sa_family, rt);
1255 #if defined(INET) || defined(INET6)
1256 if (netmask != NULL)
1257 lltable_prefix_free(dst->sa_family, dst, netmask, 0);
1258 #endif
1259 if (ret_nrt == NULL) {
1260 /* Adjust the refcount */
1261 rt_ref(rt);
1262 RT_REFCNT_TRACE(rt);
1263 rt_free(rt);
1264 }
1265 break;
1266
1267 case RTM_ADD:
1268 if (info->rti_ifa == NULL) {
1269 ifp = rt_getifp(info, &psref_ifp);
1270 ifa = rt_getifa(info, &psref_ifa);
1271 if (ifa == NULL)
1272 senderr(ENETUNREACH);
1273 } else {
1274 /* Caller should have a reference of ifa */
1275 ifa = info->rti_ifa;
1276 need_to_release_ifa = false;
1277 }
1278 rt = pool_get(&rtentry_pool, PR_NOWAIT);
1279 if (rt == NULL)
1280 senderr(ENOBUFS);
1281 memset(rt, 0, sizeof(*rt));
1282 rt->rt_flags = RTF_UP | flags;
1283 LIST_INIT(&rt->rt_timer);
1284
1285 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1286 if (netmask) {
1287 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1288 netmask);
1289 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
1290 } else {
1291 rt_setkey(rt, dst, M_NOWAIT);
1292 }
1293 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1294 if (rt_getkey(rt) == NULL ||
1295 rt_setgate(rt, gateway) != 0) {
1296 pool_put(&rtentry_pool, rt);
1297 senderr(ENOBUFS);
1298 }
1299
1300 rt_set_ifa(rt, ifa);
1301 if (info->rti_info[RTAX_TAG] != NULL) {
1302 const struct sockaddr *tag;
1303 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
1304 if (tag == NULL)
1305 senderr(ENOBUFS);
1306 }
1307 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1308
1309 ss = pserialize_read_enter();
1310 if (info->rti_info[RTAX_IFP] != NULL) {
1311 struct ifaddr *ifa2;
1312 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
1313 if (ifa2 != NULL)
1314 rt->rt_ifp = ifa2->ifa_ifp;
1315 else
1316 rt->rt_ifp = ifa->ifa_ifp;
1317 } else
1318 rt->rt_ifp = ifa->ifa_ifp;
1319 pserialize_read_exit(ss);
1320 cv_init(&rt->rt_cv, "rtentry");
1321 psref_target_init(&rt->rt_psref, rt_psref_class);
1322
1323 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1324 rc = rt_addaddr(rtbl, rt, netmask);
1325 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1326 if (rc != 0) {
1327 ifafree(ifa); /* for rt_set_ifa above */
1328 cv_destroy(&rt->rt_cv);
1329 rt_destroy(rt);
1330 pool_put(&rtentry_pool, rt);
1331 senderr(rc);
1332 }
1333 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1334 if (ifa->ifa_rtrequest)
1335 ifa->ifa_rtrequest(req, rt, info);
1336 if (need_to_release_ifa)
1337 ifa_release(ifa, &psref_ifa);
1338 ifa = NULL;
1339 if_put(ifp, &psref_ifp);
1340 ifp = NULL;
1341 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1342 if (ret_nrt) {
1343 *ret_nrt = rt;
1344 rt_ref(rt);
1345 RT_REFCNT_TRACE(rt);
1346 }
1347 RT_UNLOCK();
1348 need_unlock = false;
1349 rtflushall(dst->sa_family);
1350 break;
1351 case RTM_GET:
1352 if (netmask != NULL) {
1353 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1354 netmask);
1355 dst = (struct sockaddr *)&maskeddst;
1356 }
1357 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1358 senderr(ESRCH);
1359 if (ret_nrt != NULL) {
1360 *ret_nrt = rt;
1361 rt_ref(rt);
1362 RT_REFCNT_TRACE(rt);
1363 }
1364 break;
1365 }
1366 bad:
1367 if (need_to_release_ifa)
1368 ifa_release(ifa, &psref_ifa);
1369 if_put(ifp, &psref_ifp);
1370 curlwp_bindx(bound);
1371 if (need_unlock)
1372 RT_UNLOCK();
1373 splx(s);
1374 return error;
1375 }
1376
1377 int
1378 rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
1379 {
1380 struct sockaddr *new, *old;
1381
1382 KASSERT(rt->_rt_key != NULL);
1383 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1384
1385 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
1386 if (new == NULL)
1387 return ENOMEM;
1388
1389 old = rt->rt_gateway;
1390 rt->rt_gateway = new;
1391 if (old != NULL)
1392 sockaddr_free(old);
1393
1394 KASSERT(rt->_rt_key != NULL);
1395 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1396
1397 if (rt->rt_flags & RTF_GATEWAY) {
1398 struct rtentry *gwrt;
1399
1400 /* XXX we cannot call rtalloc1 if holding the rt lock */
1401 if (RT_LOCKED())
1402 gwrt = rtalloc1_locked(gate, 1, false, true);
1403 else
1404 gwrt = rtalloc1(gate, 1);
1405 /*
1406 * If we switched gateways, grab the MTU from the new
1407 * gateway route if the current MTU, if the current MTU is
1408 * greater than the MTU of gateway.
1409 * Note that, if the MTU of gateway is 0, we will reset the
1410 * MTU of the route to run PMTUD again from scratch. XXX
1411 */
1412 if (gwrt != NULL) {
1413 KASSERT(gwrt->_rt_key != NULL);
1414 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
1415 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
1416 rt->rt_rmx.rmx_mtu &&
1417 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
1418 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
1419 }
1420 rt_unref(gwrt);
1421 }
1422 }
1423 KASSERT(rt->_rt_key != NULL);
1424 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1425 return 0;
1426 }
1427
1428 static void
1429 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
1430 const struct sockaddr *netmask)
1431 {
1432 const char *netmaskp = &netmask->sa_data[0],
1433 *srcp = &src->sa_data[0];
1434 char *dstp = &dst->sa_data[0];
1435 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
1436 const char *srcend = (char *)dst + src->sa_len;
1437
1438 dst->sa_len = src->sa_len;
1439 dst->sa_family = src->sa_family;
1440
1441 while (dstp < maskend)
1442 *dstp++ = *srcp++ & *netmaskp++;
1443 if (dstp < srcend)
1444 memset(dstp, 0, (size_t)(srcend - dstp));
1445 }
1446
1447 /*
1448 * Inform the routing socket of a route change.
1449 */
1450 void
1451 rt_newmsg(const int cmd, const struct rtentry *rt)
1452 {
1453 struct rt_addrinfo info;
1454
1455 memset((void *)&info, 0, sizeof(info));
1456 info.rti_info[RTAX_DST] = rt_getkey(rt);
1457 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1458 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1459 if (rt->rt_ifp) {
1460 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1461 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1462 }
1463
1464 rt_missmsg(cmd, &info, rt->rt_flags, 0);
1465 }
1466
1467 /*
1468 * Set up or tear down a routing table entry, normally
1469 * for an interface.
1470 */
1471 int
1472 rtinit(struct ifaddr *ifa, int cmd, int flags)
1473 {
1474 struct rtentry *rt;
1475 struct sockaddr *dst, *odst;
1476 struct sockaddr_storage maskeddst;
1477 struct rtentry *nrt = NULL;
1478 int error;
1479 struct rt_addrinfo info;
1480
1481 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1482 if (cmd == RTM_DELETE) {
1483 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1484 /* Delete subnet route for this interface */
1485 odst = dst;
1486 dst = (struct sockaddr *)&maskeddst;
1487 rt_maskedcopy(odst, dst, ifa->ifa_netmask);
1488 }
1489 if ((rt = rtalloc1(dst, 0)) != NULL) {
1490 if (rt->rt_ifa != ifa) {
1491 rt_unref(rt);
1492 return (flags & RTF_HOST) ? EHOSTUNREACH
1493 : ENETUNREACH;
1494 }
1495 rt_unref(rt);
1496 }
1497 }
1498 memset(&info, 0, sizeof(info));
1499 info.rti_ifa = ifa;
1500 info.rti_flags = flags | ifa->ifa_flags;
1501 info.rti_info[RTAX_DST] = dst;
1502 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1503
1504 /*
1505 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1506 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate
1507 * variable) when RTF_HOST is 1. still not sure if i can safely
1508 * change it to meet bsdi4 behavior.
1509 */
1510 if (cmd != RTM_LLINFO_UPD)
1511 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1512 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
1513 &nrt);
1514 if (error != 0)
1515 return error;
1516
1517 rt = nrt;
1518 RT_REFCNT_TRACE(rt);
1519 switch (cmd) {
1520 case RTM_DELETE:
1521 rt_newmsg(cmd, rt);
1522 rt_free(rt);
1523 break;
1524 case RTM_LLINFO_UPD:
1525 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
1526 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
1527 rt_newmsg(RTM_CHANGE, rt);
1528 rt_unref(rt);
1529 break;
1530 case RTM_ADD:
1531 /*
1532 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest
1533 * called via rtrequest1. Can we just prevent the replacement
1534 * somehow and remove the following code? And also doesn't
1535 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again?
1536 */
1537 if (rt->rt_ifa != ifa) {
1538 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa,
1539 rt->rt_ifa);
1540 #ifdef NET_MPSAFE
1541 KASSERT(!cpu_softintr_p());
1542
1543 error = rt_update_prepare(rt);
1544 if (error == 0) {
1545 #endif
1546 if (rt->rt_ifa->ifa_rtrequest != NULL) {
1547 rt->rt_ifa->ifa_rtrequest(RTM_DELETE,
1548 rt, &info);
1549 }
1550 rt_replace_ifa(rt, ifa);
1551 rt->rt_ifp = ifa->ifa_ifp;
1552 if (ifa->ifa_rtrequest != NULL)
1553 ifa->ifa_rtrequest(RTM_ADD, rt, &info);
1554 #ifdef NET_MPSAFE
1555 rt_update_finish(rt);
1556 } else {
1557 /*
1558 * If error != 0, the rtentry is being
1559 * destroyed, so doing nothing doesn't
1560 * matter.
1561 */
1562 }
1563 #endif
1564 }
1565 rt_newmsg(cmd, rt);
1566 rt_unref(rt);
1567 RT_REFCNT_TRACE(rt);
1568 break;
1569 }
1570 return error;
1571 }
1572
1573 /*
1574 * Create a local route entry for the address.
1575 * Announce the addition of the address and the route to the routing socket.
1576 */
1577 int
1578 rt_ifa_addlocal(struct ifaddr *ifa)
1579 {
1580 struct rtentry *rt;
1581 int e;
1582
1583 /* If there is no loopback entry, allocate one. */
1584 rt = rtalloc1(ifa->ifa_addr, 0);
1585 #ifdef RT_DEBUG
1586 if (rt != NULL)
1587 dump_rt(rt);
1588 #endif
1589 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
1590 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1591 {
1592 struct rt_addrinfo info;
1593 struct rtentry *nrt;
1594
1595 memset(&info, 0, sizeof(info));
1596 info.rti_flags = RTF_HOST | RTF_LOCAL;
1597 info.rti_info[RTAX_DST] = ifa->ifa_addr;
1598 info.rti_info[RTAX_GATEWAY] =
1599 (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
1600 info.rti_ifa = ifa;
1601 nrt = NULL;
1602 e = rtrequest1(RTM_ADD, &info, &nrt);
1603 if (nrt && ifa != nrt->rt_ifa)
1604 rt_replace_ifa(nrt, ifa);
1605 rt_newaddrmsg(RTM_ADD, ifa, e, nrt);
1606 if (nrt != NULL) {
1607 #ifdef RT_DEBUG
1608 dump_rt(nrt);
1609 #endif
1610 rt_unref(nrt);
1611 RT_REFCNT_TRACE(nrt);
1612 }
1613 } else {
1614 e = 0;
1615 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL);
1616 }
1617 if (rt != NULL)
1618 rt_unref(rt);
1619 return e;
1620 }
1621
1622 /*
1623 * Remove the local route entry for the address.
1624 * Announce the removal of the address and the route to the routing socket.
1625 */
1626 int
1627 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
1628 {
1629 struct rtentry *rt;
1630 int e = 0;
1631
1632 rt = rtalloc1(ifa->ifa_addr, 0);
1633
1634 /*
1635 * Before deleting, check if a corresponding loopbacked
1636 * host route surely exists. With this check, we can avoid
1637 * deleting an interface direct route whose destination is
1638 * the same as the address being removed. This can happen
1639 * when removing a subnet-router anycast address on an
1640 * interface attached to a shared medium.
1641 */
1642 if (rt != NULL &&
1643 (rt->rt_flags & RTF_HOST) &&
1644 (rt->rt_ifp->if_flags & IFF_LOOPBACK))
1645 {
1646 /* If we cannot replace the route's ifaddr with the equivalent
1647 * ifaddr of another interface, I believe it is safest to
1648 * delete the route.
1649 */
1650 if (alt_ifa == NULL) {
1651 e = rtdeletemsg(rt);
1652 if (e == 0) {
1653 rt_unref(rt);
1654 rt_free(rt);
1655 rt = NULL;
1656 }
1657 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1658 } else {
1659 rt_replace_ifa(rt, alt_ifa);
1660 rt_newmsg(RTM_CHANGE, rt);
1661 }
1662 } else
1663 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1664 if (rt != NULL)
1665 rt_unref(rt);
1666 return e;
1667 }
1668
1669 /*
1670 * Route timer routines. These routes allow functions to be called
1671 * for various routes at any time. This is useful in supporting
1672 * path MTU discovery and redirect route deletion.
1673 *
1674 * This is similar to some BSDI internal functions, but it provides
1675 * for multiple queues for efficiency's sake...
1676 */
1677
1678 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
1679 static int rt_init_done = 0;
1680
1681 /*
1682 * Some subtle order problems with domain initialization mean that
1683 * we cannot count on this being run from rt_init before various
1684 * protocol initializations are done. Therefore, we make sure
1685 * that this is run when the first queue is added...
1686 */
1687
1688 static void rt_timer_work(struct work *, void *);
1689
1690 static void
1691 rt_timer_init(void)
1692 {
1693 int error;
1694
1695 assert(rt_init_done == 0);
1696
1697 /* XXX should be in rt_init */
1698 rw_init(&rt_lock);
1699 rw_init(&rtcache_lock);
1700
1701 LIST_INIT(&rttimer_queue_head);
1702 callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1703 error = workqueue_create(&rt_timer_wq, "rt_timer",
1704 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
1705 if (error)
1706 panic("%s: workqueue_create failed (%d)\n", __func__, error);
1707 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1708 rt_init_done = 1;
1709 }
1710
1711 struct rttimer_queue *
1712 rt_timer_queue_create(u_int timeout)
1713 {
1714 struct rttimer_queue *rtq;
1715
1716 if (rt_init_done == 0)
1717 rt_timer_init();
1718
1719 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1720 if (rtq == NULL)
1721 return NULL;
1722 memset(rtq, 0, sizeof(*rtq));
1723
1724 rtq->rtq_timeout = timeout;
1725 TAILQ_INIT(&rtq->rtq_head);
1726 RT_WLOCK();
1727 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1728 RT_UNLOCK();
1729
1730 return rtq;
1731 }
1732
1733 void
1734 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1735 {
1736
1737 rtq->rtq_timeout = timeout;
1738 }
1739
1740 static void
1741 rt_timer_queue_remove_all(struct rttimer_queue *rtq)
1742 {
1743 struct rttimer *r;
1744
1745 RT_ASSERT_WLOCK();
1746
1747 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1748 LIST_REMOVE(r, rtt_link);
1749 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1750 rt_ref(r->rtt_rt); /* XXX */
1751 RT_REFCNT_TRACE(r->rtt_rt);
1752 RT_UNLOCK();
1753 (*r->rtt_func)(r->rtt_rt, r);
1754 pool_put(&rttimer_pool, r);
1755 RT_WLOCK();
1756 if (rtq->rtq_count > 0)
1757 rtq->rtq_count--;
1758 else
1759 printf("rt_timer_queue_remove_all: "
1760 "rtq_count reached 0\n");
1761 }
1762 }
1763
1764 void
1765 rt_timer_queue_destroy(struct rttimer_queue *rtq)
1766 {
1767
1768 RT_WLOCK();
1769 rt_timer_queue_remove_all(rtq);
1770 LIST_REMOVE(rtq, rtq_link);
1771 RT_UNLOCK();
1772
1773 /*
1774 * Caller is responsible for freeing the rttimer_queue structure.
1775 */
1776 }
1777
1778 unsigned long
1779 rt_timer_count(struct rttimer_queue *rtq)
1780 {
1781 return rtq->rtq_count;
1782 }
1783
1784 static void
1785 rt_timer_remove_all(struct rtentry *rt)
1786 {
1787 struct rttimer *r;
1788
1789 RT_WLOCK();
1790 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1791 LIST_REMOVE(r, rtt_link);
1792 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1793 if (r->rtt_queue->rtq_count > 0)
1794 r->rtt_queue->rtq_count--;
1795 else
1796 printf("rt_timer_remove_all: rtq_count reached 0\n");
1797 pool_put(&rttimer_pool, r);
1798 }
1799 RT_UNLOCK();
1800 }
1801
1802 int
1803 rt_timer_add(struct rtentry *rt,
1804 void (*func)(struct rtentry *, struct rttimer *),
1805 struct rttimer_queue *queue)
1806 {
1807 struct rttimer *r;
1808
1809 KASSERT(func != NULL);
1810 RT_WLOCK();
1811 /*
1812 * If there's already a timer with this action, destroy it before
1813 * we add a new one.
1814 */
1815 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1816 if (r->rtt_func == func)
1817 break;
1818 }
1819 if (r != NULL) {
1820 LIST_REMOVE(r, rtt_link);
1821 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1822 if (r->rtt_queue->rtq_count > 0)
1823 r->rtt_queue->rtq_count--;
1824 else
1825 printf("rt_timer_add: rtq_count reached 0\n");
1826 } else {
1827 r = pool_get(&rttimer_pool, PR_NOWAIT);
1828 if (r == NULL) {
1829 RT_UNLOCK();
1830 return ENOBUFS;
1831 }
1832 }
1833
1834 memset(r, 0, sizeof(*r));
1835
1836 r->rtt_rt = rt;
1837 r->rtt_time = time_uptime;
1838 r->rtt_func = func;
1839 r->rtt_queue = queue;
1840 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1841 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1842 r->rtt_queue->rtq_count++;
1843
1844 RT_UNLOCK();
1845
1846 return 0;
1847 }
1848
1849 static void
1850 rt_timer_work(struct work *wk, void *arg)
1851 {
1852 struct rttimer_queue *rtq;
1853 struct rttimer *r;
1854
1855 RT_WLOCK();
1856 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1857 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1858 (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
1859 LIST_REMOVE(r, rtt_link);
1860 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1861 rt_ref(r->rtt_rt); /* XXX */
1862 RT_REFCNT_TRACE(r->rtt_rt);
1863 RT_UNLOCK();
1864 (*r->rtt_func)(r->rtt_rt, r);
1865 pool_put(&rttimer_pool, r);
1866 RT_WLOCK();
1867 if (rtq->rtq_count > 0)
1868 rtq->rtq_count--;
1869 else
1870 printf("rt_timer_timer: rtq_count reached 0\n");
1871 }
1872 }
1873 RT_UNLOCK();
1874
1875 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1876 }
1877
1878 static void
1879 rt_timer_timer(void *arg)
1880 {
1881
1882 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
1883 }
1884
1885 static struct rtentry *
1886 _rtcache_init(struct route *ro, int flag)
1887 {
1888 struct rtentry *rt;
1889
1890 rtcache_invariants(ro);
1891 KASSERT(ro->_ro_rt == NULL);
1892 RTCACHE_ASSERT_WLOCK();
1893
1894 if (rtcache_getdst(ro) == NULL)
1895 return NULL;
1896 ro->ro_invalid = false;
1897 rt = rtalloc1(rtcache_getdst(ro), flag);
1898 if (rt != NULL && ISSET(rt->rt_flags, RTF_UP)) {
1899 ro->_ro_rt = rt;
1900 KASSERT(!ISSET(rt->rt_flags, RTF_UPDATING));
1901 rtcache_ref(rt, ro);
1902 rt_unref(rt);
1903 rtcache(ro);
1904 } else if (rt != NULL)
1905 rt_unref(rt);
1906
1907 rtcache_invariants(ro);
1908 return ro->_ro_rt;
1909 }
1910
1911 struct rtentry *
1912 rtcache_init(struct route *ro)
1913 {
1914 struct rtentry *rt;
1915 RTCACHE_WLOCK();
1916 rt = _rtcache_init(ro, 1);
1917 RTCACHE_UNLOCK();
1918 return rt;
1919 }
1920
1921 struct rtentry *
1922 rtcache_init_noclone(struct route *ro)
1923 {
1924 struct rtentry *rt;
1925 RTCACHE_WLOCK();
1926 rt = _rtcache_init(ro, 0);
1927 RTCACHE_UNLOCK();
1928 return rt;
1929 }
1930
1931 struct rtentry *
1932 rtcache_update(struct route *ro, int clone)
1933 {
1934 struct rtentry *rt;
1935 RTCACHE_WLOCK();
1936 rtcache_clear(ro);
1937 rt = _rtcache_init(ro, clone);
1938 RTCACHE_UNLOCK();
1939 return rt;
1940 }
1941
1942 void
1943 rtcache_copy(struct route *new_ro, struct route *old_ro)
1944 {
1945 struct rtentry *rt;
1946 int ret;
1947
1948 KASSERT(new_ro != old_ro);
1949 rtcache_invariants(new_ro);
1950 rtcache_invariants(old_ro);
1951
1952 rt = rtcache_validate(old_ro);
1953
1954 if (rtcache_getdst(old_ro) == NULL)
1955 goto out;
1956 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro));
1957 if (ret != 0)
1958 goto out;
1959
1960 RTCACHE_WLOCK();
1961 new_ro->ro_invalid = false;
1962 if ((new_ro->_ro_rt = rt) != NULL)
1963 rtcache(new_ro);
1964 rtcache_invariants(new_ro);
1965 RTCACHE_UNLOCK();
1966 out:
1967 rtcache_unref(rt, old_ro);
1968 return;
1969 }
1970
1971 static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist);
1972
1973 #if defined(RT_DEBUG) && defined(NET_MPSAFE)
1974 static void
1975 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro)
1976 {
1977 char dst[64];
1978
1979 sockaddr_format(ro->ro_sa, dst, 64);
1980 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst,
1981 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref);
1982 }
1983 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro))
1984 #else
1985 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0)
1986 #endif
1987
1988 static void
1989 rtcache_ref(struct rtentry *rt, struct route *ro)
1990 {
1991
1992 KASSERT(rt != NULL);
1993
1994 #ifdef NET_MPSAFE
1995 RTCACHE_PSREF_TRACE(rt, ro);
1996 ro->ro_bound = curlwp_bind();
1997 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
1998 #endif
1999 }
2000
2001 void
2002 rtcache_unref(struct rtentry *rt, struct route *ro)
2003 {
2004
2005 if (rt == NULL)
2006 return;
2007
2008 #ifdef NET_MPSAFE
2009 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
2010 curlwp_bindx(ro->ro_bound);
2011 RTCACHE_PSREF_TRACE(rt, ro);
2012 #endif
2013 }
2014
2015 static struct rtentry *
2016 rtcache_validate_locked(struct route *ro)
2017 {
2018 struct rtentry *rt = NULL;
2019
2020 #ifdef NET_MPSAFE
2021 retry:
2022 #endif
2023 rt = ro->_ro_rt;
2024 rtcache_invariants(ro);
2025
2026 if (ro->ro_invalid) {
2027 rt = NULL;
2028 goto out;
2029 }
2030
2031 RT_RLOCK();
2032 if (rt != NULL && (rt->rt_flags & RTF_UP) != 0) {
2033 #ifdef NET_MPSAFE
2034 if (ISSET(rt->rt_flags, RTF_UPDATING)) {
2035 if (rt_wait_ok()) {
2036 RT_UNLOCK();
2037 RTCACHE_UNLOCK();
2038 /* We can wait until the update is complete */
2039 rt_update_wait();
2040 RTCACHE_RLOCK();
2041 goto retry;
2042 } else {
2043 rt = NULL;
2044 }
2045 } else
2046 #endif
2047 rtcache_ref(rt, ro);
2048 } else
2049 rt = NULL;
2050 RT_UNLOCK();
2051 out:
2052 return rt;
2053 }
2054
2055 struct rtentry *
2056 rtcache_validate(struct route *ro)
2057 {
2058 struct rtentry *rt;
2059
2060 RTCACHE_RLOCK();
2061 rt = rtcache_validate_locked(ro);
2062 RTCACHE_UNLOCK();
2063 return rt;
2064 }
2065
2066 static void
2067 rtcache_invalidate(struct dom_rtlist *rtlist)
2068 {
2069 struct route *ro;
2070
2071 RTCACHE_ASSERT_WLOCK();
2072
2073 while ((ro = LIST_FIRST(rtlist)) != NULL) {
2074 rtcache_invariants(ro);
2075 KASSERT(ro->_ro_rt != NULL);
2076 ro->ro_invalid = true;
2077 LIST_REMOVE(ro, ro_rtcache_next);
2078 LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next);
2079 rtcache_invariants(ro);
2080 }
2081 }
2082
2083 static void
2084 rtcache_clear_rtentry(int family, struct rtentry *rt)
2085 {
2086 struct domain *dom;
2087 struct route *ro, *nro;
2088
2089 if ((dom = pffinddomain(family)) == NULL)
2090 return;
2091
2092 RTCACHE_WLOCK();
2093 LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) {
2094 if (ro->_ro_rt == rt)
2095 rtcache_clear(ro);
2096 }
2097 RTCACHE_UNLOCK();
2098 }
2099
2100 static void
2101 rtcache_clear(struct route *ro)
2102 {
2103
2104 RTCACHE_ASSERT_WLOCK();
2105
2106 rtcache_invariants(ro);
2107 if (ro->_ro_rt == NULL)
2108 return;
2109
2110 LIST_REMOVE(ro, ro_rtcache_next);
2111
2112 ro->_ro_rt = NULL;
2113 ro->ro_invalid = false;
2114 rtcache_invariants(ro);
2115 }
2116
2117 struct rtentry *
2118 rtcache_lookup2(struct route *ro, const struct sockaddr *dst,
2119 int clone, int *hitp)
2120 {
2121 const struct sockaddr *odst;
2122 struct rtentry *rt = NULL;
2123
2124 RTCACHE_RLOCK();
2125 odst = rtcache_getdst(ro);
2126 if (odst == NULL) {
2127 RTCACHE_UNLOCK();
2128 RTCACHE_WLOCK();
2129 goto miss;
2130 }
2131
2132 if (sockaddr_cmp(odst, dst) != 0) {
2133 RTCACHE_UNLOCK();
2134 RTCACHE_WLOCK();
2135 rtcache_free_locked(ro);
2136 goto miss;
2137 }
2138
2139 rt = rtcache_validate_locked(ro);
2140 if (rt == NULL) {
2141 RTCACHE_UNLOCK();
2142 RTCACHE_WLOCK();
2143 rtcache_clear(ro);
2144 goto miss;
2145 }
2146
2147 rtcache_invariants(ro);
2148
2149 RTCACHE_UNLOCK();
2150 if (hitp != NULL)
2151 *hitp = 1;
2152 return rt;
2153 miss:
2154 if (hitp != NULL)
2155 *hitp = 0;
2156 if (rtcache_setdst_locked(ro, dst) == 0)
2157 rt = _rtcache_init(ro, clone);
2158
2159 rtcache_invariants(ro);
2160
2161 RTCACHE_UNLOCK();
2162 return rt;
2163 }
2164
2165 static void
2166 rtcache_free_locked(struct route *ro)
2167 {
2168
2169 RTCACHE_ASSERT_WLOCK();
2170 rtcache_clear(ro);
2171 if (ro->ro_sa != NULL) {
2172 sockaddr_free(ro->ro_sa);
2173 ro->ro_sa = NULL;
2174 }
2175 rtcache_invariants(ro);
2176 }
2177
2178 void
2179 rtcache_free(struct route *ro)
2180 {
2181
2182 RTCACHE_WLOCK();
2183 rtcache_free_locked(ro);
2184 RTCACHE_UNLOCK();
2185 }
2186
2187 static int
2188 rtcache_setdst_locked(struct route *ro, const struct sockaddr *sa)
2189 {
2190 KASSERT(sa != NULL);
2191
2192 RTCACHE_ASSERT_WLOCK();
2193
2194 rtcache_invariants(ro);
2195 if (ro->ro_sa != NULL) {
2196 if (ro->ro_sa->sa_family == sa->sa_family) {
2197 rtcache_clear(ro);
2198 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
2199 rtcache_invariants(ro);
2200 return 0;
2201 }
2202 /* free ro_sa, wrong family */
2203 rtcache_free_locked(ro);
2204 }
2205
2206 KASSERT(ro->_ro_rt == NULL);
2207
2208 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
2209 rtcache_invariants(ro);
2210 return ENOMEM;
2211 }
2212 rtcache_invariants(ro);
2213 return 0;
2214 }
2215
2216 int
2217 rtcache_setdst(struct route *ro, const struct sockaddr *sa)
2218 {
2219 int error;
2220
2221 RTCACHE_WLOCK();
2222 error = rtcache_setdst_locked(ro, sa);
2223 RTCACHE_UNLOCK();
2224
2225 return error;
2226 }
2227
2228 const struct sockaddr *
2229 rt_settag(struct rtentry *rt, const struct sockaddr *tag)
2230 {
2231 if (rt->rt_tag != tag) {
2232 if (rt->rt_tag != NULL)
2233 sockaddr_free(rt->rt_tag);
2234 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
2235 }
2236 return rt->rt_tag;
2237 }
2238
2239 struct sockaddr *
2240 rt_gettag(const struct rtentry *rt)
2241 {
2242 return rt->rt_tag;
2243 }
2244
2245 int
2246 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
2247 {
2248
2249 if ((rt->rt_flags & RTF_REJECT) != 0) {
2250 /* Mimic looutput */
2251 if (ifp->if_flags & IFF_LOOPBACK)
2252 return (rt->rt_flags & RTF_HOST) ?
2253 EHOSTUNREACH : ENETUNREACH;
2254 else if (rt->rt_rmx.rmx_expire == 0 ||
2255 time_uptime < rt->rt_rmx.rmx_expire)
2256 return (rt->rt_flags & RTF_GATEWAY) ?
2257 EHOSTUNREACH : EHOSTDOWN;
2258 }
2259
2260 return 0;
2261 }
2262
2263 void
2264 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *),
2265 void *v)
2266 {
2267
2268 for (;;) {
2269 int s;
2270 int error;
2271 struct rtentry *rt, *retrt = NULL;
2272
2273 RT_RLOCK();
2274 s = splsoftnet();
2275 rt = rtbl_search_matched_entry(family, f, v);
2276 if (rt == NULL) {
2277 splx(s);
2278 RT_UNLOCK();
2279 return;
2280 }
2281 rt->rt_refcnt++;
2282 splx(s);
2283 RT_UNLOCK();
2284
2285 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway,
2286 rt_mask(rt), rt->rt_flags, &retrt);
2287 if (error == 0) {
2288 KASSERT(retrt == rt);
2289 KASSERT((retrt->rt_flags & RTF_UP) == 0);
2290 retrt->rt_ifp = NULL;
2291 rt_unref(rt);
2292 rt_free(retrt);
2293 } else if (error == ESRCH) {
2294 /* Someone deleted the entry already. */
2295 rt_unref(rt);
2296 } else {
2297 log(LOG_ERR, "%s: unable to delete rtentry @ %p, "
2298 "error = %d\n", rt->rt_ifp->if_xname, rt, error);
2299 /* XXX how to treat this case? */
2300 }
2301 }
2302 }
2303
2304 int
2305 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v)
2306 {
2307 int error;
2308
2309 RT_RLOCK();
2310 error = rtbl_walktree(family, f, v);
2311 RT_UNLOCK();
2312
2313 return error;
2314 }
2315
2316 #ifdef DDB
2317
2318 #include <machine/db_machdep.h>
2319 #include <ddb/db_interface.h>
2320 #include <ddb/db_output.h>
2321
2322 #define rt_expire rt_rmx.rmx_expire
2323
2324 static void
2325 db_print_sa(const struct sockaddr *sa)
2326 {
2327 int len;
2328 const u_char *p;
2329
2330 if (sa == NULL) {
2331 db_printf("[NULL]");
2332 return;
2333 }
2334
2335 p = (const u_char *)sa;
2336 len = sa->sa_len;
2337 db_printf("[");
2338 while (len > 0) {
2339 db_printf("%d", *p);
2340 p++; len--;
2341 if (len) db_printf(",");
2342 }
2343 db_printf("]\n");
2344 }
2345
2346 static void
2347 db_print_ifa(struct ifaddr *ifa)
2348 {
2349 if (ifa == NULL)
2350 return;
2351 db_printf(" ifa_addr=");
2352 db_print_sa(ifa->ifa_addr);
2353 db_printf(" ifa_dsta=");
2354 db_print_sa(ifa->ifa_dstaddr);
2355 db_printf(" ifa_mask=");
2356 db_print_sa(ifa->ifa_netmask);
2357 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n",
2358 ifa->ifa_flags,
2359 ifa->ifa_refcnt,
2360 ifa->ifa_metric);
2361 }
2362
2363 /*
2364 * Function to pass to rt_walktree().
2365 * Return non-zero error to abort walk.
2366 */
2367 static int
2368 db_show_rtentry(struct rtentry *rt, void *w)
2369 {
2370 db_printf("rtentry=%p", rt);
2371
2372 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
2373 rt->rt_flags, rt->rt_refcnt,
2374 rt->rt_use, (uint64_t)rt->rt_expire);
2375
2376 db_printf(" key="); db_print_sa(rt_getkey(rt));
2377 db_printf(" mask="); db_print_sa(rt_mask(rt));
2378 db_printf(" gw="); db_print_sa(rt->rt_gateway);
2379
2380 db_printf(" ifp=%p ", rt->rt_ifp);
2381 if (rt->rt_ifp)
2382 db_printf("(%s)", rt->rt_ifp->if_xname);
2383 else
2384 db_printf("(NULL)");
2385
2386 db_printf(" ifa=%p\n", rt->rt_ifa);
2387 db_print_ifa(rt->rt_ifa);
2388
2389 db_printf(" gwroute=%p llinfo=%p\n",
2390 rt->rt_gwroute, rt->rt_llinfo);
2391
2392 return 0;
2393 }
2394
2395 /*
2396 * Function to print all the route trees.
2397 * Use this from ddb: "show routes"
2398 */
2399 void
2400 db_show_routes(db_expr_t addr, bool have_addr,
2401 db_expr_t count, const char *modif)
2402 {
2403 rt_walktree(AF_INET, db_show_rtentry, NULL);
2404 }
2405 #endif
2406