route.c revision 1.184 1 /* $NetBSD: route.c,v 1.184 2016/12/21 00:33:49 ozaki-r Exp $ */
2
3 /*-
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the project nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62 /*
63 * Copyright (c) 1980, 1986, 1991, 1993
64 * The Regents of the University of California. All rights reserved.
65 *
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
68 * are met:
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)route.c 8.3 (Berkeley) 1/9/95
91 */
92
93 #ifdef _KERNEL_OPT
94 #include "opt_inet.h"
95 #include "opt_route.h"
96 #include "opt_net_mpsafe.h"
97 #endif
98
99 #include <sys/cdefs.h>
100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.184 2016/12/21 00:33:49 ozaki-r Exp $");
101
102 #include <sys/param.h>
103 #ifdef RTFLUSH_DEBUG
104 #include <sys/sysctl.h>
105 #endif
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/mbuf.h>
110 #include <sys/socket.h>
111 #include <sys/socketvar.h>
112 #include <sys/domain.h>
113 #include <sys/protosw.h>
114 #include <sys/kernel.h>
115 #include <sys/ioctl.h>
116 #include <sys/pool.h>
117 #include <sys/kauth.h>
118 #include <sys/workqueue.h>
119 #include <sys/syslog.h>
120 #include <sys/rwlock.h>
121 #include <sys/mutex.h>
122 #include <sys/cpu.h>
123
124 #include <net/if.h>
125 #include <net/if_dl.h>
126 #include <net/route.h>
127
128 #include <netinet/in.h>
129 #include <netinet/in_var.h>
130
131 #ifdef RTFLUSH_DEBUG
132 #define rtcache_debug() __predict_false(_rtcache_debug)
133 #else /* RTFLUSH_DEBUG */
134 #define rtcache_debug() 0
135 #endif /* RTFLUSH_DEBUG */
136
137 #ifdef RT_DEBUG
138 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \
139 __func__, __LINE__, (rt), (rt)->rt_refcnt)
140 #else
141 #define RT_REFCNT_TRACE(rt) do {} while (0)
142 #endif
143
144 #ifdef DEBUG
145 #define dlog(level, fmt, args...) log(level, fmt, ##args)
146 #else
147 #define dlog(level, fmt, args...) do {} while (0)
148 #endif
149
150 struct rtstat rtstat;
151
152 static int rttrash; /* routes not in table but not freed */
153
154 static struct pool rtentry_pool;
155 static struct pool rttimer_pool;
156
157 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */
158 static struct workqueue *rt_timer_wq;
159 static struct work rt_timer_wk;
160
161 static void rt_timer_init(void);
162 static void rt_timer_queue_remove_all(struct rttimer_queue *);
163 static void rt_timer_remove_all(struct rtentry *);
164 static void rt_timer_timer(void *);
165
166 /*
167 * Locking notes:
168 * - The routing table is protected by a global rwlock
169 * - API: RT_RLOCK and friends
170 * - rtcaches are protected by a global rwlock
171 * - API: RTCACHE_RLOCK and friends
172 * - References to a rtentry is managed by reference counting and psref
173 * - Reference couting is used for temporal reference when a rtentry
174 * is fetched from the routing table
175 * - psref is used for temporal reference when a rtentry is fetched
176 * from a rtcache
177 * - struct route (rtcache) has struct psref, so we cannot obtain
178 * a reference twice on the same struct route
179 * - Befere destroying or updating a rtentry, we have to wait for
180 * all references left (see below for details)
181 * - APIs
182 * - An obtained rtentry via rtalloc1 or rtrequest* must be
183 * unreferenced by rt_unref
184 * - An obtained rtentry via rtcache_* must be unreferenced by
185 * rtcache_unref
186 * - TODO: once we get a lockless routing table, we should use only
187 * psref for rtentries
188 * - rtentry destruction
189 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE)
190 * - If a caller of rtrequest grabs a reference of a rtentry, the caller
191 * has a responsibility to destroy the rtentry by itself by calling
192 * rt_free
193 * - If not, rtrequest itself does that
194 * - If rt_free is called in softint, the actual destruction routine is
195 * deferred to a workqueue
196 * - rtentry update
197 * - When updating a rtentry, RTF_UPDATING flag is set
198 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from
199 * the routing table or a rtcache results in either of the following
200 * cases:
201 * - if the caller runs in softint, the caller fails to fetch
202 * - otherwise, the caller waits for the update completed and retries
203 * to fetch (probably succeed to fetch for the second time)
204 */
205
206 /*
207 * Global locks for the routing table and rtcaches.
208 * Locking order: rtcache_lock => rt_lock
209 */
210 static krwlock_t rt_lock __cacheline_aligned;
211 #ifdef NET_MPSAFE
212 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER)
213 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER)
214 #define RT_UNLOCK() rw_exit(&rt_lock)
215 #define RT_LOCKED() rw_lock_held(&rt_lock)
216 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock))
217 #else
218 #define RT_RLOCK() do {} while (0)
219 #define RT_WLOCK() do {} while (0)
220 #define RT_UNLOCK() do {} while (0)
221 #define RT_LOCKED() false
222 #define RT_ASSERT_WLOCK() do {} while (0)
223 #endif
224
225 static krwlock_t rtcache_lock __cacheline_aligned;
226 #ifdef NET_MPSAFE
227 #define RTCACHE_RLOCK() rw_enter(&rtcache_lock, RW_READER)
228 #define RTCACHE_WLOCK() rw_enter(&rtcache_lock, RW_WRITER)
229 #define RTCACHE_UNLOCK() rw_exit(&rtcache_lock)
230 #define RTCACHE_ASSERT_WLOCK() KASSERT(rw_write_held(&rtcache_lock))
231 #define RTCACHE_WLOCKED() rw_write_held(&rtcache_lock)
232 #else
233 #define RTCACHE_RLOCK() do {} while (0)
234 #define RTCACHE_WLOCK() do {} while (0)
235 #define RTCACHE_UNLOCK() do {} while (0)
236 #define RTCACHE_ASSERT_WLOCK() do {} while (0)
237 #define RTCACHE_WLOCKED() false
238 #endif
239
240 /*
241 * mutex and cv that are used to wait for references to a rtentry left
242 * before updating the rtentry.
243 */
244 static struct {
245 kmutex_t lock;
246 kcondvar_t cv;
247 bool ongoing;
248 const struct lwp *lwp;
249 } rt_update_global __cacheline_aligned;
250
251 /*
252 * A workqueue and stuff that are used to defer the destruction routine
253 * of rtentries.
254 */
255 static struct {
256 struct workqueue *wq;
257 struct work wk;
258 kmutex_t lock;
259 struct rtentry *queue[10];
260 } rt_free_global __cacheline_aligned;
261
262 /* psref for rtentry */
263 static struct psref_class *rt_psref_class __read_mostly;
264
265 #ifdef RTFLUSH_DEBUG
266 static int _rtcache_debug = 0;
267 #endif /* RTFLUSH_DEBUG */
268
269 static kauth_listener_t route_listener;
270
271 static int rtdeletemsg(struct rtentry *);
272 static void rtflushall(int);
273
274 static void rt_maskedcopy(const struct sockaddr *,
275 struct sockaddr *, const struct sockaddr *);
276
277 static void rtcache_clear(struct route *);
278 static void rtcache_clear_rtentry(int, struct rtentry *);
279 static void rtcache_invalidate(struct dom_rtlist *);
280
281 static void rt_ref(struct rtentry *);
282
283 static struct rtentry *
284 rtalloc1_locked(const struct sockaddr *, int, bool);
285 static struct rtentry *
286 rtcache_validate_locked(struct route *);
287 static void rtcache_free_locked(struct route *);
288 static int rtcache_setdst_locked(struct route *, const struct sockaddr *);
289
290 static void rtcache_ref(struct rtentry *, struct route *);
291
292 static void rt_update_wait(void);
293
294 static bool rt_wait_ok(void);
295 static void rt_wait_refcnt(const char *, struct rtentry *, int);
296 static void rt_wait_psref(struct rtentry *);
297
298 #ifdef DDB
299 static void db_print_sa(const struct sockaddr *);
300 static void db_print_ifa(struct ifaddr *);
301 static int db_show_rtentry(struct rtentry *, void *);
302 #endif
303
304 #ifdef RTFLUSH_DEBUG
305 static void sysctl_net_rtcache_setup(struct sysctllog **);
306 static void
307 sysctl_net_rtcache_setup(struct sysctllog **clog)
308 {
309 const struct sysctlnode *rnode;
310
311 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
312 CTLTYPE_NODE,
313 "rtcache", SYSCTL_DESCR("Route cache related settings"),
314 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
315 return;
316 if (sysctl_createv(clog, 0, &rnode, &rnode,
317 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
318 "debug", SYSCTL_DESCR("Debug route caches"),
319 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
320 return;
321 }
322 #endif /* RTFLUSH_DEBUG */
323
324 static inline void
325 rt_destroy(struct rtentry *rt)
326 {
327 if (rt->_rt_key != NULL)
328 sockaddr_free(rt->_rt_key);
329 if (rt->rt_gateway != NULL)
330 sockaddr_free(rt->rt_gateway);
331 if (rt_gettag(rt) != NULL)
332 sockaddr_free(rt_gettag(rt));
333 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
334 }
335
336 static inline const struct sockaddr *
337 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
338 {
339 if (rt->_rt_key == key)
340 goto out;
341
342 if (rt->_rt_key != NULL)
343 sockaddr_free(rt->_rt_key);
344 rt->_rt_key = sockaddr_dup(key, flags);
345 out:
346 rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
347 return rt->_rt_key;
348 }
349
350 struct ifaddr *
351 rt_get_ifa(struct rtentry *rt)
352 {
353 struct ifaddr *ifa;
354
355 if ((ifa = rt->rt_ifa) == NULL)
356 return ifa;
357 else if (ifa->ifa_getifa == NULL)
358 return ifa;
359 #if 0
360 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
361 return ifa;
362 #endif
363 else {
364 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
365 if (ifa == NULL)
366 return NULL;
367 rt_replace_ifa(rt, ifa);
368 return ifa;
369 }
370 }
371
372 static void
373 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
374 {
375 rt->rt_ifa = ifa;
376 if (ifa->ifa_seqno != NULL)
377 rt->rt_ifa_seqno = *ifa->ifa_seqno;
378 }
379
380 /*
381 * Is this route the connected route for the ifa?
382 */
383 static int
384 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
385 {
386 const struct sockaddr *key, *dst, *odst;
387 struct sockaddr_storage maskeddst;
388
389 key = rt_getkey(rt);
390 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
391 if (dst == NULL ||
392 dst->sa_family != key->sa_family ||
393 dst->sa_len != key->sa_len)
394 return 0;
395 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
396 odst = dst;
397 dst = (struct sockaddr *)&maskeddst;
398 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
399 ifa->ifa_netmask);
400 }
401 return (memcmp(dst, key, dst->sa_len) == 0);
402 }
403
404 void
405 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
406 {
407 if (rt->rt_ifa &&
408 rt->rt_ifa != ifa &&
409 rt->rt_ifa->ifa_flags & IFA_ROUTE &&
410 rt_ifa_connected(rt, rt->rt_ifa))
411 {
412 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
413 "replace deleted IFA_ROUTE\n",
414 (void *)rt->_rt_key, (void *)rt->rt_ifa);
415 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
416 if (rt_ifa_connected(rt, ifa)) {
417 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
418 "replace added IFA_ROUTE\n",
419 (void *)rt->_rt_key, (void *)ifa);
420 ifa->ifa_flags |= IFA_ROUTE;
421 }
422 }
423
424 ifaref(ifa);
425 ifafree(rt->rt_ifa);
426 rt_set_ifa1(rt, ifa);
427 }
428
429 static void
430 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
431 {
432 ifaref(ifa);
433 rt_set_ifa1(rt, ifa);
434 }
435
436 static int
437 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
438 void *arg0, void *arg1, void *arg2, void *arg3)
439 {
440 struct rt_msghdr *rtm;
441 int result;
442
443 result = KAUTH_RESULT_DEFER;
444 rtm = arg1;
445
446 if (action != KAUTH_NETWORK_ROUTE)
447 return result;
448
449 if (rtm->rtm_type == RTM_GET)
450 result = KAUTH_RESULT_ALLOW;
451
452 return result;
453 }
454
455 static void rt_free_work(struct work *, void *);
456
457 void
458 rt_init(void)
459 {
460 int error;
461
462 #ifdef RTFLUSH_DEBUG
463 sysctl_net_rtcache_setup(NULL);
464 #endif
465
466 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
467 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET);
468
469 error = workqueue_create(&rt_free_global.wq, "rt_free",
470 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
471 if (error)
472 panic("%s: workqueue_create failed (%d)\n", __func__, error);
473
474 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
475 cv_init(&rt_update_global.cv, "rt_update");
476
477 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
478 NULL, IPL_SOFTNET);
479 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
480 NULL, IPL_SOFTNET);
481
482 rn_init(); /* initialize all zeroes, all ones, mask table */
483 rtbl_init();
484
485 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
486 route_listener_cb, NULL);
487 }
488
489 static void
490 rtflushall(int family)
491 {
492 struct domain *dom;
493
494 if (rtcache_debug())
495 printf("%s: enter\n", __func__);
496
497 if ((dom = pffinddomain(family)) == NULL)
498 return;
499
500 RTCACHE_WLOCK();
501 rtcache_invalidate(&dom->dom_rtcache);
502 RTCACHE_UNLOCK();
503 }
504
505 static void
506 rtcache(struct route *ro)
507 {
508 struct domain *dom;
509
510 RTCACHE_ASSERT_WLOCK();
511
512 rtcache_invariants(ro);
513 KASSERT(ro->_ro_rt != NULL);
514 KASSERT(ro->ro_invalid == false);
515 KASSERT(rtcache_getdst(ro) != NULL);
516
517 if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL)
518 return;
519
520 LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next);
521 rtcache_invariants(ro);
522 }
523
524 #ifdef RT_DEBUG
525 static void
526 dump_rt(const struct rtentry *rt)
527 {
528 char buf[512];
529
530 aprint_normal("rt: ");
531 aprint_normal("p=%p ", rt);
532 if (rt->_rt_key == NULL) {
533 aprint_normal("dst=(NULL) ");
534 } else {
535 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
536 aprint_normal("dst=%s ", buf);
537 }
538 if (rt->rt_gateway == NULL) {
539 aprint_normal("gw=(NULL) ");
540 } else {
541 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
542 aprint_normal("gw=%s ", buf);
543 }
544 aprint_normal("flags=%x ", rt->rt_flags);
545 if (rt->rt_ifp == NULL) {
546 aprint_normal("if=(NULL) ");
547 } else {
548 aprint_normal("if=%s ", rt->rt_ifp->if_xname);
549 }
550 aprint_normal("\n");
551 }
552 #endif /* RT_DEBUG */
553
554 /*
555 * Packet routing routines. If success, refcnt of a returned rtentry
556 * will be incremented. The caller has to rtfree it by itself.
557 */
558 struct rtentry *
559 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok)
560 {
561 rtbl_t *rtbl;
562 struct rtentry *rt;
563 int s;
564
565 retry:
566 s = splsoftnet();
567 rtbl = rt_gettable(dst->sa_family);
568 if (rtbl == NULL)
569 goto miss;
570
571 rt = rt_matchaddr(rtbl, dst);
572 if (rt == NULL)
573 goto miss;
574
575 if (!ISSET(rt->rt_flags, RTF_UP))
576 goto miss;
577
578 if (ISSET(rt->rt_flags, RTF_UPDATING) &&
579 /* XXX updater should be always able to acquire */
580 curlwp != rt_update_global.lwp) {
581 bool need_lock = false;
582 if (!wait_ok || !rt_wait_ok())
583 goto miss;
584 RT_UNLOCK();
585 splx(s);
586
587 /* XXX need more proper solution */
588 if (RTCACHE_WLOCKED()) {
589 RTCACHE_UNLOCK();
590 need_lock = true;
591 }
592
593 /* We can wait until the update is complete */
594 rt_update_wait();
595
596 if (need_lock)
597 RTCACHE_WLOCK();
598 goto retry;
599 }
600
601 rt_ref(rt);
602 RT_REFCNT_TRACE(rt);
603
604 splx(s);
605 return rt;
606 miss:
607 rtstat.rts_unreach++;
608 if (report) {
609 struct rt_addrinfo info;
610
611 memset(&info, 0, sizeof(info));
612 info.rti_info[RTAX_DST] = dst;
613 rt_missmsg(RTM_MISS, &info, 0, 0);
614 }
615 splx(s);
616 return NULL;
617 }
618
619 struct rtentry *
620 rtalloc1(const struct sockaddr *dst, int report)
621 {
622 struct rtentry *rt;
623
624 RT_RLOCK();
625 rt = rtalloc1_locked(dst, report, true);
626 RT_UNLOCK();
627
628 return rt;
629 }
630
631 static void
632 rt_ref(struct rtentry *rt)
633 {
634
635 KASSERT(rt->rt_refcnt >= 0);
636 atomic_inc_uint(&rt->rt_refcnt);
637 }
638
639 void
640 rt_unref(struct rtentry *rt)
641 {
642
643 KASSERT(rt != NULL);
644 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt);
645
646 atomic_dec_uint(&rt->rt_refcnt);
647 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) {
648 mutex_enter(&rt_free_global.lock);
649 cv_broadcast(&rt->rt_cv);
650 mutex_exit(&rt_free_global.lock);
651 }
652 }
653
654 static bool
655 rt_wait_ok(void)
656 {
657
658 KASSERT(!cpu_intr_p());
659 return !cpu_softintr_p();
660 }
661
662 void
663 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt)
664 {
665 mutex_enter(&rt_free_global.lock);
666 while (rt->rt_refcnt > cnt) {
667 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n",
668 __func__, title, rt->rt_refcnt);
669 cv_wait(&rt->rt_cv, &rt_free_global.lock);
670 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n",
671 __func__, title, rt->rt_refcnt);
672 }
673 mutex_exit(&rt_free_global.lock);
674 }
675
676 void
677 rt_wait_psref(struct rtentry *rt)
678 {
679
680 psref_target_destroy(&rt->rt_psref, rt_psref_class);
681 psref_target_init(&rt->rt_psref, rt_psref_class);
682 }
683
684 static void
685 _rt_free(struct rtentry *rt)
686 {
687 struct ifaddr *ifa;
688
689 /*
690 * Need to avoid a deadlock on rt_wait_refcnt of update
691 * and a conflict on psref_target_destroy of update.
692 */
693 rt_update_wait();
694
695 RT_REFCNT_TRACE(rt);
696 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt);
697 rt_wait_refcnt("free", rt, 0);
698 psref_target_destroy(&rt->rt_psref, rt_psref_class);
699
700 rt_assert_inactive(rt);
701 rttrash--;
702 ifa = rt->rt_ifa;
703 rt->rt_ifa = NULL;
704 ifafree(ifa);
705 rt->rt_ifp = NULL;
706 cv_destroy(&rt->rt_cv);
707 rt_destroy(rt);
708 pool_put(&rtentry_pool, rt);
709 }
710
711 static void
712 rt_free_work(struct work *wk, void *arg)
713 {
714 int i;
715 struct rtentry *rt;
716
717 restart:
718 mutex_enter(&rt_free_global.lock);
719 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
720 if (rt_free_global.queue[i] == NULL)
721 continue;
722 rt = rt_free_global.queue[i];
723 rt_free_global.queue[i] = NULL;
724 mutex_exit(&rt_free_global.lock);
725
726 atomic_dec_uint(&rt->rt_refcnt);
727 _rt_free(rt);
728 goto restart;
729 }
730 mutex_exit(&rt_free_global.lock);
731 }
732
733 void
734 rt_free(struct rtentry *rt)
735 {
736
737 KASSERT(rt->rt_refcnt > 0);
738 if (!rt_wait_ok()) {
739 int i;
740 mutex_enter(&rt_free_global.lock);
741 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
742 if (rt_free_global.queue[i] == NULL) {
743 rt_free_global.queue[i] = rt;
744 break;
745 }
746 }
747 KASSERT(i < sizeof(rt_free_global.queue));
748 rt_ref(rt);
749 mutex_exit(&rt_free_global.lock);
750 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL);
751 } else {
752 atomic_dec_uint(&rt->rt_refcnt);
753 _rt_free(rt);
754 }
755 }
756
757 static void
758 rt_update_wait(void)
759 {
760
761 mutex_enter(&rt_update_global.lock);
762 while (rt_update_global.ongoing) {
763 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp);
764 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
765 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp);
766 }
767 mutex_exit(&rt_update_global.lock);
768 }
769
770 int
771 rt_update_prepare(struct rtentry *rt)
772 {
773
774 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp);
775
776 RTCACHE_WLOCK();
777 RT_WLOCK();
778 /* If the entry is being destroyed, don't proceed the update. */
779 if (!ISSET(rt->rt_flags, RTF_UP)) {
780 RT_UNLOCK();
781 RTCACHE_UNLOCK();
782 return -1;
783 }
784 rt->rt_flags |= RTF_UPDATING;
785 RT_UNLOCK();
786 RTCACHE_UNLOCK();
787
788 mutex_enter(&rt_update_global.lock);
789 while (rt_update_global.ongoing) {
790 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n",
791 __func__, rt, curlwp);
792 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
793 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n",
794 __func__, rt, curlwp);
795 }
796 rt_update_global.ongoing = true;
797 /* XXX need it to avoid rt_update_wait by updater itself. */
798 rt_update_global.lwp = curlwp;
799 mutex_exit(&rt_update_global.lock);
800
801 rt_wait_refcnt("update", rt, 1);
802 rt_wait_psref(rt);
803
804 return 0;
805 }
806
807 void
808 rt_update_finish(struct rtentry *rt)
809 {
810
811 RTCACHE_WLOCK();
812 RT_WLOCK();
813 rt->rt_flags &= ~RTF_UPDATING;
814 RT_UNLOCK();
815 RTCACHE_UNLOCK();
816
817 mutex_enter(&rt_update_global.lock);
818 rt_update_global.ongoing = false;
819 rt_update_global.lwp = NULL;
820 cv_broadcast(&rt_update_global.cv);
821 mutex_exit(&rt_update_global.lock);
822
823 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp);
824 }
825
826 /*
827 * Force a routing table entry to the specified
828 * destination to go through the given gateway.
829 * Normally called as a result of a routing redirect
830 * message from the network layer.
831 *
832 * N.B.: must be called at splsoftnet
833 */
834 void
835 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
836 const struct sockaddr *netmask, int flags, const struct sockaddr *src,
837 struct rtentry **rtp)
838 {
839 struct rtentry *rt;
840 int error = 0;
841 uint64_t *stat = NULL;
842 struct rt_addrinfo info;
843 struct ifaddr *ifa;
844 struct psref psref;
845
846 /* verify the gateway is directly reachable */
847 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
848 error = ENETUNREACH;
849 goto out;
850 }
851 rt = rtalloc1(dst, 0);
852 /*
853 * If the redirect isn't from our current router for this dst,
854 * it's either old or wrong. If it redirects us to ourselves,
855 * we have a routing loop, perhaps as a result of an interface
856 * going down recently.
857 */
858 if (!(flags & RTF_DONE) && rt &&
859 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
860 error = EINVAL;
861 else {
862 int s = pserialize_read_enter();
863 struct ifaddr *_ifa;
864
865 _ifa = ifa_ifwithaddr(gateway);
866 if (_ifa != NULL)
867 error = EHOSTUNREACH;
868 pserialize_read_exit(s);
869 }
870 if (error)
871 goto done;
872 /*
873 * Create a new entry if we just got back a wildcard entry
874 * or the lookup failed. This is necessary for hosts
875 * which use routing redirects generated by smart gateways
876 * to dynamically build the routing tables.
877 */
878 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
879 goto create;
880 /*
881 * Don't listen to the redirect if it's
882 * for a route to an interface.
883 */
884 if (rt->rt_flags & RTF_GATEWAY) {
885 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
886 /*
887 * Changing from route to net => route to host.
888 * Create new route, rather than smashing route to net.
889 */
890 create:
891 if (rt != NULL)
892 rt_unref(rt);
893 flags |= RTF_GATEWAY | RTF_DYNAMIC;
894 memset(&info, 0, sizeof(info));
895 info.rti_info[RTAX_DST] = dst;
896 info.rti_info[RTAX_GATEWAY] = gateway;
897 info.rti_info[RTAX_NETMASK] = netmask;
898 info.rti_ifa = ifa;
899 info.rti_flags = flags;
900 rt = NULL;
901 error = rtrequest1(RTM_ADD, &info, &rt);
902 if (rt != NULL)
903 flags = rt->rt_flags;
904 stat = &rtstat.rts_dynamic;
905 } else {
906 /*
907 * Smash the current notion of the gateway to
908 * this destination. Should check about netmask!!!
909 */
910 /*
911 * FIXME NOMPAFE: the rtentry is updated with the existence
912 * of refeferences of it.
913 */
914 error = rt_setgate(rt, gateway);
915 if (error == 0) {
916 rt->rt_flags |= RTF_MODIFIED;
917 flags |= RTF_MODIFIED;
918 }
919 stat = &rtstat.rts_newgateway;
920 }
921 } else
922 error = EHOSTUNREACH;
923 done:
924 if (rt) {
925 if (rtp != NULL && !error)
926 *rtp = rt;
927 else
928 rt_unref(rt);
929 }
930 out:
931 if (error)
932 rtstat.rts_badredirect++;
933 else if (stat != NULL)
934 (*stat)++;
935 memset(&info, 0, sizeof(info));
936 info.rti_info[RTAX_DST] = dst;
937 info.rti_info[RTAX_GATEWAY] = gateway;
938 info.rti_info[RTAX_NETMASK] = netmask;
939 info.rti_info[RTAX_AUTHOR] = src;
940 rt_missmsg(RTM_REDIRECT, &info, flags, error);
941 ifa_release(ifa, &psref);
942 }
943
944 /*
945 * Delete a route and generate a message.
946 * It doesn't free a passed rt.
947 */
948 static int
949 rtdeletemsg(struct rtentry *rt)
950 {
951 int error;
952 struct rt_addrinfo info;
953 struct rtentry *retrt;
954
955 /*
956 * Request the new route so that the entry is not actually
957 * deleted. That will allow the information being reported to
958 * be accurate (and consistent with route_output()).
959 */
960 memset(&info, 0, sizeof(info));
961 info.rti_info[RTAX_DST] = rt_getkey(rt);
962 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
963 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
964 info.rti_flags = rt->rt_flags;
965 error = rtrequest1(RTM_DELETE, &info, &retrt);
966
967 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
968
969 return error;
970 }
971
972 struct ifaddr *
973 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
974 const struct sockaddr *gateway, struct psref *psref)
975 {
976 struct ifaddr *ifa = NULL;
977
978 if ((flags & RTF_GATEWAY) == 0) {
979 /*
980 * If we are adding a route to an interface,
981 * and the interface is a pt to pt link
982 * we should search for the destination
983 * as our clue to the interface. Otherwise
984 * we can use the local address.
985 */
986 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
987 ifa = ifa_ifwithdstaddr_psref(dst, psref);
988 if (ifa == NULL)
989 ifa = ifa_ifwithaddr_psref(gateway, psref);
990 } else {
991 /*
992 * If we are adding a route to a remote net
993 * or host, the gateway may still be on the
994 * other end of a pt to pt link.
995 */
996 ifa = ifa_ifwithdstaddr_psref(gateway, psref);
997 }
998 if (ifa == NULL)
999 ifa = ifa_ifwithnet_psref(gateway, psref);
1000 if (ifa == NULL) {
1001 int s;
1002 struct rtentry *rt;
1003
1004 rt = rtalloc1(dst, 0);
1005 if (rt == NULL)
1006 return NULL;
1007 /*
1008 * Just in case. May not need to do this workaround.
1009 * Revisit when working on rtentry MP-ification.
1010 */
1011 s = pserialize_read_enter();
1012 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
1013 if (ifa == rt->rt_ifa)
1014 break;
1015 }
1016 if (ifa != NULL)
1017 ifa_acquire(ifa, psref);
1018 pserialize_read_exit(s);
1019 rt_unref(rt);
1020 if (ifa == NULL)
1021 return NULL;
1022 }
1023 if (ifa->ifa_addr->sa_family != dst->sa_family) {
1024 struct ifaddr *nifa;
1025 int s;
1026
1027 s = pserialize_read_enter();
1028 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1029 if (nifa != NULL) {
1030 ifa_release(ifa, psref);
1031 ifa_acquire(nifa, psref);
1032 ifa = nifa;
1033 }
1034 pserialize_read_exit(s);
1035 }
1036 return ifa;
1037 }
1038
1039 /*
1040 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1041 * The caller has to rtfree it by itself.
1042 */
1043 int
1044 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
1045 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
1046 {
1047 struct rt_addrinfo info;
1048
1049 memset(&info, 0, sizeof(info));
1050 info.rti_flags = flags;
1051 info.rti_info[RTAX_DST] = dst;
1052 info.rti_info[RTAX_GATEWAY] = gateway;
1053 info.rti_info[RTAX_NETMASK] = netmask;
1054 return rtrequest1(req, &info, ret_nrt);
1055 }
1056
1057 /*
1058 * It's a utility function to add/remove a route to/from the routing table
1059 * and tell user processes the addition/removal on success.
1060 */
1061 int
1062 rtrequest_newmsg(const int req, const struct sockaddr *dst,
1063 const struct sockaddr *gateway, const struct sockaddr *netmask,
1064 const int flags)
1065 {
1066 int error;
1067 struct rtentry *ret_nrt = NULL;
1068
1069 KASSERT(req == RTM_ADD || req == RTM_DELETE);
1070
1071 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt);
1072 if (error != 0)
1073 return error;
1074
1075 KASSERT(ret_nrt != NULL);
1076
1077 rt_newmsg(req, ret_nrt); /* tell user process */
1078 if (req == RTM_DELETE)
1079 rt_free(ret_nrt);
1080 else
1081 rt_unref(ret_nrt);
1082
1083 return 0;
1084 }
1085
1086 struct ifnet *
1087 rt_getifp(struct rt_addrinfo *info, struct psref *psref)
1088 {
1089 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
1090
1091 if (info->rti_ifp != NULL)
1092 return NULL;
1093 /*
1094 * ifp may be specified by sockaddr_dl when protocol address
1095 * is ambiguous
1096 */
1097 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
1098 struct ifaddr *ifa;
1099 int s = pserialize_read_enter();
1100
1101 ifa = ifa_ifwithnet(ifpaddr);
1102 if (ifa != NULL)
1103 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
1104 psref);
1105 pserialize_read_exit(s);
1106 }
1107
1108 return info->rti_ifp;
1109 }
1110
1111 struct ifaddr *
1112 rt_getifa(struct rt_addrinfo *info, struct psref *psref)
1113 {
1114 struct ifaddr *ifa = NULL;
1115 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1116 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1117 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
1118 int flags = info->rti_flags;
1119 const struct sockaddr *sa;
1120
1121 if (info->rti_ifa == NULL && ifaaddr != NULL) {
1122 ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
1123 if (ifa != NULL)
1124 goto got;
1125 }
1126
1127 sa = ifaaddr != NULL ? ifaaddr :
1128 (gateway != NULL ? gateway : dst);
1129 if (sa != NULL && info->rti_ifp != NULL)
1130 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
1131 else if (dst != NULL && gateway != NULL)
1132 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
1133 else if (sa != NULL)
1134 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
1135 if (ifa == NULL)
1136 return NULL;
1137 got:
1138 if (ifa->ifa_getifa != NULL) {
1139 /* FIXME NOMPSAFE */
1140 ifa = (*ifa->ifa_getifa)(ifa, dst);
1141 if (ifa == NULL)
1142 return NULL;
1143 ifa_acquire(ifa, psref);
1144 }
1145 info->rti_ifa = ifa;
1146 if (info->rti_ifp == NULL)
1147 info->rti_ifp = ifa->ifa_ifp;
1148 return ifa;
1149 }
1150
1151 /*
1152 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1153 * The caller has to rtfree it by itself.
1154 */
1155 int
1156 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
1157 {
1158 int s = splsoftnet(), ss;
1159 int error = 0, rc;
1160 struct rtentry *rt;
1161 rtbl_t *rtbl;
1162 struct ifaddr *ifa = NULL, *ifa2 = NULL;
1163 struct sockaddr_storage maskeddst;
1164 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1165 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1166 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
1167 int flags = info->rti_flags;
1168 struct psref psref_ifp, psref_ifa;
1169 int bound = 0;
1170 struct ifnet *ifp = NULL;
1171 bool need_to_release_ifa = true;
1172 bool need_unlock = true;
1173 #define senderr(x) { error = x ; goto bad; }
1174
1175 RT_WLOCK();
1176
1177 bound = curlwp_bind();
1178 if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
1179 senderr(ESRCH);
1180 if (flags & RTF_HOST)
1181 netmask = NULL;
1182 switch (req) {
1183 case RTM_DELETE:
1184 if (netmask) {
1185 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1186 netmask);
1187 dst = (struct sockaddr *)&maskeddst;
1188 }
1189 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1190 senderr(ESRCH);
1191 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
1192 senderr(ESRCH);
1193 rt->rt_flags &= ~RTF_UP;
1194 if ((ifa = rt->rt_ifa)) {
1195 if (ifa->ifa_flags & IFA_ROUTE &&
1196 rt_ifa_connected(rt, ifa)) {
1197 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
1198 "deleted IFA_ROUTE\n",
1199 (void *)rt->_rt_key, (void *)ifa);
1200 ifa->ifa_flags &= ~IFA_ROUTE;
1201 }
1202 if (ifa->ifa_rtrequest)
1203 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
1204 ifa = NULL;
1205 }
1206 rttrash++;
1207 if (ret_nrt) {
1208 *ret_nrt = rt;
1209 rt_ref(rt);
1210 RT_REFCNT_TRACE(rt);
1211 }
1212 RT_UNLOCK();
1213 need_unlock = false;
1214 rt_timer_remove_all(rt);
1215 rtcache_clear_rtentry(dst->sa_family, rt);
1216 if (ret_nrt == NULL) {
1217 /* Adjust the refcount */
1218 rt_ref(rt);
1219 RT_REFCNT_TRACE(rt);
1220 rt_free(rt);
1221 }
1222 break;
1223
1224 case RTM_ADD:
1225 if (info->rti_ifa == NULL) {
1226 ifp = rt_getifp(info, &psref_ifp);
1227 ifa = rt_getifa(info, &psref_ifa);
1228 if (ifa == NULL)
1229 senderr(ENETUNREACH);
1230 } else {
1231 /* Caller should have a reference of ifa */
1232 ifa = info->rti_ifa;
1233 need_to_release_ifa = false;
1234 }
1235 rt = pool_get(&rtentry_pool, PR_NOWAIT);
1236 if (rt == NULL)
1237 senderr(ENOBUFS);
1238 memset(rt, 0, sizeof(*rt));
1239 rt->rt_flags = RTF_UP | flags;
1240 LIST_INIT(&rt->rt_timer);
1241
1242 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1243 if (netmask) {
1244 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1245 netmask);
1246 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
1247 } else {
1248 rt_setkey(rt, dst, M_NOWAIT);
1249 }
1250 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1251 if (rt_getkey(rt) == NULL ||
1252 rt_setgate(rt, gateway) != 0) {
1253 pool_put(&rtentry_pool, rt);
1254 senderr(ENOBUFS);
1255 }
1256
1257 rt_set_ifa(rt, ifa);
1258 if (info->rti_info[RTAX_TAG] != NULL) {
1259 const struct sockaddr *tag;
1260 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
1261 if (tag == NULL)
1262 senderr(ENOBUFS);
1263 }
1264 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1265
1266 ss = pserialize_read_enter();
1267 if (info->rti_info[RTAX_IFP] != NULL) {
1268 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
1269 if (ifa2 != NULL)
1270 rt->rt_ifp = ifa2->ifa_ifp;
1271 else
1272 rt->rt_ifp = ifa->ifa_ifp;
1273 } else
1274 rt->rt_ifp = ifa->ifa_ifp;
1275 pserialize_read_exit(ss);
1276 cv_init(&rt->rt_cv, "rtentry");
1277 psref_target_init(&rt->rt_psref, rt_psref_class);
1278
1279 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1280 rc = rt_addaddr(rtbl, rt, netmask);
1281 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1282 if (rc != 0) {
1283 ifafree(ifa); /* for rt_set_ifa above */
1284 cv_destroy(&rt->rt_cv);
1285 rt_destroy(rt);
1286 pool_put(&rtentry_pool, rt);
1287 senderr(rc);
1288 }
1289 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1290 if (ifa->ifa_rtrequest)
1291 ifa->ifa_rtrequest(req, rt, info);
1292 if (need_to_release_ifa)
1293 ifa_release(ifa, &psref_ifa);
1294 ifa = NULL;
1295 if_put(ifp, &psref_ifp);
1296 ifp = NULL;
1297 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1298 if (ret_nrt) {
1299 *ret_nrt = rt;
1300 rt_ref(rt);
1301 RT_REFCNT_TRACE(rt);
1302 }
1303 RT_UNLOCK();
1304 need_unlock = false;
1305 rtflushall(dst->sa_family);
1306 break;
1307 case RTM_GET:
1308 if (netmask != NULL) {
1309 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1310 netmask);
1311 dst = (struct sockaddr *)&maskeddst;
1312 }
1313 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1314 senderr(ESRCH);
1315 if (ret_nrt != NULL) {
1316 *ret_nrt = rt;
1317 rt_ref(rt);
1318 RT_REFCNT_TRACE(rt);
1319 }
1320 break;
1321 }
1322 bad:
1323 if (need_to_release_ifa)
1324 ifa_release(ifa, &psref_ifa);
1325 if_put(ifp, &psref_ifp);
1326 curlwp_bindx(bound);
1327 if (need_unlock)
1328 RT_UNLOCK();
1329 splx(s);
1330 return error;
1331 }
1332
1333 int
1334 rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
1335 {
1336 struct sockaddr *new, *old;
1337
1338 KASSERT(rt->_rt_key != NULL);
1339 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1340
1341 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
1342 if (new == NULL)
1343 return ENOMEM;
1344
1345 old = rt->rt_gateway;
1346 rt->rt_gateway = new;
1347 if (old != NULL)
1348 sockaddr_free(old);
1349
1350 KASSERT(rt->_rt_key != NULL);
1351 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1352
1353 if (rt->rt_flags & RTF_GATEWAY) {
1354 struct rtentry *gwrt;
1355
1356 /* XXX we cannot call rtalloc1 if holding the rt lock */
1357 if (RT_LOCKED())
1358 gwrt = rtalloc1_locked(gate, 1, false);
1359 else
1360 gwrt = rtalloc1(gate, 1);
1361 /*
1362 * If we switched gateways, grab the MTU from the new
1363 * gateway route if the current MTU, if the current MTU is
1364 * greater than the MTU of gateway.
1365 * Note that, if the MTU of gateway is 0, we will reset the
1366 * MTU of the route to run PMTUD again from scratch. XXX
1367 */
1368 if (gwrt != NULL) {
1369 KASSERT(gwrt->_rt_key != NULL);
1370 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
1371 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
1372 rt->rt_rmx.rmx_mtu &&
1373 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
1374 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
1375 }
1376 rt_unref(gwrt);
1377 }
1378 }
1379 KASSERT(rt->_rt_key != NULL);
1380 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1381 return 0;
1382 }
1383
1384 static void
1385 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
1386 const struct sockaddr *netmask)
1387 {
1388 const char *netmaskp = &netmask->sa_data[0],
1389 *srcp = &src->sa_data[0];
1390 char *dstp = &dst->sa_data[0];
1391 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
1392 const char *srcend = (char *)dst + src->sa_len;
1393
1394 dst->sa_len = src->sa_len;
1395 dst->sa_family = src->sa_family;
1396
1397 while (dstp < maskend)
1398 *dstp++ = *srcp++ & *netmaskp++;
1399 if (dstp < srcend)
1400 memset(dstp, 0, (size_t)(srcend - dstp));
1401 }
1402
1403 /*
1404 * Inform the routing socket of a route change.
1405 */
1406 void
1407 rt_newmsg(const int cmd, const struct rtentry *rt)
1408 {
1409 struct rt_addrinfo info;
1410
1411 memset((void *)&info, 0, sizeof(info));
1412 info.rti_info[RTAX_DST] = rt_getkey(rt);
1413 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1414 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1415 if (rt->rt_ifp) {
1416 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1417 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1418 }
1419
1420 rt_missmsg(cmd, &info, rt->rt_flags, 0);
1421 }
1422
1423 /*
1424 * Set up or tear down a routing table entry, normally
1425 * for an interface.
1426 */
1427 int
1428 rtinit(struct ifaddr *ifa, int cmd, int flags)
1429 {
1430 struct rtentry *rt;
1431 struct sockaddr *dst, *odst;
1432 struct sockaddr_storage maskeddst;
1433 struct rtentry *nrt = NULL;
1434 int error;
1435 struct rt_addrinfo info;
1436
1437 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1438 if (cmd == RTM_DELETE) {
1439 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1440 /* Delete subnet route for this interface */
1441 odst = dst;
1442 dst = (struct sockaddr *)&maskeddst;
1443 rt_maskedcopy(odst, dst, ifa->ifa_netmask);
1444 }
1445 if ((rt = rtalloc1(dst, 0)) != NULL) {
1446 if (rt->rt_ifa != ifa) {
1447 rt_unref(rt);
1448 return (flags & RTF_HOST) ? EHOSTUNREACH
1449 : ENETUNREACH;
1450 }
1451 rt_unref(rt);
1452 }
1453 }
1454 memset(&info, 0, sizeof(info));
1455 info.rti_ifa = ifa;
1456 info.rti_flags = flags | ifa->ifa_flags;
1457 info.rti_info[RTAX_DST] = dst;
1458 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1459
1460 /*
1461 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1462 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate
1463 * variable) when RTF_HOST is 1. still not sure if i can safely
1464 * change it to meet bsdi4 behavior.
1465 */
1466 if (cmd != RTM_LLINFO_UPD)
1467 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1468 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
1469 &nrt);
1470 if (error != 0)
1471 return error;
1472
1473 rt = nrt;
1474 RT_REFCNT_TRACE(rt);
1475 switch (cmd) {
1476 case RTM_DELETE:
1477 rt_newmsg(cmd, rt);
1478 rt_free(rt);
1479 break;
1480 case RTM_LLINFO_UPD:
1481 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
1482 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
1483 rt_newmsg(RTM_CHANGE, rt);
1484 rt_unref(rt);
1485 break;
1486 case RTM_ADD:
1487 /*
1488 * FIXME NOMPAFE: the rtentry is updated with the existence
1489 * of refeferences of it.
1490 */
1491 /*
1492 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest
1493 * called via rtrequest1. Can we just prevent the replacement
1494 * somehow and remove the following code? And also doesn't
1495 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again?
1496 */
1497 if (rt->rt_ifa != ifa) {
1498 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa,
1499 rt->rt_ifa);
1500 if (rt->rt_ifa->ifa_rtrequest != NULL) {
1501 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
1502 &info);
1503 }
1504 rt_replace_ifa(rt, ifa);
1505 rt->rt_ifp = ifa->ifa_ifp;
1506 if (ifa->ifa_rtrequest != NULL)
1507 ifa->ifa_rtrequest(RTM_ADD, rt, &info);
1508 }
1509 rt_newmsg(cmd, rt);
1510 rt_unref(rt);
1511 RT_REFCNT_TRACE(rt);
1512 break;
1513 }
1514 return error;
1515 }
1516
1517 /*
1518 * Create a local route entry for the address.
1519 * Announce the addition of the address and the route to the routing socket.
1520 */
1521 int
1522 rt_ifa_addlocal(struct ifaddr *ifa)
1523 {
1524 struct rtentry *rt;
1525 int e;
1526
1527 /* If there is no loopback entry, allocate one. */
1528 rt = rtalloc1(ifa->ifa_addr, 0);
1529 #ifdef RT_DEBUG
1530 if (rt != NULL)
1531 dump_rt(rt);
1532 #endif
1533 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
1534 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1535 {
1536 struct rt_addrinfo info;
1537 struct rtentry *nrt;
1538
1539 memset(&info, 0, sizeof(info));
1540 info.rti_flags = RTF_HOST | RTF_LOCAL;
1541 if (!(ifa->ifa_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))
1542 info.rti_flags |= RTF_LLDATA;
1543 info.rti_info[RTAX_DST] = ifa->ifa_addr;
1544 info.rti_info[RTAX_GATEWAY] =
1545 (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
1546 info.rti_ifa = ifa;
1547 nrt = NULL;
1548 e = rtrequest1(RTM_ADD, &info, &nrt);
1549 if (nrt && ifa != nrt->rt_ifa)
1550 rt_replace_ifa(nrt, ifa);
1551 rt_newaddrmsg(RTM_ADD, ifa, e, nrt);
1552 if (nrt != NULL) {
1553 #ifdef RT_DEBUG
1554 dump_rt(nrt);
1555 #endif
1556 rt_unref(nrt);
1557 RT_REFCNT_TRACE(nrt);
1558 }
1559 } else {
1560 e = 0;
1561 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL);
1562 }
1563 if (rt != NULL)
1564 rt_unref(rt);
1565 return e;
1566 }
1567
1568 /*
1569 * Remove the local route entry for the address.
1570 * Announce the removal of the address and the route to the routing socket.
1571 */
1572 int
1573 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
1574 {
1575 struct rtentry *rt;
1576 int e = 0;
1577
1578 rt = rtalloc1(ifa->ifa_addr, 0);
1579
1580 /*
1581 * Before deleting, check if a corresponding loopbacked
1582 * host route surely exists. With this check, we can avoid
1583 * deleting an interface direct route whose destination is
1584 * the same as the address being removed. This can happen
1585 * when removing a subnet-router anycast address on an
1586 * interface attached to a shared medium.
1587 */
1588 if (rt != NULL &&
1589 (rt->rt_flags & RTF_HOST) &&
1590 (rt->rt_ifp->if_flags & IFF_LOOPBACK))
1591 {
1592 /* If we cannot replace the route's ifaddr with the equivalent
1593 * ifaddr of another interface, I believe it is safest to
1594 * delete the route.
1595 */
1596 if (alt_ifa == NULL) {
1597 e = rtdeletemsg(rt);
1598 if (e == 0) {
1599 rt_unref(rt);
1600 rt_free(rt);
1601 rt = NULL;
1602 }
1603 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1604 } else {
1605 rt_replace_ifa(rt, alt_ifa);
1606 rt_newmsg(RTM_CHANGE, rt);
1607 }
1608 } else
1609 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1610 if (rt != NULL)
1611 rt_unref(rt);
1612 return e;
1613 }
1614
1615 /*
1616 * Route timer routines. These routes allow functions to be called
1617 * for various routes at any time. This is useful in supporting
1618 * path MTU discovery and redirect route deletion.
1619 *
1620 * This is similar to some BSDI internal functions, but it provides
1621 * for multiple queues for efficiency's sake...
1622 */
1623
1624 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
1625 static int rt_init_done = 0;
1626
1627 /*
1628 * Some subtle order problems with domain initialization mean that
1629 * we cannot count on this being run from rt_init before various
1630 * protocol initializations are done. Therefore, we make sure
1631 * that this is run when the first queue is added...
1632 */
1633
1634 static void rt_timer_work(struct work *, void *);
1635
1636 static void
1637 rt_timer_init(void)
1638 {
1639 int error;
1640
1641 assert(rt_init_done == 0);
1642
1643 /* XXX should be in rt_init */
1644 rw_init(&rt_lock);
1645 rw_init(&rtcache_lock);
1646
1647 LIST_INIT(&rttimer_queue_head);
1648 callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1649 error = workqueue_create(&rt_timer_wq, "rt_timer",
1650 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
1651 if (error)
1652 panic("%s: workqueue_create failed (%d)\n", __func__, error);
1653 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1654 rt_init_done = 1;
1655 }
1656
1657 struct rttimer_queue *
1658 rt_timer_queue_create(u_int timeout)
1659 {
1660 struct rttimer_queue *rtq;
1661
1662 if (rt_init_done == 0)
1663 rt_timer_init();
1664
1665 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1666 if (rtq == NULL)
1667 return NULL;
1668 memset(rtq, 0, sizeof(*rtq));
1669
1670 rtq->rtq_timeout = timeout;
1671 TAILQ_INIT(&rtq->rtq_head);
1672 RT_WLOCK();
1673 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1674 RT_UNLOCK();
1675
1676 return rtq;
1677 }
1678
1679 void
1680 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1681 {
1682
1683 rtq->rtq_timeout = timeout;
1684 }
1685
1686 static void
1687 rt_timer_queue_remove_all(struct rttimer_queue *rtq)
1688 {
1689 struct rttimer *r;
1690
1691 RT_ASSERT_WLOCK();
1692
1693 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1694 LIST_REMOVE(r, rtt_link);
1695 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1696 rt_ref(r->rtt_rt); /* XXX */
1697 RT_REFCNT_TRACE(r->rtt_rt);
1698 RT_UNLOCK();
1699 (*r->rtt_func)(r->rtt_rt, r);
1700 pool_put(&rttimer_pool, r);
1701 RT_WLOCK();
1702 if (rtq->rtq_count > 0)
1703 rtq->rtq_count--;
1704 else
1705 printf("rt_timer_queue_remove_all: "
1706 "rtq_count reached 0\n");
1707 }
1708 }
1709
1710 void
1711 rt_timer_queue_destroy(struct rttimer_queue *rtq)
1712 {
1713
1714 RT_WLOCK();
1715 rt_timer_queue_remove_all(rtq);
1716 LIST_REMOVE(rtq, rtq_link);
1717 RT_UNLOCK();
1718
1719 /*
1720 * Caller is responsible for freeing the rttimer_queue structure.
1721 */
1722 }
1723
1724 unsigned long
1725 rt_timer_count(struct rttimer_queue *rtq)
1726 {
1727 return rtq->rtq_count;
1728 }
1729
1730 static void
1731 rt_timer_remove_all(struct rtentry *rt)
1732 {
1733 struct rttimer *r;
1734
1735 RT_WLOCK();
1736 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1737 LIST_REMOVE(r, rtt_link);
1738 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1739 if (r->rtt_queue->rtq_count > 0)
1740 r->rtt_queue->rtq_count--;
1741 else
1742 printf("rt_timer_remove_all: rtq_count reached 0\n");
1743 pool_put(&rttimer_pool, r);
1744 }
1745 RT_UNLOCK();
1746 }
1747
1748 int
1749 rt_timer_add(struct rtentry *rt,
1750 void (*func)(struct rtentry *, struct rttimer *),
1751 struct rttimer_queue *queue)
1752 {
1753 struct rttimer *r;
1754
1755 KASSERT(func != NULL);
1756 RT_WLOCK();
1757 /*
1758 * If there's already a timer with this action, destroy it before
1759 * we add a new one.
1760 */
1761 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1762 if (r->rtt_func == func)
1763 break;
1764 }
1765 if (r != NULL) {
1766 LIST_REMOVE(r, rtt_link);
1767 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1768 if (r->rtt_queue->rtq_count > 0)
1769 r->rtt_queue->rtq_count--;
1770 else
1771 printf("rt_timer_add: rtq_count reached 0\n");
1772 } else {
1773 r = pool_get(&rttimer_pool, PR_NOWAIT);
1774 if (r == NULL) {
1775 RT_UNLOCK();
1776 return ENOBUFS;
1777 }
1778 }
1779
1780 memset(r, 0, sizeof(*r));
1781
1782 r->rtt_rt = rt;
1783 r->rtt_time = time_uptime;
1784 r->rtt_func = func;
1785 r->rtt_queue = queue;
1786 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1787 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1788 r->rtt_queue->rtq_count++;
1789
1790 RT_UNLOCK();
1791
1792 return 0;
1793 }
1794
1795 static void
1796 rt_timer_work(struct work *wk, void *arg)
1797 {
1798 struct rttimer_queue *rtq;
1799 struct rttimer *r;
1800
1801 RT_WLOCK();
1802 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1803 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1804 (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
1805 LIST_REMOVE(r, rtt_link);
1806 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1807 rt_ref(r->rtt_rt); /* XXX */
1808 RT_REFCNT_TRACE(r->rtt_rt);
1809 RT_UNLOCK();
1810 (*r->rtt_func)(r->rtt_rt, r);
1811 pool_put(&rttimer_pool, r);
1812 RT_WLOCK();
1813 if (rtq->rtq_count > 0)
1814 rtq->rtq_count--;
1815 else
1816 printf("rt_timer_timer: rtq_count reached 0\n");
1817 }
1818 }
1819 RT_UNLOCK();
1820
1821 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1822 }
1823
1824 static void
1825 rt_timer_timer(void *arg)
1826 {
1827
1828 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
1829 }
1830
1831 static struct rtentry *
1832 _rtcache_init(struct route *ro, int flag)
1833 {
1834 struct rtentry *rt;
1835
1836 rtcache_invariants(ro);
1837 KASSERT(ro->_ro_rt == NULL);
1838 RTCACHE_ASSERT_WLOCK();
1839
1840 if (rtcache_getdst(ro) == NULL)
1841 return NULL;
1842 ro->ro_invalid = false;
1843 rt = rtalloc1(rtcache_getdst(ro), flag);
1844 if (rt != NULL && ISSET(rt->rt_flags, RTF_UP)) {
1845 ro->_ro_rt = rt;
1846 KASSERT(!ISSET(rt->rt_flags, RTF_UPDATING));
1847 rtcache_ref(rt, ro);
1848 rt_unref(rt);
1849 rtcache(ro);
1850 } else if (rt != NULL)
1851 rt_unref(rt);
1852
1853 rtcache_invariants(ro);
1854 return ro->_ro_rt;
1855 }
1856
1857 struct rtentry *
1858 rtcache_init(struct route *ro)
1859 {
1860 struct rtentry *rt;
1861 RTCACHE_WLOCK();
1862 rt = _rtcache_init(ro, 1);
1863 RTCACHE_UNLOCK();
1864 return rt;
1865 }
1866
1867 struct rtentry *
1868 rtcache_init_noclone(struct route *ro)
1869 {
1870 struct rtentry *rt;
1871 RTCACHE_WLOCK();
1872 rt = _rtcache_init(ro, 0);
1873 RTCACHE_UNLOCK();
1874 return rt;
1875 }
1876
1877 struct rtentry *
1878 rtcache_update(struct route *ro, int clone)
1879 {
1880 struct rtentry *rt;
1881 RTCACHE_WLOCK();
1882 rtcache_clear(ro);
1883 rt = _rtcache_init(ro, clone);
1884 RTCACHE_UNLOCK();
1885 return rt;
1886 }
1887
1888 void
1889 rtcache_copy(struct route *new_ro, struct route *old_ro)
1890 {
1891 struct rtentry *rt;
1892 int ret;
1893
1894 KASSERT(new_ro != old_ro);
1895 rtcache_invariants(new_ro);
1896 rtcache_invariants(old_ro);
1897
1898 rt = rtcache_validate(old_ro);
1899
1900 if (rtcache_getdst(old_ro) == NULL)
1901 goto out;
1902 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro));
1903 if (ret != 0)
1904 goto out;
1905
1906 RTCACHE_WLOCK();
1907 new_ro->ro_invalid = false;
1908 if ((new_ro->_ro_rt = rt) != NULL)
1909 rtcache(new_ro);
1910 rtcache_invariants(new_ro);
1911 RTCACHE_UNLOCK();
1912 out:
1913 rtcache_unref(rt, old_ro);
1914 return;
1915 }
1916
1917 static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist);
1918
1919 #if defined(RT_DEBUG) && defined(NET_MPSAFE)
1920 static void
1921 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro)
1922 {
1923 char dst[64];
1924
1925 sockaddr_format(ro->ro_sa, dst, 64);
1926 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst,
1927 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref);
1928 }
1929 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro))
1930 #else
1931 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0)
1932 #endif
1933
1934 static void
1935 rtcache_ref(struct rtentry *rt, struct route *ro)
1936 {
1937
1938 KASSERT(rt != NULL);
1939
1940 #ifdef NET_MPSAFE
1941 RTCACHE_PSREF_TRACE(rt, ro);
1942 ro->ro_bound = curlwp_bind();
1943 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
1944 #endif
1945 }
1946
1947 void
1948 rtcache_unref(struct rtentry *rt, struct route *ro)
1949 {
1950
1951 if (rt == NULL)
1952 return;
1953
1954 #ifdef NET_MPSAFE
1955 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
1956 curlwp_bindx(ro->ro_bound);
1957 RTCACHE_PSREF_TRACE(rt, ro);
1958 #endif
1959 }
1960
1961 static struct rtentry *
1962 rtcache_validate_locked(struct route *ro)
1963 {
1964 struct rtentry *rt = NULL;
1965
1966 retry:
1967 rt = ro->_ro_rt;
1968 rtcache_invariants(ro);
1969
1970 if (ro->ro_invalid) {
1971 rt = NULL;
1972 goto out;
1973 }
1974
1975 RT_RLOCK();
1976 if (rt != NULL && (rt->rt_flags & RTF_UP) != 0 && rt->rt_ifp != NULL) {
1977 if (ISSET(rt->rt_flags, RTF_UPDATING)) {
1978 if (rt_wait_ok()) {
1979 RT_UNLOCK();
1980 RTCACHE_UNLOCK();
1981 /* We can wait until the update is complete */
1982 rt_update_wait();
1983 RTCACHE_RLOCK();
1984 goto retry;
1985 } else {
1986 rt = NULL;
1987 }
1988 } else
1989 rtcache_ref(rt, ro);
1990 } else
1991 rt = NULL;
1992 RT_UNLOCK();
1993 out:
1994 return rt;
1995 }
1996
1997 struct rtentry *
1998 rtcache_validate(struct route *ro)
1999 {
2000 struct rtentry *rt;
2001
2002 RTCACHE_RLOCK();
2003 rt = rtcache_validate_locked(ro);
2004 RTCACHE_UNLOCK();
2005 return rt;
2006 }
2007
2008 static void
2009 rtcache_invalidate(struct dom_rtlist *rtlist)
2010 {
2011 struct route *ro;
2012
2013 RTCACHE_ASSERT_WLOCK();
2014
2015 while ((ro = LIST_FIRST(rtlist)) != NULL) {
2016 rtcache_invariants(ro);
2017 KASSERT(ro->_ro_rt != NULL);
2018 ro->ro_invalid = true;
2019 LIST_REMOVE(ro, ro_rtcache_next);
2020 LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next);
2021 rtcache_invariants(ro);
2022 }
2023 }
2024
2025 static void
2026 rtcache_clear_rtentry(int family, struct rtentry *rt)
2027 {
2028 struct domain *dom;
2029 struct route *ro, *nro;
2030
2031 if ((dom = pffinddomain(family)) == NULL)
2032 return;
2033
2034 RTCACHE_WLOCK();
2035 LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) {
2036 if (ro->_ro_rt == rt)
2037 rtcache_clear(ro);
2038 }
2039 RTCACHE_UNLOCK();
2040 }
2041
2042 static void
2043 rtcache_clear(struct route *ro)
2044 {
2045
2046 RTCACHE_ASSERT_WLOCK();
2047
2048 rtcache_invariants(ro);
2049 if (ro->_ro_rt == NULL)
2050 return;
2051
2052 LIST_REMOVE(ro, ro_rtcache_next);
2053
2054 ro->_ro_rt = NULL;
2055 ro->ro_invalid = false;
2056 rtcache_invariants(ro);
2057 }
2058
2059 struct rtentry *
2060 rtcache_lookup2(struct route *ro, const struct sockaddr *dst,
2061 int clone, int *hitp)
2062 {
2063 const struct sockaddr *odst;
2064 struct rtentry *rt = NULL;
2065
2066 RTCACHE_RLOCK();
2067 odst = rtcache_getdst(ro);
2068 if (odst == NULL) {
2069 RTCACHE_UNLOCK();
2070 RTCACHE_WLOCK();
2071 goto miss;
2072 }
2073
2074 if (sockaddr_cmp(odst, dst) != 0) {
2075 RTCACHE_UNLOCK();
2076 RTCACHE_WLOCK();
2077 rtcache_free_locked(ro);
2078 goto miss;
2079 }
2080
2081 rt = rtcache_validate_locked(ro);
2082 if (rt == NULL) {
2083 RTCACHE_UNLOCK();
2084 RTCACHE_WLOCK();
2085 rtcache_clear(ro);
2086 goto miss;
2087 }
2088
2089 rtcache_invariants(ro);
2090
2091 RTCACHE_UNLOCK();
2092 if (hitp != NULL)
2093 *hitp = 1;
2094 return rt;
2095 miss:
2096 if (hitp != NULL)
2097 *hitp = 0;
2098 if (rtcache_setdst_locked(ro, dst) == 0)
2099 rt = _rtcache_init(ro, clone);
2100
2101 rtcache_invariants(ro);
2102
2103 RTCACHE_UNLOCK();
2104 return rt;
2105 }
2106
2107 static void
2108 rtcache_free_locked(struct route *ro)
2109 {
2110
2111 RTCACHE_ASSERT_WLOCK();
2112 rtcache_clear(ro);
2113 if (ro->ro_sa != NULL) {
2114 sockaddr_free(ro->ro_sa);
2115 ro->ro_sa = NULL;
2116 }
2117 rtcache_invariants(ro);
2118 }
2119
2120 void
2121 rtcache_free(struct route *ro)
2122 {
2123
2124 RTCACHE_WLOCK();
2125 rtcache_free_locked(ro);
2126 RTCACHE_UNLOCK();
2127 }
2128
2129 static int
2130 rtcache_setdst_locked(struct route *ro, const struct sockaddr *sa)
2131 {
2132 KASSERT(sa != NULL);
2133
2134 RTCACHE_ASSERT_WLOCK();
2135
2136 rtcache_invariants(ro);
2137 if (ro->ro_sa != NULL) {
2138 if (ro->ro_sa->sa_family == sa->sa_family) {
2139 rtcache_clear(ro);
2140 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
2141 rtcache_invariants(ro);
2142 return 0;
2143 }
2144 /* free ro_sa, wrong family */
2145 rtcache_free_locked(ro);
2146 }
2147
2148 KASSERT(ro->_ro_rt == NULL);
2149
2150 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
2151 rtcache_invariants(ro);
2152 return ENOMEM;
2153 }
2154 rtcache_invariants(ro);
2155 return 0;
2156 }
2157
2158 int
2159 rtcache_setdst(struct route *ro, const struct sockaddr *sa)
2160 {
2161 int error;
2162
2163 RTCACHE_WLOCK();
2164 error = rtcache_setdst_locked(ro, sa);
2165 RTCACHE_UNLOCK();
2166
2167 return error;
2168 }
2169
2170 const struct sockaddr *
2171 rt_settag(struct rtentry *rt, const struct sockaddr *tag)
2172 {
2173 if (rt->rt_tag != tag) {
2174 if (rt->rt_tag != NULL)
2175 sockaddr_free(rt->rt_tag);
2176 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
2177 }
2178 return rt->rt_tag;
2179 }
2180
2181 struct sockaddr *
2182 rt_gettag(const struct rtentry *rt)
2183 {
2184 return rt->rt_tag;
2185 }
2186
2187 int
2188 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
2189 {
2190
2191 if ((rt->rt_flags & RTF_REJECT) != 0) {
2192 /* Mimic looutput */
2193 if (ifp->if_flags & IFF_LOOPBACK)
2194 return (rt->rt_flags & RTF_HOST) ?
2195 EHOSTUNREACH : ENETUNREACH;
2196 else if (rt->rt_rmx.rmx_expire == 0 ||
2197 time_uptime < rt->rt_rmx.rmx_expire)
2198 return (rt->rt_flags & RTF_GATEWAY) ?
2199 EHOSTUNREACH : EHOSTDOWN;
2200 }
2201
2202 return 0;
2203 }
2204
2205 void
2206 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *),
2207 void *v)
2208 {
2209
2210 for (;;) {
2211 int s;
2212 int error;
2213 struct rtentry *rt, *retrt = NULL;
2214
2215 RT_RLOCK();
2216 s = splsoftnet();
2217 rt = rtbl_search_matched_entry(family, f, v);
2218 if (rt == NULL) {
2219 splx(s);
2220 RT_UNLOCK();
2221 return;
2222 }
2223 rt->rt_refcnt++;
2224 splx(s);
2225 RT_UNLOCK();
2226
2227 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway,
2228 rt_mask(rt), rt->rt_flags, &retrt);
2229 if (error == 0) {
2230 KASSERT(retrt == rt);
2231 KASSERT((retrt->rt_flags & RTF_UP) == 0);
2232 retrt->rt_ifp = NULL;
2233 rt_unref(rt);
2234 rt_free(retrt);
2235 } else if (error == ESRCH) {
2236 /* Someone deleted the entry already. */
2237 rt_unref(rt);
2238 } else {
2239 log(LOG_ERR, "%s: unable to delete rtentry @ %p, "
2240 "error = %d\n", rt->rt_ifp->if_xname, rt, error);
2241 /* XXX how to treat this case? */
2242 }
2243 }
2244 }
2245
2246 int
2247 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v)
2248 {
2249 int error;
2250
2251 RT_RLOCK();
2252 error = rtbl_walktree(family, f, v);
2253 RT_UNLOCK();
2254
2255 return error;
2256 }
2257
2258 #ifdef DDB
2259
2260 #include <machine/db_machdep.h>
2261 #include <ddb/db_interface.h>
2262 #include <ddb/db_output.h>
2263
2264 #define rt_expire rt_rmx.rmx_expire
2265
2266 static void
2267 db_print_sa(const struct sockaddr *sa)
2268 {
2269 int len;
2270 const u_char *p;
2271
2272 if (sa == NULL) {
2273 db_printf("[NULL]");
2274 return;
2275 }
2276
2277 p = (const u_char *)sa;
2278 len = sa->sa_len;
2279 db_printf("[");
2280 while (len > 0) {
2281 db_printf("%d", *p);
2282 p++; len--;
2283 if (len) db_printf(",");
2284 }
2285 db_printf("]\n");
2286 }
2287
2288 static void
2289 db_print_ifa(struct ifaddr *ifa)
2290 {
2291 if (ifa == NULL)
2292 return;
2293 db_printf(" ifa_addr=");
2294 db_print_sa(ifa->ifa_addr);
2295 db_printf(" ifa_dsta=");
2296 db_print_sa(ifa->ifa_dstaddr);
2297 db_printf(" ifa_mask=");
2298 db_print_sa(ifa->ifa_netmask);
2299 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n",
2300 ifa->ifa_flags,
2301 ifa->ifa_refcnt,
2302 ifa->ifa_metric);
2303 }
2304
2305 /*
2306 * Function to pass to rt_walktree().
2307 * Return non-zero error to abort walk.
2308 */
2309 static int
2310 db_show_rtentry(struct rtentry *rt, void *w)
2311 {
2312 db_printf("rtentry=%p", rt);
2313
2314 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
2315 rt->rt_flags, rt->rt_refcnt,
2316 rt->rt_use, (uint64_t)rt->rt_expire);
2317
2318 db_printf(" key="); db_print_sa(rt_getkey(rt));
2319 db_printf(" mask="); db_print_sa(rt_mask(rt));
2320 db_printf(" gw="); db_print_sa(rt->rt_gateway);
2321
2322 db_printf(" ifp=%p ", rt->rt_ifp);
2323 if (rt->rt_ifp)
2324 db_printf("(%s)", rt->rt_ifp->if_xname);
2325 else
2326 db_printf("(NULL)");
2327
2328 db_printf(" ifa=%p\n", rt->rt_ifa);
2329 db_print_ifa(rt->rt_ifa);
2330
2331 db_printf(" gwroute=%p llinfo=%p\n",
2332 rt->rt_gwroute, rt->rt_llinfo);
2333
2334 return 0;
2335 }
2336
2337 /*
2338 * Function to print all the route trees.
2339 * Use this from ddb: "show routes"
2340 */
2341 void
2342 db_show_routes(db_expr_t addr, bool have_addr,
2343 db_expr_t count, const char *modif)
2344 {
2345 rt_walktree(AF_INET, db_show_rtentry, NULL);
2346 }
2347 #endif
2348