route.c revision 1.187 1 /* $NetBSD: route.c,v 1.187 2017/01/17 07:53:06 ozaki-r Exp $ */
2
3 /*-
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the project nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62 /*
63 * Copyright (c) 1980, 1986, 1991, 1993
64 * The Regents of the University of California. All rights reserved.
65 *
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
68 * are met:
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)route.c 8.3 (Berkeley) 1/9/95
91 */
92
93 #ifdef _KERNEL_OPT
94 #include "opt_inet.h"
95 #include "opt_route.h"
96 #include "opt_net_mpsafe.h"
97 #endif
98
99 #include <sys/cdefs.h>
100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.187 2017/01/17 07:53:06 ozaki-r Exp $");
101
102 #include <sys/param.h>
103 #ifdef RTFLUSH_DEBUG
104 #include <sys/sysctl.h>
105 #endif
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/mbuf.h>
110 #include <sys/socket.h>
111 #include <sys/socketvar.h>
112 #include <sys/domain.h>
113 #include <sys/kernel.h>
114 #include <sys/ioctl.h>
115 #include <sys/pool.h>
116 #include <sys/kauth.h>
117 #include <sys/workqueue.h>
118 #include <sys/syslog.h>
119 #include <sys/rwlock.h>
120 #include <sys/mutex.h>
121 #include <sys/cpu.h>
122
123 #include <net/if.h>
124 #include <net/if_dl.h>
125 #include <net/route.h>
126
127 #include <netinet/in.h>
128 #include <netinet/in_var.h>
129
130 #ifdef RTFLUSH_DEBUG
131 #define rtcache_debug() __predict_false(_rtcache_debug)
132 #else /* RTFLUSH_DEBUG */
133 #define rtcache_debug() 0
134 #endif /* RTFLUSH_DEBUG */
135
136 #ifdef RT_DEBUG
137 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \
138 __func__, __LINE__, (rt), (rt)->rt_refcnt)
139 #else
140 #define RT_REFCNT_TRACE(rt) do {} while (0)
141 #endif
142
143 #ifdef DEBUG
144 #define dlog(level, fmt, args...) log(level, fmt, ##args)
145 #else
146 #define dlog(level, fmt, args...) do {} while (0)
147 #endif
148
149 struct rtstat rtstat;
150
151 static int rttrash; /* routes not in table but not freed */
152
153 static struct pool rtentry_pool;
154 static struct pool rttimer_pool;
155
156 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */
157 static struct workqueue *rt_timer_wq;
158 static struct work rt_timer_wk;
159
160 static void rt_timer_init(void);
161 static void rt_timer_queue_remove_all(struct rttimer_queue *);
162 static void rt_timer_remove_all(struct rtentry *);
163 static void rt_timer_timer(void *);
164
165 /*
166 * Locking notes:
167 * - The routing table is protected by a global rwlock
168 * - API: RT_RLOCK and friends
169 * - rtcaches are protected by a global rwlock
170 * - API: RTCACHE_RLOCK and friends
171 * - References to a rtentry is managed by reference counting and psref
172 * - Reference couting is used for temporal reference when a rtentry
173 * is fetched from the routing table
174 * - psref is used for temporal reference when a rtentry is fetched
175 * from a rtcache
176 * - struct route (rtcache) has struct psref, so we cannot obtain
177 * a reference twice on the same struct route
178 * - Befere destroying or updating a rtentry, we have to wait for
179 * all references left (see below for details)
180 * - APIs
181 * - An obtained rtentry via rtalloc1 or rtrequest* must be
182 * unreferenced by rt_unref
183 * - An obtained rtentry via rtcache_* must be unreferenced by
184 * rtcache_unref
185 * - TODO: once we get a lockless routing table, we should use only
186 * psref for rtentries
187 * - rtentry destruction
188 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE)
189 * - If a caller of rtrequest grabs a reference of a rtentry, the caller
190 * has a responsibility to destroy the rtentry by itself by calling
191 * rt_free
192 * - If not, rtrequest itself does that
193 * - If rt_free is called in softint, the actual destruction routine is
194 * deferred to a workqueue
195 * - rtentry update
196 * - When updating a rtentry, RTF_UPDATING flag is set
197 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from
198 * the routing table or a rtcache results in either of the following
199 * cases:
200 * - if the caller runs in softint, the caller fails to fetch
201 * - otherwise, the caller waits for the update completed and retries
202 * to fetch (probably succeed to fetch for the second time)
203 */
204
205 /*
206 * Global locks for the routing table and rtcaches.
207 * Locking order: rtcache_lock => rt_lock
208 */
209 static krwlock_t rt_lock __cacheline_aligned;
210 #ifdef NET_MPSAFE
211 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER)
212 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER)
213 #define RT_UNLOCK() rw_exit(&rt_lock)
214 #define RT_LOCKED() rw_lock_held(&rt_lock)
215 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock))
216 #else
217 #define RT_RLOCK() do {} while (0)
218 #define RT_WLOCK() do {} while (0)
219 #define RT_UNLOCK() do {} while (0)
220 #define RT_LOCKED() false
221 #define RT_ASSERT_WLOCK() do {} while (0)
222 #endif
223
224 static krwlock_t rtcache_lock __cacheline_aligned;
225 #ifdef NET_MPSAFE
226 #define RTCACHE_RLOCK() rw_enter(&rtcache_lock, RW_READER)
227 #define RTCACHE_WLOCK() rw_enter(&rtcache_lock, RW_WRITER)
228 #define RTCACHE_UNLOCK() rw_exit(&rtcache_lock)
229 #define RTCACHE_ASSERT_WLOCK() KASSERT(rw_write_held(&rtcache_lock))
230 #define RTCACHE_WLOCKED() rw_write_held(&rtcache_lock)
231 #else
232 #define RTCACHE_RLOCK() do {} while (0)
233 #define RTCACHE_WLOCK() do {} while (0)
234 #define RTCACHE_UNLOCK() do {} while (0)
235 #define RTCACHE_ASSERT_WLOCK() do {} while (0)
236 #define RTCACHE_WLOCKED() false
237 #endif
238
239 /*
240 * mutex and cv that are used to wait for references to a rtentry left
241 * before updating the rtentry.
242 */
243 static struct {
244 kmutex_t lock;
245 kcondvar_t cv;
246 bool ongoing;
247 const struct lwp *lwp;
248 } rt_update_global __cacheline_aligned;
249
250 /*
251 * A workqueue and stuff that are used to defer the destruction routine
252 * of rtentries.
253 */
254 static struct {
255 struct workqueue *wq;
256 struct work wk;
257 kmutex_t lock;
258 struct rtentry *queue[10];
259 } rt_free_global __cacheline_aligned;
260
261 /* psref for rtentry */
262 static struct psref_class *rt_psref_class __read_mostly;
263
264 #ifdef RTFLUSH_DEBUG
265 static int _rtcache_debug = 0;
266 #endif /* RTFLUSH_DEBUG */
267
268 static kauth_listener_t route_listener;
269
270 static int rtdeletemsg(struct rtentry *);
271 static void rtflushall(int);
272
273 static void rt_maskedcopy(const struct sockaddr *,
274 struct sockaddr *, const struct sockaddr *);
275
276 static void rtcache_clear(struct route *);
277 static void rtcache_clear_rtentry(int, struct rtentry *);
278 static void rtcache_invalidate(struct dom_rtlist *);
279
280 static void rt_ref(struct rtentry *);
281
282 static struct rtentry *
283 rtalloc1_locked(const struct sockaddr *, int, bool);
284 static struct rtentry *
285 rtcache_validate_locked(struct route *);
286 static void rtcache_free_locked(struct route *);
287 static int rtcache_setdst_locked(struct route *, const struct sockaddr *);
288
289 static void rtcache_ref(struct rtentry *, struct route *);
290
291 static void rt_update_wait(void);
292
293 static bool rt_wait_ok(void);
294 static void rt_wait_refcnt(const char *, struct rtentry *, int);
295 static void rt_wait_psref(struct rtentry *);
296
297 #ifdef DDB
298 static void db_print_sa(const struct sockaddr *);
299 static void db_print_ifa(struct ifaddr *);
300 static int db_show_rtentry(struct rtentry *, void *);
301 #endif
302
303 #ifdef RTFLUSH_DEBUG
304 static void sysctl_net_rtcache_setup(struct sysctllog **);
305 static void
306 sysctl_net_rtcache_setup(struct sysctllog **clog)
307 {
308 const struct sysctlnode *rnode;
309
310 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
311 CTLTYPE_NODE,
312 "rtcache", SYSCTL_DESCR("Route cache related settings"),
313 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
314 return;
315 if (sysctl_createv(clog, 0, &rnode, &rnode,
316 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
317 "debug", SYSCTL_DESCR("Debug route caches"),
318 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
319 return;
320 }
321 #endif /* RTFLUSH_DEBUG */
322
323 static inline void
324 rt_destroy(struct rtentry *rt)
325 {
326 if (rt->_rt_key != NULL)
327 sockaddr_free(rt->_rt_key);
328 if (rt->rt_gateway != NULL)
329 sockaddr_free(rt->rt_gateway);
330 if (rt_gettag(rt) != NULL)
331 sockaddr_free(rt_gettag(rt));
332 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
333 }
334
335 static inline const struct sockaddr *
336 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
337 {
338 if (rt->_rt_key == key)
339 goto out;
340
341 if (rt->_rt_key != NULL)
342 sockaddr_free(rt->_rt_key);
343 rt->_rt_key = sockaddr_dup(key, flags);
344 out:
345 rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
346 return rt->_rt_key;
347 }
348
349 struct ifaddr *
350 rt_get_ifa(struct rtentry *rt)
351 {
352 struct ifaddr *ifa;
353
354 if ((ifa = rt->rt_ifa) == NULL)
355 return ifa;
356 else if (ifa->ifa_getifa == NULL)
357 return ifa;
358 #if 0
359 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
360 return ifa;
361 #endif
362 else {
363 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
364 if (ifa == NULL)
365 return NULL;
366 rt_replace_ifa(rt, ifa);
367 return ifa;
368 }
369 }
370
371 static void
372 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
373 {
374 rt->rt_ifa = ifa;
375 if (ifa->ifa_seqno != NULL)
376 rt->rt_ifa_seqno = *ifa->ifa_seqno;
377 }
378
379 /*
380 * Is this route the connected route for the ifa?
381 */
382 static int
383 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
384 {
385 const struct sockaddr *key, *dst, *odst;
386 struct sockaddr_storage maskeddst;
387
388 key = rt_getkey(rt);
389 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
390 if (dst == NULL ||
391 dst->sa_family != key->sa_family ||
392 dst->sa_len != key->sa_len)
393 return 0;
394 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
395 odst = dst;
396 dst = (struct sockaddr *)&maskeddst;
397 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
398 ifa->ifa_netmask);
399 }
400 return (memcmp(dst, key, dst->sa_len) == 0);
401 }
402
403 void
404 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
405 {
406 if (rt->rt_ifa &&
407 rt->rt_ifa != ifa &&
408 rt->rt_ifa->ifa_flags & IFA_ROUTE &&
409 rt_ifa_connected(rt, rt->rt_ifa))
410 {
411 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
412 "replace deleted IFA_ROUTE\n",
413 (void *)rt->_rt_key, (void *)rt->rt_ifa);
414 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
415 if (rt_ifa_connected(rt, ifa)) {
416 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
417 "replace added IFA_ROUTE\n",
418 (void *)rt->_rt_key, (void *)ifa);
419 ifa->ifa_flags |= IFA_ROUTE;
420 }
421 }
422
423 ifaref(ifa);
424 ifafree(rt->rt_ifa);
425 rt_set_ifa1(rt, ifa);
426 }
427
428 static void
429 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
430 {
431 ifaref(ifa);
432 rt_set_ifa1(rt, ifa);
433 }
434
435 static int
436 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
437 void *arg0, void *arg1, void *arg2, void *arg3)
438 {
439 struct rt_msghdr *rtm;
440 int result;
441
442 result = KAUTH_RESULT_DEFER;
443 rtm = arg1;
444
445 if (action != KAUTH_NETWORK_ROUTE)
446 return result;
447
448 if (rtm->rtm_type == RTM_GET)
449 result = KAUTH_RESULT_ALLOW;
450
451 return result;
452 }
453
454 static void rt_free_work(struct work *, void *);
455
456 void
457 rt_init(void)
458 {
459 int error;
460
461 #ifdef RTFLUSH_DEBUG
462 sysctl_net_rtcache_setup(NULL);
463 #endif
464
465 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
466 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET);
467
468 error = workqueue_create(&rt_free_global.wq, "rt_free",
469 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
470 if (error)
471 panic("%s: workqueue_create failed (%d)\n", __func__, error);
472
473 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
474 cv_init(&rt_update_global.cv, "rt_update");
475
476 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
477 NULL, IPL_SOFTNET);
478 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
479 NULL, IPL_SOFTNET);
480
481 rn_init(); /* initialize all zeroes, all ones, mask table */
482 rtbl_init();
483
484 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
485 route_listener_cb, NULL);
486 }
487
488 static void
489 rtflushall(int family)
490 {
491 struct domain *dom;
492
493 if (rtcache_debug())
494 printf("%s: enter\n", __func__);
495
496 if ((dom = pffinddomain(family)) == NULL)
497 return;
498
499 RTCACHE_WLOCK();
500 rtcache_invalidate(&dom->dom_rtcache);
501 RTCACHE_UNLOCK();
502 }
503
504 static void
505 rtcache(struct route *ro)
506 {
507 struct domain *dom;
508
509 RTCACHE_ASSERT_WLOCK();
510
511 rtcache_invariants(ro);
512 KASSERT(ro->_ro_rt != NULL);
513 KASSERT(ro->ro_invalid == false);
514 KASSERT(rtcache_getdst(ro) != NULL);
515
516 if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL)
517 return;
518
519 LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next);
520 rtcache_invariants(ro);
521 }
522
523 #ifdef RT_DEBUG
524 static void
525 dump_rt(const struct rtentry *rt)
526 {
527 char buf[512];
528
529 aprint_normal("rt: ");
530 aprint_normal("p=%p ", rt);
531 if (rt->_rt_key == NULL) {
532 aprint_normal("dst=(NULL) ");
533 } else {
534 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
535 aprint_normal("dst=%s ", buf);
536 }
537 if (rt->rt_gateway == NULL) {
538 aprint_normal("gw=(NULL) ");
539 } else {
540 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
541 aprint_normal("gw=%s ", buf);
542 }
543 aprint_normal("flags=%x ", rt->rt_flags);
544 if (rt->rt_ifp == NULL) {
545 aprint_normal("if=(NULL) ");
546 } else {
547 aprint_normal("if=%s ", rt->rt_ifp->if_xname);
548 }
549 aprint_normal("\n");
550 }
551 #endif /* RT_DEBUG */
552
553 /*
554 * Packet routing routines. If success, refcnt of a returned rtentry
555 * will be incremented. The caller has to rtfree it by itself.
556 */
557 struct rtentry *
558 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok)
559 {
560 rtbl_t *rtbl;
561 struct rtentry *rt;
562 int s;
563
564 retry:
565 s = splsoftnet();
566 rtbl = rt_gettable(dst->sa_family);
567 if (rtbl == NULL)
568 goto miss;
569
570 rt = rt_matchaddr(rtbl, dst);
571 if (rt == NULL)
572 goto miss;
573
574 if (!ISSET(rt->rt_flags, RTF_UP))
575 goto miss;
576
577 if (ISSET(rt->rt_flags, RTF_UPDATING) &&
578 /* XXX updater should be always able to acquire */
579 curlwp != rt_update_global.lwp) {
580 bool need_lock = false;
581 if (!wait_ok || !rt_wait_ok())
582 goto miss;
583 RT_UNLOCK();
584 splx(s);
585
586 /* XXX need more proper solution */
587 if (RTCACHE_WLOCKED()) {
588 RTCACHE_UNLOCK();
589 need_lock = true;
590 }
591
592 /* We can wait until the update is complete */
593 rt_update_wait();
594
595 if (need_lock)
596 RTCACHE_WLOCK();
597 goto retry;
598 }
599
600 rt_ref(rt);
601 RT_REFCNT_TRACE(rt);
602
603 splx(s);
604 return rt;
605 miss:
606 rtstat.rts_unreach++;
607 if (report) {
608 struct rt_addrinfo info;
609
610 memset(&info, 0, sizeof(info));
611 info.rti_info[RTAX_DST] = dst;
612 rt_missmsg(RTM_MISS, &info, 0, 0);
613 }
614 splx(s);
615 return NULL;
616 }
617
618 struct rtentry *
619 rtalloc1(const struct sockaddr *dst, int report)
620 {
621 struct rtentry *rt;
622
623 RT_RLOCK();
624 rt = rtalloc1_locked(dst, report, true);
625 RT_UNLOCK();
626
627 return rt;
628 }
629
630 static void
631 rt_ref(struct rtentry *rt)
632 {
633
634 KASSERT(rt->rt_refcnt >= 0);
635 atomic_inc_uint(&rt->rt_refcnt);
636 }
637
638 void
639 rt_unref(struct rtentry *rt)
640 {
641
642 KASSERT(rt != NULL);
643 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt);
644
645 atomic_dec_uint(&rt->rt_refcnt);
646 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) {
647 mutex_enter(&rt_free_global.lock);
648 cv_broadcast(&rt->rt_cv);
649 mutex_exit(&rt_free_global.lock);
650 }
651 }
652
653 static bool
654 rt_wait_ok(void)
655 {
656
657 KASSERT(!cpu_intr_p());
658 return !cpu_softintr_p();
659 }
660
661 void
662 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt)
663 {
664 mutex_enter(&rt_free_global.lock);
665 while (rt->rt_refcnt > cnt) {
666 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n",
667 __func__, title, rt->rt_refcnt);
668 cv_wait(&rt->rt_cv, &rt_free_global.lock);
669 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n",
670 __func__, title, rt->rt_refcnt);
671 }
672 mutex_exit(&rt_free_global.lock);
673 }
674
675 void
676 rt_wait_psref(struct rtentry *rt)
677 {
678
679 psref_target_destroy(&rt->rt_psref, rt_psref_class);
680 psref_target_init(&rt->rt_psref, rt_psref_class);
681 }
682
683 static void
684 _rt_free(struct rtentry *rt)
685 {
686 struct ifaddr *ifa;
687
688 /*
689 * Need to avoid a deadlock on rt_wait_refcnt of update
690 * and a conflict on psref_target_destroy of update.
691 */
692 rt_update_wait();
693
694 RT_REFCNT_TRACE(rt);
695 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt);
696 rt_wait_refcnt("free", rt, 0);
697 #ifdef NET_MPSAFE
698 psref_target_destroy(&rt->rt_psref, rt_psref_class);
699 #endif
700
701 rt_assert_inactive(rt);
702 rttrash--;
703 ifa = rt->rt_ifa;
704 rt->rt_ifa = NULL;
705 ifafree(ifa);
706 rt->rt_ifp = NULL;
707 cv_destroy(&rt->rt_cv);
708 rt_destroy(rt);
709 pool_put(&rtentry_pool, rt);
710 }
711
712 static void
713 rt_free_work(struct work *wk, void *arg)
714 {
715 int i;
716 struct rtentry *rt;
717
718 restart:
719 mutex_enter(&rt_free_global.lock);
720 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
721 if (rt_free_global.queue[i] == NULL)
722 continue;
723 rt = rt_free_global.queue[i];
724 rt_free_global.queue[i] = NULL;
725 mutex_exit(&rt_free_global.lock);
726
727 atomic_dec_uint(&rt->rt_refcnt);
728 _rt_free(rt);
729 goto restart;
730 }
731 mutex_exit(&rt_free_global.lock);
732 }
733
734 void
735 rt_free(struct rtentry *rt)
736 {
737
738 KASSERT(rt->rt_refcnt > 0);
739 if (!rt_wait_ok()) {
740 int i;
741 mutex_enter(&rt_free_global.lock);
742 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
743 if (rt_free_global.queue[i] == NULL) {
744 rt_free_global.queue[i] = rt;
745 break;
746 }
747 }
748 KASSERT(i < sizeof(rt_free_global.queue));
749 rt_ref(rt);
750 mutex_exit(&rt_free_global.lock);
751 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL);
752 } else {
753 atomic_dec_uint(&rt->rt_refcnt);
754 _rt_free(rt);
755 }
756 }
757
758 static void
759 rt_update_wait(void)
760 {
761
762 mutex_enter(&rt_update_global.lock);
763 while (rt_update_global.ongoing) {
764 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp);
765 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
766 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp);
767 }
768 mutex_exit(&rt_update_global.lock);
769 }
770
771 int
772 rt_update_prepare(struct rtentry *rt)
773 {
774
775 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp);
776
777 RTCACHE_WLOCK();
778 RT_WLOCK();
779 /* If the entry is being destroyed, don't proceed the update. */
780 if (!ISSET(rt->rt_flags, RTF_UP)) {
781 RT_UNLOCK();
782 RTCACHE_UNLOCK();
783 return -1;
784 }
785 rt->rt_flags |= RTF_UPDATING;
786 RT_UNLOCK();
787 RTCACHE_UNLOCK();
788
789 mutex_enter(&rt_update_global.lock);
790 while (rt_update_global.ongoing) {
791 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n",
792 __func__, rt, curlwp);
793 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
794 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n",
795 __func__, rt, curlwp);
796 }
797 rt_update_global.ongoing = true;
798 /* XXX need it to avoid rt_update_wait by updater itself. */
799 rt_update_global.lwp = curlwp;
800 mutex_exit(&rt_update_global.lock);
801
802 rt_wait_refcnt("update", rt, 1);
803 rt_wait_psref(rt);
804
805 return 0;
806 }
807
808 void
809 rt_update_finish(struct rtentry *rt)
810 {
811
812 RTCACHE_WLOCK();
813 RT_WLOCK();
814 rt->rt_flags &= ~RTF_UPDATING;
815 RT_UNLOCK();
816 RTCACHE_UNLOCK();
817
818 mutex_enter(&rt_update_global.lock);
819 rt_update_global.ongoing = false;
820 rt_update_global.lwp = NULL;
821 cv_broadcast(&rt_update_global.cv);
822 mutex_exit(&rt_update_global.lock);
823
824 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp);
825 }
826
827 /*
828 * Force a routing table entry to the specified
829 * destination to go through the given gateway.
830 * Normally called as a result of a routing redirect
831 * message from the network layer.
832 *
833 * N.B.: must be called at splsoftnet
834 */
835 void
836 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
837 const struct sockaddr *netmask, int flags, const struct sockaddr *src,
838 struct rtentry **rtp)
839 {
840 struct rtentry *rt;
841 int error = 0;
842 uint64_t *stat = NULL;
843 struct rt_addrinfo info;
844 struct ifaddr *ifa;
845 struct psref psref;
846
847 /* verify the gateway is directly reachable */
848 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
849 error = ENETUNREACH;
850 goto out;
851 }
852 rt = rtalloc1(dst, 0);
853 /*
854 * If the redirect isn't from our current router for this dst,
855 * it's either old or wrong. If it redirects us to ourselves,
856 * we have a routing loop, perhaps as a result of an interface
857 * going down recently.
858 */
859 if (!(flags & RTF_DONE) && rt &&
860 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
861 error = EINVAL;
862 else {
863 int s = pserialize_read_enter();
864 struct ifaddr *_ifa;
865
866 _ifa = ifa_ifwithaddr(gateway);
867 if (_ifa != NULL)
868 error = EHOSTUNREACH;
869 pserialize_read_exit(s);
870 }
871 if (error)
872 goto done;
873 /*
874 * Create a new entry if we just got back a wildcard entry
875 * or the lookup failed. This is necessary for hosts
876 * which use routing redirects generated by smart gateways
877 * to dynamically build the routing tables.
878 */
879 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
880 goto create;
881 /*
882 * Don't listen to the redirect if it's
883 * for a route to an interface.
884 */
885 if (rt->rt_flags & RTF_GATEWAY) {
886 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
887 /*
888 * Changing from route to net => route to host.
889 * Create new route, rather than smashing route to net.
890 */
891 create:
892 if (rt != NULL)
893 rt_unref(rt);
894 flags |= RTF_GATEWAY | RTF_DYNAMIC;
895 memset(&info, 0, sizeof(info));
896 info.rti_info[RTAX_DST] = dst;
897 info.rti_info[RTAX_GATEWAY] = gateway;
898 info.rti_info[RTAX_NETMASK] = netmask;
899 info.rti_ifa = ifa;
900 info.rti_flags = flags;
901 rt = NULL;
902 error = rtrequest1(RTM_ADD, &info, &rt);
903 if (rt != NULL)
904 flags = rt->rt_flags;
905 stat = &rtstat.rts_dynamic;
906 } else {
907 /*
908 * Smash the current notion of the gateway to
909 * this destination. Should check about netmask!!!
910 */
911 /*
912 * FIXME NOMPSAFE: the rtentry is updated with the existence
913 * of refeferences of it.
914 */
915 error = rt_setgate(rt, gateway);
916 if (error == 0) {
917 rt->rt_flags |= RTF_MODIFIED;
918 flags |= RTF_MODIFIED;
919 }
920 stat = &rtstat.rts_newgateway;
921 }
922 } else
923 error = EHOSTUNREACH;
924 done:
925 if (rt) {
926 if (rtp != NULL && !error)
927 *rtp = rt;
928 else
929 rt_unref(rt);
930 }
931 out:
932 if (error)
933 rtstat.rts_badredirect++;
934 else if (stat != NULL)
935 (*stat)++;
936 memset(&info, 0, sizeof(info));
937 info.rti_info[RTAX_DST] = dst;
938 info.rti_info[RTAX_GATEWAY] = gateway;
939 info.rti_info[RTAX_NETMASK] = netmask;
940 info.rti_info[RTAX_AUTHOR] = src;
941 rt_missmsg(RTM_REDIRECT, &info, flags, error);
942 ifa_release(ifa, &psref);
943 }
944
945 /*
946 * Delete a route and generate a message.
947 * It doesn't free a passed rt.
948 */
949 static int
950 rtdeletemsg(struct rtentry *rt)
951 {
952 int error;
953 struct rt_addrinfo info;
954 struct rtentry *retrt;
955
956 /*
957 * Request the new route so that the entry is not actually
958 * deleted. That will allow the information being reported to
959 * be accurate (and consistent with route_output()).
960 */
961 memset(&info, 0, sizeof(info));
962 info.rti_info[RTAX_DST] = rt_getkey(rt);
963 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
964 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
965 info.rti_flags = rt->rt_flags;
966 error = rtrequest1(RTM_DELETE, &info, &retrt);
967
968 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
969
970 return error;
971 }
972
973 struct ifaddr *
974 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
975 const struct sockaddr *gateway, struct psref *psref)
976 {
977 struct ifaddr *ifa = NULL;
978
979 if ((flags & RTF_GATEWAY) == 0) {
980 /*
981 * If we are adding a route to an interface,
982 * and the interface is a pt to pt link
983 * we should search for the destination
984 * as our clue to the interface. Otherwise
985 * we can use the local address.
986 */
987 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
988 ifa = ifa_ifwithdstaddr_psref(dst, psref);
989 if (ifa == NULL)
990 ifa = ifa_ifwithaddr_psref(gateway, psref);
991 } else {
992 /*
993 * If we are adding a route to a remote net
994 * or host, the gateway may still be on the
995 * other end of a pt to pt link.
996 */
997 ifa = ifa_ifwithdstaddr_psref(gateway, psref);
998 }
999 if (ifa == NULL)
1000 ifa = ifa_ifwithnet_psref(gateway, psref);
1001 if (ifa == NULL) {
1002 int s;
1003 struct rtentry *rt;
1004
1005 rt = rtalloc1(dst, 0);
1006 if (rt == NULL)
1007 return NULL;
1008 /*
1009 * Just in case. May not need to do this workaround.
1010 * Revisit when working on rtentry MP-ification.
1011 */
1012 s = pserialize_read_enter();
1013 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
1014 if (ifa == rt->rt_ifa)
1015 break;
1016 }
1017 if (ifa != NULL)
1018 ifa_acquire(ifa, psref);
1019 pserialize_read_exit(s);
1020 rt_unref(rt);
1021 if (ifa == NULL)
1022 return NULL;
1023 }
1024 if (ifa->ifa_addr->sa_family != dst->sa_family) {
1025 struct ifaddr *nifa;
1026 int s;
1027
1028 s = pserialize_read_enter();
1029 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1030 if (nifa != NULL) {
1031 ifa_release(ifa, psref);
1032 ifa_acquire(nifa, psref);
1033 ifa = nifa;
1034 }
1035 pserialize_read_exit(s);
1036 }
1037 return ifa;
1038 }
1039
1040 /*
1041 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1042 * The caller has to rtfree it by itself.
1043 */
1044 int
1045 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
1046 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
1047 {
1048 struct rt_addrinfo info;
1049
1050 memset(&info, 0, sizeof(info));
1051 info.rti_flags = flags;
1052 info.rti_info[RTAX_DST] = dst;
1053 info.rti_info[RTAX_GATEWAY] = gateway;
1054 info.rti_info[RTAX_NETMASK] = netmask;
1055 return rtrequest1(req, &info, ret_nrt);
1056 }
1057
1058 /*
1059 * It's a utility function to add/remove a route to/from the routing table
1060 * and tell user processes the addition/removal on success.
1061 */
1062 int
1063 rtrequest_newmsg(const int req, const struct sockaddr *dst,
1064 const struct sockaddr *gateway, const struct sockaddr *netmask,
1065 const int flags)
1066 {
1067 int error;
1068 struct rtentry *ret_nrt = NULL;
1069
1070 KASSERT(req == RTM_ADD || req == RTM_DELETE);
1071
1072 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt);
1073 if (error != 0)
1074 return error;
1075
1076 KASSERT(ret_nrt != NULL);
1077
1078 rt_newmsg(req, ret_nrt); /* tell user process */
1079 if (req == RTM_DELETE)
1080 rt_free(ret_nrt);
1081 else
1082 rt_unref(ret_nrt);
1083
1084 return 0;
1085 }
1086
1087 struct ifnet *
1088 rt_getifp(struct rt_addrinfo *info, struct psref *psref)
1089 {
1090 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
1091
1092 if (info->rti_ifp != NULL)
1093 return NULL;
1094 /*
1095 * ifp may be specified by sockaddr_dl when protocol address
1096 * is ambiguous
1097 */
1098 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
1099 struct ifaddr *ifa;
1100 int s = pserialize_read_enter();
1101
1102 ifa = ifa_ifwithnet(ifpaddr);
1103 if (ifa != NULL)
1104 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
1105 psref);
1106 pserialize_read_exit(s);
1107 }
1108
1109 return info->rti_ifp;
1110 }
1111
1112 struct ifaddr *
1113 rt_getifa(struct rt_addrinfo *info, struct psref *psref)
1114 {
1115 struct ifaddr *ifa = NULL;
1116 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1117 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1118 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
1119 int flags = info->rti_flags;
1120 const struct sockaddr *sa;
1121
1122 if (info->rti_ifa == NULL && ifaaddr != NULL) {
1123 ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
1124 if (ifa != NULL)
1125 goto got;
1126 }
1127
1128 sa = ifaaddr != NULL ? ifaaddr :
1129 (gateway != NULL ? gateway : dst);
1130 if (sa != NULL && info->rti_ifp != NULL)
1131 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
1132 else if (dst != NULL && gateway != NULL)
1133 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
1134 else if (sa != NULL)
1135 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
1136 if (ifa == NULL)
1137 return NULL;
1138 got:
1139 if (ifa->ifa_getifa != NULL) {
1140 /* FIXME NOMPSAFE */
1141 ifa = (*ifa->ifa_getifa)(ifa, dst);
1142 if (ifa == NULL)
1143 return NULL;
1144 ifa_acquire(ifa, psref);
1145 }
1146 info->rti_ifa = ifa;
1147 if (info->rti_ifp == NULL)
1148 info->rti_ifp = ifa->ifa_ifp;
1149 return ifa;
1150 }
1151
1152 /*
1153 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1154 * The caller has to rtfree it by itself.
1155 */
1156 int
1157 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
1158 {
1159 int s = splsoftnet(), ss;
1160 int error = 0, rc;
1161 struct rtentry *rt;
1162 rtbl_t *rtbl;
1163 struct ifaddr *ifa = NULL, *ifa2 = NULL;
1164 struct sockaddr_storage maskeddst;
1165 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1166 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1167 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
1168 int flags = info->rti_flags;
1169 struct psref psref_ifp, psref_ifa;
1170 int bound = 0;
1171 struct ifnet *ifp = NULL;
1172 bool need_to_release_ifa = true;
1173 bool need_unlock = true;
1174 #define senderr(x) { error = x ; goto bad; }
1175
1176 RT_WLOCK();
1177
1178 bound = curlwp_bind();
1179 if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
1180 senderr(ESRCH);
1181 if (flags & RTF_HOST)
1182 netmask = NULL;
1183 switch (req) {
1184 case RTM_DELETE:
1185 if (netmask) {
1186 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1187 netmask);
1188 dst = (struct sockaddr *)&maskeddst;
1189 }
1190 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1191 senderr(ESRCH);
1192 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
1193 senderr(ESRCH);
1194 rt->rt_flags &= ~RTF_UP;
1195 if ((ifa = rt->rt_ifa)) {
1196 if (ifa->ifa_flags & IFA_ROUTE &&
1197 rt_ifa_connected(rt, ifa)) {
1198 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
1199 "deleted IFA_ROUTE\n",
1200 (void *)rt->_rt_key, (void *)ifa);
1201 ifa->ifa_flags &= ~IFA_ROUTE;
1202 }
1203 if (ifa->ifa_rtrequest)
1204 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
1205 ifa = NULL;
1206 }
1207 rttrash++;
1208 if (ret_nrt) {
1209 *ret_nrt = rt;
1210 rt_ref(rt);
1211 RT_REFCNT_TRACE(rt);
1212 }
1213 RT_UNLOCK();
1214 need_unlock = false;
1215 rt_timer_remove_all(rt);
1216 rtcache_clear_rtentry(dst->sa_family, rt);
1217 if (ret_nrt == NULL) {
1218 /* Adjust the refcount */
1219 rt_ref(rt);
1220 RT_REFCNT_TRACE(rt);
1221 rt_free(rt);
1222 }
1223 break;
1224
1225 case RTM_ADD:
1226 if (info->rti_ifa == NULL) {
1227 ifp = rt_getifp(info, &psref_ifp);
1228 ifa = rt_getifa(info, &psref_ifa);
1229 if (ifa == NULL)
1230 senderr(ENETUNREACH);
1231 } else {
1232 /* Caller should have a reference of ifa */
1233 ifa = info->rti_ifa;
1234 need_to_release_ifa = false;
1235 }
1236 rt = pool_get(&rtentry_pool, PR_NOWAIT);
1237 if (rt == NULL)
1238 senderr(ENOBUFS);
1239 memset(rt, 0, sizeof(*rt));
1240 rt->rt_flags = RTF_UP | flags;
1241 LIST_INIT(&rt->rt_timer);
1242
1243 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1244 if (netmask) {
1245 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1246 netmask);
1247 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
1248 } else {
1249 rt_setkey(rt, dst, M_NOWAIT);
1250 }
1251 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1252 if (rt_getkey(rt) == NULL ||
1253 rt_setgate(rt, gateway) != 0) {
1254 pool_put(&rtentry_pool, rt);
1255 senderr(ENOBUFS);
1256 }
1257
1258 rt_set_ifa(rt, ifa);
1259 if (info->rti_info[RTAX_TAG] != NULL) {
1260 const struct sockaddr *tag;
1261 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
1262 if (tag == NULL)
1263 senderr(ENOBUFS);
1264 }
1265 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1266
1267 ss = pserialize_read_enter();
1268 if (info->rti_info[RTAX_IFP] != NULL) {
1269 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
1270 if (ifa2 != NULL)
1271 rt->rt_ifp = ifa2->ifa_ifp;
1272 else
1273 rt->rt_ifp = ifa->ifa_ifp;
1274 } else
1275 rt->rt_ifp = ifa->ifa_ifp;
1276 pserialize_read_exit(ss);
1277 cv_init(&rt->rt_cv, "rtentry");
1278 psref_target_init(&rt->rt_psref, rt_psref_class);
1279
1280 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1281 rc = rt_addaddr(rtbl, rt, netmask);
1282 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1283 if (rc != 0) {
1284 ifafree(ifa); /* for rt_set_ifa above */
1285 cv_destroy(&rt->rt_cv);
1286 rt_destroy(rt);
1287 pool_put(&rtentry_pool, rt);
1288 senderr(rc);
1289 }
1290 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1291 if (ifa->ifa_rtrequest)
1292 ifa->ifa_rtrequest(req, rt, info);
1293 if (need_to_release_ifa)
1294 ifa_release(ifa, &psref_ifa);
1295 ifa = NULL;
1296 if_put(ifp, &psref_ifp);
1297 ifp = NULL;
1298 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1299 if (ret_nrt) {
1300 *ret_nrt = rt;
1301 rt_ref(rt);
1302 RT_REFCNT_TRACE(rt);
1303 }
1304 RT_UNLOCK();
1305 need_unlock = false;
1306 rtflushall(dst->sa_family);
1307 break;
1308 case RTM_GET:
1309 if (netmask != NULL) {
1310 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1311 netmask);
1312 dst = (struct sockaddr *)&maskeddst;
1313 }
1314 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1315 senderr(ESRCH);
1316 if (ret_nrt != NULL) {
1317 *ret_nrt = rt;
1318 rt_ref(rt);
1319 RT_REFCNT_TRACE(rt);
1320 }
1321 break;
1322 }
1323 bad:
1324 if (need_to_release_ifa)
1325 ifa_release(ifa, &psref_ifa);
1326 if_put(ifp, &psref_ifp);
1327 curlwp_bindx(bound);
1328 if (need_unlock)
1329 RT_UNLOCK();
1330 splx(s);
1331 return error;
1332 }
1333
1334 int
1335 rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
1336 {
1337 struct sockaddr *new, *old;
1338
1339 KASSERT(rt->_rt_key != NULL);
1340 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1341
1342 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
1343 if (new == NULL)
1344 return ENOMEM;
1345
1346 old = rt->rt_gateway;
1347 rt->rt_gateway = new;
1348 if (old != NULL)
1349 sockaddr_free(old);
1350
1351 KASSERT(rt->_rt_key != NULL);
1352 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1353
1354 if (rt->rt_flags & RTF_GATEWAY) {
1355 struct rtentry *gwrt;
1356
1357 /* XXX we cannot call rtalloc1 if holding the rt lock */
1358 if (RT_LOCKED())
1359 gwrt = rtalloc1_locked(gate, 1, false);
1360 else
1361 gwrt = rtalloc1(gate, 1);
1362 /*
1363 * If we switched gateways, grab the MTU from the new
1364 * gateway route if the current MTU, if the current MTU is
1365 * greater than the MTU of gateway.
1366 * Note that, if the MTU of gateway is 0, we will reset the
1367 * MTU of the route to run PMTUD again from scratch. XXX
1368 */
1369 if (gwrt != NULL) {
1370 KASSERT(gwrt->_rt_key != NULL);
1371 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
1372 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
1373 rt->rt_rmx.rmx_mtu &&
1374 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
1375 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
1376 }
1377 rt_unref(gwrt);
1378 }
1379 }
1380 KASSERT(rt->_rt_key != NULL);
1381 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1382 return 0;
1383 }
1384
1385 static void
1386 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
1387 const struct sockaddr *netmask)
1388 {
1389 const char *netmaskp = &netmask->sa_data[0],
1390 *srcp = &src->sa_data[0];
1391 char *dstp = &dst->sa_data[0];
1392 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
1393 const char *srcend = (char *)dst + src->sa_len;
1394
1395 dst->sa_len = src->sa_len;
1396 dst->sa_family = src->sa_family;
1397
1398 while (dstp < maskend)
1399 *dstp++ = *srcp++ & *netmaskp++;
1400 if (dstp < srcend)
1401 memset(dstp, 0, (size_t)(srcend - dstp));
1402 }
1403
1404 /*
1405 * Inform the routing socket of a route change.
1406 */
1407 void
1408 rt_newmsg(const int cmd, const struct rtentry *rt)
1409 {
1410 struct rt_addrinfo info;
1411
1412 memset((void *)&info, 0, sizeof(info));
1413 info.rti_info[RTAX_DST] = rt_getkey(rt);
1414 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1415 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1416 if (rt->rt_ifp) {
1417 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1418 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1419 }
1420
1421 rt_missmsg(cmd, &info, rt->rt_flags, 0);
1422 }
1423
1424 /*
1425 * Set up or tear down a routing table entry, normally
1426 * for an interface.
1427 */
1428 int
1429 rtinit(struct ifaddr *ifa, int cmd, int flags)
1430 {
1431 struct rtentry *rt;
1432 struct sockaddr *dst, *odst;
1433 struct sockaddr_storage maskeddst;
1434 struct rtentry *nrt = NULL;
1435 int error;
1436 struct rt_addrinfo info;
1437
1438 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1439 if (cmd == RTM_DELETE) {
1440 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1441 /* Delete subnet route for this interface */
1442 odst = dst;
1443 dst = (struct sockaddr *)&maskeddst;
1444 rt_maskedcopy(odst, dst, ifa->ifa_netmask);
1445 }
1446 if ((rt = rtalloc1(dst, 0)) != NULL) {
1447 if (rt->rt_ifa != ifa) {
1448 rt_unref(rt);
1449 return (flags & RTF_HOST) ? EHOSTUNREACH
1450 : ENETUNREACH;
1451 }
1452 rt_unref(rt);
1453 }
1454 }
1455 memset(&info, 0, sizeof(info));
1456 info.rti_ifa = ifa;
1457 info.rti_flags = flags | ifa->ifa_flags;
1458 info.rti_info[RTAX_DST] = dst;
1459 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1460
1461 /*
1462 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1463 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate
1464 * variable) when RTF_HOST is 1. still not sure if i can safely
1465 * change it to meet bsdi4 behavior.
1466 */
1467 if (cmd != RTM_LLINFO_UPD)
1468 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1469 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
1470 &nrt);
1471 if (error != 0)
1472 return error;
1473
1474 rt = nrt;
1475 RT_REFCNT_TRACE(rt);
1476 switch (cmd) {
1477 case RTM_DELETE:
1478 rt_newmsg(cmd, rt);
1479 rt_free(rt);
1480 break;
1481 case RTM_LLINFO_UPD:
1482 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
1483 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
1484 rt_newmsg(RTM_CHANGE, rt);
1485 rt_unref(rt);
1486 break;
1487 case RTM_ADD:
1488 /*
1489 * FIXME NOMPSAFE: the rtentry is updated with the existence
1490 * of refeferences of it.
1491 */
1492 /*
1493 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest
1494 * called via rtrequest1. Can we just prevent the replacement
1495 * somehow and remove the following code? And also doesn't
1496 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again?
1497 */
1498 if (rt->rt_ifa != ifa) {
1499 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa,
1500 rt->rt_ifa);
1501 if (rt->rt_ifa->ifa_rtrequest != NULL) {
1502 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
1503 &info);
1504 }
1505 rt_replace_ifa(rt, ifa);
1506 rt->rt_ifp = ifa->ifa_ifp;
1507 if (ifa->ifa_rtrequest != NULL)
1508 ifa->ifa_rtrequest(RTM_ADD, rt, &info);
1509 }
1510 rt_newmsg(cmd, rt);
1511 rt_unref(rt);
1512 RT_REFCNT_TRACE(rt);
1513 break;
1514 }
1515 return error;
1516 }
1517
1518 /*
1519 * Create a local route entry for the address.
1520 * Announce the addition of the address and the route to the routing socket.
1521 */
1522 int
1523 rt_ifa_addlocal(struct ifaddr *ifa)
1524 {
1525 struct rtentry *rt;
1526 int e;
1527
1528 /* If there is no loopback entry, allocate one. */
1529 rt = rtalloc1(ifa->ifa_addr, 0);
1530 #ifdef RT_DEBUG
1531 if (rt != NULL)
1532 dump_rt(rt);
1533 #endif
1534 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
1535 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1536 {
1537 struct rt_addrinfo info;
1538 struct rtentry *nrt;
1539
1540 memset(&info, 0, sizeof(info));
1541 info.rti_flags = RTF_HOST | RTF_LOCAL;
1542 if (!(ifa->ifa_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))
1543 info.rti_flags |= RTF_LLDATA;
1544 info.rti_info[RTAX_DST] = ifa->ifa_addr;
1545 info.rti_info[RTAX_GATEWAY] =
1546 (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
1547 info.rti_ifa = ifa;
1548 nrt = NULL;
1549 e = rtrequest1(RTM_ADD, &info, &nrt);
1550 if (nrt && ifa != nrt->rt_ifa)
1551 rt_replace_ifa(nrt, ifa);
1552 rt_newaddrmsg(RTM_ADD, ifa, e, nrt);
1553 if (nrt != NULL) {
1554 #ifdef RT_DEBUG
1555 dump_rt(nrt);
1556 #endif
1557 rt_unref(nrt);
1558 RT_REFCNT_TRACE(nrt);
1559 }
1560 } else {
1561 e = 0;
1562 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL);
1563 }
1564 if (rt != NULL)
1565 rt_unref(rt);
1566 return e;
1567 }
1568
1569 /*
1570 * Remove the local route entry for the address.
1571 * Announce the removal of the address and the route to the routing socket.
1572 */
1573 int
1574 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
1575 {
1576 struct rtentry *rt;
1577 int e = 0;
1578
1579 rt = rtalloc1(ifa->ifa_addr, 0);
1580
1581 /*
1582 * Before deleting, check if a corresponding loopbacked
1583 * host route surely exists. With this check, we can avoid
1584 * deleting an interface direct route whose destination is
1585 * the same as the address being removed. This can happen
1586 * when removing a subnet-router anycast address on an
1587 * interface attached to a shared medium.
1588 */
1589 if (rt != NULL &&
1590 (rt->rt_flags & RTF_HOST) &&
1591 (rt->rt_ifp->if_flags & IFF_LOOPBACK))
1592 {
1593 /* If we cannot replace the route's ifaddr with the equivalent
1594 * ifaddr of another interface, I believe it is safest to
1595 * delete the route.
1596 */
1597 if (alt_ifa == NULL) {
1598 e = rtdeletemsg(rt);
1599 if (e == 0) {
1600 rt_unref(rt);
1601 rt_free(rt);
1602 rt = NULL;
1603 }
1604 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1605 } else {
1606 rt_replace_ifa(rt, alt_ifa);
1607 rt_newmsg(RTM_CHANGE, rt);
1608 }
1609 } else
1610 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1611 if (rt != NULL)
1612 rt_unref(rt);
1613 return e;
1614 }
1615
1616 /*
1617 * Route timer routines. These routes allow functions to be called
1618 * for various routes at any time. This is useful in supporting
1619 * path MTU discovery and redirect route deletion.
1620 *
1621 * This is similar to some BSDI internal functions, but it provides
1622 * for multiple queues for efficiency's sake...
1623 */
1624
1625 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
1626 static int rt_init_done = 0;
1627
1628 /*
1629 * Some subtle order problems with domain initialization mean that
1630 * we cannot count on this being run from rt_init before various
1631 * protocol initializations are done. Therefore, we make sure
1632 * that this is run when the first queue is added...
1633 */
1634
1635 static void rt_timer_work(struct work *, void *);
1636
1637 static void
1638 rt_timer_init(void)
1639 {
1640 int error;
1641
1642 assert(rt_init_done == 0);
1643
1644 /* XXX should be in rt_init */
1645 rw_init(&rt_lock);
1646 rw_init(&rtcache_lock);
1647
1648 LIST_INIT(&rttimer_queue_head);
1649 callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1650 error = workqueue_create(&rt_timer_wq, "rt_timer",
1651 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
1652 if (error)
1653 panic("%s: workqueue_create failed (%d)\n", __func__, error);
1654 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1655 rt_init_done = 1;
1656 }
1657
1658 struct rttimer_queue *
1659 rt_timer_queue_create(u_int timeout)
1660 {
1661 struct rttimer_queue *rtq;
1662
1663 if (rt_init_done == 0)
1664 rt_timer_init();
1665
1666 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1667 if (rtq == NULL)
1668 return NULL;
1669 memset(rtq, 0, sizeof(*rtq));
1670
1671 rtq->rtq_timeout = timeout;
1672 TAILQ_INIT(&rtq->rtq_head);
1673 RT_WLOCK();
1674 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1675 RT_UNLOCK();
1676
1677 return rtq;
1678 }
1679
1680 void
1681 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1682 {
1683
1684 rtq->rtq_timeout = timeout;
1685 }
1686
1687 static void
1688 rt_timer_queue_remove_all(struct rttimer_queue *rtq)
1689 {
1690 struct rttimer *r;
1691
1692 RT_ASSERT_WLOCK();
1693
1694 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1695 LIST_REMOVE(r, rtt_link);
1696 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1697 rt_ref(r->rtt_rt); /* XXX */
1698 RT_REFCNT_TRACE(r->rtt_rt);
1699 RT_UNLOCK();
1700 (*r->rtt_func)(r->rtt_rt, r);
1701 pool_put(&rttimer_pool, r);
1702 RT_WLOCK();
1703 if (rtq->rtq_count > 0)
1704 rtq->rtq_count--;
1705 else
1706 printf("rt_timer_queue_remove_all: "
1707 "rtq_count reached 0\n");
1708 }
1709 }
1710
1711 void
1712 rt_timer_queue_destroy(struct rttimer_queue *rtq)
1713 {
1714
1715 RT_WLOCK();
1716 rt_timer_queue_remove_all(rtq);
1717 LIST_REMOVE(rtq, rtq_link);
1718 RT_UNLOCK();
1719
1720 /*
1721 * Caller is responsible for freeing the rttimer_queue structure.
1722 */
1723 }
1724
1725 unsigned long
1726 rt_timer_count(struct rttimer_queue *rtq)
1727 {
1728 return rtq->rtq_count;
1729 }
1730
1731 static void
1732 rt_timer_remove_all(struct rtentry *rt)
1733 {
1734 struct rttimer *r;
1735
1736 RT_WLOCK();
1737 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1738 LIST_REMOVE(r, rtt_link);
1739 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1740 if (r->rtt_queue->rtq_count > 0)
1741 r->rtt_queue->rtq_count--;
1742 else
1743 printf("rt_timer_remove_all: rtq_count reached 0\n");
1744 pool_put(&rttimer_pool, r);
1745 }
1746 RT_UNLOCK();
1747 }
1748
1749 int
1750 rt_timer_add(struct rtentry *rt,
1751 void (*func)(struct rtentry *, struct rttimer *),
1752 struct rttimer_queue *queue)
1753 {
1754 struct rttimer *r;
1755
1756 KASSERT(func != NULL);
1757 RT_WLOCK();
1758 /*
1759 * If there's already a timer with this action, destroy it before
1760 * we add a new one.
1761 */
1762 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1763 if (r->rtt_func == func)
1764 break;
1765 }
1766 if (r != NULL) {
1767 LIST_REMOVE(r, rtt_link);
1768 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1769 if (r->rtt_queue->rtq_count > 0)
1770 r->rtt_queue->rtq_count--;
1771 else
1772 printf("rt_timer_add: rtq_count reached 0\n");
1773 } else {
1774 r = pool_get(&rttimer_pool, PR_NOWAIT);
1775 if (r == NULL) {
1776 RT_UNLOCK();
1777 return ENOBUFS;
1778 }
1779 }
1780
1781 memset(r, 0, sizeof(*r));
1782
1783 r->rtt_rt = rt;
1784 r->rtt_time = time_uptime;
1785 r->rtt_func = func;
1786 r->rtt_queue = queue;
1787 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1788 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1789 r->rtt_queue->rtq_count++;
1790
1791 RT_UNLOCK();
1792
1793 return 0;
1794 }
1795
1796 static void
1797 rt_timer_work(struct work *wk, void *arg)
1798 {
1799 struct rttimer_queue *rtq;
1800 struct rttimer *r;
1801
1802 RT_WLOCK();
1803 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1804 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1805 (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
1806 LIST_REMOVE(r, rtt_link);
1807 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1808 rt_ref(r->rtt_rt); /* XXX */
1809 RT_REFCNT_TRACE(r->rtt_rt);
1810 RT_UNLOCK();
1811 (*r->rtt_func)(r->rtt_rt, r);
1812 pool_put(&rttimer_pool, r);
1813 RT_WLOCK();
1814 if (rtq->rtq_count > 0)
1815 rtq->rtq_count--;
1816 else
1817 printf("rt_timer_timer: rtq_count reached 0\n");
1818 }
1819 }
1820 RT_UNLOCK();
1821
1822 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1823 }
1824
1825 static void
1826 rt_timer_timer(void *arg)
1827 {
1828
1829 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
1830 }
1831
1832 static struct rtentry *
1833 _rtcache_init(struct route *ro, int flag)
1834 {
1835 struct rtentry *rt;
1836
1837 rtcache_invariants(ro);
1838 KASSERT(ro->_ro_rt == NULL);
1839 RTCACHE_ASSERT_WLOCK();
1840
1841 if (rtcache_getdst(ro) == NULL)
1842 return NULL;
1843 ro->ro_invalid = false;
1844 rt = rtalloc1(rtcache_getdst(ro), flag);
1845 if (rt != NULL && ISSET(rt->rt_flags, RTF_UP)) {
1846 ro->_ro_rt = rt;
1847 KASSERT(!ISSET(rt->rt_flags, RTF_UPDATING));
1848 rtcache_ref(rt, ro);
1849 rt_unref(rt);
1850 rtcache(ro);
1851 } else if (rt != NULL)
1852 rt_unref(rt);
1853
1854 rtcache_invariants(ro);
1855 return ro->_ro_rt;
1856 }
1857
1858 struct rtentry *
1859 rtcache_init(struct route *ro)
1860 {
1861 struct rtentry *rt;
1862 RTCACHE_WLOCK();
1863 rt = _rtcache_init(ro, 1);
1864 RTCACHE_UNLOCK();
1865 return rt;
1866 }
1867
1868 struct rtentry *
1869 rtcache_init_noclone(struct route *ro)
1870 {
1871 struct rtentry *rt;
1872 RTCACHE_WLOCK();
1873 rt = _rtcache_init(ro, 0);
1874 RTCACHE_UNLOCK();
1875 return rt;
1876 }
1877
1878 struct rtentry *
1879 rtcache_update(struct route *ro, int clone)
1880 {
1881 struct rtentry *rt;
1882 RTCACHE_WLOCK();
1883 rtcache_clear(ro);
1884 rt = _rtcache_init(ro, clone);
1885 RTCACHE_UNLOCK();
1886 return rt;
1887 }
1888
1889 void
1890 rtcache_copy(struct route *new_ro, struct route *old_ro)
1891 {
1892 struct rtentry *rt;
1893 int ret;
1894
1895 KASSERT(new_ro != old_ro);
1896 rtcache_invariants(new_ro);
1897 rtcache_invariants(old_ro);
1898
1899 rt = rtcache_validate(old_ro);
1900
1901 if (rtcache_getdst(old_ro) == NULL)
1902 goto out;
1903 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro));
1904 if (ret != 0)
1905 goto out;
1906
1907 RTCACHE_WLOCK();
1908 new_ro->ro_invalid = false;
1909 if ((new_ro->_ro_rt = rt) != NULL)
1910 rtcache(new_ro);
1911 rtcache_invariants(new_ro);
1912 RTCACHE_UNLOCK();
1913 out:
1914 rtcache_unref(rt, old_ro);
1915 return;
1916 }
1917
1918 static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist);
1919
1920 #if defined(RT_DEBUG) && defined(NET_MPSAFE)
1921 static void
1922 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro)
1923 {
1924 char dst[64];
1925
1926 sockaddr_format(ro->ro_sa, dst, 64);
1927 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst,
1928 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref);
1929 }
1930 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro))
1931 #else
1932 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0)
1933 #endif
1934
1935 static void
1936 rtcache_ref(struct rtentry *rt, struct route *ro)
1937 {
1938
1939 KASSERT(rt != NULL);
1940
1941 #ifdef NET_MPSAFE
1942 RTCACHE_PSREF_TRACE(rt, ro);
1943 ro->ro_bound = curlwp_bind();
1944 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
1945 #endif
1946 }
1947
1948 void
1949 rtcache_unref(struct rtentry *rt, struct route *ro)
1950 {
1951
1952 if (rt == NULL)
1953 return;
1954
1955 #ifdef NET_MPSAFE
1956 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
1957 curlwp_bindx(ro->ro_bound);
1958 RTCACHE_PSREF_TRACE(rt, ro);
1959 #endif
1960 }
1961
1962 static struct rtentry *
1963 rtcache_validate_locked(struct route *ro)
1964 {
1965 struct rtentry *rt = NULL;
1966
1967 retry:
1968 rt = ro->_ro_rt;
1969 rtcache_invariants(ro);
1970
1971 if (ro->ro_invalid) {
1972 rt = NULL;
1973 goto out;
1974 }
1975
1976 RT_RLOCK();
1977 if (rt != NULL && (rt->rt_flags & RTF_UP) != 0 && rt->rt_ifp != NULL) {
1978 if (ISSET(rt->rt_flags, RTF_UPDATING)) {
1979 if (rt_wait_ok()) {
1980 RT_UNLOCK();
1981 RTCACHE_UNLOCK();
1982 /* We can wait until the update is complete */
1983 rt_update_wait();
1984 RTCACHE_RLOCK();
1985 goto retry;
1986 } else {
1987 rt = NULL;
1988 }
1989 } else
1990 rtcache_ref(rt, ro);
1991 } else
1992 rt = NULL;
1993 RT_UNLOCK();
1994 out:
1995 return rt;
1996 }
1997
1998 struct rtentry *
1999 rtcache_validate(struct route *ro)
2000 {
2001 struct rtentry *rt;
2002
2003 RTCACHE_RLOCK();
2004 rt = rtcache_validate_locked(ro);
2005 RTCACHE_UNLOCK();
2006 return rt;
2007 }
2008
2009 static void
2010 rtcache_invalidate(struct dom_rtlist *rtlist)
2011 {
2012 struct route *ro;
2013
2014 RTCACHE_ASSERT_WLOCK();
2015
2016 while ((ro = LIST_FIRST(rtlist)) != NULL) {
2017 rtcache_invariants(ro);
2018 KASSERT(ro->_ro_rt != NULL);
2019 ro->ro_invalid = true;
2020 LIST_REMOVE(ro, ro_rtcache_next);
2021 LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next);
2022 rtcache_invariants(ro);
2023 }
2024 }
2025
2026 static void
2027 rtcache_clear_rtentry(int family, struct rtentry *rt)
2028 {
2029 struct domain *dom;
2030 struct route *ro, *nro;
2031
2032 if ((dom = pffinddomain(family)) == NULL)
2033 return;
2034
2035 RTCACHE_WLOCK();
2036 LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) {
2037 if (ro->_ro_rt == rt)
2038 rtcache_clear(ro);
2039 }
2040 RTCACHE_UNLOCK();
2041 }
2042
2043 static void
2044 rtcache_clear(struct route *ro)
2045 {
2046
2047 RTCACHE_ASSERT_WLOCK();
2048
2049 rtcache_invariants(ro);
2050 if (ro->_ro_rt == NULL)
2051 return;
2052
2053 LIST_REMOVE(ro, ro_rtcache_next);
2054
2055 ro->_ro_rt = NULL;
2056 ro->ro_invalid = false;
2057 rtcache_invariants(ro);
2058 }
2059
2060 struct rtentry *
2061 rtcache_lookup2(struct route *ro, const struct sockaddr *dst,
2062 int clone, int *hitp)
2063 {
2064 const struct sockaddr *odst;
2065 struct rtentry *rt = NULL;
2066
2067 RTCACHE_RLOCK();
2068 odst = rtcache_getdst(ro);
2069 if (odst == NULL) {
2070 RTCACHE_UNLOCK();
2071 RTCACHE_WLOCK();
2072 goto miss;
2073 }
2074
2075 if (sockaddr_cmp(odst, dst) != 0) {
2076 RTCACHE_UNLOCK();
2077 RTCACHE_WLOCK();
2078 rtcache_free_locked(ro);
2079 goto miss;
2080 }
2081
2082 rt = rtcache_validate_locked(ro);
2083 if (rt == NULL) {
2084 RTCACHE_UNLOCK();
2085 RTCACHE_WLOCK();
2086 rtcache_clear(ro);
2087 goto miss;
2088 }
2089
2090 rtcache_invariants(ro);
2091
2092 RTCACHE_UNLOCK();
2093 if (hitp != NULL)
2094 *hitp = 1;
2095 return rt;
2096 miss:
2097 if (hitp != NULL)
2098 *hitp = 0;
2099 if (rtcache_setdst_locked(ro, dst) == 0)
2100 rt = _rtcache_init(ro, clone);
2101
2102 rtcache_invariants(ro);
2103
2104 RTCACHE_UNLOCK();
2105 return rt;
2106 }
2107
2108 static void
2109 rtcache_free_locked(struct route *ro)
2110 {
2111
2112 RTCACHE_ASSERT_WLOCK();
2113 rtcache_clear(ro);
2114 if (ro->ro_sa != NULL) {
2115 sockaddr_free(ro->ro_sa);
2116 ro->ro_sa = NULL;
2117 }
2118 rtcache_invariants(ro);
2119 }
2120
2121 void
2122 rtcache_free(struct route *ro)
2123 {
2124
2125 RTCACHE_WLOCK();
2126 rtcache_free_locked(ro);
2127 RTCACHE_UNLOCK();
2128 }
2129
2130 static int
2131 rtcache_setdst_locked(struct route *ro, const struct sockaddr *sa)
2132 {
2133 KASSERT(sa != NULL);
2134
2135 RTCACHE_ASSERT_WLOCK();
2136
2137 rtcache_invariants(ro);
2138 if (ro->ro_sa != NULL) {
2139 if (ro->ro_sa->sa_family == sa->sa_family) {
2140 rtcache_clear(ro);
2141 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
2142 rtcache_invariants(ro);
2143 return 0;
2144 }
2145 /* free ro_sa, wrong family */
2146 rtcache_free_locked(ro);
2147 }
2148
2149 KASSERT(ro->_ro_rt == NULL);
2150
2151 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
2152 rtcache_invariants(ro);
2153 return ENOMEM;
2154 }
2155 rtcache_invariants(ro);
2156 return 0;
2157 }
2158
2159 int
2160 rtcache_setdst(struct route *ro, const struct sockaddr *sa)
2161 {
2162 int error;
2163
2164 RTCACHE_WLOCK();
2165 error = rtcache_setdst_locked(ro, sa);
2166 RTCACHE_UNLOCK();
2167
2168 return error;
2169 }
2170
2171 const struct sockaddr *
2172 rt_settag(struct rtentry *rt, const struct sockaddr *tag)
2173 {
2174 if (rt->rt_tag != tag) {
2175 if (rt->rt_tag != NULL)
2176 sockaddr_free(rt->rt_tag);
2177 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
2178 }
2179 return rt->rt_tag;
2180 }
2181
2182 struct sockaddr *
2183 rt_gettag(const struct rtentry *rt)
2184 {
2185 return rt->rt_tag;
2186 }
2187
2188 int
2189 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
2190 {
2191
2192 if ((rt->rt_flags & RTF_REJECT) != 0) {
2193 /* Mimic looutput */
2194 if (ifp->if_flags & IFF_LOOPBACK)
2195 return (rt->rt_flags & RTF_HOST) ?
2196 EHOSTUNREACH : ENETUNREACH;
2197 else if (rt->rt_rmx.rmx_expire == 0 ||
2198 time_uptime < rt->rt_rmx.rmx_expire)
2199 return (rt->rt_flags & RTF_GATEWAY) ?
2200 EHOSTUNREACH : EHOSTDOWN;
2201 }
2202
2203 return 0;
2204 }
2205
2206 void
2207 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *),
2208 void *v)
2209 {
2210
2211 for (;;) {
2212 int s;
2213 int error;
2214 struct rtentry *rt, *retrt = NULL;
2215
2216 RT_RLOCK();
2217 s = splsoftnet();
2218 rt = rtbl_search_matched_entry(family, f, v);
2219 if (rt == NULL) {
2220 splx(s);
2221 RT_UNLOCK();
2222 return;
2223 }
2224 rt->rt_refcnt++;
2225 splx(s);
2226 RT_UNLOCK();
2227
2228 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway,
2229 rt_mask(rt), rt->rt_flags, &retrt);
2230 if (error == 0) {
2231 KASSERT(retrt == rt);
2232 KASSERT((retrt->rt_flags & RTF_UP) == 0);
2233 retrt->rt_ifp = NULL;
2234 rt_unref(rt);
2235 rt_free(retrt);
2236 } else if (error == ESRCH) {
2237 /* Someone deleted the entry already. */
2238 rt_unref(rt);
2239 } else {
2240 log(LOG_ERR, "%s: unable to delete rtentry @ %p, "
2241 "error = %d\n", rt->rt_ifp->if_xname, rt, error);
2242 /* XXX how to treat this case? */
2243 }
2244 }
2245 }
2246
2247 int
2248 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v)
2249 {
2250 int error;
2251
2252 RT_RLOCK();
2253 error = rtbl_walktree(family, f, v);
2254 RT_UNLOCK();
2255
2256 return error;
2257 }
2258
2259 #ifdef DDB
2260
2261 #include <machine/db_machdep.h>
2262 #include <ddb/db_interface.h>
2263 #include <ddb/db_output.h>
2264
2265 #define rt_expire rt_rmx.rmx_expire
2266
2267 static void
2268 db_print_sa(const struct sockaddr *sa)
2269 {
2270 int len;
2271 const u_char *p;
2272
2273 if (sa == NULL) {
2274 db_printf("[NULL]");
2275 return;
2276 }
2277
2278 p = (const u_char *)sa;
2279 len = sa->sa_len;
2280 db_printf("[");
2281 while (len > 0) {
2282 db_printf("%d", *p);
2283 p++; len--;
2284 if (len) db_printf(",");
2285 }
2286 db_printf("]\n");
2287 }
2288
2289 static void
2290 db_print_ifa(struct ifaddr *ifa)
2291 {
2292 if (ifa == NULL)
2293 return;
2294 db_printf(" ifa_addr=");
2295 db_print_sa(ifa->ifa_addr);
2296 db_printf(" ifa_dsta=");
2297 db_print_sa(ifa->ifa_dstaddr);
2298 db_printf(" ifa_mask=");
2299 db_print_sa(ifa->ifa_netmask);
2300 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n",
2301 ifa->ifa_flags,
2302 ifa->ifa_refcnt,
2303 ifa->ifa_metric);
2304 }
2305
2306 /*
2307 * Function to pass to rt_walktree().
2308 * Return non-zero error to abort walk.
2309 */
2310 static int
2311 db_show_rtentry(struct rtentry *rt, void *w)
2312 {
2313 db_printf("rtentry=%p", rt);
2314
2315 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
2316 rt->rt_flags, rt->rt_refcnt,
2317 rt->rt_use, (uint64_t)rt->rt_expire);
2318
2319 db_printf(" key="); db_print_sa(rt_getkey(rt));
2320 db_printf(" mask="); db_print_sa(rt_mask(rt));
2321 db_printf(" gw="); db_print_sa(rt->rt_gateway);
2322
2323 db_printf(" ifp=%p ", rt->rt_ifp);
2324 if (rt->rt_ifp)
2325 db_printf("(%s)", rt->rt_ifp->if_xname);
2326 else
2327 db_printf("(NULL)");
2328
2329 db_printf(" ifa=%p\n", rt->rt_ifa);
2330 db_print_ifa(rt->rt_ifa);
2331
2332 db_printf(" gwroute=%p llinfo=%p\n",
2333 rt->rt_gwroute, rt->rt_llinfo);
2334
2335 return 0;
2336 }
2337
2338 /*
2339 * Function to print all the route trees.
2340 * Use this from ddb: "show routes"
2341 */
2342 void
2343 db_show_routes(db_expr_t addr, bool have_addr,
2344 db_expr_t count, const char *modif)
2345 {
2346 rt_walktree(AF_INET, db_show_rtentry, NULL);
2347 }
2348 #endif
2349