route.c revision 1.189 1 /* $NetBSD: route.c,v 1.189 2017/02/10 13:44:47 ozaki-r Exp $ */
2
3 /*-
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the project nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62 /*
63 * Copyright (c) 1980, 1986, 1991, 1993
64 * The Regents of the University of California. All rights reserved.
65 *
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
68 * are met:
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)route.c 8.3 (Berkeley) 1/9/95
91 */
92
93 #ifdef _KERNEL_OPT
94 #include "opt_inet.h"
95 #include "opt_route.h"
96 #include "opt_net_mpsafe.h"
97 #endif
98
99 #include <sys/cdefs.h>
100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.189 2017/02/10 13:44:47 ozaki-r Exp $");
101
102 #include <sys/param.h>
103 #ifdef RTFLUSH_DEBUG
104 #include <sys/sysctl.h>
105 #endif
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/mbuf.h>
110 #include <sys/socket.h>
111 #include <sys/socketvar.h>
112 #include <sys/domain.h>
113 #include <sys/kernel.h>
114 #include <sys/ioctl.h>
115 #include <sys/pool.h>
116 #include <sys/kauth.h>
117 #include <sys/workqueue.h>
118 #include <sys/syslog.h>
119 #include <sys/rwlock.h>
120 #include <sys/mutex.h>
121 #include <sys/cpu.h>
122
123 #include <net/if.h>
124 #include <net/if_dl.h>
125 #include <net/route.h>
126
127 #include <netinet/in.h>
128 #include <netinet/in_var.h>
129
130 #ifdef RTFLUSH_DEBUG
131 #define rtcache_debug() __predict_false(_rtcache_debug)
132 #else /* RTFLUSH_DEBUG */
133 #define rtcache_debug() 0
134 #endif /* RTFLUSH_DEBUG */
135
136 #ifdef RT_DEBUG
137 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \
138 __func__, __LINE__, (rt), (rt)->rt_refcnt)
139 #else
140 #define RT_REFCNT_TRACE(rt) do {} while (0)
141 #endif
142
143 #ifdef DEBUG
144 #define dlog(level, fmt, args...) log(level, fmt, ##args)
145 #else
146 #define dlog(level, fmt, args...) do {} while (0)
147 #endif
148
149 struct rtstat rtstat;
150
151 static int rttrash; /* routes not in table but not freed */
152
153 static struct pool rtentry_pool;
154 static struct pool rttimer_pool;
155
156 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */
157 static struct workqueue *rt_timer_wq;
158 static struct work rt_timer_wk;
159
160 static void rt_timer_init(void);
161 static void rt_timer_queue_remove_all(struct rttimer_queue *);
162 static void rt_timer_remove_all(struct rtentry *);
163 static void rt_timer_timer(void *);
164
165 /*
166 * Locking notes:
167 * - The routing table is protected by a global rwlock
168 * - API: RT_RLOCK and friends
169 * - rtcaches are protected by a global rwlock
170 * - API: RTCACHE_RLOCK and friends
171 * - References to a rtentry is managed by reference counting and psref
172 * - Reference couting is used for temporal reference when a rtentry
173 * is fetched from the routing table
174 * - psref is used for temporal reference when a rtentry is fetched
175 * from a rtcache
176 * - struct route (rtcache) has struct psref, so we cannot obtain
177 * a reference twice on the same struct route
178 * - Befere destroying or updating a rtentry, we have to wait for
179 * all references left (see below for details)
180 * - APIs
181 * - An obtained rtentry via rtalloc1 or rtrequest* must be
182 * unreferenced by rt_unref
183 * - An obtained rtentry via rtcache_* must be unreferenced by
184 * rtcache_unref
185 * - TODO: once we get a lockless routing table, we should use only
186 * psref for rtentries
187 * - rtentry destruction
188 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE)
189 * - If a caller of rtrequest grabs a reference of a rtentry, the caller
190 * has a responsibility to destroy the rtentry by itself by calling
191 * rt_free
192 * - If not, rtrequest itself does that
193 * - If rt_free is called in softint, the actual destruction routine is
194 * deferred to a workqueue
195 * - rtentry update
196 * - When updating a rtentry, RTF_UPDATING flag is set
197 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from
198 * the routing table or a rtcache results in either of the following
199 * cases:
200 * - if the caller runs in softint, the caller fails to fetch
201 * - otherwise, the caller waits for the update completed and retries
202 * to fetch (probably succeed to fetch for the second time)
203 */
204
205 /*
206 * Global locks for the routing table and rtcaches.
207 * Locking order: rtcache_lock => rt_lock
208 */
209 static krwlock_t rt_lock __cacheline_aligned;
210 #ifdef NET_MPSAFE
211 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER)
212 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER)
213 #define RT_UNLOCK() rw_exit(&rt_lock)
214 #define RT_LOCKED() rw_lock_held(&rt_lock)
215 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock))
216 #else
217 #define RT_RLOCK() do {} while (0)
218 #define RT_WLOCK() do {} while (0)
219 #define RT_UNLOCK() do {} while (0)
220 #define RT_LOCKED() false
221 #define RT_ASSERT_WLOCK() do {} while (0)
222 #endif
223
224 static krwlock_t rtcache_lock __cacheline_aligned;
225 #ifdef NET_MPSAFE
226 #define RTCACHE_RLOCK() rw_enter(&rtcache_lock, RW_READER)
227 #define RTCACHE_WLOCK() rw_enter(&rtcache_lock, RW_WRITER)
228 #define RTCACHE_UNLOCK() rw_exit(&rtcache_lock)
229 #define RTCACHE_ASSERT_WLOCK() KASSERT(rw_write_held(&rtcache_lock))
230 #define RTCACHE_WLOCKED() rw_write_held(&rtcache_lock)
231 #else
232 #define RTCACHE_RLOCK() do {} while (0)
233 #define RTCACHE_WLOCK() do {} while (0)
234 #define RTCACHE_UNLOCK() do {} while (0)
235 #define RTCACHE_ASSERT_WLOCK() do {} while (0)
236 #define RTCACHE_WLOCKED() false
237 #endif
238
239 /*
240 * mutex and cv that are used to wait for references to a rtentry left
241 * before updating the rtentry.
242 */
243 static struct {
244 kmutex_t lock;
245 kcondvar_t cv;
246 bool ongoing;
247 const struct lwp *lwp;
248 } rt_update_global __cacheline_aligned;
249
250 /*
251 * A workqueue and stuff that are used to defer the destruction routine
252 * of rtentries.
253 */
254 static struct {
255 struct workqueue *wq;
256 struct work wk;
257 kmutex_t lock;
258 struct rtentry *queue[10];
259 } rt_free_global __cacheline_aligned;
260
261 /* psref for rtentry */
262 static struct psref_class *rt_psref_class __read_mostly;
263
264 #ifdef RTFLUSH_DEBUG
265 static int _rtcache_debug = 0;
266 #endif /* RTFLUSH_DEBUG */
267
268 static kauth_listener_t route_listener;
269
270 static int rtdeletemsg(struct rtentry *);
271 static void rtflushall(int);
272
273 static void rt_maskedcopy(const struct sockaddr *,
274 struct sockaddr *, const struct sockaddr *);
275
276 static void rtcache_clear(struct route *);
277 static void rtcache_clear_rtentry(int, struct rtentry *);
278 static void rtcache_invalidate(struct dom_rtlist *);
279
280 static void rt_ref(struct rtentry *);
281
282 static struct rtentry *
283 rtalloc1_locked(const struct sockaddr *, int, bool);
284 static struct rtentry *
285 rtcache_validate_locked(struct route *);
286 static void rtcache_free_locked(struct route *);
287 static int rtcache_setdst_locked(struct route *, const struct sockaddr *);
288
289 static void rtcache_ref(struct rtentry *, struct route *);
290
291 #ifdef NET_MPSAFE
292 static void rt_update_wait(void);
293 #endif
294
295 static bool rt_wait_ok(void);
296 static void rt_wait_refcnt(const char *, struct rtentry *, int);
297 static void rt_wait_psref(struct rtentry *);
298
299 #ifdef DDB
300 static void db_print_sa(const struct sockaddr *);
301 static void db_print_ifa(struct ifaddr *);
302 static int db_show_rtentry(struct rtentry *, void *);
303 #endif
304
305 #ifdef RTFLUSH_DEBUG
306 static void sysctl_net_rtcache_setup(struct sysctllog **);
307 static void
308 sysctl_net_rtcache_setup(struct sysctllog **clog)
309 {
310 const struct sysctlnode *rnode;
311
312 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
313 CTLTYPE_NODE,
314 "rtcache", SYSCTL_DESCR("Route cache related settings"),
315 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
316 return;
317 if (sysctl_createv(clog, 0, &rnode, &rnode,
318 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
319 "debug", SYSCTL_DESCR("Debug route caches"),
320 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
321 return;
322 }
323 #endif /* RTFLUSH_DEBUG */
324
325 static inline void
326 rt_destroy(struct rtentry *rt)
327 {
328 if (rt->_rt_key != NULL)
329 sockaddr_free(rt->_rt_key);
330 if (rt->rt_gateway != NULL)
331 sockaddr_free(rt->rt_gateway);
332 if (rt_gettag(rt) != NULL)
333 sockaddr_free(rt_gettag(rt));
334 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
335 }
336
337 static inline const struct sockaddr *
338 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
339 {
340 if (rt->_rt_key == key)
341 goto out;
342
343 if (rt->_rt_key != NULL)
344 sockaddr_free(rt->_rt_key);
345 rt->_rt_key = sockaddr_dup(key, flags);
346 out:
347 rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
348 return rt->_rt_key;
349 }
350
351 struct ifaddr *
352 rt_get_ifa(struct rtentry *rt)
353 {
354 struct ifaddr *ifa;
355
356 if ((ifa = rt->rt_ifa) == NULL)
357 return ifa;
358 else if (ifa->ifa_getifa == NULL)
359 return ifa;
360 #if 0
361 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
362 return ifa;
363 #endif
364 else {
365 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
366 if (ifa == NULL)
367 return NULL;
368 rt_replace_ifa(rt, ifa);
369 return ifa;
370 }
371 }
372
373 static void
374 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
375 {
376 rt->rt_ifa = ifa;
377 if (ifa->ifa_seqno != NULL)
378 rt->rt_ifa_seqno = *ifa->ifa_seqno;
379 }
380
381 /*
382 * Is this route the connected route for the ifa?
383 */
384 static int
385 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
386 {
387 const struct sockaddr *key, *dst, *odst;
388 struct sockaddr_storage maskeddst;
389
390 key = rt_getkey(rt);
391 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
392 if (dst == NULL ||
393 dst->sa_family != key->sa_family ||
394 dst->sa_len != key->sa_len)
395 return 0;
396 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
397 odst = dst;
398 dst = (struct sockaddr *)&maskeddst;
399 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
400 ifa->ifa_netmask);
401 }
402 return (memcmp(dst, key, dst->sa_len) == 0);
403 }
404
405 void
406 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
407 {
408 if (rt->rt_ifa &&
409 rt->rt_ifa != ifa &&
410 rt->rt_ifa->ifa_flags & IFA_ROUTE &&
411 rt_ifa_connected(rt, rt->rt_ifa))
412 {
413 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
414 "replace deleted IFA_ROUTE\n",
415 (void *)rt->_rt_key, (void *)rt->rt_ifa);
416 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
417 if (rt_ifa_connected(rt, ifa)) {
418 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
419 "replace added IFA_ROUTE\n",
420 (void *)rt->_rt_key, (void *)ifa);
421 ifa->ifa_flags |= IFA_ROUTE;
422 }
423 }
424
425 ifaref(ifa);
426 ifafree(rt->rt_ifa);
427 rt_set_ifa1(rt, ifa);
428 }
429
430 static void
431 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
432 {
433 ifaref(ifa);
434 rt_set_ifa1(rt, ifa);
435 }
436
437 static int
438 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
439 void *arg0, void *arg1, void *arg2, void *arg3)
440 {
441 struct rt_msghdr *rtm;
442 int result;
443
444 result = KAUTH_RESULT_DEFER;
445 rtm = arg1;
446
447 if (action != KAUTH_NETWORK_ROUTE)
448 return result;
449
450 if (rtm->rtm_type == RTM_GET)
451 result = KAUTH_RESULT_ALLOW;
452
453 return result;
454 }
455
456 static void rt_free_work(struct work *, void *);
457
458 void
459 rt_init(void)
460 {
461 int error;
462
463 #ifdef RTFLUSH_DEBUG
464 sysctl_net_rtcache_setup(NULL);
465 #endif
466
467 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
468 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET);
469
470 error = workqueue_create(&rt_free_global.wq, "rt_free",
471 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
472 if (error)
473 panic("%s: workqueue_create failed (%d)\n", __func__, error);
474
475 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
476 cv_init(&rt_update_global.cv, "rt_update");
477
478 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
479 NULL, IPL_SOFTNET);
480 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
481 NULL, IPL_SOFTNET);
482
483 rn_init(); /* initialize all zeroes, all ones, mask table */
484 rtbl_init();
485
486 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
487 route_listener_cb, NULL);
488 }
489
490 static void
491 rtflushall(int family)
492 {
493 struct domain *dom;
494
495 if (rtcache_debug())
496 printf("%s: enter\n", __func__);
497
498 if ((dom = pffinddomain(family)) == NULL)
499 return;
500
501 RTCACHE_WLOCK();
502 rtcache_invalidate(&dom->dom_rtcache);
503 RTCACHE_UNLOCK();
504 }
505
506 static void
507 rtcache(struct route *ro)
508 {
509 struct domain *dom;
510
511 RTCACHE_ASSERT_WLOCK();
512
513 rtcache_invariants(ro);
514 KASSERT(ro->_ro_rt != NULL);
515 KASSERT(ro->ro_invalid == false);
516 KASSERT(rtcache_getdst(ro) != NULL);
517
518 if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL)
519 return;
520
521 LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next);
522 rtcache_invariants(ro);
523 }
524
525 #ifdef RT_DEBUG
526 static void
527 dump_rt(const struct rtentry *rt)
528 {
529 char buf[512];
530
531 aprint_normal("rt: ");
532 aprint_normal("p=%p ", rt);
533 if (rt->_rt_key == NULL) {
534 aprint_normal("dst=(NULL) ");
535 } else {
536 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
537 aprint_normal("dst=%s ", buf);
538 }
539 if (rt->rt_gateway == NULL) {
540 aprint_normal("gw=(NULL) ");
541 } else {
542 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
543 aprint_normal("gw=%s ", buf);
544 }
545 aprint_normal("flags=%x ", rt->rt_flags);
546 if (rt->rt_ifp == NULL) {
547 aprint_normal("if=(NULL) ");
548 } else {
549 aprint_normal("if=%s ", rt->rt_ifp->if_xname);
550 }
551 aprint_normal("\n");
552 }
553 #endif /* RT_DEBUG */
554
555 /*
556 * Packet routing routines. If success, refcnt of a returned rtentry
557 * will be incremented. The caller has to rtfree it by itself.
558 */
559 struct rtentry *
560 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok)
561 {
562 rtbl_t *rtbl;
563 struct rtentry *rt;
564 int s;
565
566 #ifdef NET_MPSAFE
567 retry:
568 #endif
569 s = splsoftnet();
570 rtbl = rt_gettable(dst->sa_family);
571 if (rtbl == NULL)
572 goto miss;
573
574 rt = rt_matchaddr(rtbl, dst);
575 if (rt == NULL)
576 goto miss;
577
578 if (!ISSET(rt->rt_flags, RTF_UP))
579 goto miss;
580
581 #ifdef NET_MPSAFE
582 if (ISSET(rt->rt_flags, RTF_UPDATING) &&
583 /* XXX updater should be always able to acquire */
584 curlwp != rt_update_global.lwp) {
585 bool need_lock = false;
586 if (!wait_ok || !rt_wait_ok())
587 goto miss;
588 RT_UNLOCK();
589 splx(s);
590
591 /* XXX need more proper solution */
592 if (RTCACHE_WLOCKED()) {
593 RTCACHE_UNLOCK();
594 need_lock = true;
595 }
596
597 /* We can wait until the update is complete */
598 rt_update_wait();
599
600 if (need_lock)
601 RTCACHE_WLOCK();
602 goto retry;
603 }
604 #endif /* NET_MPSAFE */
605
606 rt_ref(rt);
607 RT_REFCNT_TRACE(rt);
608
609 splx(s);
610 return rt;
611 miss:
612 rtstat.rts_unreach++;
613 if (report) {
614 struct rt_addrinfo info;
615
616 memset(&info, 0, sizeof(info));
617 info.rti_info[RTAX_DST] = dst;
618 rt_missmsg(RTM_MISS, &info, 0, 0);
619 }
620 splx(s);
621 return NULL;
622 }
623
624 struct rtentry *
625 rtalloc1(const struct sockaddr *dst, int report)
626 {
627 struct rtentry *rt;
628
629 RT_RLOCK();
630 rt = rtalloc1_locked(dst, report, true);
631 RT_UNLOCK();
632
633 return rt;
634 }
635
636 static void
637 rt_ref(struct rtentry *rt)
638 {
639
640 KASSERT(rt->rt_refcnt >= 0);
641 atomic_inc_uint(&rt->rt_refcnt);
642 }
643
644 void
645 rt_unref(struct rtentry *rt)
646 {
647
648 KASSERT(rt != NULL);
649 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt);
650
651 atomic_dec_uint(&rt->rt_refcnt);
652 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) {
653 mutex_enter(&rt_free_global.lock);
654 cv_broadcast(&rt->rt_cv);
655 mutex_exit(&rt_free_global.lock);
656 }
657 }
658
659 static bool
660 rt_wait_ok(void)
661 {
662
663 KASSERT(!cpu_intr_p());
664 return !cpu_softintr_p();
665 }
666
667 void
668 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt)
669 {
670 mutex_enter(&rt_free_global.lock);
671 while (rt->rt_refcnt > cnt) {
672 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n",
673 __func__, title, rt->rt_refcnt);
674 cv_wait(&rt->rt_cv, &rt_free_global.lock);
675 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n",
676 __func__, title, rt->rt_refcnt);
677 }
678 mutex_exit(&rt_free_global.lock);
679 }
680
681 void
682 rt_wait_psref(struct rtentry *rt)
683 {
684
685 psref_target_destroy(&rt->rt_psref, rt_psref_class);
686 psref_target_init(&rt->rt_psref, rt_psref_class);
687 }
688
689 static void
690 _rt_free(struct rtentry *rt)
691 {
692 struct ifaddr *ifa;
693
694 /*
695 * Need to avoid a deadlock on rt_wait_refcnt of update
696 * and a conflict on psref_target_destroy of update.
697 */
698 #ifdef NET_MPSAFE
699 rt_update_wait();
700 #endif
701
702 RT_REFCNT_TRACE(rt);
703 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt);
704 rt_wait_refcnt("free", rt, 0);
705 #ifdef NET_MPSAFE
706 psref_target_destroy(&rt->rt_psref, rt_psref_class);
707 #endif
708
709 rt_assert_inactive(rt);
710 rttrash--;
711 ifa = rt->rt_ifa;
712 rt->rt_ifa = NULL;
713 ifafree(ifa);
714 rt->rt_ifp = NULL;
715 cv_destroy(&rt->rt_cv);
716 rt_destroy(rt);
717 pool_put(&rtentry_pool, rt);
718 }
719
720 static void
721 rt_free_work(struct work *wk, void *arg)
722 {
723 int i;
724 struct rtentry *rt;
725
726 restart:
727 mutex_enter(&rt_free_global.lock);
728 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
729 if (rt_free_global.queue[i] == NULL)
730 continue;
731 rt = rt_free_global.queue[i];
732 rt_free_global.queue[i] = NULL;
733 mutex_exit(&rt_free_global.lock);
734
735 atomic_dec_uint(&rt->rt_refcnt);
736 _rt_free(rt);
737 goto restart;
738 }
739 mutex_exit(&rt_free_global.lock);
740 }
741
742 void
743 rt_free(struct rtentry *rt)
744 {
745
746 KASSERT(rt->rt_refcnt > 0);
747 if (!rt_wait_ok()) {
748 int i;
749 mutex_enter(&rt_free_global.lock);
750 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
751 if (rt_free_global.queue[i] == NULL) {
752 rt_free_global.queue[i] = rt;
753 break;
754 }
755 }
756 KASSERT(i < sizeof(rt_free_global.queue));
757 rt_ref(rt);
758 mutex_exit(&rt_free_global.lock);
759 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL);
760 } else {
761 atomic_dec_uint(&rt->rt_refcnt);
762 _rt_free(rt);
763 }
764 }
765
766 #ifdef NET_MPSAFE
767 static void
768 rt_update_wait(void)
769 {
770
771 mutex_enter(&rt_update_global.lock);
772 while (rt_update_global.ongoing) {
773 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp);
774 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
775 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp);
776 }
777 mutex_exit(&rt_update_global.lock);
778 }
779 #endif
780
781 int
782 rt_update_prepare(struct rtentry *rt)
783 {
784
785 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp);
786
787 RTCACHE_WLOCK();
788 RT_WLOCK();
789 /* If the entry is being destroyed, don't proceed the update. */
790 if (!ISSET(rt->rt_flags, RTF_UP)) {
791 RT_UNLOCK();
792 RTCACHE_UNLOCK();
793 return -1;
794 }
795 rt->rt_flags |= RTF_UPDATING;
796 RT_UNLOCK();
797 RTCACHE_UNLOCK();
798
799 mutex_enter(&rt_update_global.lock);
800 while (rt_update_global.ongoing) {
801 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n",
802 __func__, rt, curlwp);
803 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
804 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n",
805 __func__, rt, curlwp);
806 }
807 rt_update_global.ongoing = true;
808 /* XXX need it to avoid rt_update_wait by updater itself. */
809 rt_update_global.lwp = curlwp;
810 mutex_exit(&rt_update_global.lock);
811
812 rt_wait_refcnt("update", rt, 1);
813 rt_wait_psref(rt);
814
815 return 0;
816 }
817
818 void
819 rt_update_finish(struct rtentry *rt)
820 {
821
822 RTCACHE_WLOCK();
823 RT_WLOCK();
824 rt->rt_flags &= ~RTF_UPDATING;
825 RT_UNLOCK();
826 RTCACHE_UNLOCK();
827
828 mutex_enter(&rt_update_global.lock);
829 rt_update_global.ongoing = false;
830 rt_update_global.lwp = NULL;
831 cv_broadcast(&rt_update_global.cv);
832 mutex_exit(&rt_update_global.lock);
833
834 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp);
835 }
836
837 /*
838 * Force a routing table entry to the specified
839 * destination to go through the given gateway.
840 * Normally called as a result of a routing redirect
841 * message from the network layer.
842 *
843 * N.B.: must be called at splsoftnet
844 */
845 void
846 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
847 const struct sockaddr *netmask, int flags, const struct sockaddr *src,
848 struct rtentry **rtp)
849 {
850 struct rtentry *rt;
851 int error = 0;
852 uint64_t *stat = NULL;
853 struct rt_addrinfo info;
854 struct ifaddr *ifa;
855 struct psref psref;
856
857 /* verify the gateway is directly reachable */
858 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
859 error = ENETUNREACH;
860 goto out;
861 }
862 rt = rtalloc1(dst, 0);
863 /*
864 * If the redirect isn't from our current router for this dst,
865 * it's either old or wrong. If it redirects us to ourselves,
866 * we have a routing loop, perhaps as a result of an interface
867 * going down recently.
868 */
869 if (!(flags & RTF_DONE) && rt &&
870 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
871 error = EINVAL;
872 else {
873 int s = pserialize_read_enter();
874 struct ifaddr *_ifa;
875
876 _ifa = ifa_ifwithaddr(gateway);
877 if (_ifa != NULL)
878 error = EHOSTUNREACH;
879 pserialize_read_exit(s);
880 }
881 if (error)
882 goto done;
883 /*
884 * Create a new entry if we just got back a wildcard entry
885 * or the lookup failed. This is necessary for hosts
886 * which use routing redirects generated by smart gateways
887 * to dynamically build the routing tables.
888 */
889 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
890 goto create;
891 /*
892 * Don't listen to the redirect if it's
893 * for a route to an interface.
894 */
895 if (rt->rt_flags & RTF_GATEWAY) {
896 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
897 /*
898 * Changing from route to net => route to host.
899 * Create new route, rather than smashing route to net.
900 */
901 create:
902 if (rt != NULL)
903 rt_unref(rt);
904 flags |= RTF_GATEWAY | RTF_DYNAMIC;
905 memset(&info, 0, sizeof(info));
906 info.rti_info[RTAX_DST] = dst;
907 info.rti_info[RTAX_GATEWAY] = gateway;
908 info.rti_info[RTAX_NETMASK] = netmask;
909 info.rti_ifa = ifa;
910 info.rti_flags = flags;
911 rt = NULL;
912 error = rtrequest1(RTM_ADD, &info, &rt);
913 if (rt != NULL)
914 flags = rt->rt_flags;
915 stat = &rtstat.rts_dynamic;
916 } else {
917 /*
918 * Smash the current notion of the gateway to
919 * this destination. Should check about netmask!!!
920 */
921 /*
922 * FIXME NOMPSAFE: the rtentry is updated with the existence
923 * of refeferences of it.
924 */
925 error = rt_setgate(rt, gateway);
926 if (error == 0) {
927 rt->rt_flags |= RTF_MODIFIED;
928 flags |= RTF_MODIFIED;
929 }
930 stat = &rtstat.rts_newgateway;
931 }
932 } else
933 error = EHOSTUNREACH;
934 done:
935 if (rt) {
936 if (rtp != NULL && !error)
937 *rtp = rt;
938 else
939 rt_unref(rt);
940 }
941 out:
942 if (error)
943 rtstat.rts_badredirect++;
944 else if (stat != NULL)
945 (*stat)++;
946 memset(&info, 0, sizeof(info));
947 info.rti_info[RTAX_DST] = dst;
948 info.rti_info[RTAX_GATEWAY] = gateway;
949 info.rti_info[RTAX_NETMASK] = netmask;
950 info.rti_info[RTAX_AUTHOR] = src;
951 rt_missmsg(RTM_REDIRECT, &info, flags, error);
952 ifa_release(ifa, &psref);
953 }
954
955 /*
956 * Delete a route and generate a message.
957 * It doesn't free a passed rt.
958 */
959 static int
960 rtdeletemsg(struct rtentry *rt)
961 {
962 int error;
963 struct rt_addrinfo info;
964 struct rtentry *retrt;
965
966 /*
967 * Request the new route so that the entry is not actually
968 * deleted. That will allow the information being reported to
969 * be accurate (and consistent with route_output()).
970 */
971 memset(&info, 0, sizeof(info));
972 info.rti_info[RTAX_DST] = rt_getkey(rt);
973 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
974 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
975 info.rti_flags = rt->rt_flags;
976 error = rtrequest1(RTM_DELETE, &info, &retrt);
977
978 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
979
980 return error;
981 }
982
983 struct ifaddr *
984 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
985 const struct sockaddr *gateway, struct psref *psref)
986 {
987 struct ifaddr *ifa = NULL;
988
989 if ((flags & RTF_GATEWAY) == 0) {
990 /*
991 * If we are adding a route to an interface,
992 * and the interface is a pt to pt link
993 * we should search for the destination
994 * as our clue to the interface. Otherwise
995 * we can use the local address.
996 */
997 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
998 ifa = ifa_ifwithdstaddr_psref(dst, psref);
999 if (ifa == NULL)
1000 ifa = ifa_ifwithaddr_psref(gateway, psref);
1001 } else {
1002 /*
1003 * If we are adding a route to a remote net
1004 * or host, the gateway may still be on the
1005 * other end of a pt to pt link.
1006 */
1007 ifa = ifa_ifwithdstaddr_psref(gateway, psref);
1008 }
1009 if (ifa == NULL)
1010 ifa = ifa_ifwithnet_psref(gateway, psref);
1011 if (ifa == NULL) {
1012 int s;
1013 struct rtentry *rt;
1014
1015 /* XXX we cannot call rtalloc1 if holding the rt lock */
1016 if (RT_LOCKED())
1017 rt = rtalloc1_locked(dst, 0, true);
1018 else
1019 rt = rtalloc1(dst, 0);
1020 if (rt == NULL)
1021 return NULL;
1022 /*
1023 * Just in case. May not need to do this workaround.
1024 * Revisit when working on rtentry MP-ification.
1025 */
1026 s = pserialize_read_enter();
1027 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
1028 if (ifa == rt->rt_ifa)
1029 break;
1030 }
1031 if (ifa != NULL)
1032 ifa_acquire(ifa, psref);
1033 pserialize_read_exit(s);
1034 rt_unref(rt);
1035 if (ifa == NULL)
1036 return NULL;
1037 }
1038 if (ifa->ifa_addr->sa_family != dst->sa_family) {
1039 struct ifaddr *nifa;
1040 int s;
1041
1042 s = pserialize_read_enter();
1043 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1044 if (nifa != NULL) {
1045 ifa_release(ifa, psref);
1046 ifa_acquire(nifa, psref);
1047 ifa = nifa;
1048 }
1049 pserialize_read_exit(s);
1050 }
1051 return ifa;
1052 }
1053
1054 /*
1055 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1056 * The caller has to rtfree it by itself.
1057 */
1058 int
1059 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
1060 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
1061 {
1062 struct rt_addrinfo info;
1063
1064 memset(&info, 0, sizeof(info));
1065 info.rti_flags = flags;
1066 info.rti_info[RTAX_DST] = dst;
1067 info.rti_info[RTAX_GATEWAY] = gateway;
1068 info.rti_info[RTAX_NETMASK] = netmask;
1069 return rtrequest1(req, &info, ret_nrt);
1070 }
1071
1072 /*
1073 * It's a utility function to add/remove a route to/from the routing table
1074 * and tell user processes the addition/removal on success.
1075 */
1076 int
1077 rtrequest_newmsg(const int req, const struct sockaddr *dst,
1078 const struct sockaddr *gateway, const struct sockaddr *netmask,
1079 const int flags)
1080 {
1081 int error;
1082 struct rtentry *ret_nrt = NULL;
1083
1084 KASSERT(req == RTM_ADD || req == RTM_DELETE);
1085
1086 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt);
1087 if (error != 0)
1088 return error;
1089
1090 KASSERT(ret_nrt != NULL);
1091
1092 rt_newmsg(req, ret_nrt); /* tell user process */
1093 if (req == RTM_DELETE)
1094 rt_free(ret_nrt);
1095 else
1096 rt_unref(ret_nrt);
1097
1098 return 0;
1099 }
1100
1101 struct ifnet *
1102 rt_getifp(struct rt_addrinfo *info, struct psref *psref)
1103 {
1104 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
1105
1106 if (info->rti_ifp != NULL)
1107 return NULL;
1108 /*
1109 * ifp may be specified by sockaddr_dl when protocol address
1110 * is ambiguous
1111 */
1112 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
1113 struct ifaddr *ifa;
1114 int s = pserialize_read_enter();
1115
1116 ifa = ifa_ifwithnet(ifpaddr);
1117 if (ifa != NULL)
1118 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
1119 psref);
1120 pserialize_read_exit(s);
1121 }
1122
1123 return info->rti_ifp;
1124 }
1125
1126 struct ifaddr *
1127 rt_getifa(struct rt_addrinfo *info, struct psref *psref)
1128 {
1129 struct ifaddr *ifa = NULL;
1130 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1131 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1132 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
1133 int flags = info->rti_flags;
1134 const struct sockaddr *sa;
1135
1136 if (info->rti_ifa == NULL && ifaaddr != NULL) {
1137 ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
1138 if (ifa != NULL)
1139 goto got;
1140 }
1141
1142 sa = ifaaddr != NULL ? ifaaddr :
1143 (gateway != NULL ? gateway : dst);
1144 if (sa != NULL && info->rti_ifp != NULL)
1145 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
1146 else if (dst != NULL && gateway != NULL)
1147 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
1148 else if (sa != NULL)
1149 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
1150 if (ifa == NULL)
1151 return NULL;
1152 got:
1153 if (ifa->ifa_getifa != NULL) {
1154 /* FIXME NOMPSAFE */
1155 ifa = (*ifa->ifa_getifa)(ifa, dst);
1156 if (ifa == NULL)
1157 return NULL;
1158 ifa_acquire(ifa, psref);
1159 }
1160 info->rti_ifa = ifa;
1161 if (info->rti_ifp == NULL)
1162 info->rti_ifp = ifa->ifa_ifp;
1163 return ifa;
1164 }
1165
1166 /*
1167 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1168 * The caller has to rtfree it by itself.
1169 */
1170 int
1171 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
1172 {
1173 int s = splsoftnet(), ss;
1174 int error = 0, rc;
1175 struct rtentry *rt;
1176 rtbl_t *rtbl;
1177 struct ifaddr *ifa = NULL, *ifa2 = NULL;
1178 struct sockaddr_storage maskeddst;
1179 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1180 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1181 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
1182 int flags = info->rti_flags;
1183 struct psref psref_ifp, psref_ifa;
1184 int bound = 0;
1185 struct ifnet *ifp = NULL;
1186 bool need_to_release_ifa = true;
1187 bool need_unlock = true;
1188 #define senderr(x) { error = x ; goto bad; }
1189
1190 RT_WLOCK();
1191
1192 bound = curlwp_bind();
1193 if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
1194 senderr(ESRCH);
1195 if (flags & RTF_HOST)
1196 netmask = NULL;
1197 switch (req) {
1198 case RTM_DELETE:
1199 if (netmask) {
1200 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1201 netmask);
1202 dst = (struct sockaddr *)&maskeddst;
1203 }
1204 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1205 senderr(ESRCH);
1206 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
1207 senderr(ESRCH);
1208 rt->rt_flags &= ~RTF_UP;
1209 if ((ifa = rt->rt_ifa)) {
1210 if (ifa->ifa_flags & IFA_ROUTE &&
1211 rt_ifa_connected(rt, ifa)) {
1212 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
1213 "deleted IFA_ROUTE\n",
1214 (void *)rt->_rt_key, (void *)ifa);
1215 ifa->ifa_flags &= ~IFA_ROUTE;
1216 }
1217 if (ifa->ifa_rtrequest)
1218 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
1219 ifa = NULL;
1220 }
1221 rttrash++;
1222 if (ret_nrt) {
1223 *ret_nrt = rt;
1224 rt_ref(rt);
1225 RT_REFCNT_TRACE(rt);
1226 }
1227 RT_UNLOCK();
1228 need_unlock = false;
1229 rt_timer_remove_all(rt);
1230 rtcache_clear_rtentry(dst->sa_family, rt);
1231 if (ret_nrt == NULL) {
1232 /* Adjust the refcount */
1233 rt_ref(rt);
1234 RT_REFCNT_TRACE(rt);
1235 rt_free(rt);
1236 }
1237 break;
1238
1239 case RTM_ADD:
1240 if (info->rti_ifa == NULL) {
1241 ifp = rt_getifp(info, &psref_ifp);
1242 ifa = rt_getifa(info, &psref_ifa);
1243 if (ifa == NULL)
1244 senderr(ENETUNREACH);
1245 } else {
1246 /* Caller should have a reference of ifa */
1247 ifa = info->rti_ifa;
1248 need_to_release_ifa = false;
1249 }
1250 rt = pool_get(&rtentry_pool, PR_NOWAIT);
1251 if (rt == NULL)
1252 senderr(ENOBUFS);
1253 memset(rt, 0, sizeof(*rt));
1254 rt->rt_flags = RTF_UP | flags;
1255 LIST_INIT(&rt->rt_timer);
1256
1257 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1258 if (netmask) {
1259 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1260 netmask);
1261 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
1262 } else {
1263 rt_setkey(rt, dst, M_NOWAIT);
1264 }
1265 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1266 if (rt_getkey(rt) == NULL ||
1267 rt_setgate(rt, gateway) != 0) {
1268 pool_put(&rtentry_pool, rt);
1269 senderr(ENOBUFS);
1270 }
1271
1272 rt_set_ifa(rt, ifa);
1273 if (info->rti_info[RTAX_TAG] != NULL) {
1274 const struct sockaddr *tag;
1275 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
1276 if (tag == NULL)
1277 senderr(ENOBUFS);
1278 }
1279 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1280
1281 ss = pserialize_read_enter();
1282 if (info->rti_info[RTAX_IFP] != NULL) {
1283 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
1284 if (ifa2 != NULL)
1285 rt->rt_ifp = ifa2->ifa_ifp;
1286 else
1287 rt->rt_ifp = ifa->ifa_ifp;
1288 } else
1289 rt->rt_ifp = ifa->ifa_ifp;
1290 pserialize_read_exit(ss);
1291 cv_init(&rt->rt_cv, "rtentry");
1292 psref_target_init(&rt->rt_psref, rt_psref_class);
1293
1294 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1295 rc = rt_addaddr(rtbl, rt, netmask);
1296 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1297 if (rc != 0) {
1298 ifafree(ifa); /* for rt_set_ifa above */
1299 cv_destroy(&rt->rt_cv);
1300 rt_destroy(rt);
1301 pool_put(&rtentry_pool, rt);
1302 senderr(rc);
1303 }
1304 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1305 if (ifa->ifa_rtrequest)
1306 ifa->ifa_rtrequest(req, rt, info);
1307 if (need_to_release_ifa)
1308 ifa_release(ifa, &psref_ifa);
1309 ifa = NULL;
1310 if_put(ifp, &psref_ifp);
1311 ifp = NULL;
1312 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1313 if (ret_nrt) {
1314 *ret_nrt = rt;
1315 rt_ref(rt);
1316 RT_REFCNT_TRACE(rt);
1317 }
1318 RT_UNLOCK();
1319 need_unlock = false;
1320 rtflushall(dst->sa_family);
1321 break;
1322 case RTM_GET:
1323 if (netmask != NULL) {
1324 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1325 netmask);
1326 dst = (struct sockaddr *)&maskeddst;
1327 }
1328 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1329 senderr(ESRCH);
1330 if (ret_nrt != NULL) {
1331 *ret_nrt = rt;
1332 rt_ref(rt);
1333 RT_REFCNT_TRACE(rt);
1334 }
1335 break;
1336 }
1337 bad:
1338 if (need_to_release_ifa)
1339 ifa_release(ifa, &psref_ifa);
1340 if_put(ifp, &psref_ifp);
1341 curlwp_bindx(bound);
1342 if (need_unlock)
1343 RT_UNLOCK();
1344 splx(s);
1345 return error;
1346 }
1347
1348 int
1349 rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
1350 {
1351 struct sockaddr *new, *old;
1352
1353 KASSERT(rt->_rt_key != NULL);
1354 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1355
1356 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
1357 if (new == NULL)
1358 return ENOMEM;
1359
1360 old = rt->rt_gateway;
1361 rt->rt_gateway = new;
1362 if (old != NULL)
1363 sockaddr_free(old);
1364
1365 KASSERT(rt->_rt_key != NULL);
1366 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1367
1368 if (rt->rt_flags & RTF_GATEWAY) {
1369 struct rtentry *gwrt;
1370
1371 /* XXX we cannot call rtalloc1 if holding the rt lock */
1372 if (RT_LOCKED())
1373 gwrt = rtalloc1_locked(gate, 1, false);
1374 else
1375 gwrt = rtalloc1(gate, 1);
1376 /*
1377 * If we switched gateways, grab the MTU from the new
1378 * gateway route if the current MTU, if the current MTU is
1379 * greater than the MTU of gateway.
1380 * Note that, if the MTU of gateway is 0, we will reset the
1381 * MTU of the route to run PMTUD again from scratch. XXX
1382 */
1383 if (gwrt != NULL) {
1384 KASSERT(gwrt->_rt_key != NULL);
1385 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
1386 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
1387 rt->rt_rmx.rmx_mtu &&
1388 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
1389 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
1390 }
1391 rt_unref(gwrt);
1392 }
1393 }
1394 KASSERT(rt->_rt_key != NULL);
1395 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1396 return 0;
1397 }
1398
1399 static void
1400 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
1401 const struct sockaddr *netmask)
1402 {
1403 const char *netmaskp = &netmask->sa_data[0],
1404 *srcp = &src->sa_data[0];
1405 char *dstp = &dst->sa_data[0];
1406 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
1407 const char *srcend = (char *)dst + src->sa_len;
1408
1409 dst->sa_len = src->sa_len;
1410 dst->sa_family = src->sa_family;
1411
1412 while (dstp < maskend)
1413 *dstp++ = *srcp++ & *netmaskp++;
1414 if (dstp < srcend)
1415 memset(dstp, 0, (size_t)(srcend - dstp));
1416 }
1417
1418 /*
1419 * Inform the routing socket of a route change.
1420 */
1421 void
1422 rt_newmsg(const int cmd, const struct rtentry *rt)
1423 {
1424 struct rt_addrinfo info;
1425
1426 memset((void *)&info, 0, sizeof(info));
1427 info.rti_info[RTAX_DST] = rt_getkey(rt);
1428 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1429 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1430 if (rt->rt_ifp) {
1431 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1432 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1433 }
1434
1435 rt_missmsg(cmd, &info, rt->rt_flags, 0);
1436 }
1437
1438 /*
1439 * Set up or tear down a routing table entry, normally
1440 * for an interface.
1441 */
1442 int
1443 rtinit(struct ifaddr *ifa, int cmd, int flags)
1444 {
1445 struct rtentry *rt;
1446 struct sockaddr *dst, *odst;
1447 struct sockaddr_storage maskeddst;
1448 struct rtentry *nrt = NULL;
1449 int error;
1450 struct rt_addrinfo info;
1451
1452 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1453 if (cmd == RTM_DELETE) {
1454 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1455 /* Delete subnet route for this interface */
1456 odst = dst;
1457 dst = (struct sockaddr *)&maskeddst;
1458 rt_maskedcopy(odst, dst, ifa->ifa_netmask);
1459 }
1460 if ((rt = rtalloc1(dst, 0)) != NULL) {
1461 if (rt->rt_ifa != ifa) {
1462 rt_unref(rt);
1463 return (flags & RTF_HOST) ? EHOSTUNREACH
1464 : ENETUNREACH;
1465 }
1466 rt_unref(rt);
1467 }
1468 }
1469 memset(&info, 0, sizeof(info));
1470 info.rti_ifa = ifa;
1471 info.rti_flags = flags | ifa->ifa_flags;
1472 info.rti_info[RTAX_DST] = dst;
1473 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1474
1475 /*
1476 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1477 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate
1478 * variable) when RTF_HOST is 1. still not sure if i can safely
1479 * change it to meet bsdi4 behavior.
1480 */
1481 if (cmd != RTM_LLINFO_UPD)
1482 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1483 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
1484 &nrt);
1485 if (error != 0)
1486 return error;
1487
1488 rt = nrt;
1489 RT_REFCNT_TRACE(rt);
1490 switch (cmd) {
1491 case RTM_DELETE:
1492 rt_newmsg(cmd, rt);
1493 rt_free(rt);
1494 break;
1495 case RTM_LLINFO_UPD:
1496 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
1497 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
1498 rt_newmsg(RTM_CHANGE, rt);
1499 rt_unref(rt);
1500 break;
1501 case RTM_ADD:
1502 /*
1503 * FIXME NOMPSAFE: the rtentry is updated with the existence
1504 * of refeferences of it.
1505 */
1506 /*
1507 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest
1508 * called via rtrequest1. Can we just prevent the replacement
1509 * somehow and remove the following code? And also doesn't
1510 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again?
1511 */
1512 if (rt->rt_ifa != ifa) {
1513 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa,
1514 rt->rt_ifa);
1515 if (rt->rt_ifa->ifa_rtrequest != NULL) {
1516 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
1517 &info);
1518 }
1519 rt_replace_ifa(rt, ifa);
1520 rt->rt_ifp = ifa->ifa_ifp;
1521 if (ifa->ifa_rtrequest != NULL)
1522 ifa->ifa_rtrequest(RTM_ADD, rt, &info);
1523 }
1524 rt_newmsg(cmd, rt);
1525 rt_unref(rt);
1526 RT_REFCNT_TRACE(rt);
1527 break;
1528 }
1529 return error;
1530 }
1531
1532 /*
1533 * Create a local route entry for the address.
1534 * Announce the addition of the address and the route to the routing socket.
1535 */
1536 int
1537 rt_ifa_addlocal(struct ifaddr *ifa)
1538 {
1539 struct rtentry *rt;
1540 int e;
1541
1542 /* If there is no loopback entry, allocate one. */
1543 rt = rtalloc1(ifa->ifa_addr, 0);
1544 #ifdef RT_DEBUG
1545 if (rt != NULL)
1546 dump_rt(rt);
1547 #endif
1548 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
1549 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1550 {
1551 struct rt_addrinfo info;
1552 struct rtentry *nrt;
1553
1554 memset(&info, 0, sizeof(info));
1555 info.rti_flags = RTF_HOST | RTF_LOCAL;
1556 if (!(ifa->ifa_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))
1557 info.rti_flags |= RTF_LLDATA;
1558 info.rti_info[RTAX_DST] = ifa->ifa_addr;
1559 info.rti_info[RTAX_GATEWAY] =
1560 (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
1561 info.rti_ifa = ifa;
1562 nrt = NULL;
1563 e = rtrequest1(RTM_ADD, &info, &nrt);
1564 if (nrt && ifa != nrt->rt_ifa)
1565 rt_replace_ifa(nrt, ifa);
1566 rt_newaddrmsg(RTM_ADD, ifa, e, nrt);
1567 if (nrt != NULL) {
1568 #ifdef RT_DEBUG
1569 dump_rt(nrt);
1570 #endif
1571 rt_unref(nrt);
1572 RT_REFCNT_TRACE(nrt);
1573 }
1574 } else {
1575 e = 0;
1576 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL);
1577 }
1578 if (rt != NULL)
1579 rt_unref(rt);
1580 return e;
1581 }
1582
1583 /*
1584 * Remove the local route entry for the address.
1585 * Announce the removal of the address and the route to the routing socket.
1586 */
1587 int
1588 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
1589 {
1590 struct rtentry *rt;
1591 int e = 0;
1592
1593 rt = rtalloc1(ifa->ifa_addr, 0);
1594
1595 /*
1596 * Before deleting, check if a corresponding loopbacked
1597 * host route surely exists. With this check, we can avoid
1598 * deleting an interface direct route whose destination is
1599 * the same as the address being removed. This can happen
1600 * when removing a subnet-router anycast address on an
1601 * interface attached to a shared medium.
1602 */
1603 if (rt != NULL &&
1604 (rt->rt_flags & RTF_HOST) &&
1605 (rt->rt_ifp->if_flags & IFF_LOOPBACK))
1606 {
1607 /* If we cannot replace the route's ifaddr with the equivalent
1608 * ifaddr of another interface, I believe it is safest to
1609 * delete the route.
1610 */
1611 if (alt_ifa == NULL) {
1612 e = rtdeletemsg(rt);
1613 if (e == 0) {
1614 rt_unref(rt);
1615 rt_free(rt);
1616 rt = NULL;
1617 }
1618 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1619 } else {
1620 rt_replace_ifa(rt, alt_ifa);
1621 rt_newmsg(RTM_CHANGE, rt);
1622 }
1623 } else
1624 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1625 if (rt != NULL)
1626 rt_unref(rt);
1627 return e;
1628 }
1629
1630 /*
1631 * Route timer routines. These routes allow functions to be called
1632 * for various routes at any time. This is useful in supporting
1633 * path MTU discovery and redirect route deletion.
1634 *
1635 * This is similar to some BSDI internal functions, but it provides
1636 * for multiple queues for efficiency's sake...
1637 */
1638
1639 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
1640 static int rt_init_done = 0;
1641
1642 /*
1643 * Some subtle order problems with domain initialization mean that
1644 * we cannot count on this being run from rt_init before various
1645 * protocol initializations are done. Therefore, we make sure
1646 * that this is run when the first queue is added...
1647 */
1648
1649 static void rt_timer_work(struct work *, void *);
1650
1651 static void
1652 rt_timer_init(void)
1653 {
1654 int error;
1655
1656 assert(rt_init_done == 0);
1657
1658 /* XXX should be in rt_init */
1659 rw_init(&rt_lock);
1660 rw_init(&rtcache_lock);
1661
1662 LIST_INIT(&rttimer_queue_head);
1663 callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1664 error = workqueue_create(&rt_timer_wq, "rt_timer",
1665 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
1666 if (error)
1667 panic("%s: workqueue_create failed (%d)\n", __func__, error);
1668 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1669 rt_init_done = 1;
1670 }
1671
1672 struct rttimer_queue *
1673 rt_timer_queue_create(u_int timeout)
1674 {
1675 struct rttimer_queue *rtq;
1676
1677 if (rt_init_done == 0)
1678 rt_timer_init();
1679
1680 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1681 if (rtq == NULL)
1682 return NULL;
1683 memset(rtq, 0, sizeof(*rtq));
1684
1685 rtq->rtq_timeout = timeout;
1686 TAILQ_INIT(&rtq->rtq_head);
1687 RT_WLOCK();
1688 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1689 RT_UNLOCK();
1690
1691 return rtq;
1692 }
1693
1694 void
1695 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1696 {
1697
1698 rtq->rtq_timeout = timeout;
1699 }
1700
1701 static void
1702 rt_timer_queue_remove_all(struct rttimer_queue *rtq)
1703 {
1704 struct rttimer *r;
1705
1706 RT_ASSERT_WLOCK();
1707
1708 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1709 LIST_REMOVE(r, rtt_link);
1710 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1711 rt_ref(r->rtt_rt); /* XXX */
1712 RT_REFCNT_TRACE(r->rtt_rt);
1713 RT_UNLOCK();
1714 (*r->rtt_func)(r->rtt_rt, r);
1715 pool_put(&rttimer_pool, r);
1716 RT_WLOCK();
1717 if (rtq->rtq_count > 0)
1718 rtq->rtq_count--;
1719 else
1720 printf("rt_timer_queue_remove_all: "
1721 "rtq_count reached 0\n");
1722 }
1723 }
1724
1725 void
1726 rt_timer_queue_destroy(struct rttimer_queue *rtq)
1727 {
1728
1729 RT_WLOCK();
1730 rt_timer_queue_remove_all(rtq);
1731 LIST_REMOVE(rtq, rtq_link);
1732 RT_UNLOCK();
1733
1734 /*
1735 * Caller is responsible for freeing the rttimer_queue structure.
1736 */
1737 }
1738
1739 unsigned long
1740 rt_timer_count(struct rttimer_queue *rtq)
1741 {
1742 return rtq->rtq_count;
1743 }
1744
1745 static void
1746 rt_timer_remove_all(struct rtentry *rt)
1747 {
1748 struct rttimer *r;
1749
1750 RT_WLOCK();
1751 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1752 LIST_REMOVE(r, rtt_link);
1753 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1754 if (r->rtt_queue->rtq_count > 0)
1755 r->rtt_queue->rtq_count--;
1756 else
1757 printf("rt_timer_remove_all: rtq_count reached 0\n");
1758 pool_put(&rttimer_pool, r);
1759 }
1760 RT_UNLOCK();
1761 }
1762
1763 int
1764 rt_timer_add(struct rtentry *rt,
1765 void (*func)(struct rtentry *, struct rttimer *),
1766 struct rttimer_queue *queue)
1767 {
1768 struct rttimer *r;
1769
1770 KASSERT(func != NULL);
1771 RT_WLOCK();
1772 /*
1773 * If there's already a timer with this action, destroy it before
1774 * we add a new one.
1775 */
1776 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1777 if (r->rtt_func == func)
1778 break;
1779 }
1780 if (r != NULL) {
1781 LIST_REMOVE(r, rtt_link);
1782 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1783 if (r->rtt_queue->rtq_count > 0)
1784 r->rtt_queue->rtq_count--;
1785 else
1786 printf("rt_timer_add: rtq_count reached 0\n");
1787 } else {
1788 r = pool_get(&rttimer_pool, PR_NOWAIT);
1789 if (r == NULL) {
1790 RT_UNLOCK();
1791 return ENOBUFS;
1792 }
1793 }
1794
1795 memset(r, 0, sizeof(*r));
1796
1797 r->rtt_rt = rt;
1798 r->rtt_time = time_uptime;
1799 r->rtt_func = func;
1800 r->rtt_queue = queue;
1801 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1802 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1803 r->rtt_queue->rtq_count++;
1804
1805 RT_UNLOCK();
1806
1807 return 0;
1808 }
1809
1810 static void
1811 rt_timer_work(struct work *wk, void *arg)
1812 {
1813 struct rttimer_queue *rtq;
1814 struct rttimer *r;
1815
1816 RT_WLOCK();
1817 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1818 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1819 (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
1820 LIST_REMOVE(r, rtt_link);
1821 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1822 rt_ref(r->rtt_rt); /* XXX */
1823 RT_REFCNT_TRACE(r->rtt_rt);
1824 RT_UNLOCK();
1825 (*r->rtt_func)(r->rtt_rt, r);
1826 pool_put(&rttimer_pool, r);
1827 RT_WLOCK();
1828 if (rtq->rtq_count > 0)
1829 rtq->rtq_count--;
1830 else
1831 printf("rt_timer_timer: rtq_count reached 0\n");
1832 }
1833 }
1834 RT_UNLOCK();
1835
1836 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1837 }
1838
1839 static void
1840 rt_timer_timer(void *arg)
1841 {
1842
1843 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
1844 }
1845
1846 static struct rtentry *
1847 _rtcache_init(struct route *ro, int flag)
1848 {
1849 struct rtentry *rt;
1850
1851 rtcache_invariants(ro);
1852 KASSERT(ro->_ro_rt == NULL);
1853 RTCACHE_ASSERT_WLOCK();
1854
1855 if (rtcache_getdst(ro) == NULL)
1856 return NULL;
1857 ro->ro_invalid = false;
1858 rt = rtalloc1(rtcache_getdst(ro), flag);
1859 if (rt != NULL && ISSET(rt->rt_flags, RTF_UP)) {
1860 ro->_ro_rt = rt;
1861 KASSERT(!ISSET(rt->rt_flags, RTF_UPDATING));
1862 rtcache_ref(rt, ro);
1863 rt_unref(rt);
1864 rtcache(ro);
1865 } else if (rt != NULL)
1866 rt_unref(rt);
1867
1868 rtcache_invariants(ro);
1869 return ro->_ro_rt;
1870 }
1871
1872 struct rtentry *
1873 rtcache_init(struct route *ro)
1874 {
1875 struct rtentry *rt;
1876 RTCACHE_WLOCK();
1877 rt = _rtcache_init(ro, 1);
1878 RTCACHE_UNLOCK();
1879 return rt;
1880 }
1881
1882 struct rtentry *
1883 rtcache_init_noclone(struct route *ro)
1884 {
1885 struct rtentry *rt;
1886 RTCACHE_WLOCK();
1887 rt = _rtcache_init(ro, 0);
1888 RTCACHE_UNLOCK();
1889 return rt;
1890 }
1891
1892 struct rtentry *
1893 rtcache_update(struct route *ro, int clone)
1894 {
1895 struct rtentry *rt;
1896 RTCACHE_WLOCK();
1897 rtcache_clear(ro);
1898 rt = _rtcache_init(ro, clone);
1899 RTCACHE_UNLOCK();
1900 return rt;
1901 }
1902
1903 void
1904 rtcache_copy(struct route *new_ro, struct route *old_ro)
1905 {
1906 struct rtentry *rt;
1907 int ret;
1908
1909 KASSERT(new_ro != old_ro);
1910 rtcache_invariants(new_ro);
1911 rtcache_invariants(old_ro);
1912
1913 rt = rtcache_validate(old_ro);
1914
1915 if (rtcache_getdst(old_ro) == NULL)
1916 goto out;
1917 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro));
1918 if (ret != 0)
1919 goto out;
1920
1921 RTCACHE_WLOCK();
1922 new_ro->ro_invalid = false;
1923 if ((new_ro->_ro_rt = rt) != NULL)
1924 rtcache(new_ro);
1925 rtcache_invariants(new_ro);
1926 RTCACHE_UNLOCK();
1927 out:
1928 rtcache_unref(rt, old_ro);
1929 return;
1930 }
1931
1932 static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist);
1933
1934 #if defined(RT_DEBUG) && defined(NET_MPSAFE)
1935 static void
1936 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro)
1937 {
1938 char dst[64];
1939
1940 sockaddr_format(ro->ro_sa, dst, 64);
1941 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst,
1942 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref);
1943 }
1944 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro))
1945 #else
1946 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0)
1947 #endif
1948
1949 static void
1950 rtcache_ref(struct rtentry *rt, struct route *ro)
1951 {
1952
1953 KASSERT(rt != NULL);
1954
1955 #ifdef NET_MPSAFE
1956 RTCACHE_PSREF_TRACE(rt, ro);
1957 ro->ro_bound = curlwp_bind();
1958 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
1959 #endif
1960 }
1961
1962 void
1963 rtcache_unref(struct rtentry *rt, struct route *ro)
1964 {
1965
1966 if (rt == NULL)
1967 return;
1968
1969 #ifdef NET_MPSAFE
1970 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
1971 curlwp_bindx(ro->ro_bound);
1972 RTCACHE_PSREF_TRACE(rt, ro);
1973 #endif
1974 }
1975
1976 static struct rtentry *
1977 rtcache_validate_locked(struct route *ro)
1978 {
1979 struct rtentry *rt = NULL;
1980
1981 #ifdef NET_MPSAFE
1982 retry:
1983 #endif
1984 rt = ro->_ro_rt;
1985 rtcache_invariants(ro);
1986
1987 if (ro->ro_invalid) {
1988 rt = NULL;
1989 goto out;
1990 }
1991
1992 RT_RLOCK();
1993 if (rt != NULL && (rt->rt_flags & RTF_UP) != 0 && rt->rt_ifp != NULL) {
1994 #ifdef NET_MPSAFE
1995 if (ISSET(rt->rt_flags, RTF_UPDATING)) {
1996 if (rt_wait_ok()) {
1997 RT_UNLOCK();
1998 RTCACHE_UNLOCK();
1999 /* We can wait until the update is complete */
2000 rt_update_wait();
2001 RTCACHE_RLOCK();
2002 goto retry;
2003 } else {
2004 rt = NULL;
2005 }
2006 } else
2007 #endif
2008 rtcache_ref(rt, ro);
2009 } else
2010 rt = NULL;
2011 RT_UNLOCK();
2012 out:
2013 return rt;
2014 }
2015
2016 struct rtentry *
2017 rtcache_validate(struct route *ro)
2018 {
2019 struct rtentry *rt;
2020
2021 RTCACHE_RLOCK();
2022 rt = rtcache_validate_locked(ro);
2023 RTCACHE_UNLOCK();
2024 return rt;
2025 }
2026
2027 static void
2028 rtcache_invalidate(struct dom_rtlist *rtlist)
2029 {
2030 struct route *ro;
2031
2032 RTCACHE_ASSERT_WLOCK();
2033
2034 while ((ro = LIST_FIRST(rtlist)) != NULL) {
2035 rtcache_invariants(ro);
2036 KASSERT(ro->_ro_rt != NULL);
2037 ro->ro_invalid = true;
2038 LIST_REMOVE(ro, ro_rtcache_next);
2039 LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next);
2040 rtcache_invariants(ro);
2041 }
2042 }
2043
2044 static void
2045 rtcache_clear_rtentry(int family, struct rtentry *rt)
2046 {
2047 struct domain *dom;
2048 struct route *ro, *nro;
2049
2050 if ((dom = pffinddomain(family)) == NULL)
2051 return;
2052
2053 RTCACHE_WLOCK();
2054 LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) {
2055 if (ro->_ro_rt == rt)
2056 rtcache_clear(ro);
2057 }
2058 RTCACHE_UNLOCK();
2059 }
2060
2061 static void
2062 rtcache_clear(struct route *ro)
2063 {
2064
2065 RTCACHE_ASSERT_WLOCK();
2066
2067 rtcache_invariants(ro);
2068 if (ro->_ro_rt == NULL)
2069 return;
2070
2071 LIST_REMOVE(ro, ro_rtcache_next);
2072
2073 ro->_ro_rt = NULL;
2074 ro->ro_invalid = false;
2075 rtcache_invariants(ro);
2076 }
2077
2078 struct rtentry *
2079 rtcache_lookup2(struct route *ro, const struct sockaddr *dst,
2080 int clone, int *hitp)
2081 {
2082 const struct sockaddr *odst;
2083 struct rtentry *rt = NULL;
2084
2085 RTCACHE_RLOCK();
2086 odst = rtcache_getdst(ro);
2087 if (odst == NULL) {
2088 RTCACHE_UNLOCK();
2089 RTCACHE_WLOCK();
2090 goto miss;
2091 }
2092
2093 if (sockaddr_cmp(odst, dst) != 0) {
2094 RTCACHE_UNLOCK();
2095 RTCACHE_WLOCK();
2096 rtcache_free_locked(ro);
2097 goto miss;
2098 }
2099
2100 rt = rtcache_validate_locked(ro);
2101 if (rt == NULL) {
2102 RTCACHE_UNLOCK();
2103 RTCACHE_WLOCK();
2104 rtcache_clear(ro);
2105 goto miss;
2106 }
2107
2108 rtcache_invariants(ro);
2109
2110 RTCACHE_UNLOCK();
2111 if (hitp != NULL)
2112 *hitp = 1;
2113 return rt;
2114 miss:
2115 if (hitp != NULL)
2116 *hitp = 0;
2117 if (rtcache_setdst_locked(ro, dst) == 0)
2118 rt = _rtcache_init(ro, clone);
2119
2120 rtcache_invariants(ro);
2121
2122 RTCACHE_UNLOCK();
2123 return rt;
2124 }
2125
2126 static void
2127 rtcache_free_locked(struct route *ro)
2128 {
2129
2130 RTCACHE_ASSERT_WLOCK();
2131 rtcache_clear(ro);
2132 if (ro->ro_sa != NULL) {
2133 sockaddr_free(ro->ro_sa);
2134 ro->ro_sa = NULL;
2135 }
2136 rtcache_invariants(ro);
2137 }
2138
2139 void
2140 rtcache_free(struct route *ro)
2141 {
2142
2143 RTCACHE_WLOCK();
2144 rtcache_free_locked(ro);
2145 RTCACHE_UNLOCK();
2146 }
2147
2148 static int
2149 rtcache_setdst_locked(struct route *ro, const struct sockaddr *sa)
2150 {
2151 KASSERT(sa != NULL);
2152
2153 RTCACHE_ASSERT_WLOCK();
2154
2155 rtcache_invariants(ro);
2156 if (ro->ro_sa != NULL) {
2157 if (ro->ro_sa->sa_family == sa->sa_family) {
2158 rtcache_clear(ro);
2159 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
2160 rtcache_invariants(ro);
2161 return 0;
2162 }
2163 /* free ro_sa, wrong family */
2164 rtcache_free_locked(ro);
2165 }
2166
2167 KASSERT(ro->_ro_rt == NULL);
2168
2169 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
2170 rtcache_invariants(ro);
2171 return ENOMEM;
2172 }
2173 rtcache_invariants(ro);
2174 return 0;
2175 }
2176
2177 int
2178 rtcache_setdst(struct route *ro, const struct sockaddr *sa)
2179 {
2180 int error;
2181
2182 RTCACHE_WLOCK();
2183 error = rtcache_setdst_locked(ro, sa);
2184 RTCACHE_UNLOCK();
2185
2186 return error;
2187 }
2188
2189 const struct sockaddr *
2190 rt_settag(struct rtentry *rt, const struct sockaddr *tag)
2191 {
2192 if (rt->rt_tag != tag) {
2193 if (rt->rt_tag != NULL)
2194 sockaddr_free(rt->rt_tag);
2195 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
2196 }
2197 return rt->rt_tag;
2198 }
2199
2200 struct sockaddr *
2201 rt_gettag(const struct rtentry *rt)
2202 {
2203 return rt->rt_tag;
2204 }
2205
2206 int
2207 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
2208 {
2209
2210 if ((rt->rt_flags & RTF_REJECT) != 0) {
2211 /* Mimic looutput */
2212 if (ifp->if_flags & IFF_LOOPBACK)
2213 return (rt->rt_flags & RTF_HOST) ?
2214 EHOSTUNREACH : ENETUNREACH;
2215 else if (rt->rt_rmx.rmx_expire == 0 ||
2216 time_uptime < rt->rt_rmx.rmx_expire)
2217 return (rt->rt_flags & RTF_GATEWAY) ?
2218 EHOSTUNREACH : EHOSTDOWN;
2219 }
2220
2221 return 0;
2222 }
2223
2224 void
2225 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *),
2226 void *v)
2227 {
2228
2229 for (;;) {
2230 int s;
2231 int error;
2232 struct rtentry *rt, *retrt = NULL;
2233
2234 RT_RLOCK();
2235 s = splsoftnet();
2236 rt = rtbl_search_matched_entry(family, f, v);
2237 if (rt == NULL) {
2238 splx(s);
2239 RT_UNLOCK();
2240 return;
2241 }
2242 rt->rt_refcnt++;
2243 splx(s);
2244 RT_UNLOCK();
2245
2246 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway,
2247 rt_mask(rt), rt->rt_flags, &retrt);
2248 if (error == 0) {
2249 KASSERT(retrt == rt);
2250 KASSERT((retrt->rt_flags & RTF_UP) == 0);
2251 retrt->rt_ifp = NULL;
2252 rt_unref(rt);
2253 rt_free(retrt);
2254 } else if (error == ESRCH) {
2255 /* Someone deleted the entry already. */
2256 rt_unref(rt);
2257 } else {
2258 log(LOG_ERR, "%s: unable to delete rtentry @ %p, "
2259 "error = %d\n", rt->rt_ifp->if_xname, rt, error);
2260 /* XXX how to treat this case? */
2261 }
2262 }
2263 }
2264
2265 int
2266 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v)
2267 {
2268 int error;
2269
2270 RT_RLOCK();
2271 error = rtbl_walktree(family, f, v);
2272 RT_UNLOCK();
2273
2274 return error;
2275 }
2276
2277 #ifdef DDB
2278
2279 #include <machine/db_machdep.h>
2280 #include <ddb/db_interface.h>
2281 #include <ddb/db_output.h>
2282
2283 #define rt_expire rt_rmx.rmx_expire
2284
2285 static void
2286 db_print_sa(const struct sockaddr *sa)
2287 {
2288 int len;
2289 const u_char *p;
2290
2291 if (sa == NULL) {
2292 db_printf("[NULL]");
2293 return;
2294 }
2295
2296 p = (const u_char *)sa;
2297 len = sa->sa_len;
2298 db_printf("[");
2299 while (len > 0) {
2300 db_printf("%d", *p);
2301 p++; len--;
2302 if (len) db_printf(",");
2303 }
2304 db_printf("]\n");
2305 }
2306
2307 static void
2308 db_print_ifa(struct ifaddr *ifa)
2309 {
2310 if (ifa == NULL)
2311 return;
2312 db_printf(" ifa_addr=");
2313 db_print_sa(ifa->ifa_addr);
2314 db_printf(" ifa_dsta=");
2315 db_print_sa(ifa->ifa_dstaddr);
2316 db_printf(" ifa_mask=");
2317 db_print_sa(ifa->ifa_netmask);
2318 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n",
2319 ifa->ifa_flags,
2320 ifa->ifa_refcnt,
2321 ifa->ifa_metric);
2322 }
2323
2324 /*
2325 * Function to pass to rt_walktree().
2326 * Return non-zero error to abort walk.
2327 */
2328 static int
2329 db_show_rtentry(struct rtentry *rt, void *w)
2330 {
2331 db_printf("rtentry=%p", rt);
2332
2333 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
2334 rt->rt_flags, rt->rt_refcnt,
2335 rt->rt_use, (uint64_t)rt->rt_expire);
2336
2337 db_printf(" key="); db_print_sa(rt_getkey(rt));
2338 db_printf(" mask="); db_print_sa(rt_mask(rt));
2339 db_printf(" gw="); db_print_sa(rt->rt_gateway);
2340
2341 db_printf(" ifp=%p ", rt->rt_ifp);
2342 if (rt->rt_ifp)
2343 db_printf("(%s)", rt->rt_ifp->if_xname);
2344 else
2345 db_printf("(NULL)");
2346
2347 db_printf(" ifa=%p\n", rt->rt_ifa);
2348 db_print_ifa(rt->rt_ifa);
2349
2350 db_printf(" gwroute=%p llinfo=%p\n",
2351 rt->rt_gwroute, rt->rt_llinfo);
2352
2353 return 0;
2354 }
2355
2356 /*
2357 * Function to print all the route trees.
2358 * Use this from ddb: "show routes"
2359 */
2360 void
2361 db_show_routes(db_expr_t addr, bool have_addr,
2362 db_expr_t count, const char *modif)
2363 {
2364 rt_walktree(AF_INET, db_show_rtentry, NULL);
2365 }
2366 #endif
2367