route.c revision 1.195 1 /* $NetBSD: route.c,v 1.195 2017/06/22 08:31:54 ozaki-r Exp $ */
2
3 /*-
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the project nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62 /*
63 * Copyright (c) 1980, 1986, 1991, 1993
64 * The Regents of the University of California. All rights reserved.
65 *
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
68 * are met:
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)route.c 8.3 (Berkeley) 1/9/95
91 */
92
93 #ifdef _KERNEL_OPT
94 #include "opt_inet.h"
95 #include "opt_route.h"
96 #include "opt_net_mpsafe.h"
97 #endif
98
99 #include <sys/cdefs.h>
100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.195 2017/06/22 08:31:54 ozaki-r Exp $");
101
102 #include <sys/param.h>
103 #ifdef RTFLUSH_DEBUG
104 #include <sys/sysctl.h>
105 #endif
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/mbuf.h>
110 #include <sys/socket.h>
111 #include <sys/socketvar.h>
112 #include <sys/domain.h>
113 #include <sys/kernel.h>
114 #include <sys/ioctl.h>
115 #include <sys/pool.h>
116 #include <sys/kauth.h>
117 #include <sys/workqueue.h>
118 #include <sys/syslog.h>
119 #include <sys/rwlock.h>
120 #include <sys/mutex.h>
121 #include <sys/cpu.h>
122
123 #include <net/if.h>
124 #include <net/if_dl.h>
125 #include <net/route.h>
126
127 #include <netinet/in.h>
128 #include <netinet/in_var.h>
129
130 #ifdef RTFLUSH_DEBUG
131 #define rtcache_debug() __predict_false(_rtcache_debug)
132 #else /* RTFLUSH_DEBUG */
133 #define rtcache_debug() 0
134 #endif /* RTFLUSH_DEBUG */
135
136 #ifdef RT_DEBUG
137 #define RT_REFCNT_TRACE(rt) printf("%s:%d: rt=%p refcnt=%d\n", \
138 __func__, __LINE__, (rt), (rt)->rt_refcnt)
139 #else
140 #define RT_REFCNT_TRACE(rt) do {} while (0)
141 #endif
142
143 #ifdef DEBUG
144 #define dlog(level, fmt, args...) log(level, fmt, ##args)
145 #else
146 #define dlog(level, fmt, args...) do {} while (0)
147 #endif
148
149 struct rtstat rtstat;
150
151 static int rttrash; /* routes not in table but not freed */
152
153 static struct pool rtentry_pool;
154 static struct pool rttimer_pool;
155
156 static struct callout rt_timer_ch; /* callout for rt_timer_timer() */
157 static struct workqueue *rt_timer_wq;
158 static struct work rt_timer_wk;
159
160 static void rt_timer_init(void);
161 static void rt_timer_queue_remove_all(struct rttimer_queue *);
162 static void rt_timer_remove_all(struct rtentry *);
163 static void rt_timer_timer(void *);
164
165 /*
166 * Locking notes:
167 * - The routing table is protected by a global rwlock
168 * - API: RT_RLOCK and friends
169 * - rtcaches are protected by a global rwlock
170 * - API: RTCACHE_RLOCK and friends
171 * - References to a rtentry is managed by reference counting and psref
172 * - Reference couting is used for temporal reference when a rtentry
173 * is fetched from the routing table
174 * - psref is used for temporal reference when a rtentry is fetched
175 * from a rtcache
176 * - struct route (rtcache) has struct psref, so we cannot obtain
177 * a reference twice on the same struct route
178 * - Befere destroying or updating a rtentry, we have to wait for
179 * all references left (see below for details)
180 * - APIs
181 * - An obtained rtentry via rtalloc1 or rtrequest* must be
182 * unreferenced by rt_unref
183 * - An obtained rtentry via rtcache_* must be unreferenced by
184 * rtcache_unref
185 * - TODO: once we get a lockless routing table, we should use only
186 * psref for rtentries
187 * - rtentry destruction
188 * - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE)
189 * - If a caller of rtrequest grabs a reference of a rtentry, the caller
190 * has a responsibility to destroy the rtentry by itself by calling
191 * rt_free
192 * - If not, rtrequest itself does that
193 * - If rt_free is called in softint, the actual destruction routine is
194 * deferred to a workqueue
195 * - rtentry update
196 * - When updating a rtentry, RTF_UPDATING flag is set
197 * - If a rtentry is set RTF_UPDATING, fetching the rtentry from
198 * the routing table or a rtcache results in either of the following
199 * cases:
200 * - if the caller runs in softint, the caller fails to fetch
201 * - otherwise, the caller waits for the update completed and retries
202 * to fetch (probably succeed to fetch for the second time)
203 */
204
205 /*
206 * Global locks for the routing table and rtcaches.
207 * Locking order: rtcache_lock => rt_lock
208 */
209 static krwlock_t rt_lock __cacheline_aligned;
210 #ifdef NET_MPSAFE
211 #define RT_RLOCK() rw_enter(&rt_lock, RW_READER)
212 #define RT_WLOCK() rw_enter(&rt_lock, RW_WRITER)
213 #define RT_UNLOCK() rw_exit(&rt_lock)
214 #define RT_LOCKED() rw_lock_held(&rt_lock)
215 #define RT_ASSERT_WLOCK() KASSERT(rw_write_held(&rt_lock))
216 #else
217 #define RT_RLOCK() do {} while (0)
218 #define RT_WLOCK() do {} while (0)
219 #define RT_UNLOCK() do {} while (0)
220 #define RT_LOCKED() false
221 #define RT_ASSERT_WLOCK() do {} while (0)
222 #endif
223
224 static krwlock_t rtcache_lock __cacheline_aligned;
225 #ifdef NET_MPSAFE
226 #define RTCACHE_RLOCK() rw_enter(&rtcache_lock, RW_READER)
227 #define RTCACHE_WLOCK() rw_enter(&rtcache_lock, RW_WRITER)
228 #define RTCACHE_UNLOCK() rw_exit(&rtcache_lock)
229 #define RTCACHE_ASSERT_WLOCK() KASSERT(rw_write_held(&rtcache_lock))
230 #define RTCACHE_WLOCKED() rw_write_held(&rtcache_lock)
231 #else
232 #define RTCACHE_RLOCK() do {} while (0)
233 #define RTCACHE_WLOCK() do {} while (0)
234 #define RTCACHE_UNLOCK() do {} while (0)
235 #define RTCACHE_ASSERT_WLOCK() do {} while (0)
236 #define RTCACHE_WLOCKED() false
237 #endif
238
239 /*
240 * mutex and cv that are used to wait for references to a rtentry left
241 * before updating the rtentry.
242 */
243 static struct {
244 kmutex_t lock;
245 kcondvar_t cv;
246 bool ongoing;
247 const struct lwp *lwp;
248 } rt_update_global __cacheline_aligned;
249
250 /*
251 * A workqueue and stuff that are used to defer the destruction routine
252 * of rtentries.
253 */
254 static struct {
255 struct workqueue *wq;
256 struct work wk;
257 kmutex_t lock;
258 struct rtentry *queue[10];
259 } rt_free_global __cacheline_aligned;
260
261 /* psref for rtentry */
262 static struct psref_class *rt_psref_class __read_mostly;
263
264 #ifdef RTFLUSH_DEBUG
265 static int _rtcache_debug = 0;
266 #endif /* RTFLUSH_DEBUG */
267
268 static kauth_listener_t route_listener;
269
270 static int rtdeletemsg(struct rtentry *);
271 static void rtflushall(int);
272
273 static void rt_maskedcopy(const struct sockaddr *,
274 struct sockaddr *, const struct sockaddr *);
275
276 static void rtcache_clear(struct route *);
277 static void rtcache_clear_rtentry(int, struct rtentry *);
278 static void rtcache_invalidate(struct dom_rtlist *);
279
280 static void rt_ref(struct rtentry *);
281
282 static struct rtentry *
283 rtalloc1_locked(const struct sockaddr *, int, bool, bool);
284 static struct rtentry *
285 rtcache_validate_locked(struct route *);
286 static void rtcache_free_locked(struct route *);
287 static int rtcache_setdst_locked(struct route *, const struct sockaddr *);
288
289 static void rtcache_ref(struct rtentry *, struct route *);
290
291 #ifdef NET_MPSAFE
292 static void rt_update_wait(void);
293 #endif
294
295 static bool rt_wait_ok(void);
296 static void rt_wait_refcnt(const char *, struct rtentry *, int);
297 static void rt_wait_psref(struct rtentry *);
298
299 #ifdef DDB
300 static void db_print_sa(const struct sockaddr *);
301 static void db_print_ifa(struct ifaddr *);
302 static int db_show_rtentry(struct rtentry *, void *);
303 #endif
304
305 #ifdef RTFLUSH_DEBUG
306 static void sysctl_net_rtcache_setup(struct sysctllog **);
307 static void
308 sysctl_net_rtcache_setup(struct sysctllog **clog)
309 {
310 const struct sysctlnode *rnode;
311
312 if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
313 CTLTYPE_NODE,
314 "rtcache", SYSCTL_DESCR("Route cache related settings"),
315 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
316 return;
317 if (sysctl_createv(clog, 0, &rnode, &rnode,
318 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
319 "debug", SYSCTL_DESCR("Debug route caches"),
320 NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
321 return;
322 }
323 #endif /* RTFLUSH_DEBUG */
324
325 static inline void
326 rt_destroy(struct rtentry *rt)
327 {
328 if (rt->_rt_key != NULL)
329 sockaddr_free(rt->_rt_key);
330 if (rt->rt_gateway != NULL)
331 sockaddr_free(rt->rt_gateway);
332 if (rt_gettag(rt) != NULL)
333 sockaddr_free(rt_gettag(rt));
334 rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
335 }
336
337 static inline const struct sockaddr *
338 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
339 {
340 if (rt->_rt_key == key)
341 goto out;
342
343 if (rt->_rt_key != NULL)
344 sockaddr_free(rt->_rt_key);
345 rt->_rt_key = sockaddr_dup(key, flags);
346 out:
347 rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
348 return rt->_rt_key;
349 }
350
351 struct ifaddr *
352 rt_get_ifa(struct rtentry *rt)
353 {
354 struct ifaddr *ifa;
355
356 if ((ifa = rt->rt_ifa) == NULL)
357 return ifa;
358 else if (ifa->ifa_getifa == NULL)
359 return ifa;
360 #if 0
361 else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
362 return ifa;
363 #endif
364 else {
365 ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
366 if (ifa == NULL)
367 return NULL;
368 rt_replace_ifa(rt, ifa);
369 return ifa;
370 }
371 }
372
373 static void
374 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
375 {
376 rt->rt_ifa = ifa;
377 if (ifa->ifa_seqno != NULL)
378 rt->rt_ifa_seqno = *ifa->ifa_seqno;
379 }
380
381 /*
382 * Is this route the connected route for the ifa?
383 */
384 static int
385 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
386 {
387 const struct sockaddr *key, *dst, *odst;
388 struct sockaddr_storage maskeddst;
389
390 key = rt_getkey(rt);
391 dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
392 if (dst == NULL ||
393 dst->sa_family != key->sa_family ||
394 dst->sa_len != key->sa_len)
395 return 0;
396 if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
397 odst = dst;
398 dst = (struct sockaddr *)&maskeddst;
399 rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
400 ifa->ifa_netmask);
401 }
402 return (memcmp(dst, key, dst->sa_len) == 0);
403 }
404
405 void
406 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
407 {
408 if (rt->rt_ifa &&
409 rt->rt_ifa != ifa &&
410 rt->rt_ifa->ifa_flags & IFA_ROUTE &&
411 rt_ifa_connected(rt, rt->rt_ifa))
412 {
413 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
414 "replace deleted IFA_ROUTE\n",
415 (void *)rt->_rt_key, (void *)rt->rt_ifa);
416 rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
417 if (rt_ifa_connected(rt, ifa)) {
418 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
419 "replace added IFA_ROUTE\n",
420 (void *)rt->_rt_key, (void *)ifa);
421 ifa->ifa_flags |= IFA_ROUTE;
422 }
423 }
424
425 ifaref(ifa);
426 ifafree(rt->rt_ifa);
427 rt_set_ifa1(rt, ifa);
428 }
429
430 static void
431 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
432 {
433 ifaref(ifa);
434 rt_set_ifa1(rt, ifa);
435 }
436
437 static int
438 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
439 void *arg0, void *arg1, void *arg2, void *arg3)
440 {
441 struct rt_msghdr *rtm;
442 int result;
443
444 result = KAUTH_RESULT_DEFER;
445 rtm = arg1;
446
447 if (action != KAUTH_NETWORK_ROUTE)
448 return result;
449
450 if (rtm->rtm_type == RTM_GET)
451 result = KAUTH_RESULT_ALLOW;
452
453 return result;
454 }
455
456 static void rt_free_work(struct work *, void *);
457
458 void
459 rt_init(void)
460 {
461 int error;
462
463 #ifdef RTFLUSH_DEBUG
464 sysctl_net_rtcache_setup(NULL);
465 #endif
466
467 mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
468 rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET);
469
470 error = workqueue_create(&rt_free_global.wq, "rt_free",
471 rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
472 if (error)
473 panic("%s: workqueue_create failed (%d)\n", __func__, error);
474
475 mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
476 cv_init(&rt_update_global.cv, "rt_update");
477
478 pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
479 NULL, IPL_SOFTNET);
480 pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
481 NULL, IPL_SOFTNET);
482
483 rn_init(); /* initialize all zeroes, all ones, mask table */
484 rtbl_init();
485
486 route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
487 route_listener_cb, NULL);
488 }
489
490 static void
491 rtflushall(int family)
492 {
493 struct domain *dom;
494
495 if (rtcache_debug())
496 printf("%s: enter\n", __func__);
497
498 if ((dom = pffinddomain(family)) == NULL)
499 return;
500
501 RTCACHE_WLOCK();
502 rtcache_invalidate(&dom->dom_rtcache);
503 RTCACHE_UNLOCK();
504 }
505
506 static void
507 rtcache(struct route *ro)
508 {
509 struct domain *dom;
510
511 RTCACHE_ASSERT_WLOCK();
512
513 rtcache_invariants(ro);
514 KASSERT(ro->_ro_rt != NULL);
515 KASSERT(ro->ro_invalid == false);
516 KASSERT(rtcache_getdst(ro) != NULL);
517
518 if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL)
519 return;
520
521 LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next);
522 rtcache_invariants(ro);
523 }
524
525 #ifdef RT_DEBUG
526 static void
527 dump_rt(const struct rtentry *rt)
528 {
529 char buf[512];
530
531 aprint_normal("rt: ");
532 aprint_normal("p=%p ", rt);
533 if (rt->_rt_key == NULL) {
534 aprint_normal("dst=(NULL) ");
535 } else {
536 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
537 aprint_normal("dst=%s ", buf);
538 }
539 if (rt->rt_gateway == NULL) {
540 aprint_normal("gw=(NULL) ");
541 } else {
542 sockaddr_format(rt->_rt_key, buf, sizeof(buf));
543 aprint_normal("gw=%s ", buf);
544 }
545 aprint_normal("flags=%x ", rt->rt_flags);
546 if (rt->rt_ifp == NULL) {
547 aprint_normal("if=(NULL) ");
548 } else {
549 aprint_normal("if=%s ", rt->rt_ifp->if_xname);
550 }
551 aprint_normal("\n");
552 }
553 #endif /* RT_DEBUG */
554
555 /*
556 * Packet routing routines. If success, refcnt of a returned rtentry
557 * will be incremented. The caller has to rtfree it by itself.
558 */
559 struct rtentry *
560 rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok,
561 bool wlock)
562 {
563 rtbl_t *rtbl;
564 struct rtentry *rt;
565 int s;
566
567 #ifdef NET_MPSAFE
568 retry:
569 #endif
570 s = splsoftnet();
571 rtbl = rt_gettable(dst->sa_family);
572 if (rtbl == NULL)
573 goto miss;
574
575 rt = rt_matchaddr(rtbl, dst);
576 if (rt == NULL)
577 goto miss;
578
579 if (!ISSET(rt->rt_flags, RTF_UP))
580 goto miss;
581
582 #ifdef NET_MPSAFE
583 if (ISSET(rt->rt_flags, RTF_UPDATING) &&
584 /* XXX updater should be always able to acquire */
585 curlwp != rt_update_global.lwp) {
586 bool need_lock = false;
587 if (!wait_ok || !rt_wait_ok())
588 goto miss;
589 RT_UNLOCK();
590 splx(s);
591
592 /* XXX need more proper solution */
593 if (RTCACHE_WLOCKED()) {
594 RTCACHE_UNLOCK();
595 need_lock = true;
596 }
597
598 /* We can wait until the update is complete */
599 rt_update_wait();
600
601 if (need_lock)
602 RTCACHE_WLOCK();
603 if (wlock)
604 RT_WLOCK();
605 else
606 RT_RLOCK();
607 goto retry;
608 }
609 #endif /* NET_MPSAFE */
610
611 rt_ref(rt);
612 RT_REFCNT_TRACE(rt);
613
614 splx(s);
615 return rt;
616 miss:
617 rtstat.rts_unreach++;
618 if (report) {
619 struct rt_addrinfo info;
620
621 memset(&info, 0, sizeof(info));
622 info.rti_info[RTAX_DST] = dst;
623 rt_missmsg(RTM_MISS, &info, 0, 0);
624 }
625 splx(s);
626 return NULL;
627 }
628
629 struct rtentry *
630 rtalloc1(const struct sockaddr *dst, int report)
631 {
632 struct rtentry *rt;
633
634 RT_RLOCK();
635 rt = rtalloc1_locked(dst, report, true, false);
636 RT_UNLOCK();
637
638 return rt;
639 }
640
641 static void
642 rt_ref(struct rtentry *rt)
643 {
644
645 KASSERT(rt->rt_refcnt >= 0);
646 atomic_inc_uint(&rt->rt_refcnt);
647 }
648
649 void
650 rt_unref(struct rtentry *rt)
651 {
652
653 KASSERT(rt != NULL);
654 KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt);
655
656 atomic_dec_uint(&rt->rt_refcnt);
657 if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) {
658 mutex_enter(&rt_free_global.lock);
659 cv_broadcast(&rt->rt_cv);
660 mutex_exit(&rt_free_global.lock);
661 }
662 }
663
664 static bool
665 rt_wait_ok(void)
666 {
667
668 KASSERT(!cpu_intr_p());
669 return !cpu_softintr_p();
670 }
671
672 void
673 rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt)
674 {
675 mutex_enter(&rt_free_global.lock);
676 while (rt->rt_refcnt > cnt) {
677 dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n",
678 __func__, title, rt->rt_refcnt);
679 cv_wait(&rt->rt_cv, &rt_free_global.lock);
680 dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n",
681 __func__, title, rt->rt_refcnt);
682 }
683 mutex_exit(&rt_free_global.lock);
684 }
685
686 void
687 rt_wait_psref(struct rtentry *rt)
688 {
689
690 psref_target_destroy(&rt->rt_psref, rt_psref_class);
691 psref_target_init(&rt->rt_psref, rt_psref_class);
692 }
693
694 static void
695 _rt_free(struct rtentry *rt)
696 {
697 struct ifaddr *ifa;
698
699 /*
700 * Need to avoid a deadlock on rt_wait_refcnt of update
701 * and a conflict on psref_target_destroy of update.
702 */
703 #ifdef NET_MPSAFE
704 rt_update_wait();
705 #endif
706
707 RT_REFCNT_TRACE(rt);
708 KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt);
709 rt_wait_refcnt("free", rt, 0);
710 #ifdef NET_MPSAFE
711 psref_target_destroy(&rt->rt_psref, rt_psref_class);
712 #endif
713
714 rt_assert_inactive(rt);
715 rttrash--;
716 ifa = rt->rt_ifa;
717 rt->rt_ifa = NULL;
718 ifafree(ifa);
719 rt->rt_ifp = NULL;
720 cv_destroy(&rt->rt_cv);
721 rt_destroy(rt);
722 pool_put(&rtentry_pool, rt);
723 }
724
725 static void
726 rt_free_work(struct work *wk, void *arg)
727 {
728 int i;
729 struct rtentry *rt;
730
731 restart:
732 mutex_enter(&rt_free_global.lock);
733 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
734 if (rt_free_global.queue[i] == NULL)
735 continue;
736 rt = rt_free_global.queue[i];
737 rt_free_global.queue[i] = NULL;
738 mutex_exit(&rt_free_global.lock);
739
740 atomic_dec_uint(&rt->rt_refcnt);
741 _rt_free(rt);
742 goto restart;
743 }
744 mutex_exit(&rt_free_global.lock);
745 }
746
747 void
748 rt_free(struct rtentry *rt)
749 {
750
751 KASSERT(rt->rt_refcnt > 0);
752 if (!rt_wait_ok()) {
753 int i;
754 mutex_enter(&rt_free_global.lock);
755 for (i = 0; i < sizeof(rt_free_global.queue); i++) {
756 if (rt_free_global.queue[i] == NULL) {
757 rt_free_global.queue[i] = rt;
758 break;
759 }
760 }
761 KASSERT(i < sizeof(rt_free_global.queue));
762 rt_ref(rt);
763 mutex_exit(&rt_free_global.lock);
764 workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL);
765 } else {
766 atomic_dec_uint(&rt->rt_refcnt);
767 _rt_free(rt);
768 }
769 }
770
771 #ifdef NET_MPSAFE
772 static void
773 rt_update_wait(void)
774 {
775
776 mutex_enter(&rt_update_global.lock);
777 while (rt_update_global.ongoing) {
778 dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp);
779 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
780 dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp);
781 }
782 mutex_exit(&rt_update_global.lock);
783 }
784 #endif
785
786 int
787 rt_update_prepare(struct rtentry *rt)
788 {
789
790 dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp);
791
792 RTCACHE_WLOCK();
793 RT_WLOCK();
794 /* If the entry is being destroyed, don't proceed the update. */
795 if (!ISSET(rt->rt_flags, RTF_UP)) {
796 RT_UNLOCK();
797 RTCACHE_UNLOCK();
798 return -1;
799 }
800 rt->rt_flags |= RTF_UPDATING;
801 RT_UNLOCK();
802 RTCACHE_UNLOCK();
803
804 mutex_enter(&rt_update_global.lock);
805 while (rt_update_global.ongoing) {
806 dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n",
807 __func__, rt, curlwp);
808 cv_wait(&rt_update_global.cv, &rt_update_global.lock);
809 dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n",
810 __func__, rt, curlwp);
811 }
812 rt_update_global.ongoing = true;
813 /* XXX need it to avoid rt_update_wait by updater itself. */
814 rt_update_global.lwp = curlwp;
815 mutex_exit(&rt_update_global.lock);
816
817 rt_wait_refcnt("update", rt, 1);
818 rt_wait_psref(rt);
819
820 return 0;
821 }
822
823 void
824 rt_update_finish(struct rtentry *rt)
825 {
826
827 RTCACHE_WLOCK();
828 RT_WLOCK();
829 rt->rt_flags &= ~RTF_UPDATING;
830 RT_UNLOCK();
831 RTCACHE_UNLOCK();
832
833 mutex_enter(&rt_update_global.lock);
834 rt_update_global.ongoing = false;
835 rt_update_global.lwp = NULL;
836 cv_broadcast(&rt_update_global.cv);
837 mutex_exit(&rt_update_global.lock);
838
839 dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp);
840 }
841
842 /*
843 * Force a routing table entry to the specified
844 * destination to go through the given gateway.
845 * Normally called as a result of a routing redirect
846 * message from the network layer.
847 *
848 * N.B.: must be called at splsoftnet
849 */
850 void
851 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
852 const struct sockaddr *netmask, int flags, const struct sockaddr *src,
853 struct rtentry **rtp)
854 {
855 struct rtentry *rt;
856 int error = 0;
857 uint64_t *stat = NULL;
858 struct rt_addrinfo info;
859 struct ifaddr *ifa;
860 struct psref psref;
861
862 /* verify the gateway is directly reachable */
863 if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
864 error = ENETUNREACH;
865 goto out;
866 }
867 rt = rtalloc1(dst, 0);
868 /*
869 * If the redirect isn't from our current router for this dst,
870 * it's either old or wrong. If it redirects us to ourselves,
871 * we have a routing loop, perhaps as a result of an interface
872 * going down recently.
873 */
874 if (!(flags & RTF_DONE) && rt &&
875 (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
876 error = EINVAL;
877 else {
878 int s = pserialize_read_enter();
879 struct ifaddr *_ifa;
880
881 _ifa = ifa_ifwithaddr(gateway);
882 if (_ifa != NULL)
883 error = EHOSTUNREACH;
884 pserialize_read_exit(s);
885 }
886 if (error)
887 goto done;
888 /*
889 * Create a new entry if we just got back a wildcard entry
890 * or the lookup failed. This is necessary for hosts
891 * which use routing redirects generated by smart gateways
892 * to dynamically build the routing tables.
893 */
894 if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
895 goto create;
896 /*
897 * Don't listen to the redirect if it's
898 * for a route to an interface.
899 */
900 if (rt->rt_flags & RTF_GATEWAY) {
901 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
902 /*
903 * Changing from route to net => route to host.
904 * Create new route, rather than smashing route to net.
905 */
906 create:
907 if (rt != NULL)
908 rt_unref(rt);
909 flags |= RTF_GATEWAY | RTF_DYNAMIC;
910 memset(&info, 0, sizeof(info));
911 info.rti_info[RTAX_DST] = dst;
912 info.rti_info[RTAX_GATEWAY] = gateway;
913 info.rti_info[RTAX_NETMASK] = netmask;
914 info.rti_ifa = ifa;
915 info.rti_flags = flags;
916 rt = NULL;
917 error = rtrequest1(RTM_ADD, &info, &rt);
918 if (rt != NULL)
919 flags = rt->rt_flags;
920 stat = &rtstat.rts_dynamic;
921 } else {
922 /*
923 * Smash the current notion of the gateway to
924 * this destination. Should check about netmask!!!
925 */
926 #ifdef NET_MPSAFE
927 KASSERT(!cpu_softintr_p());
928
929 error = rt_update_prepare(rt);
930 if (error == 0) {
931 #endif
932 error = rt_setgate(rt, gateway);
933 if (error == 0) {
934 rt->rt_flags |= RTF_MODIFIED;
935 flags |= RTF_MODIFIED;
936 }
937 #ifdef NET_MPSAFE
938 rt_update_finish(rt);
939 } else {
940 /*
941 * If error != 0, the rtentry is being
942 * destroyed, so doing nothing doesn't
943 * matter.
944 */
945 }
946 #endif
947 stat = &rtstat.rts_newgateway;
948 }
949 } else
950 error = EHOSTUNREACH;
951 done:
952 if (rt) {
953 if (rtp != NULL && !error)
954 *rtp = rt;
955 else
956 rt_unref(rt);
957 }
958 out:
959 if (error)
960 rtstat.rts_badredirect++;
961 else if (stat != NULL)
962 (*stat)++;
963 memset(&info, 0, sizeof(info));
964 info.rti_info[RTAX_DST] = dst;
965 info.rti_info[RTAX_GATEWAY] = gateway;
966 info.rti_info[RTAX_NETMASK] = netmask;
967 info.rti_info[RTAX_AUTHOR] = src;
968 rt_missmsg(RTM_REDIRECT, &info, flags, error);
969 ifa_release(ifa, &psref);
970 }
971
972 /*
973 * Delete a route and generate a message.
974 * It doesn't free a passed rt.
975 */
976 static int
977 rtdeletemsg(struct rtentry *rt)
978 {
979 int error;
980 struct rt_addrinfo info;
981 struct rtentry *retrt;
982
983 /*
984 * Request the new route so that the entry is not actually
985 * deleted. That will allow the information being reported to
986 * be accurate (and consistent with route_output()).
987 */
988 memset(&info, 0, sizeof(info));
989 info.rti_info[RTAX_DST] = rt_getkey(rt);
990 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
991 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
992 info.rti_flags = rt->rt_flags;
993 error = rtrequest1(RTM_DELETE, &info, &retrt);
994
995 rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
996
997 return error;
998 }
999
1000 struct ifaddr *
1001 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
1002 const struct sockaddr *gateway, struct psref *psref)
1003 {
1004 struct ifaddr *ifa = NULL;
1005
1006 if ((flags & RTF_GATEWAY) == 0) {
1007 /*
1008 * If we are adding a route to an interface,
1009 * and the interface is a pt to pt link
1010 * we should search for the destination
1011 * as our clue to the interface. Otherwise
1012 * we can use the local address.
1013 */
1014 if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
1015 ifa = ifa_ifwithdstaddr_psref(dst, psref);
1016 if (ifa == NULL)
1017 ifa = ifa_ifwithaddr_psref(gateway, psref);
1018 } else {
1019 /*
1020 * If we are adding a route to a remote net
1021 * or host, the gateway may still be on the
1022 * other end of a pt to pt link.
1023 */
1024 ifa = ifa_ifwithdstaddr_psref(gateway, psref);
1025 }
1026 if (ifa == NULL)
1027 ifa = ifa_ifwithnet_psref(gateway, psref);
1028 if (ifa == NULL) {
1029 int s;
1030 struct rtentry *rt;
1031
1032 /* XXX we cannot call rtalloc1 if holding the rt lock */
1033 if (RT_LOCKED())
1034 rt = rtalloc1_locked(gateway, 0, true, true);
1035 else
1036 rt = rtalloc1(gateway, 0);
1037 if (rt == NULL)
1038 return NULL;
1039 if (rt->rt_flags & RTF_GATEWAY) {
1040 rt_unref(rt);
1041 return NULL;
1042 }
1043 /*
1044 * Just in case. May not need to do this workaround.
1045 * Revisit when working on rtentry MP-ification.
1046 */
1047 s = pserialize_read_enter();
1048 IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
1049 if (ifa == rt->rt_ifa)
1050 break;
1051 }
1052 if (ifa != NULL)
1053 ifa_acquire(ifa, psref);
1054 pserialize_read_exit(s);
1055 rt_unref(rt);
1056 if (ifa == NULL)
1057 return NULL;
1058 }
1059 if (ifa->ifa_addr->sa_family != dst->sa_family) {
1060 struct ifaddr *nifa;
1061 int s;
1062
1063 s = pserialize_read_enter();
1064 nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1065 if (nifa != NULL) {
1066 ifa_release(ifa, psref);
1067 ifa_acquire(nifa, psref);
1068 ifa = nifa;
1069 }
1070 pserialize_read_exit(s);
1071 }
1072 return ifa;
1073 }
1074
1075 /*
1076 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1077 * The caller has to rtfree it by itself.
1078 */
1079 int
1080 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
1081 const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
1082 {
1083 struct rt_addrinfo info;
1084
1085 memset(&info, 0, sizeof(info));
1086 info.rti_flags = flags;
1087 info.rti_info[RTAX_DST] = dst;
1088 info.rti_info[RTAX_GATEWAY] = gateway;
1089 info.rti_info[RTAX_NETMASK] = netmask;
1090 return rtrequest1(req, &info, ret_nrt);
1091 }
1092
1093 /*
1094 * It's a utility function to add/remove a route to/from the routing table
1095 * and tell user processes the addition/removal on success.
1096 */
1097 int
1098 rtrequest_newmsg(const int req, const struct sockaddr *dst,
1099 const struct sockaddr *gateway, const struct sockaddr *netmask,
1100 const int flags)
1101 {
1102 int error;
1103 struct rtentry *ret_nrt = NULL;
1104
1105 KASSERT(req == RTM_ADD || req == RTM_DELETE);
1106
1107 error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt);
1108 if (error != 0)
1109 return error;
1110
1111 KASSERT(ret_nrt != NULL);
1112
1113 rt_newmsg(req, ret_nrt); /* tell user process */
1114 if (req == RTM_DELETE)
1115 rt_free(ret_nrt);
1116 else
1117 rt_unref(ret_nrt);
1118
1119 return 0;
1120 }
1121
1122 struct ifnet *
1123 rt_getifp(struct rt_addrinfo *info, struct psref *psref)
1124 {
1125 const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
1126
1127 if (info->rti_ifp != NULL)
1128 return NULL;
1129 /*
1130 * ifp may be specified by sockaddr_dl when protocol address
1131 * is ambiguous
1132 */
1133 if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
1134 struct ifaddr *ifa;
1135 int s = pserialize_read_enter();
1136
1137 ifa = ifa_ifwithnet(ifpaddr);
1138 if (ifa != NULL)
1139 info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
1140 psref);
1141 pserialize_read_exit(s);
1142 }
1143
1144 return info->rti_ifp;
1145 }
1146
1147 struct ifaddr *
1148 rt_getifa(struct rt_addrinfo *info, struct psref *psref)
1149 {
1150 struct ifaddr *ifa = NULL;
1151 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1152 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1153 const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
1154 int flags = info->rti_flags;
1155 const struct sockaddr *sa;
1156
1157 if (info->rti_ifa == NULL && ifaaddr != NULL) {
1158 ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
1159 if (ifa != NULL)
1160 goto got;
1161 }
1162
1163 sa = ifaaddr != NULL ? ifaaddr :
1164 (gateway != NULL ? gateway : dst);
1165 if (sa != NULL && info->rti_ifp != NULL)
1166 ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
1167 else if (dst != NULL && gateway != NULL)
1168 ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
1169 else if (sa != NULL)
1170 ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
1171 if (ifa == NULL)
1172 return NULL;
1173 got:
1174 if (ifa->ifa_getifa != NULL) {
1175 /* FIXME ifa_getifa is NOMPSAFE */
1176 ifa = (*ifa->ifa_getifa)(ifa, dst);
1177 if (ifa == NULL)
1178 return NULL;
1179 ifa_acquire(ifa, psref);
1180 }
1181 info->rti_ifa = ifa;
1182 if (info->rti_ifp == NULL)
1183 info->rti_ifp = ifa->ifa_ifp;
1184 return ifa;
1185 }
1186
1187 /*
1188 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
1189 * The caller has to rtfree it by itself.
1190 */
1191 int
1192 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
1193 {
1194 int s = splsoftnet(), ss;
1195 int error = 0, rc;
1196 struct rtentry *rt;
1197 rtbl_t *rtbl;
1198 struct ifaddr *ifa = NULL;
1199 struct sockaddr_storage maskeddst;
1200 const struct sockaddr *dst = info->rti_info[RTAX_DST];
1201 const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
1202 const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
1203 int flags = info->rti_flags;
1204 struct psref psref_ifp, psref_ifa;
1205 int bound = 0;
1206 struct ifnet *ifp = NULL;
1207 bool need_to_release_ifa = true;
1208 bool need_unlock = true;
1209 #define senderr(x) { error = x ; goto bad; }
1210
1211 RT_WLOCK();
1212
1213 bound = curlwp_bind();
1214 if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
1215 senderr(ESRCH);
1216 if (flags & RTF_HOST)
1217 netmask = NULL;
1218 switch (req) {
1219 case RTM_DELETE:
1220 if (netmask) {
1221 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1222 netmask);
1223 dst = (struct sockaddr *)&maskeddst;
1224 }
1225 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1226 senderr(ESRCH);
1227 if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
1228 senderr(ESRCH);
1229 rt->rt_flags &= ~RTF_UP;
1230 if ((ifa = rt->rt_ifa)) {
1231 if (ifa->ifa_flags & IFA_ROUTE &&
1232 rt_ifa_connected(rt, ifa)) {
1233 RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
1234 "deleted IFA_ROUTE\n",
1235 (void *)rt->_rt_key, (void *)ifa);
1236 ifa->ifa_flags &= ~IFA_ROUTE;
1237 }
1238 if (ifa->ifa_rtrequest)
1239 ifa->ifa_rtrequest(RTM_DELETE, rt, info);
1240 ifa = NULL;
1241 }
1242 rttrash++;
1243 if (ret_nrt) {
1244 *ret_nrt = rt;
1245 rt_ref(rt);
1246 RT_REFCNT_TRACE(rt);
1247 }
1248 RT_UNLOCK();
1249 need_unlock = false;
1250 rt_timer_remove_all(rt);
1251 rtcache_clear_rtentry(dst->sa_family, rt);
1252 if (ret_nrt == NULL) {
1253 /* Adjust the refcount */
1254 rt_ref(rt);
1255 RT_REFCNT_TRACE(rt);
1256 rt_free(rt);
1257 }
1258 break;
1259
1260 case RTM_ADD:
1261 if (info->rti_ifa == NULL) {
1262 ifp = rt_getifp(info, &psref_ifp);
1263 ifa = rt_getifa(info, &psref_ifa);
1264 if (ifa == NULL)
1265 senderr(ENETUNREACH);
1266 } else {
1267 /* Caller should have a reference of ifa */
1268 ifa = info->rti_ifa;
1269 need_to_release_ifa = false;
1270 }
1271 rt = pool_get(&rtentry_pool, PR_NOWAIT);
1272 if (rt == NULL)
1273 senderr(ENOBUFS);
1274 memset(rt, 0, sizeof(*rt));
1275 rt->rt_flags = RTF_UP | flags;
1276 LIST_INIT(&rt->rt_timer);
1277
1278 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1279 if (netmask) {
1280 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1281 netmask);
1282 rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
1283 } else {
1284 rt_setkey(rt, dst, M_NOWAIT);
1285 }
1286 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1287 if (rt_getkey(rt) == NULL ||
1288 rt_setgate(rt, gateway) != 0) {
1289 pool_put(&rtentry_pool, rt);
1290 senderr(ENOBUFS);
1291 }
1292
1293 rt_set_ifa(rt, ifa);
1294 if (info->rti_info[RTAX_TAG] != NULL) {
1295 const struct sockaddr *tag;
1296 tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
1297 if (tag == NULL)
1298 senderr(ENOBUFS);
1299 }
1300 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1301
1302 ss = pserialize_read_enter();
1303 if (info->rti_info[RTAX_IFP] != NULL) {
1304 struct ifaddr *ifa2;
1305 ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
1306 if (ifa2 != NULL)
1307 rt->rt_ifp = ifa2->ifa_ifp;
1308 else
1309 rt->rt_ifp = ifa->ifa_ifp;
1310 } else
1311 rt->rt_ifp = ifa->ifa_ifp;
1312 pserialize_read_exit(ss);
1313 cv_init(&rt->rt_cv, "rtentry");
1314 psref_target_init(&rt->rt_psref, rt_psref_class);
1315
1316 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1317 rc = rt_addaddr(rtbl, rt, netmask);
1318 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1319 if (rc != 0) {
1320 ifafree(ifa); /* for rt_set_ifa above */
1321 cv_destroy(&rt->rt_cv);
1322 rt_destroy(rt);
1323 pool_put(&rtentry_pool, rt);
1324 senderr(rc);
1325 }
1326 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1327 if (ifa->ifa_rtrequest)
1328 ifa->ifa_rtrequest(req, rt, info);
1329 if (need_to_release_ifa)
1330 ifa_release(ifa, &psref_ifa);
1331 ifa = NULL;
1332 if_put(ifp, &psref_ifp);
1333 ifp = NULL;
1334 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1335 if (ret_nrt) {
1336 *ret_nrt = rt;
1337 rt_ref(rt);
1338 RT_REFCNT_TRACE(rt);
1339 }
1340 RT_UNLOCK();
1341 need_unlock = false;
1342 rtflushall(dst->sa_family);
1343 break;
1344 case RTM_GET:
1345 if (netmask != NULL) {
1346 rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
1347 netmask);
1348 dst = (struct sockaddr *)&maskeddst;
1349 }
1350 if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
1351 senderr(ESRCH);
1352 if (ret_nrt != NULL) {
1353 *ret_nrt = rt;
1354 rt_ref(rt);
1355 RT_REFCNT_TRACE(rt);
1356 }
1357 break;
1358 }
1359 bad:
1360 if (need_to_release_ifa)
1361 ifa_release(ifa, &psref_ifa);
1362 if_put(ifp, &psref_ifp);
1363 curlwp_bindx(bound);
1364 if (need_unlock)
1365 RT_UNLOCK();
1366 splx(s);
1367 return error;
1368 }
1369
1370 int
1371 rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
1372 {
1373 struct sockaddr *new, *old;
1374
1375 KASSERT(rt->_rt_key != NULL);
1376 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1377
1378 new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
1379 if (new == NULL)
1380 return ENOMEM;
1381
1382 old = rt->rt_gateway;
1383 rt->rt_gateway = new;
1384 if (old != NULL)
1385 sockaddr_free(old);
1386
1387 KASSERT(rt->_rt_key != NULL);
1388 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1389
1390 if (rt->rt_flags & RTF_GATEWAY) {
1391 struct rtentry *gwrt;
1392
1393 /* XXX we cannot call rtalloc1 if holding the rt lock */
1394 if (RT_LOCKED())
1395 gwrt = rtalloc1_locked(gate, 1, false, true);
1396 else
1397 gwrt = rtalloc1(gate, 1);
1398 /*
1399 * If we switched gateways, grab the MTU from the new
1400 * gateway route if the current MTU, if the current MTU is
1401 * greater than the MTU of gateway.
1402 * Note that, if the MTU of gateway is 0, we will reset the
1403 * MTU of the route to run PMTUD again from scratch. XXX
1404 */
1405 if (gwrt != NULL) {
1406 KASSERT(gwrt->_rt_key != NULL);
1407 RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
1408 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
1409 rt->rt_rmx.rmx_mtu &&
1410 rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
1411 rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
1412 }
1413 rt_unref(gwrt);
1414 }
1415 }
1416 KASSERT(rt->_rt_key != NULL);
1417 RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1418 return 0;
1419 }
1420
1421 static void
1422 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
1423 const struct sockaddr *netmask)
1424 {
1425 const char *netmaskp = &netmask->sa_data[0],
1426 *srcp = &src->sa_data[0];
1427 char *dstp = &dst->sa_data[0];
1428 const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
1429 const char *srcend = (char *)dst + src->sa_len;
1430
1431 dst->sa_len = src->sa_len;
1432 dst->sa_family = src->sa_family;
1433
1434 while (dstp < maskend)
1435 *dstp++ = *srcp++ & *netmaskp++;
1436 if (dstp < srcend)
1437 memset(dstp, 0, (size_t)(srcend - dstp));
1438 }
1439
1440 /*
1441 * Inform the routing socket of a route change.
1442 */
1443 void
1444 rt_newmsg(const int cmd, const struct rtentry *rt)
1445 {
1446 struct rt_addrinfo info;
1447
1448 memset((void *)&info, 0, sizeof(info));
1449 info.rti_info[RTAX_DST] = rt_getkey(rt);
1450 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1451 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1452 if (rt->rt_ifp) {
1453 info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1454 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1455 }
1456
1457 rt_missmsg(cmd, &info, rt->rt_flags, 0);
1458 }
1459
1460 /*
1461 * Set up or tear down a routing table entry, normally
1462 * for an interface.
1463 */
1464 int
1465 rtinit(struct ifaddr *ifa, int cmd, int flags)
1466 {
1467 struct rtentry *rt;
1468 struct sockaddr *dst, *odst;
1469 struct sockaddr_storage maskeddst;
1470 struct rtentry *nrt = NULL;
1471 int error;
1472 struct rt_addrinfo info;
1473
1474 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1475 if (cmd == RTM_DELETE) {
1476 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1477 /* Delete subnet route for this interface */
1478 odst = dst;
1479 dst = (struct sockaddr *)&maskeddst;
1480 rt_maskedcopy(odst, dst, ifa->ifa_netmask);
1481 }
1482 if ((rt = rtalloc1(dst, 0)) != NULL) {
1483 if (rt->rt_ifa != ifa) {
1484 rt_unref(rt);
1485 return (flags & RTF_HOST) ? EHOSTUNREACH
1486 : ENETUNREACH;
1487 }
1488 rt_unref(rt);
1489 }
1490 }
1491 memset(&info, 0, sizeof(info));
1492 info.rti_ifa = ifa;
1493 info.rti_flags = flags | ifa->ifa_flags;
1494 info.rti_info[RTAX_DST] = dst;
1495 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1496
1497 /*
1498 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1499 * for RTF_HOST. bsdi4 passes NULL explicitly (via intermediate
1500 * variable) when RTF_HOST is 1. still not sure if i can safely
1501 * change it to meet bsdi4 behavior.
1502 */
1503 if (cmd != RTM_LLINFO_UPD)
1504 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1505 error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
1506 &nrt);
1507 if (error != 0)
1508 return error;
1509
1510 rt = nrt;
1511 RT_REFCNT_TRACE(rt);
1512 switch (cmd) {
1513 case RTM_DELETE:
1514 rt_newmsg(cmd, rt);
1515 rt_free(rt);
1516 break;
1517 case RTM_LLINFO_UPD:
1518 if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
1519 ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
1520 rt_newmsg(RTM_CHANGE, rt);
1521 rt_unref(rt);
1522 break;
1523 case RTM_ADD:
1524 /*
1525 * XXX it looks just reverting rt_ifa replaced by ifa_rtrequest
1526 * called via rtrequest1. Can we just prevent the replacement
1527 * somehow and remove the following code? And also doesn't
1528 * calling ifa_rtrequest(RTM_ADD) replace rt_ifa again?
1529 */
1530 if (rt->rt_ifa != ifa) {
1531 printf("rtinit: wrong ifa (%p) was (%p)\n", ifa,
1532 rt->rt_ifa);
1533 #ifdef NET_MPSAFE
1534 KASSERT(!cpu_softintr_p());
1535
1536 error = rt_update_prepare(rt);
1537 if (error == 0) {
1538 #endif
1539 if (rt->rt_ifa->ifa_rtrequest != NULL) {
1540 rt->rt_ifa->ifa_rtrequest(RTM_DELETE,
1541 rt, &info);
1542 }
1543 rt_replace_ifa(rt, ifa);
1544 rt->rt_ifp = ifa->ifa_ifp;
1545 if (ifa->ifa_rtrequest != NULL)
1546 ifa->ifa_rtrequest(RTM_ADD, rt, &info);
1547 #ifdef NET_MPSAFE
1548 rt_update_finish(rt);
1549 } else {
1550 /*
1551 * If error != 0, the rtentry is being
1552 * destroyed, so doing nothing doesn't
1553 * matter.
1554 */
1555 }
1556 #endif
1557 }
1558 rt_newmsg(cmd, rt);
1559 rt_unref(rt);
1560 RT_REFCNT_TRACE(rt);
1561 break;
1562 }
1563 return error;
1564 }
1565
1566 /*
1567 * Create a local route entry for the address.
1568 * Announce the addition of the address and the route to the routing socket.
1569 */
1570 int
1571 rt_ifa_addlocal(struct ifaddr *ifa)
1572 {
1573 struct rtentry *rt;
1574 int e;
1575
1576 /* If there is no loopback entry, allocate one. */
1577 rt = rtalloc1(ifa->ifa_addr, 0);
1578 #ifdef RT_DEBUG
1579 if (rt != NULL)
1580 dump_rt(rt);
1581 #endif
1582 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
1583 (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1584 {
1585 struct rt_addrinfo info;
1586 struct rtentry *nrt;
1587
1588 memset(&info, 0, sizeof(info));
1589 info.rti_flags = RTF_HOST | RTF_LOCAL;
1590 if (!(ifa->ifa_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))
1591 info.rti_flags |= RTF_LLDATA;
1592 info.rti_info[RTAX_DST] = ifa->ifa_addr;
1593 info.rti_info[RTAX_GATEWAY] =
1594 (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
1595 info.rti_ifa = ifa;
1596 nrt = NULL;
1597 e = rtrequest1(RTM_ADD, &info, &nrt);
1598 if (nrt && ifa != nrt->rt_ifa)
1599 rt_replace_ifa(nrt, ifa);
1600 rt_newaddrmsg(RTM_ADD, ifa, e, nrt);
1601 if (nrt != NULL) {
1602 #ifdef RT_DEBUG
1603 dump_rt(nrt);
1604 #endif
1605 rt_unref(nrt);
1606 RT_REFCNT_TRACE(nrt);
1607 }
1608 } else {
1609 e = 0;
1610 rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL);
1611 }
1612 if (rt != NULL)
1613 rt_unref(rt);
1614 return e;
1615 }
1616
1617 /*
1618 * Remove the local route entry for the address.
1619 * Announce the removal of the address and the route to the routing socket.
1620 */
1621 int
1622 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
1623 {
1624 struct rtentry *rt;
1625 int e = 0;
1626
1627 rt = rtalloc1(ifa->ifa_addr, 0);
1628
1629 /*
1630 * Before deleting, check if a corresponding loopbacked
1631 * host route surely exists. With this check, we can avoid
1632 * deleting an interface direct route whose destination is
1633 * the same as the address being removed. This can happen
1634 * when removing a subnet-router anycast address on an
1635 * interface attached to a shared medium.
1636 */
1637 if (rt != NULL &&
1638 (rt->rt_flags & RTF_HOST) &&
1639 (rt->rt_ifp->if_flags & IFF_LOOPBACK))
1640 {
1641 /* If we cannot replace the route's ifaddr with the equivalent
1642 * ifaddr of another interface, I believe it is safest to
1643 * delete the route.
1644 */
1645 if (alt_ifa == NULL) {
1646 e = rtdeletemsg(rt);
1647 if (e == 0) {
1648 rt_unref(rt);
1649 rt_free(rt);
1650 rt = NULL;
1651 }
1652 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1653 } else {
1654 rt_replace_ifa(rt, alt_ifa);
1655 rt_newmsg(RTM_CHANGE, rt);
1656 }
1657 } else
1658 rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1659 if (rt != NULL)
1660 rt_unref(rt);
1661 return e;
1662 }
1663
1664 /*
1665 * Route timer routines. These routes allow functions to be called
1666 * for various routes at any time. This is useful in supporting
1667 * path MTU discovery and redirect route deletion.
1668 *
1669 * This is similar to some BSDI internal functions, but it provides
1670 * for multiple queues for efficiency's sake...
1671 */
1672
1673 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
1674 static int rt_init_done = 0;
1675
1676 /*
1677 * Some subtle order problems with domain initialization mean that
1678 * we cannot count on this being run from rt_init before various
1679 * protocol initializations are done. Therefore, we make sure
1680 * that this is run when the first queue is added...
1681 */
1682
1683 static void rt_timer_work(struct work *, void *);
1684
1685 static void
1686 rt_timer_init(void)
1687 {
1688 int error;
1689
1690 assert(rt_init_done == 0);
1691
1692 /* XXX should be in rt_init */
1693 rw_init(&rt_lock);
1694 rw_init(&rtcache_lock);
1695
1696 LIST_INIT(&rttimer_queue_head);
1697 callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1698 error = workqueue_create(&rt_timer_wq, "rt_timer",
1699 rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
1700 if (error)
1701 panic("%s: workqueue_create failed (%d)\n", __func__, error);
1702 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1703 rt_init_done = 1;
1704 }
1705
1706 struct rttimer_queue *
1707 rt_timer_queue_create(u_int timeout)
1708 {
1709 struct rttimer_queue *rtq;
1710
1711 if (rt_init_done == 0)
1712 rt_timer_init();
1713
1714 R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1715 if (rtq == NULL)
1716 return NULL;
1717 memset(rtq, 0, sizeof(*rtq));
1718
1719 rtq->rtq_timeout = timeout;
1720 TAILQ_INIT(&rtq->rtq_head);
1721 RT_WLOCK();
1722 LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1723 RT_UNLOCK();
1724
1725 return rtq;
1726 }
1727
1728 void
1729 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1730 {
1731
1732 rtq->rtq_timeout = timeout;
1733 }
1734
1735 static void
1736 rt_timer_queue_remove_all(struct rttimer_queue *rtq)
1737 {
1738 struct rttimer *r;
1739
1740 RT_ASSERT_WLOCK();
1741
1742 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1743 LIST_REMOVE(r, rtt_link);
1744 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1745 rt_ref(r->rtt_rt); /* XXX */
1746 RT_REFCNT_TRACE(r->rtt_rt);
1747 RT_UNLOCK();
1748 (*r->rtt_func)(r->rtt_rt, r);
1749 pool_put(&rttimer_pool, r);
1750 RT_WLOCK();
1751 if (rtq->rtq_count > 0)
1752 rtq->rtq_count--;
1753 else
1754 printf("rt_timer_queue_remove_all: "
1755 "rtq_count reached 0\n");
1756 }
1757 }
1758
1759 void
1760 rt_timer_queue_destroy(struct rttimer_queue *rtq)
1761 {
1762
1763 RT_WLOCK();
1764 rt_timer_queue_remove_all(rtq);
1765 LIST_REMOVE(rtq, rtq_link);
1766 RT_UNLOCK();
1767
1768 /*
1769 * Caller is responsible for freeing the rttimer_queue structure.
1770 */
1771 }
1772
1773 unsigned long
1774 rt_timer_count(struct rttimer_queue *rtq)
1775 {
1776 return rtq->rtq_count;
1777 }
1778
1779 static void
1780 rt_timer_remove_all(struct rtentry *rt)
1781 {
1782 struct rttimer *r;
1783
1784 RT_WLOCK();
1785 while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1786 LIST_REMOVE(r, rtt_link);
1787 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1788 if (r->rtt_queue->rtq_count > 0)
1789 r->rtt_queue->rtq_count--;
1790 else
1791 printf("rt_timer_remove_all: rtq_count reached 0\n");
1792 pool_put(&rttimer_pool, r);
1793 }
1794 RT_UNLOCK();
1795 }
1796
1797 int
1798 rt_timer_add(struct rtentry *rt,
1799 void (*func)(struct rtentry *, struct rttimer *),
1800 struct rttimer_queue *queue)
1801 {
1802 struct rttimer *r;
1803
1804 KASSERT(func != NULL);
1805 RT_WLOCK();
1806 /*
1807 * If there's already a timer with this action, destroy it before
1808 * we add a new one.
1809 */
1810 LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1811 if (r->rtt_func == func)
1812 break;
1813 }
1814 if (r != NULL) {
1815 LIST_REMOVE(r, rtt_link);
1816 TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1817 if (r->rtt_queue->rtq_count > 0)
1818 r->rtt_queue->rtq_count--;
1819 else
1820 printf("rt_timer_add: rtq_count reached 0\n");
1821 } else {
1822 r = pool_get(&rttimer_pool, PR_NOWAIT);
1823 if (r == NULL) {
1824 RT_UNLOCK();
1825 return ENOBUFS;
1826 }
1827 }
1828
1829 memset(r, 0, sizeof(*r));
1830
1831 r->rtt_rt = rt;
1832 r->rtt_time = time_uptime;
1833 r->rtt_func = func;
1834 r->rtt_queue = queue;
1835 LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1836 TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1837 r->rtt_queue->rtq_count++;
1838
1839 RT_UNLOCK();
1840
1841 return 0;
1842 }
1843
1844 static void
1845 rt_timer_work(struct work *wk, void *arg)
1846 {
1847 struct rttimer_queue *rtq;
1848 struct rttimer *r;
1849
1850 RT_WLOCK();
1851 LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1852 while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1853 (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
1854 LIST_REMOVE(r, rtt_link);
1855 TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1856 rt_ref(r->rtt_rt); /* XXX */
1857 RT_REFCNT_TRACE(r->rtt_rt);
1858 RT_UNLOCK();
1859 (*r->rtt_func)(r->rtt_rt, r);
1860 pool_put(&rttimer_pool, r);
1861 RT_WLOCK();
1862 if (rtq->rtq_count > 0)
1863 rtq->rtq_count--;
1864 else
1865 printf("rt_timer_timer: rtq_count reached 0\n");
1866 }
1867 }
1868 RT_UNLOCK();
1869
1870 callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1871 }
1872
1873 static void
1874 rt_timer_timer(void *arg)
1875 {
1876
1877 workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
1878 }
1879
1880 static struct rtentry *
1881 _rtcache_init(struct route *ro, int flag)
1882 {
1883 struct rtentry *rt;
1884
1885 rtcache_invariants(ro);
1886 KASSERT(ro->_ro_rt == NULL);
1887 RTCACHE_ASSERT_WLOCK();
1888
1889 if (rtcache_getdst(ro) == NULL)
1890 return NULL;
1891 ro->ro_invalid = false;
1892 rt = rtalloc1(rtcache_getdst(ro), flag);
1893 if (rt != NULL && ISSET(rt->rt_flags, RTF_UP)) {
1894 ro->_ro_rt = rt;
1895 KASSERT(!ISSET(rt->rt_flags, RTF_UPDATING));
1896 rtcache_ref(rt, ro);
1897 rt_unref(rt);
1898 rtcache(ro);
1899 } else if (rt != NULL)
1900 rt_unref(rt);
1901
1902 rtcache_invariants(ro);
1903 return ro->_ro_rt;
1904 }
1905
1906 struct rtentry *
1907 rtcache_init(struct route *ro)
1908 {
1909 struct rtentry *rt;
1910 RTCACHE_WLOCK();
1911 rt = _rtcache_init(ro, 1);
1912 RTCACHE_UNLOCK();
1913 return rt;
1914 }
1915
1916 struct rtentry *
1917 rtcache_init_noclone(struct route *ro)
1918 {
1919 struct rtentry *rt;
1920 RTCACHE_WLOCK();
1921 rt = _rtcache_init(ro, 0);
1922 RTCACHE_UNLOCK();
1923 return rt;
1924 }
1925
1926 struct rtentry *
1927 rtcache_update(struct route *ro, int clone)
1928 {
1929 struct rtentry *rt;
1930 RTCACHE_WLOCK();
1931 rtcache_clear(ro);
1932 rt = _rtcache_init(ro, clone);
1933 RTCACHE_UNLOCK();
1934 return rt;
1935 }
1936
1937 void
1938 rtcache_copy(struct route *new_ro, struct route *old_ro)
1939 {
1940 struct rtentry *rt;
1941 int ret;
1942
1943 KASSERT(new_ro != old_ro);
1944 rtcache_invariants(new_ro);
1945 rtcache_invariants(old_ro);
1946
1947 rt = rtcache_validate(old_ro);
1948
1949 if (rtcache_getdst(old_ro) == NULL)
1950 goto out;
1951 ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro));
1952 if (ret != 0)
1953 goto out;
1954
1955 RTCACHE_WLOCK();
1956 new_ro->ro_invalid = false;
1957 if ((new_ro->_ro_rt = rt) != NULL)
1958 rtcache(new_ro);
1959 rtcache_invariants(new_ro);
1960 RTCACHE_UNLOCK();
1961 out:
1962 rtcache_unref(rt, old_ro);
1963 return;
1964 }
1965
1966 static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist);
1967
1968 #if defined(RT_DEBUG) && defined(NET_MPSAFE)
1969 static void
1970 rtcache_trace(const char *func, struct rtentry *rt, struct route *ro)
1971 {
1972 char dst[64];
1973
1974 sockaddr_format(ro->ro_sa, dst, 64);
1975 printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst,
1976 cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref);
1977 }
1978 #define RTCACHE_PSREF_TRACE(rt, ro) rtcache_trace(__func__, (rt), (ro))
1979 #else
1980 #define RTCACHE_PSREF_TRACE(rt, ro) do {} while (0)
1981 #endif
1982
1983 static void
1984 rtcache_ref(struct rtentry *rt, struct route *ro)
1985 {
1986
1987 KASSERT(rt != NULL);
1988
1989 #ifdef NET_MPSAFE
1990 RTCACHE_PSREF_TRACE(rt, ro);
1991 ro->ro_bound = curlwp_bind();
1992 psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
1993 #endif
1994 }
1995
1996 void
1997 rtcache_unref(struct rtentry *rt, struct route *ro)
1998 {
1999
2000 if (rt == NULL)
2001 return;
2002
2003 #ifdef NET_MPSAFE
2004 psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
2005 curlwp_bindx(ro->ro_bound);
2006 RTCACHE_PSREF_TRACE(rt, ro);
2007 #endif
2008 }
2009
2010 static struct rtentry *
2011 rtcache_validate_locked(struct route *ro)
2012 {
2013 struct rtentry *rt = NULL;
2014
2015 #ifdef NET_MPSAFE
2016 retry:
2017 #endif
2018 rt = ro->_ro_rt;
2019 rtcache_invariants(ro);
2020
2021 if (ro->ro_invalid) {
2022 rt = NULL;
2023 goto out;
2024 }
2025
2026 RT_RLOCK();
2027 if (rt != NULL && (rt->rt_flags & RTF_UP) != 0 && rt->rt_ifp != NULL) {
2028 #ifdef NET_MPSAFE
2029 if (ISSET(rt->rt_flags, RTF_UPDATING)) {
2030 if (rt_wait_ok()) {
2031 RT_UNLOCK();
2032 RTCACHE_UNLOCK();
2033 /* We can wait until the update is complete */
2034 rt_update_wait();
2035 RTCACHE_RLOCK();
2036 goto retry;
2037 } else {
2038 rt = NULL;
2039 }
2040 } else
2041 #endif
2042 rtcache_ref(rt, ro);
2043 } else
2044 rt = NULL;
2045 RT_UNLOCK();
2046 out:
2047 return rt;
2048 }
2049
2050 struct rtentry *
2051 rtcache_validate(struct route *ro)
2052 {
2053 struct rtentry *rt;
2054
2055 RTCACHE_RLOCK();
2056 rt = rtcache_validate_locked(ro);
2057 RTCACHE_UNLOCK();
2058 return rt;
2059 }
2060
2061 static void
2062 rtcache_invalidate(struct dom_rtlist *rtlist)
2063 {
2064 struct route *ro;
2065
2066 RTCACHE_ASSERT_WLOCK();
2067
2068 while ((ro = LIST_FIRST(rtlist)) != NULL) {
2069 rtcache_invariants(ro);
2070 KASSERT(ro->_ro_rt != NULL);
2071 ro->ro_invalid = true;
2072 LIST_REMOVE(ro, ro_rtcache_next);
2073 LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next);
2074 rtcache_invariants(ro);
2075 }
2076 }
2077
2078 static void
2079 rtcache_clear_rtentry(int family, struct rtentry *rt)
2080 {
2081 struct domain *dom;
2082 struct route *ro, *nro;
2083
2084 if ((dom = pffinddomain(family)) == NULL)
2085 return;
2086
2087 RTCACHE_WLOCK();
2088 LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) {
2089 if (ro->_ro_rt == rt)
2090 rtcache_clear(ro);
2091 }
2092 RTCACHE_UNLOCK();
2093 }
2094
2095 static void
2096 rtcache_clear(struct route *ro)
2097 {
2098
2099 RTCACHE_ASSERT_WLOCK();
2100
2101 rtcache_invariants(ro);
2102 if (ro->_ro_rt == NULL)
2103 return;
2104
2105 LIST_REMOVE(ro, ro_rtcache_next);
2106
2107 ro->_ro_rt = NULL;
2108 ro->ro_invalid = false;
2109 rtcache_invariants(ro);
2110 }
2111
2112 struct rtentry *
2113 rtcache_lookup2(struct route *ro, const struct sockaddr *dst,
2114 int clone, int *hitp)
2115 {
2116 const struct sockaddr *odst;
2117 struct rtentry *rt = NULL;
2118
2119 RTCACHE_RLOCK();
2120 odst = rtcache_getdst(ro);
2121 if (odst == NULL) {
2122 RTCACHE_UNLOCK();
2123 RTCACHE_WLOCK();
2124 goto miss;
2125 }
2126
2127 if (sockaddr_cmp(odst, dst) != 0) {
2128 RTCACHE_UNLOCK();
2129 RTCACHE_WLOCK();
2130 rtcache_free_locked(ro);
2131 goto miss;
2132 }
2133
2134 rt = rtcache_validate_locked(ro);
2135 if (rt == NULL) {
2136 RTCACHE_UNLOCK();
2137 RTCACHE_WLOCK();
2138 rtcache_clear(ro);
2139 goto miss;
2140 }
2141
2142 rtcache_invariants(ro);
2143
2144 RTCACHE_UNLOCK();
2145 if (hitp != NULL)
2146 *hitp = 1;
2147 return rt;
2148 miss:
2149 if (hitp != NULL)
2150 *hitp = 0;
2151 if (rtcache_setdst_locked(ro, dst) == 0)
2152 rt = _rtcache_init(ro, clone);
2153
2154 rtcache_invariants(ro);
2155
2156 RTCACHE_UNLOCK();
2157 return rt;
2158 }
2159
2160 static void
2161 rtcache_free_locked(struct route *ro)
2162 {
2163
2164 RTCACHE_ASSERT_WLOCK();
2165 rtcache_clear(ro);
2166 if (ro->ro_sa != NULL) {
2167 sockaddr_free(ro->ro_sa);
2168 ro->ro_sa = NULL;
2169 }
2170 rtcache_invariants(ro);
2171 }
2172
2173 void
2174 rtcache_free(struct route *ro)
2175 {
2176
2177 RTCACHE_WLOCK();
2178 rtcache_free_locked(ro);
2179 RTCACHE_UNLOCK();
2180 }
2181
2182 static int
2183 rtcache_setdst_locked(struct route *ro, const struct sockaddr *sa)
2184 {
2185 KASSERT(sa != NULL);
2186
2187 RTCACHE_ASSERT_WLOCK();
2188
2189 rtcache_invariants(ro);
2190 if (ro->ro_sa != NULL) {
2191 if (ro->ro_sa->sa_family == sa->sa_family) {
2192 rtcache_clear(ro);
2193 sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
2194 rtcache_invariants(ro);
2195 return 0;
2196 }
2197 /* free ro_sa, wrong family */
2198 rtcache_free_locked(ro);
2199 }
2200
2201 KASSERT(ro->_ro_rt == NULL);
2202
2203 if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
2204 rtcache_invariants(ro);
2205 return ENOMEM;
2206 }
2207 rtcache_invariants(ro);
2208 return 0;
2209 }
2210
2211 int
2212 rtcache_setdst(struct route *ro, const struct sockaddr *sa)
2213 {
2214 int error;
2215
2216 RTCACHE_WLOCK();
2217 error = rtcache_setdst_locked(ro, sa);
2218 RTCACHE_UNLOCK();
2219
2220 return error;
2221 }
2222
2223 const struct sockaddr *
2224 rt_settag(struct rtentry *rt, const struct sockaddr *tag)
2225 {
2226 if (rt->rt_tag != tag) {
2227 if (rt->rt_tag != NULL)
2228 sockaddr_free(rt->rt_tag);
2229 rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
2230 }
2231 return rt->rt_tag;
2232 }
2233
2234 struct sockaddr *
2235 rt_gettag(const struct rtentry *rt)
2236 {
2237 return rt->rt_tag;
2238 }
2239
2240 int
2241 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
2242 {
2243
2244 if ((rt->rt_flags & RTF_REJECT) != 0) {
2245 /* Mimic looutput */
2246 if (ifp->if_flags & IFF_LOOPBACK)
2247 return (rt->rt_flags & RTF_HOST) ?
2248 EHOSTUNREACH : ENETUNREACH;
2249 else if (rt->rt_rmx.rmx_expire == 0 ||
2250 time_uptime < rt->rt_rmx.rmx_expire)
2251 return (rt->rt_flags & RTF_GATEWAY) ?
2252 EHOSTUNREACH : EHOSTDOWN;
2253 }
2254
2255 return 0;
2256 }
2257
2258 void
2259 rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *),
2260 void *v)
2261 {
2262
2263 for (;;) {
2264 int s;
2265 int error;
2266 struct rtentry *rt, *retrt = NULL;
2267
2268 RT_RLOCK();
2269 s = splsoftnet();
2270 rt = rtbl_search_matched_entry(family, f, v);
2271 if (rt == NULL) {
2272 splx(s);
2273 RT_UNLOCK();
2274 return;
2275 }
2276 rt->rt_refcnt++;
2277 splx(s);
2278 RT_UNLOCK();
2279
2280 error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway,
2281 rt_mask(rt), rt->rt_flags, &retrt);
2282 if (error == 0) {
2283 KASSERT(retrt == rt);
2284 KASSERT((retrt->rt_flags & RTF_UP) == 0);
2285 retrt->rt_ifp = NULL;
2286 rt_unref(rt);
2287 rt_free(retrt);
2288 } else if (error == ESRCH) {
2289 /* Someone deleted the entry already. */
2290 rt_unref(rt);
2291 } else {
2292 log(LOG_ERR, "%s: unable to delete rtentry @ %p, "
2293 "error = %d\n", rt->rt_ifp->if_xname, rt, error);
2294 /* XXX how to treat this case? */
2295 }
2296 }
2297 }
2298
2299 int
2300 rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v)
2301 {
2302 int error;
2303
2304 RT_RLOCK();
2305 error = rtbl_walktree(family, f, v);
2306 RT_UNLOCK();
2307
2308 return error;
2309 }
2310
2311 #ifdef DDB
2312
2313 #include <machine/db_machdep.h>
2314 #include <ddb/db_interface.h>
2315 #include <ddb/db_output.h>
2316
2317 #define rt_expire rt_rmx.rmx_expire
2318
2319 static void
2320 db_print_sa(const struct sockaddr *sa)
2321 {
2322 int len;
2323 const u_char *p;
2324
2325 if (sa == NULL) {
2326 db_printf("[NULL]");
2327 return;
2328 }
2329
2330 p = (const u_char *)sa;
2331 len = sa->sa_len;
2332 db_printf("[");
2333 while (len > 0) {
2334 db_printf("%d", *p);
2335 p++; len--;
2336 if (len) db_printf(",");
2337 }
2338 db_printf("]\n");
2339 }
2340
2341 static void
2342 db_print_ifa(struct ifaddr *ifa)
2343 {
2344 if (ifa == NULL)
2345 return;
2346 db_printf(" ifa_addr=");
2347 db_print_sa(ifa->ifa_addr);
2348 db_printf(" ifa_dsta=");
2349 db_print_sa(ifa->ifa_dstaddr);
2350 db_printf(" ifa_mask=");
2351 db_print_sa(ifa->ifa_netmask);
2352 db_printf(" flags=0x%x,refcnt=%d,metric=%d\n",
2353 ifa->ifa_flags,
2354 ifa->ifa_refcnt,
2355 ifa->ifa_metric);
2356 }
2357
2358 /*
2359 * Function to pass to rt_walktree().
2360 * Return non-zero error to abort walk.
2361 */
2362 static int
2363 db_show_rtentry(struct rtentry *rt, void *w)
2364 {
2365 db_printf("rtentry=%p", rt);
2366
2367 db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
2368 rt->rt_flags, rt->rt_refcnt,
2369 rt->rt_use, (uint64_t)rt->rt_expire);
2370
2371 db_printf(" key="); db_print_sa(rt_getkey(rt));
2372 db_printf(" mask="); db_print_sa(rt_mask(rt));
2373 db_printf(" gw="); db_print_sa(rt->rt_gateway);
2374
2375 db_printf(" ifp=%p ", rt->rt_ifp);
2376 if (rt->rt_ifp)
2377 db_printf("(%s)", rt->rt_ifp->if_xname);
2378 else
2379 db_printf("(NULL)");
2380
2381 db_printf(" ifa=%p\n", rt->rt_ifa);
2382 db_print_ifa(rt->rt_ifa);
2383
2384 db_printf(" gwroute=%p llinfo=%p\n",
2385 rt->rt_gwroute, rt->rt_llinfo);
2386
2387 return 0;
2388 }
2389
2390 /*
2391 * Function to print all the route trees.
2392 * Use this from ddb: "show routes"
2393 */
2394 void
2395 db_show_routes(db_expr_t addr, bool have_addr,
2396 db_expr_t count, const char *modif)
2397 {
2398 rt_walktree(AF_INET, db_show_rtentry, NULL);
2399 }
2400 #endif
2401