rtsock.c revision 1.97 1 /* $NetBSD: rtsock.c,v 1.97 2007/12/20 19:53:31 dyoung Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1988, 1991, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)rtsock.c 8.7 (Berkeley) 10/12/95
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: rtsock.c,v 1.97 2007/12/20 19:53:31 dyoung Exp $");
65
66 #include "opt_inet.h"
67
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/proc.h>
71 #include <sys/mbuf.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74 #include <sys/domain.h>
75 #include <sys/protosw.h>
76 #include <sys/sysctl.h>
77 #include <sys/kauth.h>
78 #ifdef RTSOCK_DEBUG
79 #include <netinet/in.h>
80 #endif /* RTSOCK_DEBUG */
81
82 #include <net/if.h>
83 #include <net/route.h>
84 #include <net/raw_cb.h>
85
86 #include <machine/stdarg.h>
87
88 DOMAIN_DEFINE(routedomain); /* forward declare and add to link set */
89
90 struct sockaddr route_dst = { .sa_len = 2, .sa_family = PF_ROUTE, };
91 struct sockaddr route_src = { .sa_len = 2, .sa_family = PF_ROUTE, };
92 struct sockproto route_proto = { .sp_family = PF_ROUTE, };
93
94 struct walkarg {
95 int w_op;
96 int w_arg;
97 int w_given;
98 int w_needed;
99 void * w_where;
100 int w_tmemsize;
101 int w_tmemneeded;
102 void * w_tmem;
103 };
104
105 static struct mbuf *rt_msg1(int, struct rt_addrinfo *, void *, int);
106 static int rt_msg2(int, struct rt_addrinfo *, void *, struct walkarg *, int *);
107 static int rt_xaddrs(u_char, const char *, const char *, struct rt_addrinfo *);
108 static struct mbuf *rt_makeifannouncemsg(struct ifnet *, int, int,
109 struct rt_addrinfo *);
110 static int sysctl_dumpentry(struct rtentry *, void *);
111 static int sysctl_iflist(int, struct walkarg *, int);
112 static int sysctl_rtable(SYSCTLFN_PROTO);
113 static inline void rt_adjustcount(int, int);
114
115 /* Sleazy use of local variables throughout file, warning!!!! */
116 #define dst info.rti_info[RTAX_DST]
117 #define gate info.rti_info[RTAX_GATEWAY]
118 #define netmask info.rti_info[RTAX_NETMASK]
119 #define ifpaddr info.rti_info[RTAX_IFP]
120 #define ifaaddr info.rti_info[RTAX_IFA]
121 #define brdaddr info.rti_info[RTAX_BRD]
122
123 static inline void
124 rt_adjustcount(int af, int cnt)
125 {
126 route_cb.any_count += cnt;
127 switch (af) {
128 case AF_INET:
129 route_cb.ip_count += cnt;
130 return;
131 #ifdef INET6
132 case AF_INET6:
133 route_cb.ip6_count += cnt;
134 return;
135 #endif
136 case AF_IPX:
137 route_cb.ipx_count += cnt;
138 return;
139 case AF_NS:
140 route_cb.ns_count += cnt;
141 return;
142 case AF_ISO:
143 route_cb.iso_count += cnt;
144 return;
145 }
146 }
147
148 /*ARGSUSED*/
149 int
150 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
151 struct mbuf *control, struct lwp *l)
152 {
153 int error = 0;
154 struct rawcb *rp = sotorawcb(so);
155 int s;
156
157 if (req == PRU_ATTACH) {
158 MALLOC(rp, struct rawcb *, sizeof(*rp), M_PCB, M_WAITOK);
159 if ((so->so_pcb = rp) != NULL)
160 memset(so->so_pcb, 0, sizeof(*rp));
161
162 }
163 if (req == PRU_DETACH && rp)
164 rt_adjustcount(rp->rcb_proto.sp_protocol, -1);
165 s = splsoftnet();
166
167 /*
168 * Don't call raw_usrreq() in the attach case, because
169 * we want to allow non-privileged processes to listen on
170 * and send "safe" commands to the routing socket.
171 */
172 if (req == PRU_ATTACH) {
173 if (l == NULL)
174 error = EACCES;
175 else
176 error = raw_attach(so, (int)(long)nam);
177 } else
178 error = raw_usrreq(so, req, m, nam, control, l);
179
180 rp = sotorawcb(so);
181 if (req == PRU_ATTACH && rp) {
182 if (error) {
183 free((void *)rp, M_PCB);
184 splx(s);
185 return error;
186 }
187 rt_adjustcount(rp->rcb_proto.sp_protocol, 1);
188 rp->rcb_laddr = &route_src;
189 rp->rcb_faddr = &route_dst;
190 soisconnected(so);
191 so->so_options |= SO_USELOOPBACK;
192 }
193 splx(s);
194 return error;
195 }
196
197 static const struct sockaddr *
198 intern_netmask(const struct sockaddr *mask)
199 {
200 struct radix_node *rn;
201 extern struct radix_node_head *mask_rnhead;
202
203 if (mask != NULL &&
204 (rn = rn_search(mask, mask_rnhead->rnh_treetop)))
205 mask = (const struct sockaddr *)rn->rn_key;
206
207 return mask;
208 }
209
210 /*ARGSUSED*/
211 int
212 route_output(struct mbuf *m, ...)
213 {
214 struct rt_msghdr *rtm = NULL;
215 struct rtentry *rt = NULL;
216 struct rtentry *saved_nrt = NULL;
217 struct rt_addrinfo info;
218 int len, error = 0;
219 struct ifnet *ifp = NULL;
220 struct ifaddr *ifa = NULL;
221 struct socket *so;
222 va_list ap;
223 sa_family_t family;
224
225 va_start(ap, m);
226 so = va_arg(ap, struct socket *);
227 va_end(ap);
228
229 #define senderr(e) do { error = e; goto flush;} while (/*CONSTCOND*/ 0)
230 if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
231 (m = m_pullup(m, sizeof(int32_t))) == NULL))
232 return ENOBUFS;
233 if ((m->m_flags & M_PKTHDR) == 0)
234 panic("route_output");
235 len = m->m_pkthdr.len;
236 if (len < sizeof(*rtm) ||
237 len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
238 dst = NULL;
239 senderr(EINVAL);
240 }
241 R_Malloc(rtm, struct rt_msghdr *, len);
242 if (rtm == NULL) {
243 dst = NULL;
244 senderr(ENOBUFS);
245 }
246 m_copydata(m, 0, len, (void *)rtm);
247 if (rtm->rtm_version != RTM_VERSION) {
248 dst = NULL;
249 senderr(EPROTONOSUPPORT);
250 }
251 rtm->rtm_pid = curproc->p_pid;
252 memset(&info, 0, sizeof(info));
253 info.rti_addrs = rtm->rtm_addrs;
254 if (rt_xaddrs(rtm->rtm_type, (void *)(rtm + 1), len + (char *)rtm, &info))
255 senderr(EINVAL);
256 info.rti_flags = rtm->rtm_flags;
257 #ifdef RTSOCK_DEBUG
258 if (dst->sa_family == AF_INET) {
259 printf("%s: extracted dst %s\n", __func__,
260 inet_ntoa(((const struct sockaddr_in *)dst)->sin_addr));
261 }
262 #endif /* RTSOCK_DEBUG */
263 if (dst == NULL || (dst->sa_family >= AF_MAX))
264 senderr(EINVAL);
265 if (gate != NULL && (gate->sa_family >= AF_MAX))
266 senderr(EINVAL);
267
268 /*
269 * Verify that the caller has the appropriate privilege; RTM_GET
270 * is the only operation the non-superuser is allowed.
271 */
272 if (kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_ROUTE,
273 0, rtm, NULL, NULL) != 0)
274 senderr(EACCES);
275
276 switch (rtm->rtm_type) {
277
278 case RTM_ADD:
279 if (gate == NULL)
280 senderr(EINVAL);
281 error = rtrequest1(rtm->rtm_type, &info, &saved_nrt);
282 if (error == 0 && saved_nrt) {
283 rt_setmetrics(rtm->rtm_inits,
284 &rtm->rtm_rmx, &saved_nrt->rt_rmx);
285 saved_nrt->rt_refcnt--;
286 }
287 break;
288
289 case RTM_DELETE:
290 error = rtrequest1(rtm->rtm_type, &info, &saved_nrt);
291 if (error == 0) {
292 (rt = saved_nrt)->rt_refcnt++;
293 goto report;
294 }
295 break;
296
297 case RTM_GET:
298 case RTM_CHANGE:
299 case RTM_LOCK:
300 /* XXX This will mask dst with netmask before
301 * searching. It did not used to do that. --dyoung
302 */
303 error = rtrequest(RTM_GET, dst, gate, netmask, 0, &rt);
304 if (error != 0)
305 senderr(error);
306 if (rtm->rtm_type != RTM_GET) {/* XXX: too grotty */
307 struct radix_node *rn;
308
309 if (memcmp(dst, rt_getkey(rt), dst->sa_len) != 0)
310 senderr(ESRCH);
311 netmask = intern_netmask(netmask);
312 for (rn = rt->rt_nodes; rn; rn = rn->rn_dupedkey)
313 if (netmask == (const struct sockaddr *)rn->rn_mask)
314 break;
315 if (rn == NULL)
316 senderr(ETOOMANYREFS);
317 rt = (struct rtentry *)rn;
318 }
319
320 switch (rtm->rtm_type) {
321 case RTM_GET:
322 report:
323 dst = rt_getkey(rt);
324 gate = rt->rt_gateway;
325 netmask = rt_mask(rt);
326 if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) == 0)
327 ;
328 else if ((ifp = rt->rt_ifp) != NULL) {
329 const struct ifaddr *rtifa;
330 ifpaddr = ifp->if_dl->ifa_addr;
331 /* rtifa used to be simply rt->rt_ifa.
332 * If rt->rt_ifa != NULL, then
333 * rt_get_ifa() != NULL. So this
334 * ought to still be safe. --dyoung
335 */
336 rtifa = rt_get_ifa(rt);
337 ifaaddr = rtifa->ifa_addr;
338 #ifdef RTSOCK_DEBUG
339 if (ifaaddr->sa_family == AF_INET) {
340 printf("%s: copying out RTAX_IFA %s ",
341 __func__,
342 inet_ntoa(((const struct sockaddr_in *)ifaaddr)->sin_addr));
343 printf("for dst %s ifa_getifa %p ifa_seqno %p\n",
344 inet_ntoa(((const struct sockaddr_in *)dst)->sin_addr),
345 (void *)rtifa->ifa_getifa, rtifa->ifa_seqno);
346 }
347 #endif /* RTSOCK_DEBUG */
348 if (ifp->if_flags & IFF_POINTOPOINT)
349 brdaddr = rtifa->ifa_dstaddr;
350 else
351 brdaddr = NULL;
352 rtm->rtm_index = ifp->if_index;
353 } else {
354 ifpaddr = NULL;
355 ifaaddr = NULL;
356 }
357 (void)rt_msg2(rtm->rtm_type, &info, NULL, NULL, &len);
358 if (len > rtm->rtm_msglen) {
359 struct rt_msghdr *new_rtm;
360 R_Malloc(new_rtm, struct rt_msghdr *, len);
361 if (new_rtm == NULL)
362 senderr(ENOBUFS);
363 Bcopy(rtm, new_rtm, rtm->rtm_msglen);
364 Free(rtm); rtm = new_rtm;
365 }
366 (void)rt_msg2(rtm->rtm_type, &info, (void *)rtm,
367 NULL, 0);
368 rtm->rtm_flags = rt->rt_flags;
369 rtm->rtm_rmx = rt->rt_rmx;
370 rtm->rtm_addrs = info.rti_addrs;
371 break;
372
373 case RTM_CHANGE:
374 /*
375 * new gateway could require new ifaddr, ifp;
376 * flags may also be different; ifp may be specified
377 * by ll sockaddr when protocol address is ambiguous
378 */
379 if ((error = rt_getifa(&info)) != 0)
380 senderr(error);
381 if (gate && rt_setgate(rt, gate))
382 senderr(EDQUOT);
383 /* new gateway could require new ifaddr, ifp;
384 flags may also be different; ifp may be specified
385 by ll sockaddr when protocol address is ambiguous */
386 if (ifpaddr && (ifa = ifa_ifwithnet(ifpaddr)) &&
387 (ifp = ifa->ifa_ifp) && (ifaaddr || gate))
388 ifa = ifaof_ifpforaddr(ifaaddr ? ifaaddr : gate,
389 ifp);
390 else if ((ifaaddr && (ifa = ifa_ifwithaddr(ifaaddr))) ||
391 (gate && (ifa = ifa_ifwithroute(rt->rt_flags,
392 rt_getkey(rt), gate))))
393 ifp = ifa->ifa_ifp;
394 if (ifa) {
395 struct ifaddr *oifa = rt->rt_ifa;
396 if (oifa != ifa) {
397 if (oifa && oifa->ifa_rtrequest) {
398 oifa->ifa_rtrequest(RTM_DELETE,
399 rt, &info);
400 }
401 rt_replace_ifa(rt, ifa);
402 rt->rt_ifp = ifp;
403 }
404 }
405 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
406 &rt->rt_rmx);
407 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
408 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
409 /*
410 * Fall into
411 */
412 case RTM_LOCK:
413 rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
414 rt->rt_rmx.rmx_locks |=
415 (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
416 break;
417 }
418 break;
419
420 default:
421 senderr(EOPNOTSUPP);
422 }
423
424 flush:
425 if (rtm) {
426 if (error)
427 rtm->rtm_errno = error;
428 else
429 rtm->rtm_flags |= RTF_DONE;
430 }
431 family = dst ? dst->sa_family : 0;
432 if (rt)
433 rtfree(rt);
434 {
435 struct rawcb *rp = NULL;
436 /*
437 * Check to see if we don't want our own messages.
438 */
439 if ((so->so_options & SO_USELOOPBACK) == 0) {
440 if (route_cb.any_count <= 1) {
441 if (rtm)
442 Free(rtm);
443 m_freem(m);
444 return error;
445 }
446 /* There is another listener, so construct message */
447 rp = sotorawcb(so);
448 }
449 if (rtm) {
450 m_copyback(m, 0, rtm->rtm_msglen, (void *)rtm);
451 if (m->m_pkthdr.len < rtm->rtm_msglen) {
452 m_freem(m);
453 m = NULL;
454 } else if (m->m_pkthdr.len > rtm->rtm_msglen)
455 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
456 Free(rtm);
457 }
458 if (rp)
459 rp->rcb_proto.sp_family = 0; /* Avoid us */
460 if (family)
461 route_proto.sp_protocol = family;
462 if (m)
463 raw_input(m, &route_proto, &route_src, &route_dst);
464 if (rp)
465 rp->rcb_proto.sp_family = PF_ROUTE;
466 }
467 return error;
468 }
469
470 void
471 rt_setmetrics(u_long which, const struct rt_metrics *in, struct rt_metrics *out)
472 {
473 #define metric(f, e) if (which & (f)) out->e = in->e;
474 metric(RTV_RPIPE, rmx_recvpipe);
475 metric(RTV_SPIPE, rmx_sendpipe);
476 metric(RTV_SSTHRESH, rmx_ssthresh);
477 metric(RTV_RTT, rmx_rtt);
478 metric(RTV_RTTVAR, rmx_rttvar);
479 metric(RTV_HOPCOUNT, rmx_hopcount);
480 metric(RTV_MTU, rmx_mtu);
481 metric(RTV_EXPIRE, rmx_expire);
482 #undef metric
483 }
484
485 #define ROUNDUP(a) \
486 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
487 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
488
489 static int
490 rt_xaddrs(u_char rtmtype, const char *cp, const char *cplim, struct rt_addrinfo *rtinfo)
491 {
492 const struct sockaddr *sa = NULL; /* Quell compiler warning */
493 int i;
494
495 for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
496 if ((rtinfo->rti_addrs & (1 << i)) == 0)
497 continue;
498 rtinfo->rti_info[i] = sa = (const struct sockaddr *)cp;
499 ADVANCE(cp, sa);
500 }
501
502 /* Check for extra addresses specified, except RTM_GET asking for interface info. */
503 if (rtmtype == RTM_GET) {
504 if (((rtinfo->rti_addrs & (~((1 << RTAX_IFP) | (1 << RTAX_IFA)))) & (~0 << i)) != 0)
505 return 1;
506 } else {
507 if ((rtinfo->rti_addrs & (~0 << i)) != 0)
508 return 1;
509 }
510 /* Check for bad data length. */
511 if (cp != cplim) {
512 if (i == RTAX_NETMASK + 1 && sa &&
513 cp - ROUNDUP(sa->sa_len) + sa->sa_len == cplim)
514 /*
515 * The last sockaddr was netmask.
516 * We accept this for now for the sake of old
517 * binaries or third party softwares.
518 */
519 ;
520 else
521 return 1;
522 }
523 return 0;
524 }
525
526 static struct mbuf *
527 rt_msg1(int type, struct rt_addrinfo *rtinfo, void *data, int datalen)
528 {
529 struct rt_msghdr *rtm;
530 struct mbuf *m;
531 int i;
532 const struct sockaddr *sa;
533 int len, dlen;
534
535 m = m_gethdr(M_DONTWAIT, MT_DATA);
536 if (m == NULL)
537 return m;
538 MCLAIM(m, &routedomain.dom_mowner);
539 switch (type) {
540
541 case RTM_DELADDR:
542 case RTM_NEWADDR:
543 len = sizeof(struct ifa_msghdr);
544 break;
545
546 #ifdef COMPAT_14
547 case RTM_OIFINFO:
548 len = sizeof(struct if_msghdr14);
549 break;
550 #endif
551
552 case RTM_IFINFO:
553 len = sizeof(struct if_msghdr);
554 break;
555
556 case RTM_IFANNOUNCE:
557 case RTM_IEEE80211:
558 len = sizeof(struct if_announcemsghdr);
559 break;
560
561 default:
562 len = sizeof(struct rt_msghdr);
563 }
564 if (len > MHLEN + MLEN)
565 panic("rt_msg1: message too long");
566 else if (len > MHLEN) {
567 m->m_next = m_get(M_DONTWAIT, MT_DATA);
568 if (m->m_next == NULL) {
569 m_freem(m);
570 return NULL;
571 }
572 MCLAIM(m->m_next, m->m_owner);
573 m->m_pkthdr.len = len;
574 m->m_len = MHLEN;
575 m->m_next->m_len = len - MHLEN;
576 } else {
577 m->m_pkthdr.len = m->m_len = len;
578 }
579 m->m_pkthdr.rcvif = NULL;
580 m_copyback(m, 0, datalen, data);
581 rtm = mtod(m, struct rt_msghdr *);
582 for (i = 0; i < RTAX_MAX; i++) {
583 if ((sa = rtinfo->rti_info[i]) == NULL)
584 continue;
585 rtinfo->rti_addrs |= (1 << i);
586 dlen = ROUNDUP(sa->sa_len);
587 m_copyback(m, len, dlen, sa);
588 len += dlen;
589 }
590 if (m->m_pkthdr.len != len) {
591 m_freem(m);
592 return NULL;
593 }
594 rtm->rtm_msglen = len;
595 rtm->rtm_version = RTM_VERSION;
596 rtm->rtm_type = type;
597 return m;
598 }
599
600 /*
601 * rt_msg2
602 *
603 * fills 'cp' or 'w'.w_tmem with the routing socket message and
604 * returns the length of the message in 'lenp'.
605 *
606 * if walkarg is 0, cp is expected to be 0 or a buffer large enough to hold
607 * the message
608 * otherwise walkarg's w_needed is updated and if the user buffer is
609 * specified and w_needed indicates space exists the information is copied
610 * into the temp space (w_tmem). w_tmem is [re]allocated if necessary,
611 * if the allocation fails ENOBUFS is returned.
612 */
613 static int
614 rt_msg2(int type, struct rt_addrinfo *rtinfo, void *cpv, struct walkarg *w,
615 int *lenp)
616 {
617 int i;
618 int len, dlen, second_time = 0;
619 char *cp0, *cp = cpv;
620
621 rtinfo->rti_addrs = 0;
622 again:
623 switch (type) {
624
625 case RTM_DELADDR:
626 case RTM_NEWADDR:
627 len = sizeof(struct ifa_msghdr);
628 break;
629 #ifdef COMPAT_14
630 case RTM_OIFINFO:
631 len = sizeof(struct if_msghdr14);
632 break;
633 #endif
634
635 case RTM_IFINFO:
636 len = sizeof(struct if_msghdr);
637 break;
638
639 default:
640 len = sizeof(struct rt_msghdr);
641 }
642 if ((cp0 = cp) != NULL)
643 cp += len;
644 for (i = 0; i < RTAX_MAX; i++) {
645 const struct sockaddr *sa;
646
647 if ((sa = rtinfo->rti_info[i]) == NULL)
648 continue;
649 rtinfo->rti_addrs |= (1 << i);
650 dlen = ROUNDUP(sa->sa_len);
651 if (cp) {
652 bcopy(sa, cp, (unsigned)dlen);
653 cp += dlen;
654 }
655 len += dlen;
656 }
657 if (cp == NULL && w != NULL && !second_time) {
658 struct walkarg *rw = w;
659
660 rw->w_needed += len;
661 if (rw->w_needed <= 0 && rw->w_where) {
662 if (rw->w_tmemsize < len) {
663 if (rw->w_tmem)
664 free(rw->w_tmem, M_RTABLE);
665 rw->w_tmem = (void *) malloc(len, M_RTABLE,
666 M_NOWAIT);
667 if (rw->w_tmem)
668 rw->w_tmemsize = len;
669 }
670 if (rw->w_tmem) {
671 cp = rw->w_tmem;
672 second_time = 1;
673 goto again;
674 } else {
675 rw->w_tmemneeded = len;
676 return ENOBUFS;
677 }
678 }
679 }
680 if (cp) {
681 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
682
683 rtm->rtm_version = RTM_VERSION;
684 rtm->rtm_type = type;
685 rtm->rtm_msglen = len;
686 }
687 if (lenp)
688 *lenp = len;
689 return 0;
690 }
691
692 /*
693 * This routine is called to generate a message from the routing
694 * socket indicating that a redirect has occurred, a routing lookup
695 * has failed, or that a protocol has detected timeouts to a particular
696 * destination.
697 */
698 void
699 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
700 {
701 struct rt_msghdr rtm;
702 struct mbuf *m;
703 const struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
704
705 if (route_cb.any_count == 0)
706 return;
707 memset(&rtm, 0, sizeof(rtm));
708 rtm.rtm_flags = RTF_DONE | flags;
709 rtm.rtm_errno = error;
710 m = rt_msg1(type, rtinfo, (void *)&rtm, sizeof(rtm));
711 if (m == NULL)
712 return;
713 mtod(m, struct rt_msghdr *)->rtm_addrs = rtinfo->rti_addrs;
714 route_proto.sp_protocol = sa ? sa->sa_family : 0;
715 raw_input(m, &route_proto, &route_src, &route_dst);
716 }
717
718 /*
719 * This routine is called to generate a message from the routing
720 * socket indicating that the status of a network interface has changed.
721 */
722 void
723 rt_ifmsg(struct ifnet *ifp)
724 {
725 struct if_msghdr ifm;
726 #ifdef COMPAT_14
727 struct if_msghdr14 oifm;
728 #endif
729 struct mbuf *m;
730 struct rt_addrinfo info;
731
732 if (route_cb.any_count == 0)
733 return;
734 memset(&info, 0, sizeof(info));
735 memset(&ifm, 0, sizeof(ifm));
736 ifm.ifm_index = ifp->if_index;
737 ifm.ifm_flags = ifp->if_flags;
738 ifm.ifm_data = ifp->if_data;
739 ifm.ifm_addrs = 0;
740 m = rt_msg1(RTM_IFINFO, &info, (void *)&ifm, sizeof(ifm));
741 if (m == NULL)
742 return;
743 route_proto.sp_protocol = 0;
744 raw_input(m, &route_proto, &route_src, &route_dst);
745 #ifdef COMPAT_14
746 memset(&info, 0, sizeof(info));
747 memset(&oifm, 0, sizeof(oifm));
748 oifm.ifm_index = ifp->if_index;
749 oifm.ifm_flags = ifp->if_flags;
750 oifm.ifm_data.ifi_type = ifp->if_data.ifi_type;
751 oifm.ifm_data.ifi_addrlen = ifp->if_data.ifi_addrlen;
752 oifm.ifm_data.ifi_hdrlen = ifp->if_data.ifi_hdrlen;
753 oifm.ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
754 oifm.ifm_data.ifi_metric = ifp->if_data.ifi_metric;
755 oifm.ifm_data.ifi_baudrate = ifp->if_data.ifi_baudrate;
756 oifm.ifm_data.ifi_ipackets = ifp->if_data.ifi_ipackets;
757 oifm.ifm_data.ifi_ierrors = ifp->if_data.ifi_ierrors;
758 oifm.ifm_data.ifi_opackets = ifp->if_data.ifi_opackets;
759 oifm.ifm_data.ifi_oerrors = ifp->if_data.ifi_oerrors;
760 oifm.ifm_data.ifi_collisions = ifp->if_data.ifi_collisions;
761 oifm.ifm_data.ifi_ibytes = ifp->if_data.ifi_ibytes;
762 oifm.ifm_data.ifi_obytes = ifp->if_data.ifi_obytes;
763 oifm.ifm_data.ifi_imcasts = ifp->if_data.ifi_imcasts;
764 oifm.ifm_data.ifi_omcasts = ifp->if_data.ifi_omcasts;
765 oifm.ifm_data.ifi_iqdrops = ifp->if_data.ifi_iqdrops;
766 oifm.ifm_data.ifi_noproto = ifp->if_data.ifi_noproto;
767 oifm.ifm_data.ifi_lastchange = ifp->if_data.ifi_lastchange;
768 oifm.ifm_addrs = 0;
769 m = rt_msg1(RTM_OIFINFO, &info, (void *)&oifm, sizeof(oifm));
770 if (m == NULL)
771 return;
772 route_proto.sp_protocol = 0;
773 raw_input(m, &route_proto, &route_src, &route_dst);
774 #endif
775 }
776
777 /*
778 * This is called to generate messages from the routing socket
779 * indicating a network interface has had addresses associated with it.
780 * if we ever reverse the logic and replace messages TO the routing
781 * socket indicate a request to configure interfaces, then it will
782 * be unnecessary as the routing socket will automatically generate
783 * copies of it.
784 */
785 void
786 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
787 {
788 struct rt_addrinfo info;
789 const struct sockaddr *sa = NULL;
790 int pass;
791 struct mbuf *m = NULL;
792 struct ifnet *ifp = ifa->ifa_ifp;
793
794 if (route_cb.any_count == 0)
795 return;
796 for (pass = 1; pass < 3; pass++) {
797 memset(&info, 0, sizeof(info));
798 if ((cmd == RTM_ADD && pass == 1) ||
799 (cmd == RTM_DELETE && pass == 2)) {
800 struct ifa_msghdr ifam;
801 int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
802
803 ifaaddr = sa = ifa->ifa_addr;
804 ifpaddr = ifp->if_dl->ifa_addr;
805 netmask = ifa->ifa_netmask;
806 brdaddr = ifa->ifa_dstaddr;
807 memset(&ifam, 0, sizeof(ifam));
808 ifam.ifam_index = ifp->if_index;
809 ifam.ifam_metric = ifa->ifa_metric;
810 ifam.ifam_flags = ifa->ifa_flags;
811 m = rt_msg1(ncmd, &info, (void *)&ifam, sizeof(ifam));
812 if (m == NULL)
813 continue;
814 mtod(m, struct ifa_msghdr *)->ifam_addrs =
815 info.rti_addrs;
816 }
817 if ((cmd == RTM_ADD && pass == 2) ||
818 (cmd == RTM_DELETE && pass == 1)) {
819 struct rt_msghdr rtm;
820
821 if (rt == NULL)
822 continue;
823 netmask = rt_mask(rt);
824 dst = sa = rt_getkey(rt);
825 gate = rt->rt_gateway;
826 memset(&rtm, 0, sizeof(rtm));
827 rtm.rtm_index = ifp->if_index;
828 rtm.rtm_flags |= rt->rt_flags;
829 rtm.rtm_errno = error;
830 m = rt_msg1(cmd, &info, (void *)&rtm, sizeof(rtm));
831 if (m == NULL)
832 continue;
833 mtod(m, struct rt_msghdr *)->rtm_addrs = info.rti_addrs;
834 }
835 route_proto.sp_protocol = sa ? sa->sa_family : 0;
836 raw_input(m, &route_proto, &route_src, &route_dst);
837 }
838 }
839
840 static struct mbuf *
841 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
842 struct rt_addrinfo *info)
843 {
844 struct if_announcemsghdr ifan;
845
846 memset(info, 0, sizeof(*info));
847 memset(&ifan, 0, sizeof(ifan));
848 ifan.ifan_index = ifp->if_index;
849 strlcpy(ifan.ifan_name, ifp->if_xname, sizeof(ifan.ifan_name));
850 ifan.ifan_what = what;
851 return rt_msg1(type, info, (void *)&ifan, sizeof(ifan));
852 }
853
854 /*
855 * This is called to generate routing socket messages indicating
856 * network interface arrival and departure.
857 */
858 void
859 rt_ifannouncemsg(struct ifnet *ifp, int what)
860 {
861 struct mbuf *m;
862 struct rt_addrinfo info;
863
864 if (route_cb.any_count == 0)
865 return;
866 m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
867 if (m == NULL)
868 return;
869 route_proto.sp_protocol = 0;
870 raw_input(m, &route_proto, &route_src, &route_dst);
871 }
872
873 /*
874 * This is called to generate routing socket messages indicating
875 * IEEE80211 wireless events.
876 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
877 */
878 void
879 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
880 {
881 struct mbuf *m;
882 struct rt_addrinfo info;
883
884 if (route_cb.any_count == 0)
885 return;
886 m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
887 if (m == NULL)
888 return;
889 /*
890 * Append the ieee80211 data. Try to stick it in the
891 * mbuf containing the ifannounce msg; otherwise allocate
892 * a new mbuf and append.
893 *
894 * NB: we assume m is a single mbuf.
895 */
896 if (data_len > M_TRAILINGSPACE(m)) {
897 struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
898 if (n == NULL) {
899 m_freem(m);
900 return;
901 }
902 (void)memcpy(mtod(n, void *), data, data_len);
903 n->m_len = data_len;
904 m->m_next = n;
905 } else if (data_len > 0) {
906 (void)memcpy(mtod(m, u_int8_t *) + m->m_len, data, data_len);
907 m->m_len += data_len;
908 }
909 if (m->m_flags & M_PKTHDR)
910 m->m_pkthdr.len += data_len;
911 mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
912 route_proto.sp_protocol = 0;
913 raw_input(m, &route_proto, &route_src, &route_dst);
914 }
915
916 /*
917 * This is used in dumping the kernel table via sysctl().
918 */
919 static int
920 sysctl_dumpentry(struct rtentry *rt, void *v)
921 {
922 struct walkarg *w = v;
923 int error = 0, size;
924 struct rt_addrinfo info;
925
926 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
927 return 0;
928 memset(&info, 0, sizeof(info));
929 dst = rt_getkey(rt);
930 gate = rt->rt_gateway;
931 netmask = rt_mask(rt);
932 if (rt->rt_ifp) {
933 const struct ifaddr *rtifa;
934 ifpaddr = rt->rt_ifp->if_dl->ifa_addr;
935 /* rtifa used to be simply rt->rt_ifa. If rt->rt_ifa != NULL,
936 * then rt_get_ifa() != NULL. So this ought to still be safe.
937 * --dyoung
938 */
939 rtifa = rt_get_ifa(rt);
940 ifaaddr = rtifa->ifa_addr;
941 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
942 brdaddr = rtifa->ifa_dstaddr;
943 }
944 if ((error = rt_msg2(RTM_GET, &info, 0, w, &size)))
945 return error;
946 if (w->w_where && w->w_tmem && w->w_needed <= 0) {
947 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
948
949 rtm->rtm_flags = rt->rt_flags;
950 rtm->rtm_use = rt->rt_use;
951 rtm->rtm_rmx = rt->rt_rmx;
952 KASSERT(rt->rt_ifp != NULL);
953 rtm->rtm_index = rt->rt_ifp->if_index;
954 rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
955 rtm->rtm_addrs = info.rti_addrs;
956 if ((error = copyout(rtm, w->w_where, size)) != 0)
957 w->w_where = NULL;
958 else
959 w->w_where = (char *)w->w_where + size;
960 }
961 return error;
962 }
963
964 static int
965 sysctl_iflist(int af, struct walkarg *w, int type)
966 {
967 struct ifnet *ifp;
968 struct ifaddr *ifa;
969 struct rt_addrinfo info;
970 int len, error = 0;
971
972 memset(&info, 0, sizeof(info));
973 IFNET_FOREACH(ifp) {
974 if (w->w_arg && w->w_arg != ifp->if_index)
975 continue;
976 if (IFADDR_EMPTY(ifp))
977 continue;
978 ifpaddr = ifp->if_dl->ifa_addr;
979 switch (type) {
980 case NET_RT_IFLIST:
981 error =
982 rt_msg2(RTM_IFINFO, &info, NULL, w, &len);
983 break;
984 #ifdef COMPAT_14
985 case NET_RT_OIFLIST:
986 error =
987 rt_msg2(RTM_OIFINFO, &info, NULL, w, &len);
988 break;
989 #endif
990 default:
991 panic("sysctl_iflist(1)");
992 }
993 if (error)
994 return error;
995 ifpaddr = NULL;
996 if (w->w_where && w->w_tmem && w->w_needed <= 0) {
997 switch (type) {
998 case NET_RT_IFLIST: {
999 struct if_msghdr *ifm;
1000
1001 ifm = (struct if_msghdr *)w->w_tmem;
1002 ifm->ifm_index = ifp->if_index;
1003 ifm->ifm_flags = ifp->if_flags;
1004 ifm->ifm_data = ifp->if_data;
1005 ifm->ifm_addrs = info.rti_addrs;
1006 error = copyout(ifm, w->w_where, len);
1007 if (error)
1008 return error;
1009 w->w_where = (char *)w->w_where + len;
1010 break;
1011 }
1012
1013 #ifdef COMPAT_14
1014 case NET_RT_OIFLIST: {
1015 struct if_msghdr14 *ifm;
1016
1017 ifm = (struct if_msghdr14 *)w->w_tmem;
1018 ifm->ifm_index = ifp->if_index;
1019 ifm->ifm_flags = ifp->if_flags;
1020 ifm->ifm_data.ifi_type = ifp->if_data.ifi_type;
1021 ifm->ifm_data.ifi_addrlen =
1022 ifp->if_data.ifi_addrlen;
1023 ifm->ifm_data.ifi_hdrlen =
1024 ifp->if_data.ifi_hdrlen;
1025 ifm->ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
1026 ifm->ifm_data.ifi_metric =
1027 ifp->if_data.ifi_metric;
1028 ifm->ifm_data.ifi_baudrate =
1029 ifp->if_data.ifi_baudrate;
1030 ifm->ifm_data.ifi_ipackets =
1031 ifp->if_data.ifi_ipackets;
1032 ifm->ifm_data.ifi_ierrors =
1033 ifp->if_data.ifi_ierrors;
1034 ifm->ifm_data.ifi_opackets =
1035 ifp->if_data.ifi_opackets;
1036 ifm->ifm_data.ifi_oerrors =
1037 ifp->if_data.ifi_oerrors;
1038 ifm->ifm_data.ifi_collisions =
1039 ifp->if_data.ifi_collisions;
1040 ifm->ifm_data.ifi_ibytes =
1041 ifp->if_data.ifi_ibytes;
1042 ifm->ifm_data.ifi_obytes =
1043 ifp->if_data.ifi_obytes;
1044 ifm->ifm_data.ifi_imcasts =
1045 ifp->if_data.ifi_imcasts;
1046 ifm->ifm_data.ifi_omcasts =
1047 ifp->if_data.ifi_omcasts;
1048 ifm->ifm_data.ifi_iqdrops =
1049 ifp->if_data.ifi_iqdrops;
1050 ifm->ifm_data.ifi_noproto =
1051 ifp->if_data.ifi_noproto;
1052 ifm->ifm_data.ifi_lastchange =
1053 ifp->if_data.ifi_lastchange;
1054 ifm->ifm_addrs = info.rti_addrs;
1055 error = copyout(ifm, w->w_where, len);
1056 if (error)
1057 return error;
1058 w->w_where = (char *)w->w_where + len;
1059 break;
1060 }
1061 #endif
1062 default:
1063 panic("sysctl_iflist(2)");
1064 }
1065 }
1066 IFADDR_FOREACH(ifa, ifp) {
1067 if (af && af != ifa->ifa_addr->sa_family)
1068 continue;
1069 ifaaddr = ifa->ifa_addr;
1070 netmask = ifa->ifa_netmask;
1071 brdaddr = ifa->ifa_dstaddr;
1072 if ((error = rt_msg2(RTM_NEWADDR, &info, 0, w, &len)))
1073 return error;
1074 if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1075 struct ifa_msghdr *ifam;
1076
1077 ifam = (struct ifa_msghdr *)w->w_tmem;
1078 ifam->ifam_index = ifa->ifa_ifp->if_index;
1079 ifam->ifam_flags = ifa->ifa_flags;
1080 ifam->ifam_metric = ifa->ifa_metric;
1081 ifam->ifam_addrs = info.rti_addrs;
1082 error = copyout(w->w_tmem, w->w_where, len);
1083 if (error)
1084 return error;
1085 w->w_where = (char *)w->w_where + len;
1086 }
1087 }
1088 ifaaddr = netmask = brdaddr = NULL;
1089 }
1090 return 0;
1091 }
1092
1093 static int
1094 sysctl_rtable(SYSCTLFN_ARGS)
1095 {
1096 void *where = oldp;
1097 size_t *given = oldlenp;
1098 const void *new = newp;
1099 int i, s, error = EINVAL;
1100 u_char af;
1101 struct walkarg w;
1102
1103 if (namelen == 1 && name[0] == CTL_QUERY)
1104 return sysctl_query(SYSCTLFN_CALL(rnode));
1105
1106 if (new)
1107 return EPERM;
1108 if (namelen != 3)
1109 return EINVAL;
1110 af = name[0];
1111 w.w_tmemneeded = 0;
1112 w.w_tmemsize = 0;
1113 w.w_tmem = NULL;
1114 again:
1115 /* we may return here if a later [re]alloc of the t_mem buffer fails */
1116 if (w.w_tmemneeded) {
1117 w.w_tmem = (void *) malloc(w.w_tmemneeded, M_RTABLE, M_WAITOK);
1118 w.w_tmemsize = w.w_tmemneeded;
1119 w.w_tmemneeded = 0;
1120 }
1121 w.w_op = name[1];
1122 w.w_arg = name[2];
1123 w.w_given = *given;
1124 w.w_needed = 0 - w.w_given;
1125 w.w_where = where;
1126
1127 s = splsoftnet();
1128 switch (w.w_op) {
1129
1130 case NET_RT_DUMP:
1131 case NET_RT_FLAGS:
1132 for (i = 1; i <= AF_MAX; i++)
1133 if ((af == 0 || af == i) &&
1134 (error = rt_walktree(i, sysctl_dumpentry, &w)))
1135 break;
1136 break;
1137
1138 #ifdef COMPAT_14
1139 case NET_RT_OIFLIST:
1140 error = sysctl_iflist(af, &w, w.w_op);
1141 break;
1142 #endif
1143
1144 case NET_RT_IFLIST:
1145 error = sysctl_iflist(af, &w, w.w_op);
1146 }
1147 splx(s);
1148
1149 /* check to see if we couldn't allocate memory with NOWAIT */
1150 if (error == ENOBUFS && w.w_tmem == 0 && w.w_tmemneeded)
1151 goto again;
1152
1153 if (w.w_tmem)
1154 free(w.w_tmem, M_RTABLE);
1155 w.w_needed += w.w_given;
1156 if (where) {
1157 *given = (char *)w.w_where - (char *)where;
1158 if (*given < w.w_needed)
1159 return ENOMEM;
1160 } else {
1161 *given = (11 * w.w_needed) / 10;
1162 }
1163 return error;
1164 }
1165
1166 /*
1167 * Definitions of protocols supported in the ROUTE domain.
1168 */
1169
1170 const struct protosw routesw[] = {
1171 {
1172 .pr_type = SOCK_RAW,
1173 .pr_domain = &routedomain,
1174 .pr_flags = PR_ATOMIC|PR_ADDR,
1175 .pr_input = raw_input,
1176 .pr_output = route_output,
1177 .pr_ctlinput = raw_ctlinput,
1178 .pr_usrreq = route_usrreq,
1179 .pr_init = raw_init,
1180 },
1181 };
1182
1183 struct domain routedomain = {
1184 .dom_family = PF_ROUTE,
1185 .dom_name = "route",
1186 .dom_init = route_init,
1187 .dom_protosw = routesw,
1188 .dom_protoswNPROTOSW = &routesw[__arraycount(routesw)],
1189 };
1190
1191 SYSCTL_SETUP(sysctl_net_route_setup, "sysctl net.route subtree setup")
1192 {
1193 const struct sysctlnode *rnode = NULL;
1194
1195 sysctl_createv(clog, 0, NULL, NULL,
1196 CTLFLAG_PERMANENT,
1197 CTLTYPE_NODE, "net", NULL,
1198 NULL, 0, NULL, 0,
1199 CTL_NET, CTL_EOL);
1200
1201 sysctl_createv(clog, 0, NULL, &rnode,
1202 CTLFLAG_PERMANENT,
1203 CTLTYPE_NODE, "route",
1204 SYSCTL_DESCR("PF_ROUTE information"),
1205 NULL, 0, NULL, 0,
1206 CTL_NET, PF_ROUTE, CTL_EOL);
1207 sysctl_createv(clog, 0, NULL, NULL,
1208 CTLFLAG_PERMANENT,
1209 CTLTYPE_NODE, "rtable",
1210 SYSCTL_DESCR("Routing table information"),
1211 sysctl_rtable, 0, NULL, 0,
1212 CTL_NET, PF_ROUTE, 0 /* any protocol */, CTL_EOL);
1213 sysctl_createv(clog, 0, &rnode, NULL,
1214 CTLFLAG_PERMANENT,
1215 CTLTYPE_STRUCT, "stats",
1216 SYSCTL_DESCR("Routing statistics"),
1217 NULL, 0, &rtstat, sizeof(rtstat),
1218 CTL_CREATE, CTL_EOL);
1219 }
1220