ip_mroute.c revision 1.63 1 /* $NetBSD: ip_mroute.c,v 1.63 2002/11/02 07:28:13 perry Exp $ */
2
3 /*
4 * Copyright (c) 1989 Stephen Deering
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Stephen Deering of Stanford University.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
40 */
41
42 /*
43 * IP multicast forwarding procedures
44 *
45 * Written by David Waitzman, BBN Labs, August 1988.
46 * Modified by Steve Deering, Stanford, February 1989.
47 * Modified by Mark J. Steiglitz, Stanford, May, 1991
48 * Modified by Van Jacobson, LBL, January 1993
49 * Modified by Ajit Thyagarajan, PARC, August 1993
50 * Modified by Bill Fenner, PARC, April 1994
51 * Modified by Charles M. Hannum, NetBSD, May 1995.
52 *
53 * MROUTING Revision: 1.2
54 */
55
56 #include <sys/cdefs.h>
57 __KERNEL_RCSID(0, "$NetBSD: ip_mroute.c,v 1.63 2002/11/02 07:28:13 perry Exp $");
58
59 #include "opt_ipsec.h"
60
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/callout.h>
64 #include <sys/mbuf.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/protosw.h>
68 #include <sys/errno.h>
69 #include <sys/time.h>
70 #include <sys/kernel.h>
71 #include <sys/ioctl.h>
72 #include <sys/syslog.h>
73 #include <net/if.h>
74 #include <net/route.h>
75 #include <net/raw_cb.h>
76 #include <netinet/in.h>
77 #include <netinet/in_var.h>
78 #include <netinet/in_systm.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip_var.h>
81 #include <netinet/in_pcb.h>
82 #include <netinet/udp.h>
83 #include <netinet/igmp.h>
84 #include <netinet/igmp_var.h>
85 #include <netinet/ip_mroute.h>
86 #include <netinet/ip_encap.h>
87
88 #include <machine/stdarg.h>
89
90 #define IP_MULTICASTOPTS 0
91 #define M_PULLUP(m, len) \
92 do { \
93 if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \
94 (m) = m_pullup((m), (len)); \
95 } while (/*CONSTCOND*/ 0)
96
97 /*
98 * Globals. All but ip_mrouter and ip_mrtproto could be static,
99 * except for netstat or debugging purposes.
100 */
101 struct socket *ip_mrouter = 0;
102 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */
103
104 #define NO_RTE_FOUND 0x1
105 #define RTE_FOUND 0x2
106
107 #define MFCHASH(a, g) \
108 ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
109 ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & mfchash)
110 LIST_HEAD(mfchashhdr, mfc) *mfchashtbl;
111 u_long mfchash;
112
113 u_char nexpire[MFCTBLSIZ];
114 struct vif viftable[MAXVIFS];
115 struct mrtstat mrtstat;
116 u_int mrtdebug = 0; /* debug level */
117 #define DEBUG_MFC 0x02
118 #define DEBUG_FORWARD 0x04
119 #define DEBUG_EXPIRE 0x08
120 #define DEBUG_XMIT 0x10
121 u_int tbfdebug = 0; /* tbf debug level */
122 #ifdef RSVP_ISI
123 u_int rsvpdebug = 0; /* rsvp debug level */
124 extern struct socket *ip_rsvpd;
125 extern int rsvp_on;
126 #endif /* RSVP_ISI */
127
128 /* vif attachment using sys/netinet/ip_encap.c */
129 extern struct domain inetdomain;
130 static void vif_input __P((struct mbuf *, ...));
131 static int vif_encapcheck __P((const struct mbuf *, int, int, void *));
132 static struct protosw vif_protosw =
133 { SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR,
134 vif_input, rip_output, 0, rip_ctloutput,
135 rip_usrreq,
136 0, 0, 0, 0,
137 };
138
139 #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
140 #define UPCALL_EXPIRE 6 /* number of timeouts */
141
142 /*
143 * Define the token bucket filter structures
144 */
145
146 #define TBF_REPROCESS (hz / 100) /* 100x / second */
147
148 static int get_sg_cnt __P((struct sioc_sg_req *));
149 static int get_vif_cnt __P((struct sioc_vif_req *));
150 static int ip_mrouter_init __P((struct socket *, struct mbuf *));
151 static int get_version __P((struct mbuf *));
152 static int set_assert __P((struct mbuf *));
153 static int get_assert __P((struct mbuf *));
154 static int add_vif __P((struct mbuf *));
155 static int del_vif __P((struct mbuf *));
156 static void update_mfc __P((struct mfcctl *, struct mfc *));
157 static void expire_mfc __P((struct mfc *));
158 static int add_mfc __P((struct mbuf *));
159 #ifdef UPCALL_TIMING
160 static void collate __P((struct timeval *));
161 #endif
162 static int del_mfc __P((struct mbuf *));
163 static int socket_send __P((struct socket *, struct mbuf *,
164 struct sockaddr_in *));
165 static void expire_upcalls __P((void *));
166 #ifdef RSVP_ISI
167 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *, vifi_t));
168 #else
169 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *));
170 #endif
171 static void phyint_send __P((struct ip *, struct vif *, struct mbuf *));
172 static void encap_send __P((struct ip *, struct vif *, struct mbuf *));
173 static void tbf_control __P((struct vif *, struct mbuf *, struct ip *,
174 u_int32_t));
175 static void tbf_queue __P((struct vif *, struct mbuf *));
176 static void tbf_process_q __P((struct vif *));
177 static void tbf_reprocess_q __P((void *));
178 static int tbf_dq_sel __P((struct vif *, struct ip *));
179 static void tbf_send_packet __P((struct vif *, struct mbuf *));
180 static void tbf_update_tokens __P((struct vif *));
181 static int priority __P((struct vif *, struct ip *));
182
183 /*
184 * 'Interfaces' associated with decapsulator (so we can tell
185 * packets that went through it from ones that get reflected
186 * by a broken gateway). These interfaces are never linked into
187 * the system ifnet list & no routes point to them. I.e., packets
188 * can't be sent this way. They only exist as a placeholder for
189 * multicast source verification.
190 */
191 #if 0
192 struct ifnet multicast_decap_if[MAXVIFS];
193 #endif
194
195 #define ENCAP_TTL 64
196 #define ENCAP_PROTO IPPROTO_IPIP /* 4 */
197
198 /* prototype IP hdr for encapsulated packets */
199 struct ip multicast_encap_iphdr = {
200 #if BYTE_ORDER == LITTLE_ENDIAN
201 sizeof(struct ip) >> 2, IPVERSION,
202 #else
203 IPVERSION, sizeof(struct ip) >> 2,
204 #endif
205 0, /* tos */
206 sizeof(struct ip), /* total length */
207 0, /* id */
208 0, /* frag offset */
209 ENCAP_TTL, ENCAP_PROTO,
210 0, /* checksum */
211 };
212
213 /*
214 * Private variables.
215 */
216 static vifi_t numvifs = 0;
217
218 static struct callout expire_upcalls_ch;
219
220 /*
221 * one-back cache used by vif_encapcheck to locate a tunnel's vif
222 * given a datagram's src ip address.
223 */
224 static struct in_addr last_encap_src;
225 static struct vif *last_encap_vif;
226
227 /*
228 * whether or not special PIM assert processing is enabled.
229 */
230 static int pim_assert;
231 /*
232 * Rate limit for assert notification messages, in usec
233 */
234 #define ASSERT_MSG_TIME 3000000
235
236 /*
237 * Find a route for a given origin IP address and Multicast group address
238 * Type of service parameter to be added in the future!!!
239 */
240
241 #define MFCFIND(o, g, rt) { \
242 struct mfc *_rt; \
243 (rt) = 0; \
244 ++mrtstat.mrts_mfc_lookups; \
245 LIST_FOREACH(_rt, &mfchashtbl[MFCHASH(o, g)], mfc_hash) { \
246 if (in_hosteq(_rt->mfc_origin, (o)) && \
247 in_hosteq(_rt->mfc_mcastgrp, (g)) && \
248 _rt->mfc_stall == 0) { \
249 (rt) = _rt; \
250 break; \
251 } \
252 } \
253 if ((rt) == 0) \
254 ++mrtstat.mrts_mfc_misses; \
255 }
256
257 /*
258 * Macros to compute elapsed time efficiently
259 * Borrowed from Van Jacobson's scheduling code
260 */
261 #define TV_DELTA(a, b, delta) { \
262 int xxs; \
263 delta = (a).tv_usec - (b).tv_usec; \
264 xxs = (a).tv_sec - (b).tv_sec; \
265 switch (xxs) { \
266 case 2: \
267 delta += 1000000; \
268 /* fall through */ \
269 case 1: \
270 delta += 1000000; \
271 /* fall through */ \
272 case 0: \
273 break; \
274 default: \
275 delta += (1000000 * xxs); \
276 break; \
277 } \
278 }
279
280 #ifdef UPCALL_TIMING
281 u_int32_t upcall_data[51];
282 #endif /* UPCALL_TIMING */
283
284 /*
285 * Handle MRT setsockopt commands to modify the multicast routing tables.
286 */
287 int
288 ip_mrouter_set(so, optname, m)
289 struct socket *so;
290 int optname;
291 struct mbuf **m;
292 {
293 int error;
294
295 if (optname != MRT_INIT && so != ip_mrouter)
296 error = ENOPROTOOPT;
297 else
298 switch (optname) {
299 case MRT_INIT:
300 error = ip_mrouter_init(so, *m);
301 break;
302 case MRT_DONE:
303 error = ip_mrouter_done();
304 break;
305 case MRT_ADD_VIF:
306 error = add_vif(*m);
307 break;
308 case MRT_DEL_VIF:
309 error = del_vif(*m);
310 break;
311 case MRT_ADD_MFC:
312 error = add_mfc(*m);
313 break;
314 case MRT_DEL_MFC:
315 error = del_mfc(*m);
316 break;
317 case MRT_ASSERT:
318 error = set_assert(*m);
319 break;
320 default:
321 error = ENOPROTOOPT;
322 break;
323 }
324
325 if (*m)
326 m_free(*m);
327 return (error);
328 }
329
330 /*
331 * Handle MRT getsockopt commands
332 */
333 int
334 ip_mrouter_get(so, optname, m)
335 struct socket *so;
336 int optname;
337 struct mbuf **m;
338 {
339 int error;
340
341 if (so != ip_mrouter)
342 error = ENOPROTOOPT;
343 else {
344 *m = m_get(M_WAIT, MT_SOOPTS);
345
346 switch (optname) {
347 case MRT_VERSION:
348 error = get_version(*m);
349 break;
350 case MRT_ASSERT:
351 error = get_assert(*m);
352 break;
353 default:
354 error = ENOPROTOOPT;
355 break;
356 }
357
358 if (error)
359 m_free(*m);
360 }
361
362 return (error);
363 }
364
365 /*
366 * Handle ioctl commands to obtain information from the cache
367 */
368 int
369 mrt_ioctl(so, cmd, data)
370 struct socket *so;
371 u_long cmd;
372 caddr_t data;
373 {
374 int error;
375
376 if (so != ip_mrouter)
377 error = EINVAL;
378 else
379 switch (cmd) {
380 case SIOCGETVIFCNT:
381 error = get_vif_cnt((struct sioc_vif_req *)data);
382 break;
383 case SIOCGETSGCNT:
384 error = get_sg_cnt((struct sioc_sg_req *)data);
385 break;
386 default:
387 error = EINVAL;
388 break;
389 }
390
391 return (error);
392 }
393
394 /*
395 * returns the packet, byte, rpf-failure count for the source group provided
396 */
397 static int
398 get_sg_cnt(req)
399 struct sioc_sg_req *req;
400 {
401 struct mfc *rt;
402 int s;
403
404 s = splsoftnet();
405 MFCFIND(req->src, req->grp, rt);
406 splx(s);
407 if (rt != 0) {
408 req->pktcnt = rt->mfc_pkt_cnt;
409 req->bytecnt = rt->mfc_byte_cnt;
410 req->wrong_if = rt->mfc_wrong_if;
411 } else
412 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
413
414 return (0);
415 }
416
417 /*
418 * returns the input and output packet and byte counts on the vif provided
419 */
420 static int
421 get_vif_cnt(req)
422 struct sioc_vif_req *req;
423 {
424 vifi_t vifi = req->vifi;
425
426 if (vifi >= numvifs)
427 return (EINVAL);
428
429 req->icount = viftable[vifi].v_pkt_in;
430 req->ocount = viftable[vifi].v_pkt_out;
431 req->ibytes = viftable[vifi].v_bytes_in;
432 req->obytes = viftable[vifi].v_bytes_out;
433
434 return (0);
435 }
436
437 /*
438 * Enable multicast routing
439 */
440 static int
441 ip_mrouter_init(so, m)
442 struct socket *so;
443 struct mbuf *m;
444 {
445 int *v;
446
447 if (mrtdebug)
448 log(LOG_DEBUG,
449 "ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
450 so->so_type, so->so_proto->pr_protocol);
451
452 if (so->so_type != SOCK_RAW ||
453 so->so_proto->pr_protocol != IPPROTO_IGMP)
454 return (EOPNOTSUPP);
455
456 if (m == 0 || m->m_len < sizeof(int))
457 return (EINVAL);
458
459 v = mtod(m, int *);
460 if (*v != 1)
461 return (EINVAL);
462
463 if (ip_mrouter != 0)
464 return (EADDRINUSE);
465
466 ip_mrouter = so;
467
468 mfchashtbl =
469 hashinit(MFCTBLSIZ, HASH_LIST, M_MRTABLE, M_WAITOK, &mfchash);
470 bzero((caddr_t)nexpire, sizeof(nexpire));
471
472 pim_assert = 0;
473
474 callout_init(&expire_upcalls_ch);
475 callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
476 expire_upcalls, NULL);
477
478 if (mrtdebug)
479 log(LOG_DEBUG, "ip_mrouter_init\n");
480
481 return (0);
482 }
483
484 /*
485 * Disable multicast routing
486 */
487 int
488 ip_mrouter_done()
489 {
490 vifi_t vifi;
491 struct vif *vifp;
492 int i;
493 int s;
494
495 s = splsoftnet();
496
497 /* Clear out all the vifs currently in use. */
498 for (vifi = 0; vifi < numvifs; vifi++) {
499 vifp = &viftable[vifi];
500 if (!in_nullhost(vifp->v_lcl_addr))
501 reset_vif(vifp);
502 }
503
504 numvifs = 0;
505 pim_assert = 0;
506
507 callout_stop(&expire_upcalls_ch);
508
509 /*
510 * Free all multicast forwarding cache entries.
511 */
512 for (i = 0; i < MFCTBLSIZ; i++) {
513 struct mfc *rt, *nrt;
514
515 for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
516 nrt = LIST_NEXT(rt, mfc_hash);
517
518 expire_mfc(rt);
519 }
520 }
521
522 free(mfchashtbl, M_MRTABLE);
523 mfchashtbl = 0;
524
525 /* Reset de-encapsulation cache. */
526
527 ip_mrouter = 0;
528
529 splx(s);
530
531 if (mrtdebug)
532 log(LOG_DEBUG, "ip_mrouter_done\n");
533
534 return (0);
535 }
536
537 static int
538 get_version(m)
539 struct mbuf *m;
540 {
541 int *v = mtod(m, int *);
542
543 *v = 0x0305; /* XXX !!!! */
544 m->m_len = sizeof(int);
545 return (0);
546 }
547
548 /*
549 * Set PIM assert processing global
550 */
551 static int
552 set_assert(m)
553 struct mbuf *m;
554 {
555 int *i;
556
557 if (m == 0 || m->m_len < sizeof(int))
558 return (EINVAL);
559
560 i = mtod(m, int *);
561 pim_assert = !!*i;
562 return (0);
563 }
564
565 /*
566 * Get PIM assert processing global
567 */
568 static int
569 get_assert(m)
570 struct mbuf *m;
571 {
572 int *i = mtod(m, int *);
573
574 *i = pim_assert;
575 m->m_len = sizeof(int);
576 return (0);
577 }
578
579 static struct sockaddr_in sin = { sizeof(sin), AF_INET };
580
581 /*
582 * Add a vif to the vif table
583 */
584 static int
585 add_vif(m)
586 struct mbuf *m;
587 {
588 struct vifctl *vifcp;
589 struct vif *vifp;
590 struct ifaddr *ifa;
591 struct ifnet *ifp;
592 struct ifreq ifr;
593 int error, s;
594
595 if (m == 0 || m->m_len < sizeof(struct vifctl))
596 return (EINVAL);
597
598 vifcp = mtod(m, struct vifctl *);
599 if (vifcp->vifc_vifi >= MAXVIFS)
600 return (EINVAL);
601
602 vifp = &viftable[vifcp->vifc_vifi];
603 if (!in_nullhost(vifp->v_lcl_addr))
604 return (EADDRINUSE);
605
606 /* Find the interface with an address in AF_INET family. */
607 sin.sin_addr = vifcp->vifc_lcl_addr;
608 ifa = ifa_ifwithaddr(sintosa(&sin));
609 if (ifa == 0)
610 return (EADDRNOTAVAIL);
611
612 if (vifcp->vifc_flags & VIFF_TUNNEL) {
613 if (vifcp->vifc_flags & VIFF_SRCRT) {
614 log(LOG_ERR, "Source routed tunnels not supported\n");
615 return (EOPNOTSUPP);
616 }
617
618 /* attach this vif to decapsulator dispatch table */
619 vifp->v_encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4,
620 vif_encapcheck, &vif_protosw, vifp);
621 if (!vifp->v_encap_cookie)
622 return (EINVAL);
623
624 /* Create a fake encapsulation interface. */
625 ifp = (struct ifnet *)malloc(sizeof(*ifp), M_MRTABLE, M_WAITOK);
626 bzero(ifp, sizeof(*ifp));
627 sprintf(ifp->if_xname, "mdecap%d", vifcp->vifc_vifi);
628
629 /* Prepare cached route entry. */
630 bzero(&vifp->v_route, sizeof(vifp->v_route));
631 } else {
632 /* Use the physical interface associated with the address. */
633 ifp = ifa->ifa_ifp;
634
635 /* Make sure the interface supports multicast. */
636 if ((ifp->if_flags & IFF_MULTICAST) == 0)
637 return (EOPNOTSUPP);
638
639 /* Enable promiscuous reception of all IP multicasts. */
640 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
641 satosin(&ifr.ifr_addr)->sin_family = AF_INET;
642 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
643 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
644 if (error)
645 return (error);
646 }
647
648 s = splsoftnet();
649
650 /* Define parameters for the tbf structure. */
651 vifp->tbf_q = 0;
652 vifp->tbf_t = &vifp->tbf_q;
653 microtime(&vifp->tbf_last_pkt_t);
654 vifp->tbf_n_tok = 0;
655 vifp->tbf_q_len = 0;
656 vifp->tbf_max_q_len = MAXQSIZE;
657
658 vifp->v_flags = vifcp->vifc_flags;
659 vifp->v_threshold = vifcp->vifc_threshold;
660 /* scaling up here allows division by 1024 in critical code */
661 vifp->v_rate_limit = vifcp->vifc_rate_limit * 1024 / 1000;
662 vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
663 vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
664 vifp->v_ifp = ifp;
665 /* Initialize per vif pkt counters. */
666 vifp->v_pkt_in = 0;
667 vifp->v_pkt_out = 0;
668 vifp->v_bytes_in = 0;
669 vifp->v_bytes_out = 0;
670
671 callout_init(&vifp->v_repq_ch);
672
673 #ifdef RSVP_ISI
674 vifp->v_rsvp_on = 0;
675 vifp->v_rsvpd = 0;
676 #endif /* RSVP_ISI */
677
678 splx(s);
679
680 /* Adjust numvifs up if the vifi is higher than numvifs. */
681 if (numvifs <= vifcp->vifc_vifi)
682 numvifs = vifcp->vifc_vifi + 1;
683
684 if (mrtdebug)
685 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n",
686 vifcp->vifc_vifi,
687 ntohl(vifcp->vifc_lcl_addr.s_addr),
688 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
689 ntohl(vifcp->vifc_rmt_addr.s_addr),
690 vifcp->vifc_threshold,
691 vifcp->vifc_rate_limit);
692
693 return (0);
694 }
695
696 void
697 reset_vif(vifp)
698 struct vif *vifp;
699 {
700 struct mbuf *m, *n;
701 struct ifnet *ifp;
702 struct ifreq ifr;
703
704 callout_stop(&vifp->v_repq_ch);
705
706 /* detach this vif from decapsulator dispatch table */
707 encap_detach(vifp->v_encap_cookie);
708 vifp->v_encap_cookie = NULL;
709
710 for (m = vifp->tbf_q; m != 0; m = n) {
711 n = m->m_nextpkt;
712 m_freem(m);
713 }
714
715 if (vifp->v_flags & VIFF_TUNNEL) {
716 free(vifp->v_ifp, M_MRTABLE);
717 if (vifp == last_encap_vif) {
718 last_encap_vif = 0;
719 last_encap_src = zeroin_addr;
720 }
721 } else {
722 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
723 satosin(&ifr.ifr_addr)->sin_family = AF_INET;
724 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
725 ifp = vifp->v_ifp;
726 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
727 }
728 bzero((caddr_t)vifp, sizeof(*vifp));
729 }
730
731 /*
732 * Delete a vif from the vif table
733 */
734 static int
735 del_vif(m)
736 struct mbuf *m;
737 {
738 vifi_t *vifip;
739 struct vif *vifp;
740 vifi_t vifi;
741 int s;
742
743 if (m == 0 || m->m_len < sizeof(vifi_t))
744 return (EINVAL);
745
746 vifip = mtod(m, vifi_t *);
747 if (*vifip >= numvifs)
748 return (EINVAL);
749
750 vifp = &viftable[*vifip];
751 if (in_nullhost(vifp->v_lcl_addr))
752 return (EADDRNOTAVAIL);
753
754 s = splsoftnet();
755
756 reset_vif(vifp);
757
758 /* Adjust numvifs down */
759 for (vifi = numvifs; vifi > 0; vifi--)
760 if (!in_nullhost(viftable[vifi-1].v_lcl_addr))
761 break;
762 numvifs = vifi;
763
764 splx(s);
765
766 if (mrtdebug)
767 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs);
768
769 return (0);
770 }
771
772 static void
773 update_mfc(mfccp, rt)
774 struct mfcctl *mfccp;
775 struct mfc *rt;
776 {
777 vifi_t vifi;
778
779 rt->mfc_parent = mfccp->mfcc_parent;
780 for (vifi = 0; vifi < numvifs; vifi++)
781 rt->mfc_ttls[vifi] = mfccp->mfcc_ttls[vifi];
782 rt->mfc_expire = 0;
783 rt->mfc_stall = 0;
784 }
785
786 static void
787 expire_mfc(rt)
788 struct mfc *rt;
789 {
790 struct rtdetq *rte, *nrte;
791
792 for (rte = rt->mfc_stall; rte != 0; rte = nrte) {
793 nrte = rte->next;
794 m_freem(rte->m);
795 free(rte, M_MRTABLE);
796 }
797
798 LIST_REMOVE(rt, mfc_hash);
799 free(rt, M_MRTABLE);
800 }
801
802 /*
803 * Add an mfc entry
804 */
805 static int
806 add_mfc(m)
807 struct mbuf *m;
808 {
809 struct mfcctl *mfccp;
810 struct mfc *rt;
811 u_int32_t hash = 0;
812 struct rtdetq *rte, *nrte;
813 u_short nstl;
814 int s;
815
816 if (m == 0 || m->m_len < sizeof(struct mfcctl))
817 return (EINVAL);
818
819 mfccp = mtod(m, struct mfcctl *);
820
821 s = splsoftnet();
822 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt);
823
824 /* If an entry already exists, just update the fields */
825 if (rt) {
826 if (mrtdebug & DEBUG_MFC)
827 log(LOG_DEBUG,"add_mfc update o %x g %x p %x\n",
828 ntohl(mfccp->mfcc_origin.s_addr),
829 ntohl(mfccp->mfcc_mcastgrp.s_addr),
830 mfccp->mfcc_parent);
831
832 if (rt->mfc_expire)
833 nexpire[hash]--;
834
835 update_mfc(mfccp, rt);
836
837 splx(s);
838 return (0);
839 }
840
841 /*
842 * Find the entry for which the upcall was made and update
843 */
844 nstl = 0;
845 hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
846 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
847 if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
848 in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
849 rt->mfc_stall != 0) {
850 if (nstl++)
851 log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %p\n",
852 "multiple kernel entries",
853 ntohl(mfccp->mfcc_origin.s_addr),
854 ntohl(mfccp->mfcc_mcastgrp.s_addr),
855 mfccp->mfcc_parent, rt->mfc_stall);
856
857 if (mrtdebug & DEBUG_MFC)
858 log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %p\n",
859 ntohl(mfccp->mfcc_origin.s_addr),
860 ntohl(mfccp->mfcc_mcastgrp.s_addr),
861 mfccp->mfcc_parent, rt->mfc_stall);
862
863 if (rt->mfc_expire)
864 nexpire[hash]--;
865
866 rte = rt->mfc_stall;
867 update_mfc(mfccp, rt);
868
869 /* free packets Qed at the end of this entry */
870 for (; rte != 0; rte = nrte) {
871 nrte = rte->next;
872 #ifdef RSVP_ISI
873 ip_mdq(rte->m, rte->ifp, rt, -1);
874 #else
875 ip_mdq(rte->m, rte->ifp, rt);
876 #endif /* RSVP_ISI */
877 m_freem(rte->m);
878 #ifdef UPCALL_TIMING
879 collate(&rte->t);
880 #endif /* UPCALL_TIMING */
881 free(rte, M_MRTABLE);
882 }
883 }
884 }
885
886 if (nstl == 0) {
887 /*
888 * No mfc; make a new one
889 */
890 if (mrtdebug & DEBUG_MFC)
891 log(LOG_DEBUG,"add_mfc no upcall o %x g %x p %x\n",
892 ntohl(mfccp->mfcc_origin.s_addr),
893 ntohl(mfccp->mfcc_mcastgrp.s_addr),
894 mfccp->mfcc_parent);
895
896 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
897 if (rt == 0) {
898 splx(s);
899 return (ENOBUFS);
900 }
901
902 rt->mfc_origin = mfccp->mfcc_origin;
903 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
904 /* initialize pkt counters per src-grp */
905 rt->mfc_pkt_cnt = 0;
906 rt->mfc_byte_cnt = 0;
907 rt->mfc_wrong_if = 0;
908 timerclear(&rt->mfc_last_assert);
909 update_mfc(mfccp, rt);
910
911 /* insert new entry at head of hash chain */
912 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
913 }
914
915 splx(s);
916 return (0);
917 }
918
919 #ifdef UPCALL_TIMING
920 /*
921 * collect delay statistics on the upcalls
922 */
923 static void collate(t)
924 struct timeval *t;
925 {
926 u_int32_t d;
927 struct timeval tp;
928 u_int32_t delta;
929
930 microtime(&tp);
931
932 if (timercmp(t, &tp, <)) {
933 TV_DELTA(tp, *t, delta);
934
935 d = delta >> 10;
936 if (d > 50)
937 d = 50;
938
939 ++upcall_data[d];
940 }
941 }
942 #endif /* UPCALL_TIMING */
943
944 /*
945 * Delete an mfc entry
946 */
947 static int
948 del_mfc(m)
949 struct mbuf *m;
950 {
951 struct mfcctl *mfccp;
952 struct mfc *rt;
953 int s;
954
955 if (m == 0 || m->m_len < sizeof(struct mfcctl))
956 return (EINVAL);
957
958 mfccp = mtod(m, struct mfcctl *);
959
960 if (mrtdebug & DEBUG_MFC)
961 log(LOG_DEBUG, "del_mfc origin %x mcastgrp %x\n",
962 ntohl(mfccp->mfcc_origin.s_addr),
963 ntohl(mfccp->mfcc_mcastgrp.s_addr));
964
965 s = splsoftnet();
966
967 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt);
968 if (rt == 0) {
969 splx(s);
970 return (EADDRNOTAVAIL);
971 }
972
973 LIST_REMOVE(rt, mfc_hash);
974 free(rt, M_MRTABLE);
975
976 splx(s);
977 return (0);
978 }
979
980 static int
981 socket_send(s, mm, src)
982 struct socket *s;
983 struct mbuf *mm;
984 struct sockaddr_in *src;
985 {
986 if (s) {
987 if (sbappendaddr(&s->so_rcv, sintosa(src), mm, (struct mbuf *)0) != 0) {
988 sorwakeup(s);
989 return (0);
990 }
991 }
992 m_freem(mm);
993 return (-1);
994 }
995
996 /*
997 * IP multicast forwarding function. This function assumes that the packet
998 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
999 * pointed to by "ifp", and the packet is to be relayed to other networks
1000 * that have members of the packet's destination IP multicast group.
1001 *
1002 * The packet is returned unscathed to the caller, unless it is
1003 * erroneous, in which case a non-zero return value tells the caller to
1004 * discard it.
1005 */
1006
1007 #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */
1008 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */
1009
1010 int
1011 #ifdef RSVP_ISI
1012 ip_mforward(m, ifp, imo)
1013 #else
1014 ip_mforward(m, ifp)
1015 #endif /* RSVP_ISI */
1016 struct mbuf *m;
1017 struct ifnet *ifp;
1018 #ifdef RSVP_ISI
1019 struct ip_moptions *imo;
1020 #endif /* RSVP_ISI */
1021 {
1022 struct ip *ip = mtod(m, struct ip *);
1023 struct mfc *rt;
1024 u_char *ipoptions;
1025 static int srctun = 0;
1026 struct mbuf *mm;
1027 int s;
1028 #ifdef RSVP_ISI
1029 struct vif *vifp;
1030 vifi_t vifi;
1031 #endif /* RSVP_ISI */
1032
1033 /*
1034 * Clear any in-bound checksum flags for this packet.
1035 */
1036 m->m_pkthdr.csum_flags = 0;
1037
1038 if (mrtdebug & DEBUG_FORWARD)
1039 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p\n",
1040 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
1041
1042 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
1043 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR) {
1044 /*
1045 * Packet arrived via a physical interface or
1046 * an encapuslated tunnel.
1047 */
1048 } else {
1049 /*
1050 * Packet arrived through a source-route tunnel.
1051 * Source-route tunnels are no longer supported.
1052 */
1053 if ((srctun++ % 1000) == 0)
1054 log(LOG_ERR, "ip_mforward: received source-routed packet from %x\n",
1055 ntohl(ip->ip_src.s_addr));
1056
1057 return (1);
1058 }
1059
1060 #ifdef RSVP_ISI
1061 if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) {
1062 if (ip->ip_ttl < 255)
1063 ip->ip_ttl++; /* compensate for -1 in *_send routines */
1064 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1065 vifp = viftable + vifi;
1066 printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n",
1067 ntohl(ip->ip_src), ntohl(ip->ip_dst), vifi,
1068 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
1069 vifp->v_ifp->if_xname);
1070 }
1071 return (ip_mdq(m, ifp, (struct mfc *)0, vifi));
1072 }
1073 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1074 printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
1075 ntohl(ip->ip_src), ntohl(ip->ip_dst));
1076 }
1077 #endif /* RSVP_ISI */
1078
1079 /*
1080 * Don't forward a packet with time-to-live of zero or one,
1081 * or a packet destined to a local-only group.
1082 */
1083 if (ip->ip_ttl <= 1 ||
1084 IN_LOCAL_GROUP(ip->ip_dst.s_addr))
1085 return (0);
1086
1087 /*
1088 * Determine forwarding vifs from the forwarding cache table
1089 */
1090 s = splsoftnet();
1091 MFCFIND(ip->ip_src, ip->ip_dst, rt);
1092
1093 /* Entry exists, so forward if necessary */
1094 if (rt != 0) {
1095 splx(s);
1096 #ifdef RSVP_ISI
1097 return (ip_mdq(m, ifp, rt, -1));
1098 #else
1099 return (ip_mdq(m, ifp, rt));
1100 #endif /* RSVP_ISI */
1101 } else {
1102 /*
1103 * If we don't have a route for packet's origin,
1104 * Make a copy of the packet &
1105 * send message to routing daemon
1106 */
1107
1108 struct mbuf *mb0;
1109 struct rtdetq *rte;
1110 u_int32_t hash;
1111 int hlen = ip->ip_hl << 2;
1112 #ifdef UPCALL_TIMING
1113 struct timeval tp;
1114
1115 microtime(&tp);
1116 #endif /* UPCALL_TIMING */
1117
1118 mrtstat.mrts_no_route++;
1119 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1120 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n",
1121 ntohl(ip->ip_src.s_addr),
1122 ntohl(ip->ip_dst.s_addr));
1123
1124 /*
1125 * Allocate mbufs early so that we don't do extra work if we are
1126 * just going to fail anyway. Make sure to pullup the header so
1127 * that other people can't step on it.
1128 */
1129 rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT);
1130 if (rte == 0) {
1131 splx(s);
1132 return (ENOBUFS);
1133 }
1134 mb0 = m_copy(m, 0, M_COPYALL);
1135 M_PULLUP(mb0, hlen);
1136 if (mb0 == 0) {
1137 free(rte, M_MRTABLE);
1138 splx(s);
1139 return (ENOBUFS);
1140 }
1141
1142 /* is there an upcall waiting for this packet? */
1143 hash = MFCHASH(ip->ip_src, ip->ip_dst);
1144 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
1145 if (in_hosteq(ip->ip_src, rt->mfc_origin) &&
1146 in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&
1147 rt->mfc_stall != 0)
1148 break;
1149 }
1150
1151 if (rt == 0) {
1152 int i;
1153 struct igmpmsg *im;
1154
1155 /* no upcall, so make a new entry */
1156 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1157 if (rt == 0) {
1158 free(rte, M_MRTABLE);
1159 m_freem(mb0);
1160 splx(s);
1161 return (ENOBUFS);
1162 }
1163 /* Make a copy of the header to send to the user level process */
1164 mm = m_copy(m, 0, hlen);
1165 M_PULLUP(mm, hlen);
1166 if (mm == 0) {
1167 free(rte, M_MRTABLE);
1168 m_freem(mb0);
1169 free(rt, M_MRTABLE);
1170 splx(s);
1171 return (ENOBUFS);
1172 }
1173
1174 /*
1175 * Send message to routing daemon to install
1176 * a route into the kernel table
1177 */
1178 sin.sin_addr = ip->ip_src;
1179
1180 im = mtod(mm, struct igmpmsg *);
1181 im->im_msgtype = IGMPMSG_NOCACHE;
1182 im->im_mbz = 0;
1183
1184 mrtstat.mrts_upcalls++;
1185
1186 if (socket_send(ip_mrouter, mm, &sin) < 0) {
1187 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
1188 ++mrtstat.mrts_upq_sockfull;
1189 free(rte, M_MRTABLE);
1190 m_freem(mb0);
1191 free(rt, M_MRTABLE);
1192 splx(s);
1193 return (ENOBUFS);
1194 }
1195
1196 /* insert new entry at head of hash chain */
1197 rt->mfc_origin = ip->ip_src;
1198 rt->mfc_mcastgrp = ip->ip_dst;
1199 rt->mfc_pkt_cnt = 0;
1200 rt->mfc_byte_cnt = 0;
1201 rt->mfc_wrong_if = 0;
1202 rt->mfc_expire = UPCALL_EXPIRE;
1203 nexpire[hash]++;
1204 for (i = 0; i < numvifs; i++)
1205 rt->mfc_ttls[i] = 0;
1206 rt->mfc_parent = -1;
1207
1208 /* link into table */
1209 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
1210 /* Add this entry to the end of the queue */
1211 rt->mfc_stall = rte;
1212 } else {
1213 /* determine if q has overflowed */
1214 struct rtdetq **p;
1215 int npkts = 0;
1216
1217 for (p = &rt->mfc_stall; *p != 0; p = &(*p)->next)
1218 if (++npkts > MAX_UPQ) {
1219 mrtstat.mrts_upq_ovflw++;
1220 free(rte, M_MRTABLE);
1221 m_freem(mb0);
1222 splx(s);
1223 return (0);
1224 }
1225
1226 /* Add this entry to the end of the queue */
1227 *p = rte;
1228 }
1229
1230 rte->next = 0;
1231 rte->m = mb0;
1232 rte->ifp = ifp;
1233 #ifdef UPCALL_TIMING
1234 rte->t = tp;
1235 #endif /* UPCALL_TIMING */
1236
1237
1238 splx(s);
1239
1240 return (0);
1241 }
1242 }
1243
1244
1245 /*ARGSUSED*/
1246 static void
1247 expire_upcalls(v)
1248 void *v;
1249 {
1250 int i;
1251 int s;
1252
1253 s = splsoftnet();
1254
1255 for (i = 0; i < MFCTBLSIZ; i++) {
1256 struct mfc *rt, *nrt;
1257
1258 if (nexpire[i] == 0)
1259 continue;
1260
1261 for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
1262 nrt = LIST_NEXT(rt, mfc_hash);
1263
1264 if (rt->mfc_expire == 0 ||
1265 --rt->mfc_expire > 0)
1266 continue;
1267 nexpire[i]--;
1268
1269 ++mrtstat.mrts_cache_cleanups;
1270 if (mrtdebug & DEBUG_EXPIRE)
1271 log(LOG_DEBUG,
1272 "expire_upcalls: expiring (%x %x)\n",
1273 ntohl(rt->mfc_origin.s_addr),
1274 ntohl(rt->mfc_mcastgrp.s_addr));
1275
1276 expire_mfc(rt);
1277 }
1278 }
1279
1280 splx(s);
1281 callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
1282 expire_upcalls, NULL);
1283 }
1284
1285 /*
1286 * Packet forwarding routine once entry in the cache is made
1287 */
1288 static int
1289 #ifdef RSVP_ISI
1290 ip_mdq(m, ifp, rt, xmt_vif)
1291 #else
1292 ip_mdq(m, ifp, rt)
1293 #endif /* RSVP_ISI */
1294 struct mbuf *m;
1295 struct ifnet *ifp;
1296 struct mfc *rt;
1297 #ifdef RSVP_ISI
1298 vifi_t xmt_vif;
1299 #endif /* RSVP_ISI */
1300 {
1301 struct ip *ip = mtod(m, struct ip *);
1302 vifi_t vifi;
1303 struct vif *vifp;
1304 int plen = ntohs(ip->ip_len);
1305
1306 /*
1307 * Macro to send packet on vif. Since RSVP packets don't get counted on
1308 * input, they shouldn't get counted on output, so statistics keeping is
1309 * separate.
1310 */
1311 #define MC_SEND(ip,vifp,m) { \
1312 if ((vifp)->v_flags & VIFF_TUNNEL) \
1313 encap_send((ip), (vifp), (m)); \
1314 else \
1315 phyint_send((ip), (vifp), (m)); \
1316 }
1317
1318 #ifdef RSVP_ISI
1319 /*
1320 * If xmt_vif is not -1, send on only the requested vif.
1321 *
1322 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.
1323 */
1324 if (xmt_vif < numvifs) {
1325 MC_SEND(ip, viftable + xmt_vif, m);
1326 return (1);
1327 }
1328 #endif /* RSVP_ISI */
1329
1330 /*
1331 * Don't forward if it didn't arrive from the parent vif for its origin.
1332 */
1333 vifi = rt->mfc_parent;
1334 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1335 /* came in the wrong interface */
1336 if (mrtdebug & DEBUG_FORWARD)
1337 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
1338 ifp, vifi, viftable[vifi].v_ifp);
1339 ++mrtstat.mrts_wrong_if;
1340 ++rt->mfc_wrong_if;
1341 /*
1342 * If we are doing PIM assert processing, and we are forwarding
1343 * packets on this interface, and it is a broadcast medium
1344 * interface (and not a tunnel), send a message to the routing daemon.
1345 */
1346 if (pim_assert && rt->mfc_ttls[vifi] &&
1347 (ifp->if_flags & IFF_BROADCAST) &&
1348 !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1349 struct mbuf *mm;
1350 struct igmpmsg *im;
1351 int hlen = ip->ip_hl << 2;
1352 struct timeval now;
1353 u_int32_t delta;
1354
1355 microtime(&now);
1356
1357 TV_DELTA(rt->mfc_last_assert, now, delta);
1358
1359 if (delta > ASSERT_MSG_TIME) {
1360 mm = m_copy(m, 0, hlen);
1361 M_PULLUP(mm, hlen);
1362 if (mm == 0) {
1363 return (ENOBUFS);
1364 }
1365
1366 rt->mfc_last_assert = now;
1367
1368 im = mtod(mm, struct igmpmsg *);
1369 im->im_msgtype = IGMPMSG_WRONGVIF;
1370 im->im_mbz = 0;
1371 im->im_vif = vifi;
1372
1373 sin.sin_addr = im->im_src;
1374
1375 socket_send(ip_mrouter, mm, &sin);
1376 }
1377 }
1378 return (0);
1379 }
1380
1381 /* If I sourced this packet, it counts as output, else it was input. */
1382 if (in_hosteq(ip->ip_src, viftable[vifi].v_lcl_addr)) {
1383 viftable[vifi].v_pkt_out++;
1384 viftable[vifi].v_bytes_out += plen;
1385 } else {
1386 viftable[vifi].v_pkt_in++;
1387 viftable[vifi].v_bytes_in += plen;
1388 }
1389 rt->mfc_pkt_cnt++;
1390 rt->mfc_byte_cnt += plen;
1391
1392 /*
1393 * For each vif, decide if a copy of the packet should be forwarded.
1394 * Forward if:
1395 * - the ttl exceeds the vif's threshold
1396 * - there are group members downstream on interface
1397 */
1398 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1399 if ((rt->mfc_ttls[vifi] > 0) &&
1400 (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1401 vifp->v_pkt_out++;
1402 vifp->v_bytes_out += plen;
1403 MC_SEND(ip, vifp, m);
1404 }
1405
1406 return (0);
1407 }
1408
1409 #ifdef RSVP_ISI
1410 /*
1411 * check if a vif number is legal/ok. This is used by ip_output, to export
1412 * numvifs there,
1413 */
1414 int
1415 legal_vif_num(vif)
1416 int vif;
1417 {
1418 if (vif >= 0 && vif < numvifs)
1419 return (1);
1420 else
1421 return (0);
1422 }
1423 #endif /* RSVP_ISI */
1424
1425 static void
1426 phyint_send(ip, vifp, m)
1427 struct ip *ip;
1428 struct vif *vifp;
1429 struct mbuf *m;
1430 {
1431 struct mbuf *mb_copy;
1432 int hlen = ip->ip_hl << 2;
1433
1434 /*
1435 * Make a new reference to the packet; make sure that
1436 * the IP header is actually copied, not just referenced,
1437 * so that ip_output() only scribbles on the copy.
1438 */
1439 mb_copy = m_copy(m, 0, M_COPYALL);
1440 M_PULLUP(mb_copy, hlen);
1441 if (mb_copy == 0)
1442 return;
1443
1444 if (vifp->v_rate_limit <= 0)
1445 tbf_send_packet(vifp, mb_copy);
1446 else
1447 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *),
1448 ntohs(ip->ip_len));
1449 }
1450
1451 static void
1452 encap_send(ip, vifp, m)
1453 struct ip *ip;
1454 struct vif *vifp;
1455 struct mbuf *m;
1456 {
1457 struct mbuf *mb_copy;
1458 struct ip *ip_copy;
1459 int i, len = ntohs(ip->ip_len) + sizeof(multicast_encap_iphdr);
1460
1461 /*
1462 * copy the old packet & pullup it's IP header into the
1463 * new mbuf so we can modify it. Try to fill the new
1464 * mbuf since if we don't the ethernet driver will.
1465 */
1466 MGETHDR(mb_copy, M_DONTWAIT, MT_DATA);
1467 if (mb_copy == 0)
1468 return;
1469 mb_copy->m_data += max_linkhdr;
1470 mb_copy->m_pkthdr.len = len;
1471 mb_copy->m_len = sizeof(multicast_encap_iphdr);
1472
1473 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == 0) {
1474 m_freem(mb_copy);
1475 return;
1476 }
1477 i = MHLEN - max_linkhdr;
1478 if (i > len)
1479 i = len;
1480 mb_copy = m_pullup(mb_copy, i);
1481 if (mb_copy == 0)
1482 return;
1483
1484 /*
1485 * fill in the encapsulating IP header.
1486 */
1487 ip_copy = mtod(mb_copy, struct ip *);
1488 *ip_copy = multicast_encap_iphdr;
1489 ip_copy->ip_id = htons(ip_id++);
1490 ip_copy->ip_len = htons(len);
1491 ip_copy->ip_src = vifp->v_lcl_addr;
1492 ip_copy->ip_dst = vifp->v_rmt_addr;
1493
1494 /*
1495 * turn the encapsulated IP header back into a valid one.
1496 */
1497 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1498 --ip->ip_ttl;
1499 ip->ip_sum = 0;
1500 mb_copy->m_data += sizeof(multicast_encap_iphdr);
1501 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1502 mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1503
1504 if (vifp->v_rate_limit <= 0)
1505 tbf_send_packet(vifp, mb_copy);
1506 else
1507 tbf_control(vifp, mb_copy, ip, ntohs(ip_copy->ip_len));
1508 }
1509
1510 /*
1511 * De-encapsulate a packet and feed it back through ip input.
1512 */
1513 static void
1514 #if __STDC__
1515 vif_input(struct mbuf *m, ...)
1516 #else
1517 vif_input(m, va_alist)
1518 struct mbuf *m;
1519 va_dcl
1520 #endif
1521 {
1522 int off, proto;
1523 va_list ap;
1524 struct ip *ip;
1525 struct vif *vifp;
1526 int s;
1527 struct ifqueue *ifq;
1528
1529 va_start(ap, m);
1530 off = va_arg(ap, int);
1531 proto = va_arg(ap, int);
1532 va_end(ap);
1533
1534 vifp = (struct vif *)encap_getarg(m);
1535 if (!vifp || proto != AF_INET) {
1536 m_freem(m);
1537 mrtstat.mrts_bad_tunnel++;
1538 return;
1539 }
1540
1541 ip = mtod(m, struct ip *);
1542
1543 m_adj(m, off);
1544 m->m_pkthdr.rcvif = vifp->v_ifp;
1545 ifq = &ipintrq;
1546 s = splnet();
1547 if (IF_QFULL(ifq)) {
1548 IF_DROP(ifq);
1549 m_freem(m);
1550 } else {
1551 IF_ENQUEUE(ifq, m);
1552 /*
1553 * normally we would need a "schednetisr(NETISR_IP)"
1554 * here but we were called by ip_input and it is going
1555 * to loop back & try to dequeue the packet we just
1556 * queued as soon as we return so we avoid the
1557 * unnecessary software interrrupt.
1558 */
1559 }
1560 splx(s);
1561 }
1562
1563 /*
1564 * Check if the packet should be grabbed by us.
1565 */
1566 static int
1567 vif_encapcheck(m, off, proto, arg)
1568 const struct mbuf *m;
1569 int off;
1570 int proto;
1571 void *arg;
1572 {
1573 struct vif *vifp;
1574 struct ip ip;
1575
1576 #ifdef DIAGNOSTIC
1577 if (!arg || proto != IPPROTO_IPV4)
1578 panic("unexpected arg in vif_encapcheck");
1579 #endif
1580
1581 /*
1582 * do not grab the packet if it's not to a multicast destination or if
1583 * we don't have an encapsulating tunnel with the source.
1584 * Note: This code assumes that the remote site IP address
1585 * uniquely identifies the tunnel (i.e., that this site has
1586 * at most one tunnel with the remote site).
1587 */
1588
1589 /* LINTED const cast */
1590 m_copydata((struct mbuf *)m, off, sizeof(ip), (caddr_t)&ip);
1591 if (!IN_MULTICAST(ip.ip_dst.s_addr))
1592 return 0;
1593
1594 /* LINTED const cast */
1595 m_copydata((struct mbuf *)m, 0, sizeof(ip), (caddr_t)&ip);
1596 if (!in_hosteq(ip.ip_src, last_encap_src)) {
1597 vifp = (struct vif *)arg;
1598 if (vifp->v_flags & VIFF_TUNNEL &&
1599 in_hosteq(vifp->v_rmt_addr, ip.ip_src))
1600 ;
1601 else
1602 return 0;
1603 last_encap_vif = vifp;
1604 last_encap_src = ip.ip_src;
1605 } else
1606 vifp = last_encap_vif;
1607
1608 /* 32bit match, since we have checked ip_src only */
1609 return 32;
1610 }
1611
1612 /*
1613 * Token bucket filter module
1614 */
1615 static void
1616 tbf_control(vifp, m, ip, len)
1617 struct vif *vifp;
1618 struct mbuf *m;
1619 struct ip *ip;
1620 u_int32_t len;
1621 {
1622
1623 if (len > MAX_BKT_SIZE) {
1624 /* drop if packet is too large */
1625 mrtstat.mrts_pkt2large++;
1626 m_freem(m);
1627 return;
1628 }
1629
1630 tbf_update_tokens(vifp);
1631
1632 /*
1633 * If there are enough tokens, and the queue is empty, send this packet
1634 * out immediately. Otherwise, try to insert it on this vif's queue.
1635 */
1636 if (vifp->tbf_q_len == 0) {
1637 if (len <= vifp->tbf_n_tok) {
1638 vifp->tbf_n_tok -= len;
1639 tbf_send_packet(vifp, m);
1640 } else {
1641 /* queue packet and timeout till later */
1642 tbf_queue(vifp, m);
1643 callout_reset(&vifp->v_repq_ch, TBF_REPROCESS,
1644 tbf_reprocess_q, vifp);
1645 }
1646 } else {
1647 if (vifp->tbf_q_len >= vifp->tbf_max_q_len &&
1648 !tbf_dq_sel(vifp, ip)) {
1649 /* queue length too much, and couldn't make room */
1650 mrtstat.mrts_q_overflow++;
1651 m_freem(m);
1652 } else {
1653 /* queue length low enough, or made room */
1654 tbf_queue(vifp, m);
1655 tbf_process_q(vifp);
1656 }
1657 }
1658 }
1659
1660 /*
1661 * adds a packet to the queue at the interface
1662 */
1663 static void
1664 tbf_queue(vifp, m)
1665 struct vif *vifp;
1666 struct mbuf *m;
1667 {
1668 int s = splsoftnet();
1669
1670 /* insert at tail */
1671 *vifp->tbf_t = m;
1672 vifp->tbf_t = &m->m_nextpkt;
1673 vifp->tbf_q_len++;
1674
1675 splx(s);
1676 }
1677
1678
1679 /*
1680 * processes the queue at the interface
1681 */
1682 static void
1683 tbf_process_q(vifp)
1684 struct vif *vifp;
1685 {
1686 struct mbuf *m;
1687 int len;
1688 int s = splsoftnet();
1689
1690 /*
1691 * Loop through the queue at the interface and send as many packets
1692 * as possible.
1693 */
1694 for (m = vifp->tbf_q;
1695 m != 0;
1696 m = vifp->tbf_q) {
1697 len = ntohs(mtod(m, struct ip *)->ip_len);
1698
1699 /* determine if the packet can be sent */
1700 if (len <= vifp->tbf_n_tok) {
1701 /* if so,
1702 * reduce no of tokens, dequeue the packet,
1703 * send the packet.
1704 */
1705 if ((vifp->tbf_q = m->m_nextpkt) == 0)
1706 vifp->tbf_t = &vifp->tbf_q;
1707 --vifp->tbf_q_len;
1708
1709 m->m_nextpkt = 0;
1710 vifp->tbf_n_tok -= len;
1711 tbf_send_packet(vifp, m);
1712 } else
1713 break;
1714 }
1715 splx(s);
1716 }
1717
1718 static void
1719 tbf_reprocess_q(arg)
1720 void *arg;
1721 {
1722 struct vif *vifp = arg;
1723
1724 if (ip_mrouter == 0)
1725 return;
1726
1727 tbf_update_tokens(vifp);
1728 tbf_process_q(vifp);
1729
1730 if (vifp->tbf_q_len != 0)
1731 callout_reset(&vifp->v_repq_ch, TBF_REPROCESS,
1732 tbf_reprocess_q, vifp);
1733 }
1734
1735 /* function that will selectively discard a member of the queue
1736 * based on the precedence value and the priority
1737 */
1738 static int
1739 tbf_dq_sel(vifp, ip)
1740 struct vif *vifp;
1741 struct ip *ip;
1742 {
1743 u_int p;
1744 struct mbuf **mp, *m;
1745 int s = splsoftnet();
1746
1747 p = priority(vifp, ip);
1748
1749 for (mp = &vifp->tbf_q, m = *mp;
1750 m != 0;
1751 mp = &m->m_nextpkt, m = *mp) {
1752 if (p > priority(vifp, mtod(m, struct ip *))) {
1753 if ((*mp = m->m_nextpkt) == 0)
1754 vifp->tbf_t = mp;
1755 --vifp->tbf_q_len;
1756
1757 m_freem(m);
1758 mrtstat.mrts_drop_sel++;
1759 splx(s);
1760 return (1);
1761 }
1762 }
1763 splx(s);
1764 return (0);
1765 }
1766
1767 static void
1768 tbf_send_packet(vifp, m)
1769 struct vif *vifp;
1770 struct mbuf *m;
1771 {
1772 int error;
1773 int s = splsoftnet();
1774
1775 if (vifp->v_flags & VIFF_TUNNEL) {
1776 /* If tunnel options */
1777 #ifdef IPSEC
1778 /* Don't lookup socket in forwading case */
1779 (void)ipsec_setsocket(m, NULL);
1780 #endif
1781 ip_output(m, (struct mbuf *)0, &vifp->v_route,
1782 IP_FORWARDING, (struct ip_moptions *)0);
1783 } else {
1784 /* if physical interface option, extract the options and then send */
1785 struct ip_moptions imo;
1786
1787 imo.imo_multicast_ifp = vifp->v_ifp;
1788 imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1;
1789 imo.imo_multicast_loop = 1;
1790 #ifdef RSVP_ISI
1791 imo.imo_multicast_vif = -1;
1792 #endif
1793
1794 #ifdef IPSEC
1795 /* Don't lookup socket in forwading case */
1796 (void)ipsec_setsocket(m, NULL);
1797 #endif
1798 error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1799 IP_FORWARDING|IP_MULTICASTOPTS, &imo);
1800
1801 if (mrtdebug & DEBUG_XMIT)
1802 log(LOG_DEBUG, "phyint_send on vif %ld err %d\n",
1803 (long)(vifp-viftable), error);
1804 }
1805 splx(s);
1806 }
1807
1808 /* determine the current time and then
1809 * the elapsed time (between the last time and time now)
1810 * in milliseconds & update the no. of tokens in the bucket
1811 */
1812 static void
1813 tbf_update_tokens(vifp)
1814 struct vif *vifp;
1815 {
1816 struct timeval tp;
1817 u_int32_t tm;
1818 int s = splsoftnet();
1819
1820 microtime(&tp);
1821
1822 TV_DELTA(tp, vifp->tbf_last_pkt_t, tm);
1823
1824 /*
1825 * This formula is actually
1826 * "time in seconds" * "bytes/second".
1827 *
1828 * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
1829 *
1830 * The (1000/1024) was introduced in add_vif to optimize
1831 * this divide into a shift.
1832 */
1833 vifp->tbf_n_tok += tm * vifp->v_rate_limit / 8192;
1834 vifp->tbf_last_pkt_t = tp;
1835
1836 if (vifp->tbf_n_tok > MAX_BKT_SIZE)
1837 vifp->tbf_n_tok = MAX_BKT_SIZE;
1838
1839 splx(s);
1840 }
1841
1842 static int
1843 priority(vifp, ip)
1844 struct vif *vifp;
1845 struct ip *ip;
1846 {
1847 int prio;
1848
1849 /* temporary hack; may add general packet classifier some day */
1850
1851 /*
1852 * The UDP port space is divided up into four priority ranges:
1853 * [0, 16384) : unclassified - lowest priority
1854 * [16384, 32768) : audio - highest priority
1855 * [32768, 49152) : whiteboard - medium priority
1856 * [49152, 65536) : video - low priority
1857 */
1858 if (ip->ip_p == IPPROTO_UDP) {
1859 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1860
1861 switch (ntohs(udp->uh_dport) & 0xc000) {
1862 case 0x4000:
1863 prio = 70;
1864 break;
1865 case 0x8000:
1866 prio = 60;
1867 break;
1868 case 0xc000:
1869 prio = 55;
1870 break;
1871 default:
1872 prio = 50;
1873 break;
1874 }
1875
1876 if (tbfdebug > 1)
1877 log(LOG_DEBUG, "port %x prio %d\n", ntohs(udp->uh_dport), prio);
1878 } else
1879 prio = 50;
1880
1881
1882 return (prio);
1883 }
1884
1885 /*
1886 * End of token bucket filter modifications
1887 */
1888
1889 #ifdef RSVP_ISI
1890
1891 int
1892 ip_rsvp_vif_init(so, m)
1893 struct socket *so;
1894 struct mbuf *m;
1895 {
1896 int i;
1897 int s;
1898
1899 if (rsvpdebug)
1900 printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1901 so->so_type, so->so_proto->pr_protocol);
1902
1903 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1904 return (EOPNOTSUPP);
1905
1906 /* Check mbuf. */
1907 if (m == 0 || m->m_len != sizeof(int)) {
1908 return (EINVAL);
1909 }
1910 i = *(mtod(m, int *));
1911
1912 if (rsvpdebug)
1913 printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on);
1914
1915 s = splsoftnet();
1916
1917 /* Check vif. */
1918 if (!legal_vif_num(i)) {
1919 splx(s);
1920 return (EADDRNOTAVAIL);
1921 }
1922
1923 /* Check if socket is available. */
1924 if (viftable[i].v_rsvpd != 0) {
1925 splx(s);
1926 return (EADDRINUSE);
1927 }
1928
1929 viftable[i].v_rsvpd = so;
1930 /* This may seem silly, but we need to be sure we don't over-increment
1931 * the RSVP counter, in case something slips up.
1932 */
1933 if (!viftable[i].v_rsvp_on) {
1934 viftable[i].v_rsvp_on = 1;
1935 rsvp_on++;
1936 }
1937
1938 splx(s);
1939 return (0);
1940 }
1941
1942 int
1943 ip_rsvp_vif_done(so, m)
1944 struct socket *so;
1945 struct mbuf *m;
1946 {
1947 int i;
1948 int s;
1949
1950 if (rsvpdebug)
1951 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
1952 so->so_type, so->so_proto->pr_protocol);
1953
1954 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1955 return (EOPNOTSUPP);
1956
1957 /* Check mbuf. */
1958 if (m == 0 || m->m_len != sizeof(int)) {
1959 return (EINVAL);
1960 }
1961 i = *(mtod(m, int *));
1962
1963 s = splsoftnet();
1964
1965 /* Check vif. */
1966 if (!legal_vif_num(i)) {
1967 splx(s);
1968 return (EADDRNOTAVAIL);
1969 }
1970
1971 if (rsvpdebug)
1972 printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n",
1973 viftable[i].v_rsvpd, so);
1974
1975 viftable[i].v_rsvpd = 0;
1976 /* This may seem silly, but we need to be sure we don't over-decrement
1977 * the RSVP counter, in case something slips up.
1978 */
1979 if (viftable[i].v_rsvp_on) {
1980 viftable[i].v_rsvp_on = 0;
1981 rsvp_on--;
1982 }
1983
1984 splx(s);
1985 return (0);
1986 }
1987
1988 void
1989 ip_rsvp_force_done(so)
1990 struct socket *so;
1991 {
1992 int vifi;
1993 int s;
1994
1995 /* Don't bother if it is not the right type of socket. */
1996 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1997 return;
1998
1999 s = splsoftnet();
2000
2001 /* The socket may be attached to more than one vif...this
2002 * is perfectly legal.
2003 */
2004 for (vifi = 0; vifi < numvifs; vifi++) {
2005 if (viftable[vifi].v_rsvpd == so) {
2006 viftable[vifi].v_rsvpd = 0;
2007 /* This may seem silly, but we need to be sure we don't
2008 * over-decrement the RSVP counter, in case something slips up.
2009 */
2010 if (viftable[vifi].v_rsvp_on) {
2011 viftable[vifi].v_rsvp_on = 0;
2012 rsvp_on--;
2013 }
2014 }
2015 }
2016
2017 splx(s);
2018 return;
2019 }
2020
2021 void
2022 rsvp_input(m, ifp)
2023 struct mbuf *m;
2024 struct ifnet *ifp;
2025 {
2026 int vifi;
2027 struct ip *ip = mtod(m, struct ip *);
2028 static struct sockaddr_in rsvp_src = { sizeof(sin), AF_INET };
2029 int s;
2030
2031 if (rsvpdebug)
2032 printf("rsvp_input: rsvp_on %d\n",rsvp_on);
2033
2034 /* Can still get packets with rsvp_on = 0 if there is a local member
2035 * of the group to which the RSVP packet is addressed. But in this
2036 * case we want to throw the packet away.
2037 */
2038 if (!rsvp_on) {
2039 m_freem(m);
2040 return;
2041 }
2042
2043 /* If the old-style non-vif-associated socket is set, then use
2044 * it and ignore the new ones.
2045 */
2046 if (ip_rsvpd != 0) {
2047 if (rsvpdebug)
2048 printf("rsvp_input: Sending packet up old-style socket\n");
2049 rip_input(m); /*XXX*/
2050 return;
2051 }
2052
2053 s = splsoftnet();
2054
2055 if (rsvpdebug)
2056 printf("rsvp_input: check vifs\n");
2057
2058 /* Find which vif the packet arrived on. */
2059 for (vifi = 0; vifi < numvifs; vifi++) {
2060 if (viftable[vifi].v_ifp == ifp)
2061 break;
2062 }
2063
2064 if (vifi == numvifs) {
2065 /* Can't find vif packet arrived on. Drop packet. */
2066 if (rsvpdebug)
2067 printf("rsvp_input: Can't find vif for packet...dropping it.\n");
2068 m_freem(m);
2069 splx(s);
2070 return;
2071 }
2072
2073 if (rsvpdebug)
2074 printf("rsvp_input: check socket\n");
2075
2076 if (viftable[vifi].v_rsvpd == 0) {
2077 /* drop packet, since there is no specific socket for this
2078 * interface */
2079 if (rsvpdebug)
2080 printf("rsvp_input: No socket defined for vif %d\n",vifi);
2081 m_freem(m);
2082 splx(s);
2083 return;
2084 }
2085
2086 rsvp_src.sin_addr = ip->ip_src;
2087
2088 if (rsvpdebug && m)
2089 printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
2090 m->m_len,sbspace(&viftable[vifi].v_rsvpd->so_rcv));
2091
2092 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0)
2093 if (rsvpdebug)
2094 printf("rsvp_input: Failed to append to socket\n");
2095 else
2096 if (rsvpdebug)
2097 printf("rsvp_input: send packet up\n");
2098
2099 splx(s);
2100 }
2101 #endif /* RSVP_ISI */
2102