ip_mroute.c revision 1.74 1 /* $NetBSD: ip_mroute.c,v 1.74 2003/06/26 21:49:17 itojun Exp $ */
2
3 /*
4 * Copyright (c) 1989 Stephen Deering
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Stephen Deering of Stanford University.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
40 */
41
42 /*
43 * IP multicast forwarding procedures
44 *
45 * Written by David Waitzman, BBN Labs, August 1988.
46 * Modified by Steve Deering, Stanford, February 1989.
47 * Modified by Mark J. Steiglitz, Stanford, May, 1991
48 * Modified by Van Jacobson, LBL, January 1993
49 * Modified by Ajit Thyagarajan, PARC, August 1993
50 * Modified by Bill Fenner, PARC, April 1994
51 * Modified by Charles M. Hannum, NetBSD, May 1995.
52 *
53 * MROUTING Revision: 1.2
54 */
55
56 #include <sys/cdefs.h>
57 __KERNEL_RCSID(0, "$NetBSD: ip_mroute.c,v 1.74 2003/06/26 21:49:17 itojun Exp $");
58
59 #include "opt_ipsec.h"
60
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/callout.h>
64 #include <sys/mbuf.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/protosw.h>
68 #include <sys/errno.h>
69 #include <sys/time.h>
70 #include <sys/kernel.h>
71 #include <sys/ioctl.h>
72 #include <sys/syslog.h>
73 #include <net/if.h>
74 #include <net/route.h>
75 #include <net/raw_cb.h>
76 #include <netinet/in.h>
77 #include <netinet/in_var.h>
78 #include <netinet/in_systm.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip_var.h>
81 #include <netinet/in_pcb.h>
82 #include <netinet/udp.h>
83 #include <netinet/igmp.h>
84 #include <netinet/igmp_var.h>
85 #include <netinet/ip_mroute.h>
86 #include <netinet/ip_encap.h>
87
88 #ifdef IPSEC
89 #include <netinet6/ipsec.h>
90 #include <netkey/key.h>
91 #endif
92
93 #include <machine/stdarg.h>
94
95 #define IP_MULTICASTOPTS 0
96 #define M_PULLUP(m, len) \
97 do { \
98 if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \
99 (m) = m_pullup((m), (len)); \
100 } while (/*CONSTCOND*/ 0)
101
102 /*
103 * Globals. All but ip_mrouter and ip_mrtproto could be static,
104 * except for netstat or debugging purposes.
105 */
106 struct socket *ip_mrouter = 0;
107 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */
108
109 #define NO_RTE_FOUND 0x1
110 #define RTE_FOUND 0x2
111
112 #define MFCHASH(a, g) \
113 ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
114 ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & mfchash)
115 LIST_HEAD(mfchashhdr, mfc) *mfchashtbl;
116 u_long mfchash;
117
118 u_char nexpire[MFCTBLSIZ];
119 struct vif viftable[MAXVIFS];
120 struct mrtstat mrtstat;
121 u_int mrtdebug = 0; /* debug level */
122 #define DEBUG_MFC 0x02
123 #define DEBUG_FORWARD 0x04
124 #define DEBUG_EXPIRE 0x08
125 #define DEBUG_XMIT 0x10
126 u_int tbfdebug = 0; /* tbf debug level */
127 #ifdef RSVP_ISI
128 u_int rsvpdebug = 0; /* rsvp debug level */
129 extern struct socket *ip_rsvpd;
130 extern int rsvp_on;
131 #endif /* RSVP_ISI */
132
133 /* vif attachment using sys/netinet/ip_encap.c */
134 extern struct domain inetdomain;
135 static void vif_input __P((struct mbuf *, ...));
136 static int vif_encapcheck __P((const struct mbuf *, int, int, void *));
137 static struct protosw vif_protosw =
138 { SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR,
139 vif_input, rip_output, 0, rip_ctloutput,
140 rip_usrreq,
141 0, 0, 0, 0,
142 };
143
144 #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
145 #define UPCALL_EXPIRE 6 /* number of timeouts */
146
147 /*
148 * Define the token bucket filter structures
149 */
150
151 #define TBF_REPROCESS (hz / 100) /* 100x / second */
152
153 static int get_sg_cnt __P((struct sioc_sg_req *));
154 static int get_vif_cnt __P((struct sioc_vif_req *));
155 static int ip_mrouter_init __P((struct socket *, struct mbuf *));
156 static int get_version __P((struct mbuf *));
157 static int set_assert __P((struct mbuf *));
158 static int get_assert __P((struct mbuf *));
159 static int add_vif __P((struct mbuf *));
160 static int del_vif __P((struct mbuf *));
161 static void update_mfc __P((struct mfcctl *, struct mfc *));
162 static void expire_mfc __P((struct mfc *));
163 static int add_mfc __P((struct mbuf *));
164 #ifdef UPCALL_TIMING
165 static void collate __P((struct timeval *));
166 #endif
167 static int del_mfc __P((struct mbuf *));
168 static int socket_send __P((struct socket *, struct mbuf *,
169 struct sockaddr_in *));
170 static void expire_upcalls __P((void *));
171 #ifdef RSVP_ISI
172 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *, vifi_t));
173 #else
174 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *));
175 #endif
176 static void phyint_send __P((struct ip *, struct vif *, struct mbuf *));
177 static void encap_send __P((struct ip *, struct vif *, struct mbuf *));
178 static void tbf_control __P((struct vif *, struct mbuf *, struct ip *,
179 u_int32_t));
180 static void tbf_queue __P((struct vif *, struct mbuf *));
181 static void tbf_process_q __P((struct vif *));
182 static void tbf_reprocess_q __P((void *));
183 static int tbf_dq_sel __P((struct vif *, struct ip *));
184 static void tbf_send_packet __P((struct vif *, struct mbuf *));
185 static void tbf_update_tokens __P((struct vif *));
186 static int priority __P((struct vif *, struct ip *));
187
188 /*
189 * 'Interfaces' associated with decapsulator (so we can tell
190 * packets that went through it from ones that get reflected
191 * by a broken gateway). These interfaces are never linked into
192 * the system ifnet list & no routes point to them. I.e., packets
193 * can't be sent this way. They only exist as a placeholder for
194 * multicast source verification.
195 */
196 #if 0
197 struct ifnet multicast_decap_if[MAXVIFS];
198 #endif
199
200 #define ENCAP_TTL 64
201 #define ENCAP_PROTO IPPROTO_IPIP /* 4 */
202
203 /* prototype IP hdr for encapsulated packets */
204 struct ip multicast_encap_iphdr = {
205 #if BYTE_ORDER == LITTLE_ENDIAN
206 sizeof(struct ip) >> 2, IPVERSION,
207 #else
208 IPVERSION, sizeof(struct ip) >> 2,
209 #endif
210 0, /* tos */
211 sizeof(struct ip), /* total length */
212 0, /* id */
213 0, /* frag offset */
214 ENCAP_TTL, ENCAP_PROTO,
215 0, /* checksum */
216 };
217
218 /*
219 * Private variables.
220 */
221 static vifi_t numvifs = 0;
222
223 static struct callout expire_upcalls_ch;
224
225 /*
226 * one-back cache used by vif_encapcheck to locate a tunnel's vif
227 * given a datagram's src ip address.
228 */
229 static struct in_addr last_encap_src;
230 static struct vif *last_encap_vif;
231
232 /*
233 * whether or not special PIM assert processing is enabled.
234 */
235 static int pim_assert;
236 /*
237 * Rate limit for assert notification messages, in usec
238 */
239 #define ASSERT_MSG_TIME 3000000
240
241 /*
242 * Find a route for a given origin IP address and Multicast group address
243 * Type of service parameter to be added in the future!!!
244 */
245
246 #define MFCFIND(o, g, rt) do { \
247 struct mfc *_rt; \
248 (rt) = 0; \
249 ++mrtstat.mrts_mfc_lookups; \
250 LIST_FOREACH(_rt, &mfchashtbl[MFCHASH(o, g)], mfc_hash) { \
251 if (in_hosteq(_rt->mfc_origin, (o)) && \
252 in_hosteq(_rt->mfc_mcastgrp, (g)) && \
253 _rt->mfc_stall == 0) { \
254 (rt) = _rt; \
255 break; \
256 } \
257 } \
258 if ((rt) == 0) \
259 ++mrtstat.mrts_mfc_misses; \
260 } while (/*CONSTCOND*/ 0)
261
262 /*
263 * Macros to compute elapsed time efficiently
264 * Borrowed from Van Jacobson's scheduling code
265 */
266 #define TV_DELTA(a, b, delta) do { \
267 int xxs; \
268 delta = (a).tv_usec - (b).tv_usec; \
269 xxs = (a).tv_sec - (b).tv_sec; \
270 switch (xxs) { \
271 case 2: \
272 delta += 1000000; \
273 /* fall through */ \
274 case 1: \
275 delta += 1000000; \
276 /* fall through */ \
277 case 0: \
278 break; \
279 default: \
280 delta += (1000000 * xxs); \
281 break; \
282 } \
283 } while (/*CONSTCOND*/ 0)
284
285 #ifdef UPCALL_TIMING
286 u_int32_t upcall_data[51];
287 #endif /* UPCALL_TIMING */
288
289 /*
290 * Handle MRT setsockopt commands to modify the multicast routing tables.
291 */
292 int
293 ip_mrouter_set(so, optname, m)
294 struct socket *so;
295 int optname;
296 struct mbuf **m;
297 {
298 int error;
299
300 if (optname != MRT_INIT && so != ip_mrouter)
301 error = ENOPROTOOPT;
302 else
303 switch (optname) {
304 case MRT_INIT:
305 error = ip_mrouter_init(so, *m);
306 break;
307 case MRT_DONE:
308 error = ip_mrouter_done();
309 break;
310 case MRT_ADD_VIF:
311 error = add_vif(*m);
312 break;
313 case MRT_DEL_VIF:
314 error = del_vif(*m);
315 break;
316 case MRT_ADD_MFC:
317 error = add_mfc(*m);
318 break;
319 case MRT_DEL_MFC:
320 error = del_mfc(*m);
321 break;
322 case MRT_ASSERT:
323 error = set_assert(*m);
324 break;
325 default:
326 error = ENOPROTOOPT;
327 break;
328 }
329
330 if (*m)
331 m_free(*m);
332 return (error);
333 }
334
335 /*
336 * Handle MRT getsockopt commands
337 */
338 int
339 ip_mrouter_get(so, optname, m)
340 struct socket *so;
341 int optname;
342 struct mbuf **m;
343 {
344 int error;
345
346 if (so != ip_mrouter)
347 error = ENOPROTOOPT;
348 else {
349 *m = m_get(M_WAIT, MT_SOOPTS);
350 MCLAIM(*m, so->so_mowner);
351
352 switch (optname) {
353 case MRT_VERSION:
354 error = get_version(*m);
355 break;
356 case MRT_ASSERT:
357 error = get_assert(*m);
358 break;
359 default:
360 error = ENOPROTOOPT;
361 break;
362 }
363
364 if (error)
365 m_free(*m);
366 }
367
368 return (error);
369 }
370
371 /*
372 * Handle ioctl commands to obtain information from the cache
373 */
374 int
375 mrt_ioctl(so, cmd, data)
376 struct socket *so;
377 u_long cmd;
378 caddr_t data;
379 {
380 int error;
381
382 if (so != ip_mrouter)
383 error = EINVAL;
384 else
385 switch (cmd) {
386 case SIOCGETVIFCNT:
387 error = get_vif_cnt((struct sioc_vif_req *)data);
388 break;
389 case SIOCGETSGCNT:
390 error = get_sg_cnt((struct sioc_sg_req *)data);
391 break;
392 default:
393 error = EINVAL;
394 break;
395 }
396
397 return (error);
398 }
399
400 /*
401 * returns the packet, byte, rpf-failure count for the source group provided
402 */
403 static int
404 get_sg_cnt(req)
405 struct sioc_sg_req *req;
406 {
407 struct mfc *rt;
408 int s;
409
410 s = splsoftnet();
411 MFCFIND(req->src, req->grp, rt);
412 splx(s);
413 if (rt != 0) {
414 req->pktcnt = rt->mfc_pkt_cnt;
415 req->bytecnt = rt->mfc_byte_cnt;
416 req->wrong_if = rt->mfc_wrong_if;
417 } else
418 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
419
420 return (0);
421 }
422
423 /*
424 * returns the input and output packet and byte counts on the vif provided
425 */
426 static int
427 get_vif_cnt(req)
428 struct sioc_vif_req *req;
429 {
430 vifi_t vifi = req->vifi;
431
432 if (vifi >= numvifs)
433 return (EINVAL);
434
435 req->icount = viftable[vifi].v_pkt_in;
436 req->ocount = viftable[vifi].v_pkt_out;
437 req->ibytes = viftable[vifi].v_bytes_in;
438 req->obytes = viftable[vifi].v_bytes_out;
439
440 return (0);
441 }
442
443 /*
444 * Enable multicast routing
445 */
446 static int
447 ip_mrouter_init(so, m)
448 struct socket *so;
449 struct mbuf *m;
450 {
451 int *v;
452
453 if (mrtdebug)
454 log(LOG_DEBUG,
455 "ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
456 so->so_type, so->so_proto->pr_protocol);
457
458 if (so->so_type != SOCK_RAW ||
459 so->so_proto->pr_protocol != IPPROTO_IGMP)
460 return (EOPNOTSUPP);
461
462 if (m == 0 || m->m_len < sizeof(int))
463 return (EINVAL);
464
465 v = mtod(m, int *);
466 if (*v != 1)
467 return (EINVAL);
468
469 if (ip_mrouter != 0)
470 return (EADDRINUSE);
471
472 ip_mrouter = so;
473
474 mfchashtbl =
475 hashinit(MFCTBLSIZ, HASH_LIST, M_MRTABLE, M_WAITOK, &mfchash);
476 bzero((caddr_t)nexpire, sizeof(nexpire));
477
478 pim_assert = 0;
479
480 callout_init(&expire_upcalls_ch);
481 callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
482 expire_upcalls, NULL);
483
484 if (mrtdebug)
485 log(LOG_DEBUG, "ip_mrouter_init\n");
486
487 return (0);
488 }
489
490 /*
491 * Disable multicast routing
492 */
493 int
494 ip_mrouter_done()
495 {
496 vifi_t vifi;
497 struct vif *vifp;
498 int i;
499 int s;
500
501 s = splsoftnet();
502
503 /* Clear out all the vifs currently in use. */
504 for (vifi = 0; vifi < numvifs; vifi++) {
505 vifp = &viftable[vifi];
506 if (!in_nullhost(vifp->v_lcl_addr))
507 reset_vif(vifp);
508 }
509
510 numvifs = 0;
511 pim_assert = 0;
512
513 callout_stop(&expire_upcalls_ch);
514
515 /*
516 * Free all multicast forwarding cache entries.
517 */
518 for (i = 0; i < MFCTBLSIZ; i++) {
519 struct mfc *rt, *nrt;
520
521 for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
522 nrt = LIST_NEXT(rt, mfc_hash);
523
524 expire_mfc(rt);
525 }
526 }
527
528 free(mfchashtbl, M_MRTABLE);
529 mfchashtbl = 0;
530
531 /* Reset de-encapsulation cache. */
532
533 ip_mrouter = 0;
534
535 splx(s);
536
537 if (mrtdebug)
538 log(LOG_DEBUG, "ip_mrouter_done\n");
539
540 return (0);
541 }
542
543 void
544 ip_mrouter_detach(ifp)
545 struct ifnet *ifp;
546 {
547 int vifi, i;
548 struct vif *vifp;
549 struct mfc *rt, *nrt;
550 struct rtdetq *rte, *nrte, **prte;
551
552 /* XXX not sure about sideeffect to userland routing daemon */
553 for (vifi = 0; vifi < numvifs; vifi++) {
554 vifp = &viftable[vifi];
555 if (vifp->v_ifp == ifp)
556 reset_vif(vifp);
557 }
558 for (i = 0; i < MFCTBLSIZ; i++) {
559 for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
560 nrt = LIST_NEXT(rt, mfc_hash);
561
562 prte = &rt->mfc_stall;
563 for (rte = *prte; rte; rte = nrte) {
564 nrte = rte->next;
565 if (rte->ifp == ifp) {
566 m_freem(rte->m);
567 free(rte, M_MRTABLE);
568 *prte = nrte;
569 } else
570 prte = &rte->next;
571 }
572
573 if (rt->mfc_expire == 0)
574 continue;
575 nexpire[i]--;
576 ++mrtstat.mrts_cache_cleanups;
577 expire_mfc(rt);
578 }
579 }
580 }
581
582 static int
583 get_version(m)
584 struct mbuf *m;
585 {
586 int *v = mtod(m, int *);
587
588 *v = 0x0305; /* XXX !!!! */
589 m->m_len = sizeof(int);
590 return (0);
591 }
592
593 /*
594 * Set PIM assert processing global
595 */
596 static int
597 set_assert(m)
598 struct mbuf *m;
599 {
600 int *i;
601
602 if (m == 0 || m->m_len < sizeof(int))
603 return (EINVAL);
604
605 i = mtod(m, int *);
606 pim_assert = !!*i;
607 return (0);
608 }
609
610 /*
611 * Get PIM assert processing global
612 */
613 static int
614 get_assert(m)
615 struct mbuf *m;
616 {
617 int *i = mtod(m, int *);
618
619 *i = pim_assert;
620 m->m_len = sizeof(int);
621 return (0);
622 }
623
624 static struct sockaddr_in sin = { sizeof(sin), AF_INET };
625
626 /*
627 * Add a vif to the vif table
628 */
629 static int
630 add_vif(m)
631 struct mbuf *m;
632 {
633 struct vifctl *vifcp;
634 struct vif *vifp;
635 struct ifaddr *ifa;
636 struct ifnet *ifp;
637 struct ifreq ifr;
638 int error, s;
639
640 if (m == 0 || m->m_len < sizeof(struct vifctl))
641 return (EINVAL);
642
643 vifcp = mtod(m, struct vifctl *);
644 if (vifcp->vifc_vifi >= MAXVIFS)
645 return (EINVAL);
646
647 vifp = &viftable[vifcp->vifc_vifi];
648 if (!in_nullhost(vifp->v_lcl_addr))
649 return (EADDRINUSE);
650
651 /* Find the interface with an address in AF_INET family. */
652 sin.sin_addr = vifcp->vifc_lcl_addr;
653 ifa = ifa_ifwithaddr(sintosa(&sin));
654 if (ifa == 0)
655 return (EADDRNOTAVAIL);
656
657 if (vifcp->vifc_flags & VIFF_TUNNEL) {
658 if (vifcp->vifc_flags & VIFF_SRCRT) {
659 log(LOG_ERR, "Source routed tunnels not supported\n");
660 return (EOPNOTSUPP);
661 }
662
663 /* attach this vif to decapsulator dispatch table */
664 vifp->v_encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4,
665 vif_encapcheck, &vif_protosw, vifp);
666 if (!vifp->v_encap_cookie)
667 return (EINVAL);
668
669 /* Create a fake encapsulation interface. */
670 ifp = (struct ifnet *)malloc(sizeof(*ifp), M_MRTABLE, M_WAITOK);
671 bzero(ifp, sizeof(*ifp));
672 sprintf(ifp->if_xname, "mdecap%d", vifcp->vifc_vifi);
673
674 /* Prepare cached route entry. */
675 bzero(&vifp->v_route, sizeof(vifp->v_route));
676 } else {
677 /* Use the physical interface associated with the address. */
678 ifp = ifa->ifa_ifp;
679
680 /* Make sure the interface supports multicast. */
681 if ((ifp->if_flags & IFF_MULTICAST) == 0)
682 return (EOPNOTSUPP);
683
684 /* Enable promiscuous reception of all IP multicasts. */
685 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
686 satosin(&ifr.ifr_addr)->sin_family = AF_INET;
687 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
688 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
689 if (error)
690 return (error);
691 }
692
693 s = splsoftnet();
694
695 /* Define parameters for the tbf structure. */
696 vifp->tbf_q = 0;
697 vifp->tbf_t = &vifp->tbf_q;
698 microtime(&vifp->tbf_last_pkt_t);
699 vifp->tbf_n_tok = 0;
700 vifp->tbf_q_len = 0;
701 vifp->tbf_max_q_len = MAXQSIZE;
702
703 vifp->v_flags = vifcp->vifc_flags;
704 vifp->v_threshold = vifcp->vifc_threshold;
705 /* scaling up here allows division by 1024 in critical code */
706 vifp->v_rate_limit = vifcp->vifc_rate_limit * 1024 / 1000;
707 vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
708 vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
709 vifp->v_ifp = ifp;
710 /* Initialize per vif pkt counters. */
711 vifp->v_pkt_in = 0;
712 vifp->v_pkt_out = 0;
713 vifp->v_bytes_in = 0;
714 vifp->v_bytes_out = 0;
715
716 callout_init(&vifp->v_repq_ch);
717
718 #ifdef RSVP_ISI
719 vifp->v_rsvp_on = 0;
720 vifp->v_rsvpd = 0;
721 #endif /* RSVP_ISI */
722
723 splx(s);
724
725 /* Adjust numvifs up if the vifi is higher than numvifs. */
726 if (numvifs <= vifcp->vifc_vifi)
727 numvifs = vifcp->vifc_vifi + 1;
728
729 if (mrtdebug)
730 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n",
731 vifcp->vifc_vifi,
732 ntohl(vifcp->vifc_lcl_addr.s_addr),
733 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
734 ntohl(vifcp->vifc_rmt_addr.s_addr),
735 vifcp->vifc_threshold,
736 vifcp->vifc_rate_limit);
737
738 return (0);
739 }
740
741 void
742 reset_vif(vifp)
743 struct vif *vifp;
744 {
745 struct mbuf *m, *n;
746 struct ifnet *ifp;
747 struct ifreq ifr;
748
749 callout_stop(&vifp->v_repq_ch);
750
751 /* detach this vif from decapsulator dispatch table */
752 encap_detach(vifp->v_encap_cookie);
753 vifp->v_encap_cookie = NULL;
754
755 for (m = vifp->tbf_q; m != 0; m = n) {
756 n = m->m_nextpkt;
757 m_freem(m);
758 }
759
760 if (vifp->v_flags & VIFF_TUNNEL) {
761 free(vifp->v_ifp, M_MRTABLE);
762 if (vifp == last_encap_vif) {
763 last_encap_vif = 0;
764 last_encap_src = zeroin_addr;
765 }
766 } else {
767 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
768 satosin(&ifr.ifr_addr)->sin_family = AF_INET;
769 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
770 ifp = vifp->v_ifp;
771 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
772 }
773 bzero((caddr_t)vifp, sizeof(*vifp));
774 }
775
776 /*
777 * Delete a vif from the vif table
778 */
779 static int
780 del_vif(m)
781 struct mbuf *m;
782 {
783 vifi_t *vifip;
784 struct vif *vifp;
785 vifi_t vifi;
786 int s;
787
788 if (m == 0 || m->m_len < sizeof(vifi_t))
789 return (EINVAL);
790
791 vifip = mtod(m, vifi_t *);
792 if (*vifip >= numvifs)
793 return (EINVAL);
794
795 vifp = &viftable[*vifip];
796 if (in_nullhost(vifp->v_lcl_addr))
797 return (EADDRNOTAVAIL);
798
799 s = splsoftnet();
800
801 reset_vif(vifp);
802
803 /* Adjust numvifs down */
804 for (vifi = numvifs; vifi > 0; vifi--)
805 if (!in_nullhost(viftable[vifi-1].v_lcl_addr))
806 break;
807 numvifs = vifi;
808
809 splx(s);
810
811 if (mrtdebug)
812 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs);
813
814 return (0);
815 }
816
817 static void
818 update_mfc(mfccp, rt)
819 struct mfcctl *mfccp;
820 struct mfc *rt;
821 {
822 vifi_t vifi;
823
824 rt->mfc_parent = mfccp->mfcc_parent;
825 for (vifi = 0; vifi < numvifs; vifi++)
826 rt->mfc_ttls[vifi] = mfccp->mfcc_ttls[vifi];
827 rt->mfc_expire = 0;
828 rt->mfc_stall = 0;
829 }
830
831 static void
832 expire_mfc(rt)
833 struct mfc *rt;
834 {
835 struct rtdetq *rte, *nrte;
836
837 for (rte = rt->mfc_stall; rte != 0; rte = nrte) {
838 nrte = rte->next;
839 m_freem(rte->m);
840 free(rte, M_MRTABLE);
841 }
842
843 LIST_REMOVE(rt, mfc_hash);
844 free(rt, M_MRTABLE);
845 }
846
847 /*
848 * Add an mfc entry
849 */
850 static int
851 add_mfc(m)
852 struct mbuf *m;
853 {
854 struct mfcctl *mfccp;
855 struct mfc *rt;
856 u_int32_t hash = 0;
857 struct rtdetq *rte, *nrte;
858 u_short nstl;
859 int s;
860
861 if (m == 0 || m->m_len < sizeof(struct mfcctl))
862 return (EINVAL);
863
864 mfccp = mtod(m, struct mfcctl *);
865
866 s = splsoftnet();
867 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt);
868
869 /* If an entry already exists, just update the fields */
870 if (rt) {
871 if (mrtdebug & DEBUG_MFC)
872 log(LOG_DEBUG, "add_mfc update o %x g %x p %x\n",
873 ntohl(mfccp->mfcc_origin.s_addr),
874 ntohl(mfccp->mfcc_mcastgrp.s_addr),
875 mfccp->mfcc_parent);
876
877 if (rt->mfc_expire)
878 nexpire[hash]--;
879
880 update_mfc(mfccp, rt);
881
882 splx(s);
883 return (0);
884 }
885
886 /*
887 * Find the entry for which the upcall was made and update
888 */
889 nstl = 0;
890 hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
891 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
892 if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
893 in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
894 rt->mfc_stall != 0) {
895 if (nstl++)
896 log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %p\n",
897 "multiple kernel entries",
898 ntohl(mfccp->mfcc_origin.s_addr),
899 ntohl(mfccp->mfcc_mcastgrp.s_addr),
900 mfccp->mfcc_parent, rt->mfc_stall);
901
902 if (mrtdebug & DEBUG_MFC)
903 log(LOG_DEBUG, "add_mfc o %x g %x p %x dbg %p\n",
904 ntohl(mfccp->mfcc_origin.s_addr),
905 ntohl(mfccp->mfcc_mcastgrp.s_addr),
906 mfccp->mfcc_parent, rt->mfc_stall);
907
908 if (rt->mfc_expire)
909 nexpire[hash]--;
910
911 rte = rt->mfc_stall;
912 update_mfc(mfccp, rt);
913
914 /* free packets Qed at the end of this entry */
915 for (; rte != 0; rte = nrte) {
916 nrte = rte->next;
917 #ifdef RSVP_ISI
918 ip_mdq(rte->m, rte->ifp, rt, -1);
919 #else
920 ip_mdq(rte->m, rte->ifp, rt);
921 #endif /* RSVP_ISI */
922 m_freem(rte->m);
923 #ifdef UPCALL_TIMING
924 collate(&rte->t);
925 #endif /* UPCALL_TIMING */
926 free(rte, M_MRTABLE);
927 }
928 }
929 }
930
931 if (nstl == 0) {
932 /*
933 * No mfc; make a new one
934 */
935 if (mrtdebug & DEBUG_MFC)
936 log(LOG_DEBUG, "add_mfc no upcall o %x g %x p %x\n",
937 ntohl(mfccp->mfcc_origin.s_addr),
938 ntohl(mfccp->mfcc_mcastgrp.s_addr),
939 mfccp->mfcc_parent);
940
941 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
942 if (rt == 0) {
943 splx(s);
944 return (ENOBUFS);
945 }
946
947 rt->mfc_origin = mfccp->mfcc_origin;
948 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
949 /* initialize pkt counters per src-grp */
950 rt->mfc_pkt_cnt = 0;
951 rt->mfc_byte_cnt = 0;
952 rt->mfc_wrong_if = 0;
953 timerclear(&rt->mfc_last_assert);
954 update_mfc(mfccp, rt);
955
956 /* insert new entry at head of hash chain */
957 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
958 }
959
960 splx(s);
961 return (0);
962 }
963
964 #ifdef UPCALL_TIMING
965 /*
966 * collect delay statistics on the upcalls
967 */
968 static void collate(t)
969 struct timeval *t;
970 {
971 u_int32_t d;
972 struct timeval tp;
973 u_int32_t delta;
974
975 microtime(&tp);
976
977 if (timercmp(t, &tp, <)) {
978 TV_DELTA(tp, *t, delta);
979
980 d = delta >> 10;
981 if (d > 50)
982 d = 50;
983
984 ++upcall_data[d];
985 }
986 }
987 #endif /* UPCALL_TIMING */
988
989 /*
990 * Delete an mfc entry
991 */
992 static int
993 del_mfc(m)
994 struct mbuf *m;
995 {
996 struct mfcctl *mfccp;
997 struct mfc *rt;
998 int s;
999
1000 if (m == 0 || m->m_len < sizeof(struct mfcctl))
1001 return (EINVAL);
1002
1003 mfccp = mtod(m, struct mfcctl *);
1004
1005 if (mrtdebug & DEBUG_MFC)
1006 log(LOG_DEBUG, "del_mfc origin %x mcastgrp %x\n",
1007 ntohl(mfccp->mfcc_origin.s_addr),
1008 ntohl(mfccp->mfcc_mcastgrp.s_addr));
1009
1010 s = splsoftnet();
1011
1012 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt);
1013 if (rt == 0) {
1014 splx(s);
1015 return (EADDRNOTAVAIL);
1016 }
1017
1018 LIST_REMOVE(rt, mfc_hash);
1019 free(rt, M_MRTABLE);
1020
1021 splx(s);
1022 return (0);
1023 }
1024
1025 static int
1026 socket_send(s, mm, src)
1027 struct socket *s;
1028 struct mbuf *mm;
1029 struct sockaddr_in *src;
1030 {
1031 if (s) {
1032 if (sbappendaddr(&s->so_rcv, sintosa(src), mm,
1033 (struct mbuf *)0) != 0) {
1034 sorwakeup(s);
1035 return (0);
1036 }
1037 }
1038 m_freem(mm);
1039 return (-1);
1040 }
1041
1042 /*
1043 * IP multicast forwarding function. This function assumes that the packet
1044 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
1045 * pointed to by "ifp", and the packet is to be relayed to other networks
1046 * that have members of the packet's destination IP multicast group.
1047 *
1048 * The packet is returned unscathed to the caller, unless it is
1049 * erroneous, in which case a non-zero return value tells the caller to
1050 * discard it.
1051 */
1052
1053 #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */
1054 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */
1055
1056 int
1057 #ifdef RSVP_ISI
1058 ip_mforward(m, ifp, imo)
1059 #else
1060 ip_mforward(m, ifp)
1061 #endif /* RSVP_ISI */
1062 struct mbuf *m;
1063 struct ifnet *ifp;
1064 #ifdef RSVP_ISI
1065 struct ip_moptions *imo;
1066 #endif /* RSVP_ISI */
1067 {
1068 struct ip *ip = mtod(m, struct ip *);
1069 struct mfc *rt;
1070 static int srctun = 0;
1071 struct mbuf *mm;
1072 int s;
1073 #ifdef RSVP_ISI
1074 struct vif *vifp;
1075 vifi_t vifi;
1076 #endif /* RSVP_ISI */
1077
1078 /*
1079 * Clear any in-bound checksum flags for this packet.
1080 */
1081 m->m_pkthdr.csum_flags = 0;
1082
1083 if (mrtdebug & DEBUG_FORWARD)
1084 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p\n",
1085 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
1086
1087 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
1088 ((u_char *)(ip + 1))[1] != IPOPT_LSRR) {
1089 /*
1090 * Packet arrived via a physical interface or
1091 * an encapuslated tunnel.
1092 */
1093 } else {
1094 /*
1095 * Packet arrived through a source-route tunnel.
1096 * Source-route tunnels are no longer supported.
1097 */
1098 if ((srctun++ % 1000) == 0)
1099 log(LOG_ERR,
1100 "ip_mforward: received source-routed packet from %x\n",
1101 ntohl(ip->ip_src.s_addr));
1102
1103 return (1);
1104 }
1105
1106 #ifdef RSVP_ISI
1107 if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) {
1108 if (ip->ip_ttl < 255)
1109 ip->ip_ttl++; /* compensate for -1 in *_send routines */
1110 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1111 vifp = viftable + vifi;
1112 printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n",
1113 ntohl(ip->ip_src), ntohl(ip->ip_dst), vifi,
1114 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
1115 vifp->v_ifp->if_xname);
1116 }
1117 return (ip_mdq(m, ifp, (struct mfc *)0, vifi));
1118 }
1119 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1120 printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
1121 ntohl(ip->ip_src), ntohl(ip->ip_dst));
1122 }
1123 #endif /* RSVP_ISI */
1124
1125 /*
1126 * Don't forward a packet with time-to-live of zero or one,
1127 * or a packet destined to a local-only group.
1128 */
1129 if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ip->ip_dst.s_addr))
1130 return (0);
1131
1132 /*
1133 * Determine forwarding vifs from the forwarding cache table
1134 */
1135 s = splsoftnet();
1136 MFCFIND(ip->ip_src, ip->ip_dst, rt);
1137
1138 /* Entry exists, so forward if necessary */
1139 if (rt != 0) {
1140 splx(s);
1141 #ifdef RSVP_ISI
1142 return (ip_mdq(m, ifp, rt, -1));
1143 #else
1144 return (ip_mdq(m, ifp, rt));
1145 #endif /* RSVP_ISI */
1146 } else {
1147 /*
1148 * If we don't have a route for packet's origin,
1149 * Make a copy of the packet &
1150 * send message to routing daemon
1151 */
1152
1153 struct mbuf *mb0;
1154 struct rtdetq *rte;
1155 u_int32_t hash;
1156 int hlen = ip->ip_hl << 2;
1157 #ifdef UPCALL_TIMING
1158 struct timeval tp;
1159
1160 microtime(&tp);
1161 #endif /* UPCALL_TIMING */
1162
1163 mrtstat.mrts_no_route++;
1164 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1165 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n",
1166 ntohl(ip->ip_src.s_addr),
1167 ntohl(ip->ip_dst.s_addr));
1168
1169 /*
1170 * Allocate mbufs early so that we don't do extra work if we are
1171 * just going to fail anyway. Make sure to pullup the header so
1172 * that other people can't step on it.
1173 */
1174 rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE,
1175 M_NOWAIT);
1176 if (rte == 0) {
1177 splx(s);
1178 return (ENOBUFS);
1179 }
1180 mb0 = m_copy(m, 0, M_COPYALL);
1181 M_PULLUP(mb0, hlen);
1182 if (mb0 == 0) {
1183 free(rte, M_MRTABLE);
1184 splx(s);
1185 return (ENOBUFS);
1186 }
1187
1188 /* is there an upcall waiting for this packet? */
1189 hash = MFCHASH(ip->ip_src, ip->ip_dst);
1190 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
1191 if (in_hosteq(ip->ip_src, rt->mfc_origin) &&
1192 in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&
1193 rt->mfc_stall != 0)
1194 break;
1195 }
1196
1197 if (rt == 0) {
1198 int i;
1199 struct igmpmsg *im;
1200
1201 /* no upcall, so make a new entry */
1202 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE,
1203 M_NOWAIT);
1204 if (rt == 0) {
1205 free(rte, M_MRTABLE);
1206 m_freem(mb0);
1207 splx(s);
1208 return (ENOBUFS);
1209 }
1210 /*
1211 * Make a copy of the header to send to the user level
1212 * process
1213 */
1214 mm = m_copy(m, 0, hlen);
1215 M_PULLUP(mm, hlen);
1216 if (mm == 0) {
1217 free(rte, M_MRTABLE);
1218 m_freem(mb0);
1219 free(rt, M_MRTABLE);
1220 splx(s);
1221 return (ENOBUFS);
1222 }
1223
1224 /*
1225 * Send message to routing daemon to install
1226 * a route into the kernel table
1227 */
1228 sin.sin_addr = ip->ip_src;
1229
1230 im = mtod(mm, struct igmpmsg *);
1231 im->im_msgtype = IGMPMSG_NOCACHE;
1232 im->im_mbz = 0;
1233
1234 mrtstat.mrts_upcalls++;
1235
1236 if (socket_send(ip_mrouter, mm, &sin) < 0) {
1237 log(LOG_WARNING,
1238 "ip_mforward: ip_mrouter socket queue full\n");
1239 ++mrtstat.mrts_upq_sockfull;
1240 free(rte, M_MRTABLE);
1241 m_freem(mb0);
1242 free(rt, M_MRTABLE);
1243 splx(s);
1244 return (ENOBUFS);
1245 }
1246
1247 /* insert new entry at head of hash chain */
1248 rt->mfc_origin = ip->ip_src;
1249 rt->mfc_mcastgrp = ip->ip_dst;
1250 rt->mfc_pkt_cnt = 0;
1251 rt->mfc_byte_cnt = 0;
1252 rt->mfc_wrong_if = 0;
1253 rt->mfc_expire = UPCALL_EXPIRE;
1254 nexpire[hash]++;
1255 for (i = 0; i < numvifs; i++)
1256 rt->mfc_ttls[i] = 0;
1257 rt->mfc_parent = -1;
1258
1259 /* link into table */
1260 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
1261 /* Add this entry to the end of the queue */
1262 rt->mfc_stall = rte;
1263 } else {
1264 /* determine if q has overflowed */
1265 struct rtdetq **p;
1266 int npkts = 0;
1267
1268 for (p = &rt->mfc_stall; *p != 0; p = &(*p)->next)
1269 if (++npkts > MAX_UPQ) {
1270 mrtstat.mrts_upq_ovflw++;
1271 free(rte, M_MRTABLE);
1272 m_freem(mb0);
1273 splx(s);
1274 return (0);
1275 }
1276
1277 /* Add this entry to the end of the queue */
1278 *p = rte;
1279 }
1280
1281 rte->next = 0;
1282 rte->m = mb0;
1283 rte->ifp = ifp;
1284 #ifdef UPCALL_TIMING
1285 rte->t = tp;
1286 #endif /* UPCALL_TIMING */
1287
1288 splx(s);
1289
1290 return (0);
1291 }
1292 }
1293
1294
1295 /*ARGSUSED*/
1296 static void
1297 expire_upcalls(v)
1298 void *v;
1299 {
1300 int i;
1301 int s;
1302
1303 s = splsoftnet();
1304
1305 for (i = 0; i < MFCTBLSIZ; i++) {
1306 struct mfc *rt, *nrt;
1307
1308 if (nexpire[i] == 0)
1309 continue;
1310
1311 for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
1312 nrt = LIST_NEXT(rt, mfc_hash);
1313
1314 if (rt->mfc_expire == 0 || --rt->mfc_expire > 0)
1315 continue;
1316 nexpire[i]--;
1317
1318 ++mrtstat.mrts_cache_cleanups;
1319 if (mrtdebug & DEBUG_EXPIRE)
1320 log(LOG_DEBUG,
1321 "expire_upcalls: expiring (%x %x)\n",
1322 ntohl(rt->mfc_origin.s_addr),
1323 ntohl(rt->mfc_mcastgrp.s_addr));
1324
1325 expire_mfc(rt);
1326 }
1327 }
1328
1329 splx(s);
1330 callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
1331 expire_upcalls, NULL);
1332 }
1333
1334 /*
1335 * Packet forwarding routine once entry in the cache is made
1336 */
1337 static int
1338 #ifdef RSVP_ISI
1339 ip_mdq(m, ifp, rt, xmt_vif)
1340 #else
1341 ip_mdq(m, ifp, rt)
1342 #endif /* RSVP_ISI */
1343 struct mbuf *m;
1344 struct ifnet *ifp;
1345 struct mfc *rt;
1346 #ifdef RSVP_ISI
1347 vifi_t xmt_vif;
1348 #endif /* RSVP_ISI */
1349 {
1350 struct ip *ip = mtod(m, struct ip *);
1351 vifi_t vifi;
1352 struct vif *vifp;
1353 int plen = ntohs(ip->ip_len) - (ip->ip_hl << 2);
1354
1355 /*
1356 * Macro to send packet on vif. Since RSVP packets don't get counted on
1357 * input, they shouldn't get counted on output, so statistics keeping is
1358 * separate.
1359 */
1360 #define MC_SEND(ip, vifp, m) do { \
1361 if ((vifp)->v_flags & VIFF_TUNNEL) \
1362 encap_send((ip), (vifp), (m)); \
1363 else \
1364 phyint_send((ip), (vifp), (m)); \
1365 } while (/*CONSTCOND*/ 0)
1366
1367 #ifdef RSVP_ISI
1368 /*
1369 * If xmt_vif is not -1, send on only the requested vif.
1370 *
1371 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.
1372 */
1373 if (xmt_vif < numvifs) {
1374 MC_SEND(ip, viftable + xmt_vif, m);
1375 return (1);
1376 }
1377 #endif /* RSVP_ISI */
1378
1379 /*
1380 * Don't forward if it didn't arrive from the parent vif for its origin.
1381 */
1382 vifi = rt->mfc_parent;
1383 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1384 /* came in the wrong interface */
1385 if (mrtdebug & DEBUG_FORWARD)
1386 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
1387 ifp, vifi,
1388 vifi >= numvifs ? 0 : viftable[vifi].v_ifp);
1389 ++mrtstat.mrts_wrong_if;
1390 ++rt->mfc_wrong_if;
1391 /*
1392 * If we are doing PIM assert processing, and we are forwarding
1393 * packets on this interface, and it is a broadcast medium
1394 * interface (and not a tunnel), send a message to the routing
1395 * daemon.
1396 */
1397 if (pim_assert && rt->mfc_ttls[vifi] &&
1398 (ifp->if_flags & IFF_BROADCAST) &&
1399 !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1400 struct mbuf *mm;
1401 struct igmpmsg *im;
1402 int hlen = ip->ip_hl << 2;
1403 struct timeval now;
1404 u_int32_t delta;
1405
1406 microtime(&now);
1407
1408 TV_DELTA(rt->mfc_last_assert, now, delta);
1409
1410 if (delta > ASSERT_MSG_TIME) {
1411 mm = m_copy(m, 0, hlen);
1412 M_PULLUP(mm, hlen);
1413 if (mm == 0) {
1414 return (ENOBUFS);
1415 }
1416
1417 rt->mfc_last_assert = now;
1418
1419 im = mtod(mm, struct igmpmsg *);
1420 im->im_msgtype = IGMPMSG_WRONGVIF;
1421 im->im_mbz = 0;
1422 im->im_vif = vifi;
1423
1424 sin.sin_addr = im->im_src;
1425
1426 socket_send(ip_mrouter, mm, &sin);
1427 }
1428 }
1429 return (0);
1430 }
1431
1432 /* If I sourced this packet, it counts as output, else it was input. */
1433 if (in_hosteq(ip->ip_src, viftable[vifi].v_lcl_addr)) {
1434 viftable[vifi].v_pkt_out++;
1435 viftable[vifi].v_bytes_out += plen;
1436 } else {
1437 viftable[vifi].v_pkt_in++;
1438 viftable[vifi].v_bytes_in += plen;
1439 }
1440 rt->mfc_pkt_cnt++;
1441 rt->mfc_byte_cnt += plen;
1442
1443 /*
1444 * For each vif, decide if a copy of the packet should be forwarded.
1445 * Forward if:
1446 * - the ttl exceeds the vif's threshold
1447 * - there are group members downstream on interface
1448 */
1449 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1450 if ((rt->mfc_ttls[vifi] > 0) &&
1451 (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1452 vifp->v_pkt_out++;
1453 vifp->v_bytes_out += plen;
1454 MC_SEND(ip, vifp, m);
1455 }
1456
1457 return (0);
1458 }
1459
1460 #ifdef RSVP_ISI
1461 /*
1462 * check if a vif number is legal/ok. This is used by ip_output, to export
1463 * numvifs there,
1464 */
1465 int
1466 legal_vif_num(vif)
1467 int vif;
1468 {
1469 if (vif >= 0 && vif < numvifs)
1470 return (1);
1471 else
1472 return (0);
1473 }
1474 #endif /* RSVP_ISI */
1475
1476 static void
1477 phyint_send(ip, vifp, m)
1478 struct ip *ip;
1479 struct vif *vifp;
1480 struct mbuf *m;
1481 {
1482 struct mbuf *mb_copy;
1483 int hlen = ip->ip_hl << 2;
1484
1485 /*
1486 * Make a new reference to the packet; make sure that
1487 * the IP header is actually copied, not just referenced,
1488 * so that ip_output() only scribbles on the copy.
1489 */
1490 mb_copy = m_copy(m, 0, M_COPYALL);
1491 M_PULLUP(mb_copy, hlen);
1492 if (mb_copy == 0)
1493 return;
1494
1495 if (vifp->v_rate_limit <= 0)
1496 tbf_send_packet(vifp, mb_copy);
1497 else
1498 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *),
1499 ntohs(ip->ip_len));
1500 }
1501
1502 static void
1503 encap_send(ip, vifp, m)
1504 struct ip *ip;
1505 struct vif *vifp;
1506 struct mbuf *m;
1507 {
1508 struct mbuf *mb_copy;
1509 struct ip *ip_copy;
1510 int i, len = ntohs(ip->ip_len) + sizeof(multicast_encap_iphdr);
1511
1512 /*
1513 * copy the old packet & pullup it's IP header into the
1514 * new mbuf so we can modify it. Try to fill the new
1515 * mbuf since if we don't the ethernet driver will.
1516 */
1517 MGETHDR(mb_copy, M_DONTWAIT, MT_DATA);
1518 if (mb_copy == 0)
1519 return;
1520 mb_copy->m_data += max_linkhdr;
1521 mb_copy->m_pkthdr.len = len;
1522 mb_copy->m_len = sizeof(multicast_encap_iphdr);
1523
1524 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == 0) {
1525 m_freem(mb_copy);
1526 return;
1527 }
1528 i = MHLEN - max_linkhdr;
1529 if (i > len)
1530 i = len;
1531 mb_copy = m_pullup(mb_copy, i);
1532 if (mb_copy == 0)
1533 return;
1534
1535 /*
1536 * fill in the encapsulating IP header.
1537 */
1538 ip_copy = mtod(mb_copy, struct ip *);
1539 *ip_copy = multicast_encap_iphdr;
1540 ip_copy->ip_id = htons(ip_id++);
1541 ip_copy->ip_len = htons(len);
1542 ip_copy->ip_src = vifp->v_lcl_addr;
1543 ip_copy->ip_dst = vifp->v_rmt_addr;
1544
1545 /*
1546 * turn the encapsulated IP header back into a valid one.
1547 */
1548 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1549 --ip->ip_ttl;
1550 ip->ip_sum = 0;
1551 mb_copy->m_data += sizeof(multicast_encap_iphdr);
1552 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1553 mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1554
1555 if (vifp->v_rate_limit <= 0)
1556 tbf_send_packet(vifp, mb_copy);
1557 else
1558 tbf_control(vifp, mb_copy, ip, ntohs(ip_copy->ip_len));
1559 }
1560
1561 /*
1562 * De-encapsulate a packet and feed it back through ip input.
1563 */
1564 static void
1565 #if __STDC__
1566 vif_input(struct mbuf *m, ...)
1567 #else
1568 vif_input(m, va_alist)
1569 struct mbuf *m;
1570 va_dcl
1571 #endif
1572 {
1573 int off, proto;
1574 va_list ap;
1575 struct vif *vifp;
1576 int s;
1577 struct ifqueue *ifq;
1578
1579 va_start(ap, m);
1580 off = va_arg(ap, int);
1581 proto = va_arg(ap, int);
1582 va_end(ap);
1583
1584 vifp = (struct vif *)encap_getarg(m);
1585 if (!vifp || proto != AF_INET) {
1586 m_freem(m);
1587 mrtstat.mrts_bad_tunnel++;
1588 return;
1589 }
1590
1591 m_adj(m, off);
1592 m->m_pkthdr.rcvif = vifp->v_ifp;
1593 ifq = &ipintrq;
1594 s = splnet();
1595 if (IF_QFULL(ifq)) {
1596 IF_DROP(ifq);
1597 m_freem(m);
1598 } else {
1599 IF_ENQUEUE(ifq, m);
1600 /*
1601 * normally we would need a "schednetisr(NETISR_IP)"
1602 * here but we were called by ip_input and it is going
1603 * to loop back & try to dequeue the packet we just
1604 * queued as soon as we return so we avoid the
1605 * unnecessary software interrrupt.
1606 */
1607 }
1608 splx(s);
1609 }
1610
1611 /*
1612 * Check if the packet should be grabbed by us.
1613 */
1614 static int
1615 vif_encapcheck(m, off, proto, arg)
1616 const struct mbuf *m;
1617 int off;
1618 int proto;
1619 void *arg;
1620 {
1621 struct vif *vifp;
1622 struct ip ip;
1623
1624 #ifdef DIAGNOSTIC
1625 if (!arg || proto != IPPROTO_IPV4)
1626 panic("unexpected arg in vif_encapcheck");
1627 #endif
1628
1629 /*
1630 * do not grab the packet if it's not to a multicast destination or if
1631 * we don't have an encapsulating tunnel with the source.
1632 * Note: This code assumes that the remote site IP address
1633 * uniquely identifies the tunnel (i.e., that this site has
1634 * at most one tunnel with the remote site).
1635 */
1636
1637 /* LINTED const cast */
1638 m_copydata((struct mbuf *)m, off, sizeof(ip), (caddr_t)&ip);
1639 if (!IN_MULTICAST(ip.ip_dst.s_addr))
1640 return 0;
1641
1642 /* LINTED const cast */
1643 m_copydata((struct mbuf *)m, 0, sizeof(ip), (caddr_t)&ip);
1644 if (!in_hosteq(ip.ip_src, last_encap_src)) {
1645 vifp = (struct vif *)arg;
1646 if (vifp->v_flags & VIFF_TUNNEL &&
1647 in_hosteq(vifp->v_rmt_addr, ip.ip_src))
1648 ;
1649 else
1650 return 0;
1651 last_encap_vif = vifp;
1652 last_encap_src = ip.ip_src;
1653 } else
1654 vifp = last_encap_vif;
1655
1656 /* 32bit match, since we have checked ip_src only */
1657 return 32;
1658 }
1659
1660 /*
1661 * Token bucket filter module
1662 */
1663 static void
1664 tbf_control(vifp, m, ip, len)
1665 struct vif *vifp;
1666 struct mbuf *m;
1667 struct ip *ip;
1668 u_int32_t len;
1669 {
1670
1671 if (len > MAX_BKT_SIZE) {
1672 /* drop if packet is too large */
1673 mrtstat.mrts_pkt2large++;
1674 m_freem(m);
1675 return;
1676 }
1677
1678 tbf_update_tokens(vifp);
1679
1680 /*
1681 * If there are enough tokens, and the queue is empty, send this packet
1682 * out immediately. Otherwise, try to insert it on this vif's queue.
1683 */
1684 if (vifp->tbf_q_len == 0) {
1685 if (len <= vifp->tbf_n_tok) {
1686 vifp->tbf_n_tok -= len;
1687 tbf_send_packet(vifp, m);
1688 } else {
1689 /* queue packet and timeout till later */
1690 tbf_queue(vifp, m);
1691 callout_reset(&vifp->v_repq_ch, TBF_REPROCESS,
1692 tbf_reprocess_q, vifp);
1693 }
1694 } else {
1695 if (vifp->tbf_q_len >= vifp->tbf_max_q_len &&
1696 !tbf_dq_sel(vifp, ip)) {
1697 /* queue length too much, and couldn't make room */
1698 mrtstat.mrts_q_overflow++;
1699 m_freem(m);
1700 } else {
1701 /* queue length low enough, or made room */
1702 tbf_queue(vifp, m);
1703 tbf_process_q(vifp);
1704 }
1705 }
1706 }
1707
1708 /*
1709 * adds a packet to the queue at the interface
1710 */
1711 static void
1712 tbf_queue(vifp, m)
1713 struct vif *vifp;
1714 struct mbuf *m;
1715 {
1716 int s = splsoftnet();
1717
1718 /* insert at tail */
1719 *vifp->tbf_t = m;
1720 vifp->tbf_t = &m->m_nextpkt;
1721 vifp->tbf_q_len++;
1722
1723 splx(s);
1724 }
1725
1726
1727 /*
1728 * processes the queue at the interface
1729 */
1730 static void
1731 tbf_process_q(vifp)
1732 struct vif *vifp;
1733 {
1734 struct mbuf *m;
1735 int len;
1736 int s = splsoftnet();
1737
1738 /*
1739 * Loop through the queue at the interface and send as many packets
1740 * as possible.
1741 */
1742 for (m = vifp->tbf_q; m != 0; m = vifp->tbf_q) {
1743 len = ntohs(mtod(m, struct ip *)->ip_len);
1744
1745 /* determine if the packet can be sent */
1746 if (len <= vifp->tbf_n_tok) {
1747 /* if so,
1748 * reduce no of tokens, dequeue the packet,
1749 * send the packet.
1750 */
1751 if ((vifp->tbf_q = m->m_nextpkt) == 0)
1752 vifp->tbf_t = &vifp->tbf_q;
1753 --vifp->tbf_q_len;
1754
1755 m->m_nextpkt = 0;
1756 vifp->tbf_n_tok -= len;
1757 tbf_send_packet(vifp, m);
1758 } else
1759 break;
1760 }
1761 splx(s);
1762 }
1763
1764 static void
1765 tbf_reprocess_q(arg)
1766 void *arg;
1767 {
1768 struct vif *vifp = arg;
1769
1770 if (ip_mrouter == 0)
1771 return;
1772
1773 tbf_update_tokens(vifp);
1774 tbf_process_q(vifp);
1775
1776 if (vifp->tbf_q_len != 0)
1777 callout_reset(&vifp->v_repq_ch, TBF_REPROCESS,
1778 tbf_reprocess_q, vifp);
1779 }
1780
1781 /* function that will selectively discard a member of the queue
1782 * based on the precedence value and the priority
1783 */
1784 static int
1785 tbf_dq_sel(vifp, ip)
1786 struct vif *vifp;
1787 struct ip *ip;
1788 {
1789 u_int p;
1790 struct mbuf **mp, *m;
1791 int s = splsoftnet();
1792
1793 p = priority(vifp, ip);
1794
1795 for (mp = &vifp->tbf_q, m = *mp;
1796 m != 0;
1797 mp = &m->m_nextpkt, m = *mp) {
1798 if (p > priority(vifp, mtod(m, struct ip *))) {
1799 if ((*mp = m->m_nextpkt) == 0)
1800 vifp->tbf_t = mp;
1801 --vifp->tbf_q_len;
1802
1803 m_freem(m);
1804 mrtstat.mrts_drop_sel++;
1805 splx(s);
1806 return (1);
1807 }
1808 }
1809 splx(s);
1810 return (0);
1811 }
1812
1813 static void
1814 tbf_send_packet(vifp, m)
1815 struct vif *vifp;
1816 struct mbuf *m;
1817 {
1818 int error;
1819 int s = splsoftnet();
1820
1821 if (vifp->v_flags & VIFF_TUNNEL) {
1822 /* If tunnel options */
1823 #ifdef IPSEC
1824 /* Don't lookup socket in forwading case */
1825 (void)ipsec_setsocket(m, NULL);
1826 #endif
1827 ip_output(m, (struct mbuf *)0, &vifp->v_route,
1828 IP_FORWARDING, (struct ip_moptions *)0);
1829 } else {
1830 /* if physical interface option, extract the options and then send */
1831 struct ip_moptions imo;
1832
1833 imo.imo_multicast_ifp = vifp->v_ifp;
1834 imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1;
1835 imo.imo_multicast_loop = 1;
1836 #ifdef RSVP_ISI
1837 imo.imo_multicast_vif = -1;
1838 #endif
1839
1840 #ifdef IPSEC
1841 /* Don't lookup socket in forwading case */
1842 (void)ipsec_setsocket(m, NULL);
1843 #endif
1844 error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1845 IP_FORWARDING|IP_MULTICASTOPTS, &imo);
1846
1847 if (mrtdebug & DEBUG_XMIT)
1848 log(LOG_DEBUG, "phyint_send on vif %ld err %d\n",
1849 (long)(vifp - viftable), error);
1850 }
1851 splx(s);
1852 }
1853
1854 /* determine the current time and then
1855 * the elapsed time (between the last time and time now)
1856 * in milliseconds & update the no. of tokens in the bucket
1857 */
1858 static void
1859 tbf_update_tokens(vifp)
1860 struct vif *vifp;
1861 {
1862 struct timeval tp;
1863 u_int32_t tm;
1864 int s = splsoftnet();
1865
1866 microtime(&tp);
1867
1868 TV_DELTA(tp, vifp->tbf_last_pkt_t, tm);
1869
1870 /*
1871 * This formula is actually
1872 * "time in seconds" * "bytes/second".
1873 *
1874 * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
1875 *
1876 * The (1000/1024) was introduced in add_vif to optimize
1877 * this divide into a shift.
1878 */
1879 vifp->tbf_n_tok += tm * vifp->v_rate_limit / 8192;
1880 vifp->tbf_last_pkt_t = tp;
1881
1882 if (vifp->tbf_n_tok > MAX_BKT_SIZE)
1883 vifp->tbf_n_tok = MAX_BKT_SIZE;
1884
1885 splx(s);
1886 }
1887
1888 static int
1889 priority(vifp, ip)
1890 struct vif *vifp;
1891 struct ip *ip;
1892 {
1893 int prio;
1894
1895 /* temporary hack; may add general packet classifier some day */
1896
1897 /*
1898 * The UDP port space is divided up into four priority ranges:
1899 * [0, 16384) : unclassified - lowest priority
1900 * [16384, 32768) : audio - highest priority
1901 * [32768, 49152) : whiteboard - medium priority
1902 * [49152, 65536) : video - low priority
1903 */
1904 if (ip->ip_p == IPPROTO_UDP) {
1905 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1906
1907 switch (ntohs(udp->uh_dport) & 0xc000) {
1908 case 0x4000:
1909 prio = 70;
1910 break;
1911 case 0x8000:
1912 prio = 60;
1913 break;
1914 case 0xc000:
1915 prio = 55;
1916 break;
1917 default:
1918 prio = 50;
1919 break;
1920 }
1921
1922 if (tbfdebug > 1)
1923 log(LOG_DEBUG, "port %x prio %d\n",
1924 ntohs(udp->uh_dport), prio);
1925 } else
1926 prio = 50;
1927
1928 return (prio);
1929 }
1930
1931 /*
1932 * End of token bucket filter modifications
1933 */
1934 #ifdef RSVP_ISI
1935 int
1936 ip_rsvp_vif_init(so, m)
1937 struct socket *so;
1938 struct mbuf *m;
1939 {
1940 int i;
1941 int s;
1942
1943 if (rsvpdebug)
1944 printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1945 so->so_type, so->so_proto->pr_protocol);
1946
1947 if (so->so_type != SOCK_RAW ||
1948 so->so_proto->pr_protocol != IPPROTO_RSVP)
1949 return (EOPNOTSUPP);
1950
1951 /* Check mbuf. */
1952 if (m == 0 || m->m_len != sizeof(int)) {
1953 return (EINVAL);
1954 }
1955 i = *(mtod(m, int *));
1956
1957 if (rsvpdebug)
1958 printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on);
1959
1960 s = splsoftnet();
1961
1962 /* Check vif. */
1963 if (!legal_vif_num(i)) {
1964 splx(s);
1965 return (EADDRNOTAVAIL);
1966 }
1967
1968 /* Check if socket is available. */
1969 if (viftable[i].v_rsvpd != 0) {
1970 splx(s);
1971 return (EADDRINUSE);
1972 }
1973
1974 viftable[i].v_rsvpd = so;
1975 /*
1976 * This may seem silly, but we need to be sure we don't over-increment
1977 * the RSVP counter, in case something slips up.
1978 */
1979 if (!viftable[i].v_rsvp_on) {
1980 viftable[i].v_rsvp_on = 1;
1981 rsvp_on++;
1982 }
1983
1984 splx(s);
1985 return (0);
1986 }
1987
1988 int
1989 ip_rsvp_vif_done(so, m)
1990 struct socket *so;
1991 struct mbuf *m;
1992 {
1993 int i;
1994 int s;
1995
1996 if (rsvpdebug)
1997 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
1998 so->so_type, so->so_proto->pr_protocol);
1999
2000 if (so->so_type != SOCK_RAW ||
2001 so->so_proto->pr_protocol != IPPROTO_RSVP)
2002 return (EOPNOTSUPP);
2003
2004 /* Check mbuf. */
2005 if (m == 0 || m->m_len != sizeof(int)) {
2006 return (EINVAL);
2007 }
2008 i = *(mtod(m, int *));
2009
2010 s = splsoftnet();
2011
2012 /* Check vif. */
2013 if (!legal_vif_num(i)) {
2014 splx(s);
2015 return (EADDRNOTAVAIL);
2016 }
2017
2018 if (rsvpdebug)
2019 printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n",
2020 viftable[i].v_rsvpd, so);
2021
2022 viftable[i].v_rsvpd = 0;
2023 /*
2024 * This may seem silly, but we need to be sure we don't over-decrement
2025 * the RSVP counter, in case something slips up.
2026 */
2027 if (viftable[i].v_rsvp_on) {
2028 viftable[i].v_rsvp_on = 0;
2029 rsvp_on--;
2030 }
2031
2032 splx(s);
2033 return (0);
2034 }
2035
2036 void
2037 ip_rsvp_force_done(so)
2038 struct socket *so;
2039 {
2040 int vifi;
2041 int s;
2042
2043 /* Don't bother if it is not the right type of socket. */
2044 if (so->so_type != SOCK_RAW ||
2045 so->so_proto->pr_protocol != IPPROTO_RSVP)
2046 return;
2047
2048 s = splsoftnet();
2049
2050 /*
2051 * The socket may be attached to more than one vif...this
2052 * is perfectly legal.
2053 */
2054 for (vifi = 0; vifi < numvifs; vifi++) {
2055 if (viftable[vifi].v_rsvpd == so) {
2056 viftable[vifi].v_rsvpd = 0;
2057 /*
2058 * This may seem silly, but we need to be sure we don't
2059 * over-decrement the RSVP counter, in case something
2060 * slips up.
2061 */
2062 if (viftable[vifi].v_rsvp_on) {
2063 viftable[vifi].v_rsvp_on = 0;
2064 rsvp_on--;
2065 }
2066 }
2067 }
2068
2069 splx(s);
2070 return;
2071 }
2072
2073 void
2074 rsvp_input(m, ifp)
2075 struct mbuf *m;
2076 struct ifnet *ifp;
2077 {
2078 int vifi;
2079 struct ip *ip = mtod(m, struct ip *);
2080 static struct sockaddr_in rsvp_src = { sizeof(sin), AF_INET };
2081 int s;
2082
2083 if (rsvpdebug)
2084 printf("rsvp_input: rsvp_on %d\n", rsvp_on);
2085
2086 /*
2087 * Can still get packets with rsvp_on = 0 if there is a local member
2088 * of the group to which the RSVP packet is addressed. But in this
2089 * case we want to throw the packet away.
2090 */
2091 if (!rsvp_on) {
2092 m_freem(m);
2093 return;
2094 }
2095
2096 /*
2097 * If the old-style non-vif-associated socket is set, then use
2098 * it and ignore the new ones.
2099 */
2100 if (ip_rsvpd != 0) {
2101 if (rsvpdebug)
2102 printf("rsvp_input: "
2103 "Sending packet up old-style socket\n");
2104 rip_input(m); /*XXX*/
2105 return;
2106 }
2107
2108 s = splsoftnet();
2109
2110 if (rsvpdebug)
2111 printf("rsvp_input: check vifs\n");
2112
2113 /* Find which vif the packet arrived on. */
2114 for (vifi = 0; vifi < numvifs; vifi++) {
2115 if (viftable[vifi].v_ifp == ifp)
2116 break;
2117 }
2118
2119 if (vifi == numvifs) {
2120 /* Can't find vif packet arrived on. Drop packet. */
2121 if (rsvpdebug)
2122 printf("rsvp_input: "
2123 "Can't find vif for packet...dropping it.\n");
2124 m_freem(m);
2125 splx(s);
2126 return;
2127 }
2128
2129 if (rsvpdebug)
2130 printf("rsvp_input: check socket\n");
2131
2132 if (viftable[vifi].v_rsvpd == 0) {
2133 /*
2134 * drop packet, since there is no specific socket for this
2135 * interface
2136 */
2137 if (rsvpdebug)
2138 printf("rsvp_input: No socket defined for vif %d\n",
2139 vifi);
2140 m_freem(m);
2141 splx(s);
2142 return;
2143 }
2144
2145 rsvp_src.sin_addr = ip->ip_src;
2146
2147 if (rsvpdebug && m)
2148 printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
2149 m->m_len, sbspace(&viftable[vifi].v_rsvpd->so_rcv));
2150
2151 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0)
2152 if (rsvpdebug)
2153 printf("rsvp_input: Failed to append to socket\n");
2154 else
2155 if (rsvpdebug)
2156 printf("rsvp_input: send packet up\n");
2157
2158 splx(s);
2159 }
2160 #endif /* RSVP_ISI */
2161