ip_mroute.c revision 1.64 1 /* $NetBSD: ip_mroute.c,v 1.64 2002/11/05 02:07:25 fair Exp $ */
2
3 /*
4 * Copyright (c) 1989 Stephen Deering
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Stephen Deering of Stanford University.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
40 */
41
42 /*
43 * IP multicast forwarding procedures
44 *
45 * Written by David Waitzman, BBN Labs, August 1988.
46 * Modified by Steve Deering, Stanford, February 1989.
47 * Modified by Mark J. Steiglitz, Stanford, May, 1991
48 * Modified by Van Jacobson, LBL, January 1993
49 * Modified by Ajit Thyagarajan, PARC, August 1993
50 * Modified by Bill Fenner, PARC, April 1994
51 * Modified by Charles M. Hannum, NetBSD, May 1995.
52 *
53 * MROUTING Revision: 1.2
54 */
55
56 #include <sys/cdefs.h>
57 __KERNEL_RCSID(0, "$NetBSD: ip_mroute.c,v 1.64 2002/11/05 02:07:25 fair Exp $");
58
59 #include "opt_ipsec.h"
60
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/callout.h>
64 #include <sys/mbuf.h>
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <sys/protosw.h>
68 #include <sys/errno.h>
69 #include <sys/time.h>
70 #include <sys/kernel.h>
71 #include <sys/ioctl.h>
72 #include <sys/syslog.h>
73 #include <net/if.h>
74 #include <net/route.h>
75 #include <net/raw_cb.h>
76 #include <netinet/in.h>
77 #include <netinet/in_var.h>
78 #include <netinet/in_systm.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip_var.h>
81 #include <netinet/in_pcb.h>
82 #include <netinet/udp.h>
83 #include <netinet/igmp.h>
84 #include <netinet/igmp_var.h>
85 #include <netinet/ip_mroute.h>
86 #include <netinet/ip_encap.h>
87
88 #ifdef IPSEC
89 #include <netinet6/ipsec.h>
90 #include <netkey/key.h>
91 #endif
92
93 #include <machine/stdarg.h>
94
95 #define IP_MULTICASTOPTS 0
96 #define M_PULLUP(m, len) \
97 do { \
98 if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \
99 (m) = m_pullup((m), (len)); \
100 } while (/*CONSTCOND*/ 0)
101
102 /*
103 * Globals. All but ip_mrouter and ip_mrtproto could be static,
104 * except for netstat or debugging purposes.
105 */
106 struct socket *ip_mrouter = 0;
107 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */
108
109 #define NO_RTE_FOUND 0x1
110 #define RTE_FOUND 0x2
111
112 #define MFCHASH(a, g) \
113 ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
114 ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & mfchash)
115 LIST_HEAD(mfchashhdr, mfc) *mfchashtbl;
116 u_long mfchash;
117
118 u_char nexpire[MFCTBLSIZ];
119 struct vif viftable[MAXVIFS];
120 struct mrtstat mrtstat;
121 u_int mrtdebug = 0; /* debug level */
122 #define DEBUG_MFC 0x02
123 #define DEBUG_FORWARD 0x04
124 #define DEBUG_EXPIRE 0x08
125 #define DEBUG_XMIT 0x10
126 u_int tbfdebug = 0; /* tbf debug level */
127 #ifdef RSVP_ISI
128 u_int rsvpdebug = 0; /* rsvp debug level */
129 extern struct socket *ip_rsvpd;
130 extern int rsvp_on;
131 #endif /* RSVP_ISI */
132
133 /* vif attachment using sys/netinet/ip_encap.c */
134 extern struct domain inetdomain;
135 static void vif_input __P((struct mbuf *, ...));
136 static int vif_encapcheck __P((const struct mbuf *, int, int, void *));
137 static struct protosw vif_protosw =
138 { SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR,
139 vif_input, rip_output, 0, rip_ctloutput,
140 rip_usrreq,
141 0, 0, 0, 0,
142 };
143
144 #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
145 #define UPCALL_EXPIRE 6 /* number of timeouts */
146
147 /*
148 * Define the token bucket filter structures
149 */
150
151 #define TBF_REPROCESS (hz / 100) /* 100x / second */
152
153 static int get_sg_cnt __P((struct sioc_sg_req *));
154 static int get_vif_cnt __P((struct sioc_vif_req *));
155 static int ip_mrouter_init __P((struct socket *, struct mbuf *));
156 static int get_version __P((struct mbuf *));
157 static int set_assert __P((struct mbuf *));
158 static int get_assert __P((struct mbuf *));
159 static int add_vif __P((struct mbuf *));
160 static int del_vif __P((struct mbuf *));
161 static void update_mfc __P((struct mfcctl *, struct mfc *));
162 static void expire_mfc __P((struct mfc *));
163 static int add_mfc __P((struct mbuf *));
164 #ifdef UPCALL_TIMING
165 static void collate __P((struct timeval *));
166 #endif
167 static int del_mfc __P((struct mbuf *));
168 static int socket_send __P((struct socket *, struct mbuf *,
169 struct sockaddr_in *));
170 static void expire_upcalls __P((void *));
171 #ifdef RSVP_ISI
172 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *, vifi_t));
173 #else
174 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *));
175 #endif
176 static void phyint_send __P((struct ip *, struct vif *, struct mbuf *));
177 static void encap_send __P((struct ip *, struct vif *, struct mbuf *));
178 static void tbf_control __P((struct vif *, struct mbuf *, struct ip *,
179 u_int32_t));
180 static void tbf_queue __P((struct vif *, struct mbuf *));
181 static void tbf_process_q __P((struct vif *));
182 static void tbf_reprocess_q __P((void *));
183 static int tbf_dq_sel __P((struct vif *, struct ip *));
184 static void tbf_send_packet __P((struct vif *, struct mbuf *));
185 static void tbf_update_tokens __P((struct vif *));
186 static int priority __P((struct vif *, struct ip *));
187
188 /*
189 * 'Interfaces' associated with decapsulator (so we can tell
190 * packets that went through it from ones that get reflected
191 * by a broken gateway). These interfaces are never linked into
192 * the system ifnet list & no routes point to them. I.e., packets
193 * can't be sent this way. They only exist as a placeholder for
194 * multicast source verification.
195 */
196 #if 0
197 struct ifnet multicast_decap_if[MAXVIFS];
198 #endif
199
200 #define ENCAP_TTL 64
201 #define ENCAP_PROTO IPPROTO_IPIP /* 4 */
202
203 /* prototype IP hdr for encapsulated packets */
204 struct ip multicast_encap_iphdr = {
205 #if BYTE_ORDER == LITTLE_ENDIAN
206 sizeof(struct ip) >> 2, IPVERSION,
207 #else
208 IPVERSION, sizeof(struct ip) >> 2,
209 #endif
210 0, /* tos */
211 sizeof(struct ip), /* total length */
212 0, /* id */
213 0, /* frag offset */
214 ENCAP_TTL, ENCAP_PROTO,
215 0, /* checksum */
216 };
217
218 /*
219 * Private variables.
220 */
221 static vifi_t numvifs = 0;
222
223 static struct callout expire_upcalls_ch;
224
225 /*
226 * one-back cache used by vif_encapcheck to locate a tunnel's vif
227 * given a datagram's src ip address.
228 */
229 static struct in_addr last_encap_src;
230 static struct vif *last_encap_vif;
231
232 /*
233 * whether or not special PIM assert processing is enabled.
234 */
235 static int pim_assert;
236 /*
237 * Rate limit for assert notification messages, in usec
238 */
239 #define ASSERT_MSG_TIME 3000000
240
241 /*
242 * Find a route for a given origin IP address and Multicast group address
243 * Type of service parameter to be added in the future!!!
244 */
245
246 #define MFCFIND(o, g, rt) { \
247 struct mfc *_rt; \
248 (rt) = 0; \
249 ++mrtstat.mrts_mfc_lookups; \
250 LIST_FOREACH(_rt, &mfchashtbl[MFCHASH(o, g)], mfc_hash) { \
251 if (in_hosteq(_rt->mfc_origin, (o)) && \
252 in_hosteq(_rt->mfc_mcastgrp, (g)) && \
253 _rt->mfc_stall == 0) { \
254 (rt) = _rt; \
255 break; \
256 } \
257 } \
258 if ((rt) == 0) \
259 ++mrtstat.mrts_mfc_misses; \
260 }
261
262 /*
263 * Macros to compute elapsed time efficiently
264 * Borrowed from Van Jacobson's scheduling code
265 */
266 #define TV_DELTA(a, b, delta) { \
267 int xxs; \
268 delta = (a).tv_usec - (b).tv_usec; \
269 xxs = (a).tv_sec - (b).tv_sec; \
270 switch (xxs) { \
271 case 2: \
272 delta += 1000000; \
273 /* fall through */ \
274 case 1: \
275 delta += 1000000; \
276 /* fall through */ \
277 case 0: \
278 break; \
279 default: \
280 delta += (1000000 * xxs); \
281 break; \
282 } \
283 }
284
285 #ifdef UPCALL_TIMING
286 u_int32_t upcall_data[51];
287 #endif /* UPCALL_TIMING */
288
289 /*
290 * Handle MRT setsockopt commands to modify the multicast routing tables.
291 */
292 int
293 ip_mrouter_set(so, optname, m)
294 struct socket *so;
295 int optname;
296 struct mbuf **m;
297 {
298 int error;
299
300 if (optname != MRT_INIT && so != ip_mrouter)
301 error = ENOPROTOOPT;
302 else
303 switch (optname) {
304 case MRT_INIT:
305 error = ip_mrouter_init(so, *m);
306 break;
307 case MRT_DONE:
308 error = ip_mrouter_done();
309 break;
310 case MRT_ADD_VIF:
311 error = add_vif(*m);
312 break;
313 case MRT_DEL_VIF:
314 error = del_vif(*m);
315 break;
316 case MRT_ADD_MFC:
317 error = add_mfc(*m);
318 break;
319 case MRT_DEL_MFC:
320 error = del_mfc(*m);
321 break;
322 case MRT_ASSERT:
323 error = set_assert(*m);
324 break;
325 default:
326 error = ENOPROTOOPT;
327 break;
328 }
329
330 if (*m)
331 m_free(*m);
332 return (error);
333 }
334
335 /*
336 * Handle MRT getsockopt commands
337 */
338 int
339 ip_mrouter_get(so, optname, m)
340 struct socket *so;
341 int optname;
342 struct mbuf **m;
343 {
344 int error;
345
346 if (so != ip_mrouter)
347 error = ENOPROTOOPT;
348 else {
349 *m = m_get(M_WAIT, MT_SOOPTS);
350
351 switch (optname) {
352 case MRT_VERSION:
353 error = get_version(*m);
354 break;
355 case MRT_ASSERT:
356 error = get_assert(*m);
357 break;
358 default:
359 error = ENOPROTOOPT;
360 break;
361 }
362
363 if (error)
364 m_free(*m);
365 }
366
367 return (error);
368 }
369
370 /*
371 * Handle ioctl commands to obtain information from the cache
372 */
373 int
374 mrt_ioctl(so, cmd, data)
375 struct socket *so;
376 u_long cmd;
377 caddr_t data;
378 {
379 int error;
380
381 if (so != ip_mrouter)
382 error = EINVAL;
383 else
384 switch (cmd) {
385 case SIOCGETVIFCNT:
386 error = get_vif_cnt((struct sioc_vif_req *)data);
387 break;
388 case SIOCGETSGCNT:
389 error = get_sg_cnt((struct sioc_sg_req *)data);
390 break;
391 default:
392 error = EINVAL;
393 break;
394 }
395
396 return (error);
397 }
398
399 /*
400 * returns the packet, byte, rpf-failure count for the source group provided
401 */
402 static int
403 get_sg_cnt(req)
404 struct sioc_sg_req *req;
405 {
406 struct mfc *rt;
407 int s;
408
409 s = splsoftnet();
410 MFCFIND(req->src, req->grp, rt);
411 splx(s);
412 if (rt != 0) {
413 req->pktcnt = rt->mfc_pkt_cnt;
414 req->bytecnt = rt->mfc_byte_cnt;
415 req->wrong_if = rt->mfc_wrong_if;
416 } else
417 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
418
419 return (0);
420 }
421
422 /*
423 * returns the input and output packet and byte counts on the vif provided
424 */
425 static int
426 get_vif_cnt(req)
427 struct sioc_vif_req *req;
428 {
429 vifi_t vifi = req->vifi;
430
431 if (vifi >= numvifs)
432 return (EINVAL);
433
434 req->icount = viftable[vifi].v_pkt_in;
435 req->ocount = viftable[vifi].v_pkt_out;
436 req->ibytes = viftable[vifi].v_bytes_in;
437 req->obytes = viftable[vifi].v_bytes_out;
438
439 return (0);
440 }
441
442 /*
443 * Enable multicast routing
444 */
445 static int
446 ip_mrouter_init(so, m)
447 struct socket *so;
448 struct mbuf *m;
449 {
450 int *v;
451
452 if (mrtdebug)
453 log(LOG_DEBUG,
454 "ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
455 so->so_type, so->so_proto->pr_protocol);
456
457 if (so->so_type != SOCK_RAW ||
458 so->so_proto->pr_protocol != IPPROTO_IGMP)
459 return (EOPNOTSUPP);
460
461 if (m == 0 || m->m_len < sizeof(int))
462 return (EINVAL);
463
464 v = mtod(m, int *);
465 if (*v != 1)
466 return (EINVAL);
467
468 if (ip_mrouter != 0)
469 return (EADDRINUSE);
470
471 ip_mrouter = so;
472
473 mfchashtbl =
474 hashinit(MFCTBLSIZ, HASH_LIST, M_MRTABLE, M_WAITOK, &mfchash);
475 bzero((caddr_t)nexpire, sizeof(nexpire));
476
477 pim_assert = 0;
478
479 callout_init(&expire_upcalls_ch);
480 callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
481 expire_upcalls, NULL);
482
483 if (mrtdebug)
484 log(LOG_DEBUG, "ip_mrouter_init\n");
485
486 return (0);
487 }
488
489 /*
490 * Disable multicast routing
491 */
492 int
493 ip_mrouter_done()
494 {
495 vifi_t vifi;
496 struct vif *vifp;
497 int i;
498 int s;
499
500 s = splsoftnet();
501
502 /* Clear out all the vifs currently in use. */
503 for (vifi = 0; vifi < numvifs; vifi++) {
504 vifp = &viftable[vifi];
505 if (!in_nullhost(vifp->v_lcl_addr))
506 reset_vif(vifp);
507 }
508
509 numvifs = 0;
510 pim_assert = 0;
511
512 callout_stop(&expire_upcalls_ch);
513
514 /*
515 * Free all multicast forwarding cache entries.
516 */
517 for (i = 0; i < MFCTBLSIZ; i++) {
518 struct mfc *rt, *nrt;
519
520 for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
521 nrt = LIST_NEXT(rt, mfc_hash);
522
523 expire_mfc(rt);
524 }
525 }
526
527 free(mfchashtbl, M_MRTABLE);
528 mfchashtbl = 0;
529
530 /* Reset de-encapsulation cache. */
531
532 ip_mrouter = 0;
533
534 splx(s);
535
536 if (mrtdebug)
537 log(LOG_DEBUG, "ip_mrouter_done\n");
538
539 return (0);
540 }
541
542 static int
543 get_version(m)
544 struct mbuf *m;
545 {
546 int *v = mtod(m, int *);
547
548 *v = 0x0305; /* XXX !!!! */
549 m->m_len = sizeof(int);
550 return (0);
551 }
552
553 /*
554 * Set PIM assert processing global
555 */
556 static int
557 set_assert(m)
558 struct mbuf *m;
559 {
560 int *i;
561
562 if (m == 0 || m->m_len < sizeof(int))
563 return (EINVAL);
564
565 i = mtod(m, int *);
566 pim_assert = !!*i;
567 return (0);
568 }
569
570 /*
571 * Get PIM assert processing global
572 */
573 static int
574 get_assert(m)
575 struct mbuf *m;
576 {
577 int *i = mtod(m, int *);
578
579 *i = pim_assert;
580 m->m_len = sizeof(int);
581 return (0);
582 }
583
584 static struct sockaddr_in sin = { sizeof(sin), AF_INET };
585
586 /*
587 * Add a vif to the vif table
588 */
589 static int
590 add_vif(m)
591 struct mbuf *m;
592 {
593 struct vifctl *vifcp;
594 struct vif *vifp;
595 struct ifaddr *ifa;
596 struct ifnet *ifp;
597 struct ifreq ifr;
598 int error, s;
599
600 if (m == 0 || m->m_len < sizeof(struct vifctl))
601 return (EINVAL);
602
603 vifcp = mtod(m, struct vifctl *);
604 if (vifcp->vifc_vifi >= MAXVIFS)
605 return (EINVAL);
606
607 vifp = &viftable[vifcp->vifc_vifi];
608 if (!in_nullhost(vifp->v_lcl_addr))
609 return (EADDRINUSE);
610
611 /* Find the interface with an address in AF_INET family. */
612 sin.sin_addr = vifcp->vifc_lcl_addr;
613 ifa = ifa_ifwithaddr(sintosa(&sin));
614 if (ifa == 0)
615 return (EADDRNOTAVAIL);
616
617 if (vifcp->vifc_flags & VIFF_TUNNEL) {
618 if (vifcp->vifc_flags & VIFF_SRCRT) {
619 log(LOG_ERR, "Source routed tunnels not supported\n");
620 return (EOPNOTSUPP);
621 }
622
623 /* attach this vif to decapsulator dispatch table */
624 vifp->v_encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4,
625 vif_encapcheck, &vif_protosw, vifp);
626 if (!vifp->v_encap_cookie)
627 return (EINVAL);
628
629 /* Create a fake encapsulation interface. */
630 ifp = (struct ifnet *)malloc(sizeof(*ifp), M_MRTABLE, M_WAITOK);
631 bzero(ifp, sizeof(*ifp));
632 sprintf(ifp->if_xname, "mdecap%d", vifcp->vifc_vifi);
633
634 /* Prepare cached route entry. */
635 bzero(&vifp->v_route, sizeof(vifp->v_route));
636 } else {
637 /* Use the physical interface associated with the address. */
638 ifp = ifa->ifa_ifp;
639
640 /* Make sure the interface supports multicast. */
641 if ((ifp->if_flags & IFF_MULTICAST) == 0)
642 return (EOPNOTSUPP);
643
644 /* Enable promiscuous reception of all IP multicasts. */
645 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
646 satosin(&ifr.ifr_addr)->sin_family = AF_INET;
647 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
648 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
649 if (error)
650 return (error);
651 }
652
653 s = splsoftnet();
654
655 /* Define parameters for the tbf structure. */
656 vifp->tbf_q = 0;
657 vifp->tbf_t = &vifp->tbf_q;
658 microtime(&vifp->tbf_last_pkt_t);
659 vifp->tbf_n_tok = 0;
660 vifp->tbf_q_len = 0;
661 vifp->tbf_max_q_len = MAXQSIZE;
662
663 vifp->v_flags = vifcp->vifc_flags;
664 vifp->v_threshold = vifcp->vifc_threshold;
665 /* scaling up here allows division by 1024 in critical code */
666 vifp->v_rate_limit = vifcp->vifc_rate_limit * 1024 / 1000;
667 vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
668 vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
669 vifp->v_ifp = ifp;
670 /* Initialize per vif pkt counters. */
671 vifp->v_pkt_in = 0;
672 vifp->v_pkt_out = 0;
673 vifp->v_bytes_in = 0;
674 vifp->v_bytes_out = 0;
675
676 callout_init(&vifp->v_repq_ch);
677
678 #ifdef RSVP_ISI
679 vifp->v_rsvp_on = 0;
680 vifp->v_rsvpd = 0;
681 #endif /* RSVP_ISI */
682
683 splx(s);
684
685 /* Adjust numvifs up if the vifi is higher than numvifs. */
686 if (numvifs <= vifcp->vifc_vifi)
687 numvifs = vifcp->vifc_vifi + 1;
688
689 if (mrtdebug)
690 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n",
691 vifcp->vifc_vifi,
692 ntohl(vifcp->vifc_lcl_addr.s_addr),
693 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
694 ntohl(vifcp->vifc_rmt_addr.s_addr),
695 vifcp->vifc_threshold,
696 vifcp->vifc_rate_limit);
697
698 return (0);
699 }
700
701 void
702 reset_vif(vifp)
703 struct vif *vifp;
704 {
705 struct mbuf *m, *n;
706 struct ifnet *ifp;
707 struct ifreq ifr;
708
709 callout_stop(&vifp->v_repq_ch);
710
711 /* detach this vif from decapsulator dispatch table */
712 encap_detach(vifp->v_encap_cookie);
713 vifp->v_encap_cookie = NULL;
714
715 for (m = vifp->tbf_q; m != 0; m = n) {
716 n = m->m_nextpkt;
717 m_freem(m);
718 }
719
720 if (vifp->v_flags & VIFF_TUNNEL) {
721 free(vifp->v_ifp, M_MRTABLE);
722 if (vifp == last_encap_vif) {
723 last_encap_vif = 0;
724 last_encap_src = zeroin_addr;
725 }
726 } else {
727 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
728 satosin(&ifr.ifr_addr)->sin_family = AF_INET;
729 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
730 ifp = vifp->v_ifp;
731 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
732 }
733 bzero((caddr_t)vifp, sizeof(*vifp));
734 }
735
736 /*
737 * Delete a vif from the vif table
738 */
739 static int
740 del_vif(m)
741 struct mbuf *m;
742 {
743 vifi_t *vifip;
744 struct vif *vifp;
745 vifi_t vifi;
746 int s;
747
748 if (m == 0 || m->m_len < sizeof(vifi_t))
749 return (EINVAL);
750
751 vifip = mtod(m, vifi_t *);
752 if (*vifip >= numvifs)
753 return (EINVAL);
754
755 vifp = &viftable[*vifip];
756 if (in_nullhost(vifp->v_lcl_addr))
757 return (EADDRNOTAVAIL);
758
759 s = splsoftnet();
760
761 reset_vif(vifp);
762
763 /* Adjust numvifs down */
764 for (vifi = numvifs; vifi > 0; vifi--)
765 if (!in_nullhost(viftable[vifi-1].v_lcl_addr))
766 break;
767 numvifs = vifi;
768
769 splx(s);
770
771 if (mrtdebug)
772 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs);
773
774 return (0);
775 }
776
777 static void
778 update_mfc(mfccp, rt)
779 struct mfcctl *mfccp;
780 struct mfc *rt;
781 {
782 vifi_t vifi;
783
784 rt->mfc_parent = mfccp->mfcc_parent;
785 for (vifi = 0; vifi < numvifs; vifi++)
786 rt->mfc_ttls[vifi] = mfccp->mfcc_ttls[vifi];
787 rt->mfc_expire = 0;
788 rt->mfc_stall = 0;
789 }
790
791 static void
792 expire_mfc(rt)
793 struct mfc *rt;
794 {
795 struct rtdetq *rte, *nrte;
796
797 for (rte = rt->mfc_stall; rte != 0; rte = nrte) {
798 nrte = rte->next;
799 m_freem(rte->m);
800 free(rte, M_MRTABLE);
801 }
802
803 LIST_REMOVE(rt, mfc_hash);
804 free(rt, M_MRTABLE);
805 }
806
807 /*
808 * Add an mfc entry
809 */
810 static int
811 add_mfc(m)
812 struct mbuf *m;
813 {
814 struct mfcctl *mfccp;
815 struct mfc *rt;
816 u_int32_t hash = 0;
817 struct rtdetq *rte, *nrte;
818 u_short nstl;
819 int s;
820
821 if (m == 0 || m->m_len < sizeof(struct mfcctl))
822 return (EINVAL);
823
824 mfccp = mtod(m, struct mfcctl *);
825
826 s = splsoftnet();
827 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt);
828
829 /* If an entry already exists, just update the fields */
830 if (rt) {
831 if (mrtdebug & DEBUG_MFC)
832 log(LOG_DEBUG,"add_mfc update o %x g %x p %x\n",
833 ntohl(mfccp->mfcc_origin.s_addr),
834 ntohl(mfccp->mfcc_mcastgrp.s_addr),
835 mfccp->mfcc_parent);
836
837 if (rt->mfc_expire)
838 nexpire[hash]--;
839
840 update_mfc(mfccp, rt);
841
842 splx(s);
843 return (0);
844 }
845
846 /*
847 * Find the entry for which the upcall was made and update
848 */
849 nstl = 0;
850 hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
851 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
852 if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
853 in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
854 rt->mfc_stall != 0) {
855 if (nstl++)
856 log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %p\n",
857 "multiple kernel entries",
858 ntohl(mfccp->mfcc_origin.s_addr),
859 ntohl(mfccp->mfcc_mcastgrp.s_addr),
860 mfccp->mfcc_parent, rt->mfc_stall);
861
862 if (mrtdebug & DEBUG_MFC)
863 log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %p\n",
864 ntohl(mfccp->mfcc_origin.s_addr),
865 ntohl(mfccp->mfcc_mcastgrp.s_addr),
866 mfccp->mfcc_parent, rt->mfc_stall);
867
868 if (rt->mfc_expire)
869 nexpire[hash]--;
870
871 rte = rt->mfc_stall;
872 update_mfc(mfccp, rt);
873
874 /* free packets Qed at the end of this entry */
875 for (; rte != 0; rte = nrte) {
876 nrte = rte->next;
877 #ifdef RSVP_ISI
878 ip_mdq(rte->m, rte->ifp, rt, -1);
879 #else
880 ip_mdq(rte->m, rte->ifp, rt);
881 #endif /* RSVP_ISI */
882 m_freem(rte->m);
883 #ifdef UPCALL_TIMING
884 collate(&rte->t);
885 #endif /* UPCALL_TIMING */
886 free(rte, M_MRTABLE);
887 }
888 }
889 }
890
891 if (nstl == 0) {
892 /*
893 * No mfc; make a new one
894 */
895 if (mrtdebug & DEBUG_MFC)
896 log(LOG_DEBUG,"add_mfc no upcall o %x g %x p %x\n",
897 ntohl(mfccp->mfcc_origin.s_addr),
898 ntohl(mfccp->mfcc_mcastgrp.s_addr),
899 mfccp->mfcc_parent);
900
901 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
902 if (rt == 0) {
903 splx(s);
904 return (ENOBUFS);
905 }
906
907 rt->mfc_origin = mfccp->mfcc_origin;
908 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
909 /* initialize pkt counters per src-grp */
910 rt->mfc_pkt_cnt = 0;
911 rt->mfc_byte_cnt = 0;
912 rt->mfc_wrong_if = 0;
913 timerclear(&rt->mfc_last_assert);
914 update_mfc(mfccp, rt);
915
916 /* insert new entry at head of hash chain */
917 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
918 }
919
920 splx(s);
921 return (0);
922 }
923
924 #ifdef UPCALL_TIMING
925 /*
926 * collect delay statistics on the upcalls
927 */
928 static void collate(t)
929 struct timeval *t;
930 {
931 u_int32_t d;
932 struct timeval tp;
933 u_int32_t delta;
934
935 microtime(&tp);
936
937 if (timercmp(t, &tp, <)) {
938 TV_DELTA(tp, *t, delta);
939
940 d = delta >> 10;
941 if (d > 50)
942 d = 50;
943
944 ++upcall_data[d];
945 }
946 }
947 #endif /* UPCALL_TIMING */
948
949 /*
950 * Delete an mfc entry
951 */
952 static int
953 del_mfc(m)
954 struct mbuf *m;
955 {
956 struct mfcctl *mfccp;
957 struct mfc *rt;
958 int s;
959
960 if (m == 0 || m->m_len < sizeof(struct mfcctl))
961 return (EINVAL);
962
963 mfccp = mtod(m, struct mfcctl *);
964
965 if (mrtdebug & DEBUG_MFC)
966 log(LOG_DEBUG, "del_mfc origin %x mcastgrp %x\n",
967 ntohl(mfccp->mfcc_origin.s_addr),
968 ntohl(mfccp->mfcc_mcastgrp.s_addr));
969
970 s = splsoftnet();
971
972 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt);
973 if (rt == 0) {
974 splx(s);
975 return (EADDRNOTAVAIL);
976 }
977
978 LIST_REMOVE(rt, mfc_hash);
979 free(rt, M_MRTABLE);
980
981 splx(s);
982 return (0);
983 }
984
985 static int
986 socket_send(s, mm, src)
987 struct socket *s;
988 struct mbuf *mm;
989 struct sockaddr_in *src;
990 {
991 if (s) {
992 if (sbappendaddr(&s->so_rcv, sintosa(src), mm, (struct mbuf *)0) != 0) {
993 sorwakeup(s);
994 return (0);
995 }
996 }
997 m_freem(mm);
998 return (-1);
999 }
1000
1001 /*
1002 * IP multicast forwarding function. This function assumes that the packet
1003 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
1004 * pointed to by "ifp", and the packet is to be relayed to other networks
1005 * that have members of the packet's destination IP multicast group.
1006 *
1007 * The packet is returned unscathed to the caller, unless it is
1008 * erroneous, in which case a non-zero return value tells the caller to
1009 * discard it.
1010 */
1011
1012 #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */
1013 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */
1014
1015 int
1016 #ifdef RSVP_ISI
1017 ip_mforward(m, ifp, imo)
1018 #else
1019 ip_mforward(m, ifp)
1020 #endif /* RSVP_ISI */
1021 struct mbuf *m;
1022 struct ifnet *ifp;
1023 #ifdef RSVP_ISI
1024 struct ip_moptions *imo;
1025 #endif /* RSVP_ISI */
1026 {
1027 struct ip *ip = mtod(m, struct ip *);
1028 struct mfc *rt;
1029 u_char *ipoptions;
1030 static int srctun = 0;
1031 struct mbuf *mm;
1032 int s;
1033 #ifdef RSVP_ISI
1034 struct vif *vifp;
1035 vifi_t vifi;
1036 #endif /* RSVP_ISI */
1037
1038 /*
1039 * Clear any in-bound checksum flags for this packet.
1040 */
1041 m->m_pkthdr.csum_flags = 0;
1042
1043 if (mrtdebug & DEBUG_FORWARD)
1044 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p\n",
1045 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
1046
1047 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
1048 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR) {
1049 /*
1050 * Packet arrived via a physical interface or
1051 * an encapuslated tunnel.
1052 */
1053 } else {
1054 /*
1055 * Packet arrived through a source-route tunnel.
1056 * Source-route tunnels are no longer supported.
1057 */
1058 if ((srctun++ % 1000) == 0)
1059 log(LOG_ERR, "ip_mforward: received source-routed packet from %x\n",
1060 ntohl(ip->ip_src.s_addr));
1061
1062 return (1);
1063 }
1064
1065 #ifdef RSVP_ISI
1066 if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) {
1067 if (ip->ip_ttl < 255)
1068 ip->ip_ttl++; /* compensate for -1 in *_send routines */
1069 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1070 vifp = viftable + vifi;
1071 printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n",
1072 ntohl(ip->ip_src), ntohl(ip->ip_dst), vifi,
1073 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
1074 vifp->v_ifp->if_xname);
1075 }
1076 return (ip_mdq(m, ifp, (struct mfc *)0, vifi));
1077 }
1078 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1079 printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
1080 ntohl(ip->ip_src), ntohl(ip->ip_dst));
1081 }
1082 #endif /* RSVP_ISI */
1083
1084 /*
1085 * Don't forward a packet with time-to-live of zero or one,
1086 * or a packet destined to a local-only group.
1087 */
1088 if (ip->ip_ttl <= 1 ||
1089 IN_LOCAL_GROUP(ip->ip_dst.s_addr))
1090 return (0);
1091
1092 /*
1093 * Determine forwarding vifs from the forwarding cache table
1094 */
1095 s = splsoftnet();
1096 MFCFIND(ip->ip_src, ip->ip_dst, rt);
1097
1098 /* Entry exists, so forward if necessary */
1099 if (rt != 0) {
1100 splx(s);
1101 #ifdef RSVP_ISI
1102 return (ip_mdq(m, ifp, rt, -1));
1103 #else
1104 return (ip_mdq(m, ifp, rt));
1105 #endif /* RSVP_ISI */
1106 } else {
1107 /*
1108 * If we don't have a route for packet's origin,
1109 * Make a copy of the packet &
1110 * send message to routing daemon
1111 */
1112
1113 struct mbuf *mb0;
1114 struct rtdetq *rte;
1115 u_int32_t hash;
1116 int hlen = ip->ip_hl << 2;
1117 #ifdef UPCALL_TIMING
1118 struct timeval tp;
1119
1120 microtime(&tp);
1121 #endif /* UPCALL_TIMING */
1122
1123 mrtstat.mrts_no_route++;
1124 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1125 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n",
1126 ntohl(ip->ip_src.s_addr),
1127 ntohl(ip->ip_dst.s_addr));
1128
1129 /*
1130 * Allocate mbufs early so that we don't do extra work if we are
1131 * just going to fail anyway. Make sure to pullup the header so
1132 * that other people can't step on it.
1133 */
1134 rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT);
1135 if (rte == 0) {
1136 splx(s);
1137 return (ENOBUFS);
1138 }
1139 mb0 = m_copy(m, 0, M_COPYALL);
1140 M_PULLUP(mb0, hlen);
1141 if (mb0 == 0) {
1142 free(rte, M_MRTABLE);
1143 splx(s);
1144 return (ENOBUFS);
1145 }
1146
1147 /* is there an upcall waiting for this packet? */
1148 hash = MFCHASH(ip->ip_src, ip->ip_dst);
1149 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
1150 if (in_hosteq(ip->ip_src, rt->mfc_origin) &&
1151 in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&
1152 rt->mfc_stall != 0)
1153 break;
1154 }
1155
1156 if (rt == 0) {
1157 int i;
1158 struct igmpmsg *im;
1159
1160 /* no upcall, so make a new entry */
1161 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1162 if (rt == 0) {
1163 free(rte, M_MRTABLE);
1164 m_freem(mb0);
1165 splx(s);
1166 return (ENOBUFS);
1167 }
1168 /* Make a copy of the header to send to the user level process */
1169 mm = m_copy(m, 0, hlen);
1170 M_PULLUP(mm, hlen);
1171 if (mm == 0) {
1172 free(rte, M_MRTABLE);
1173 m_freem(mb0);
1174 free(rt, M_MRTABLE);
1175 splx(s);
1176 return (ENOBUFS);
1177 }
1178
1179 /*
1180 * Send message to routing daemon to install
1181 * a route into the kernel table
1182 */
1183 sin.sin_addr = ip->ip_src;
1184
1185 im = mtod(mm, struct igmpmsg *);
1186 im->im_msgtype = IGMPMSG_NOCACHE;
1187 im->im_mbz = 0;
1188
1189 mrtstat.mrts_upcalls++;
1190
1191 if (socket_send(ip_mrouter, mm, &sin) < 0) {
1192 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
1193 ++mrtstat.mrts_upq_sockfull;
1194 free(rte, M_MRTABLE);
1195 m_freem(mb0);
1196 free(rt, M_MRTABLE);
1197 splx(s);
1198 return (ENOBUFS);
1199 }
1200
1201 /* insert new entry at head of hash chain */
1202 rt->mfc_origin = ip->ip_src;
1203 rt->mfc_mcastgrp = ip->ip_dst;
1204 rt->mfc_pkt_cnt = 0;
1205 rt->mfc_byte_cnt = 0;
1206 rt->mfc_wrong_if = 0;
1207 rt->mfc_expire = UPCALL_EXPIRE;
1208 nexpire[hash]++;
1209 for (i = 0; i < numvifs; i++)
1210 rt->mfc_ttls[i] = 0;
1211 rt->mfc_parent = -1;
1212
1213 /* link into table */
1214 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
1215 /* Add this entry to the end of the queue */
1216 rt->mfc_stall = rte;
1217 } else {
1218 /* determine if q has overflowed */
1219 struct rtdetq **p;
1220 int npkts = 0;
1221
1222 for (p = &rt->mfc_stall; *p != 0; p = &(*p)->next)
1223 if (++npkts > MAX_UPQ) {
1224 mrtstat.mrts_upq_ovflw++;
1225 free(rte, M_MRTABLE);
1226 m_freem(mb0);
1227 splx(s);
1228 return (0);
1229 }
1230
1231 /* Add this entry to the end of the queue */
1232 *p = rte;
1233 }
1234
1235 rte->next = 0;
1236 rte->m = mb0;
1237 rte->ifp = ifp;
1238 #ifdef UPCALL_TIMING
1239 rte->t = tp;
1240 #endif /* UPCALL_TIMING */
1241
1242
1243 splx(s);
1244
1245 return (0);
1246 }
1247 }
1248
1249
1250 /*ARGSUSED*/
1251 static void
1252 expire_upcalls(v)
1253 void *v;
1254 {
1255 int i;
1256 int s;
1257
1258 s = splsoftnet();
1259
1260 for (i = 0; i < MFCTBLSIZ; i++) {
1261 struct mfc *rt, *nrt;
1262
1263 if (nexpire[i] == 0)
1264 continue;
1265
1266 for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
1267 nrt = LIST_NEXT(rt, mfc_hash);
1268
1269 if (rt->mfc_expire == 0 ||
1270 --rt->mfc_expire > 0)
1271 continue;
1272 nexpire[i]--;
1273
1274 ++mrtstat.mrts_cache_cleanups;
1275 if (mrtdebug & DEBUG_EXPIRE)
1276 log(LOG_DEBUG,
1277 "expire_upcalls: expiring (%x %x)\n",
1278 ntohl(rt->mfc_origin.s_addr),
1279 ntohl(rt->mfc_mcastgrp.s_addr));
1280
1281 expire_mfc(rt);
1282 }
1283 }
1284
1285 splx(s);
1286 callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
1287 expire_upcalls, NULL);
1288 }
1289
1290 /*
1291 * Packet forwarding routine once entry in the cache is made
1292 */
1293 static int
1294 #ifdef RSVP_ISI
1295 ip_mdq(m, ifp, rt, xmt_vif)
1296 #else
1297 ip_mdq(m, ifp, rt)
1298 #endif /* RSVP_ISI */
1299 struct mbuf *m;
1300 struct ifnet *ifp;
1301 struct mfc *rt;
1302 #ifdef RSVP_ISI
1303 vifi_t xmt_vif;
1304 #endif /* RSVP_ISI */
1305 {
1306 struct ip *ip = mtod(m, struct ip *);
1307 vifi_t vifi;
1308 struct vif *vifp;
1309 int plen = ntohs(ip->ip_len);
1310
1311 /*
1312 * Macro to send packet on vif. Since RSVP packets don't get counted on
1313 * input, they shouldn't get counted on output, so statistics keeping is
1314 * separate.
1315 */
1316 #define MC_SEND(ip,vifp,m) { \
1317 if ((vifp)->v_flags & VIFF_TUNNEL) \
1318 encap_send((ip), (vifp), (m)); \
1319 else \
1320 phyint_send((ip), (vifp), (m)); \
1321 }
1322
1323 #ifdef RSVP_ISI
1324 /*
1325 * If xmt_vif is not -1, send on only the requested vif.
1326 *
1327 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.
1328 */
1329 if (xmt_vif < numvifs) {
1330 MC_SEND(ip, viftable + xmt_vif, m);
1331 return (1);
1332 }
1333 #endif /* RSVP_ISI */
1334
1335 /*
1336 * Don't forward if it didn't arrive from the parent vif for its origin.
1337 */
1338 vifi = rt->mfc_parent;
1339 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1340 /* came in the wrong interface */
1341 if (mrtdebug & DEBUG_FORWARD)
1342 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
1343 ifp, vifi, viftable[vifi].v_ifp);
1344 ++mrtstat.mrts_wrong_if;
1345 ++rt->mfc_wrong_if;
1346 /*
1347 * If we are doing PIM assert processing, and we are forwarding
1348 * packets on this interface, and it is a broadcast medium
1349 * interface (and not a tunnel), send a message to the routing daemon.
1350 */
1351 if (pim_assert && rt->mfc_ttls[vifi] &&
1352 (ifp->if_flags & IFF_BROADCAST) &&
1353 !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1354 struct mbuf *mm;
1355 struct igmpmsg *im;
1356 int hlen = ip->ip_hl << 2;
1357 struct timeval now;
1358 u_int32_t delta;
1359
1360 microtime(&now);
1361
1362 TV_DELTA(rt->mfc_last_assert, now, delta);
1363
1364 if (delta > ASSERT_MSG_TIME) {
1365 mm = m_copy(m, 0, hlen);
1366 M_PULLUP(mm, hlen);
1367 if (mm == 0) {
1368 return (ENOBUFS);
1369 }
1370
1371 rt->mfc_last_assert = now;
1372
1373 im = mtod(mm, struct igmpmsg *);
1374 im->im_msgtype = IGMPMSG_WRONGVIF;
1375 im->im_mbz = 0;
1376 im->im_vif = vifi;
1377
1378 sin.sin_addr = im->im_src;
1379
1380 socket_send(ip_mrouter, mm, &sin);
1381 }
1382 }
1383 return (0);
1384 }
1385
1386 /* If I sourced this packet, it counts as output, else it was input. */
1387 if (in_hosteq(ip->ip_src, viftable[vifi].v_lcl_addr)) {
1388 viftable[vifi].v_pkt_out++;
1389 viftable[vifi].v_bytes_out += plen;
1390 } else {
1391 viftable[vifi].v_pkt_in++;
1392 viftable[vifi].v_bytes_in += plen;
1393 }
1394 rt->mfc_pkt_cnt++;
1395 rt->mfc_byte_cnt += plen;
1396
1397 /*
1398 * For each vif, decide if a copy of the packet should be forwarded.
1399 * Forward if:
1400 * - the ttl exceeds the vif's threshold
1401 * - there are group members downstream on interface
1402 */
1403 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1404 if ((rt->mfc_ttls[vifi] > 0) &&
1405 (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1406 vifp->v_pkt_out++;
1407 vifp->v_bytes_out += plen;
1408 MC_SEND(ip, vifp, m);
1409 }
1410
1411 return (0);
1412 }
1413
1414 #ifdef RSVP_ISI
1415 /*
1416 * check if a vif number is legal/ok. This is used by ip_output, to export
1417 * numvifs there,
1418 */
1419 int
1420 legal_vif_num(vif)
1421 int vif;
1422 {
1423 if (vif >= 0 && vif < numvifs)
1424 return (1);
1425 else
1426 return (0);
1427 }
1428 #endif /* RSVP_ISI */
1429
1430 static void
1431 phyint_send(ip, vifp, m)
1432 struct ip *ip;
1433 struct vif *vifp;
1434 struct mbuf *m;
1435 {
1436 struct mbuf *mb_copy;
1437 int hlen = ip->ip_hl << 2;
1438
1439 /*
1440 * Make a new reference to the packet; make sure that
1441 * the IP header is actually copied, not just referenced,
1442 * so that ip_output() only scribbles on the copy.
1443 */
1444 mb_copy = m_copy(m, 0, M_COPYALL);
1445 M_PULLUP(mb_copy, hlen);
1446 if (mb_copy == 0)
1447 return;
1448
1449 if (vifp->v_rate_limit <= 0)
1450 tbf_send_packet(vifp, mb_copy);
1451 else
1452 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *),
1453 ntohs(ip->ip_len));
1454 }
1455
1456 static void
1457 encap_send(ip, vifp, m)
1458 struct ip *ip;
1459 struct vif *vifp;
1460 struct mbuf *m;
1461 {
1462 struct mbuf *mb_copy;
1463 struct ip *ip_copy;
1464 int i, len = ntohs(ip->ip_len) + sizeof(multicast_encap_iphdr);
1465
1466 /*
1467 * copy the old packet & pullup it's IP header into the
1468 * new mbuf so we can modify it. Try to fill the new
1469 * mbuf since if we don't the ethernet driver will.
1470 */
1471 MGETHDR(mb_copy, M_DONTWAIT, MT_DATA);
1472 if (mb_copy == 0)
1473 return;
1474 mb_copy->m_data += max_linkhdr;
1475 mb_copy->m_pkthdr.len = len;
1476 mb_copy->m_len = sizeof(multicast_encap_iphdr);
1477
1478 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == 0) {
1479 m_freem(mb_copy);
1480 return;
1481 }
1482 i = MHLEN - max_linkhdr;
1483 if (i > len)
1484 i = len;
1485 mb_copy = m_pullup(mb_copy, i);
1486 if (mb_copy == 0)
1487 return;
1488
1489 /*
1490 * fill in the encapsulating IP header.
1491 */
1492 ip_copy = mtod(mb_copy, struct ip *);
1493 *ip_copy = multicast_encap_iphdr;
1494 ip_copy->ip_id = htons(ip_id++);
1495 ip_copy->ip_len = htons(len);
1496 ip_copy->ip_src = vifp->v_lcl_addr;
1497 ip_copy->ip_dst = vifp->v_rmt_addr;
1498
1499 /*
1500 * turn the encapsulated IP header back into a valid one.
1501 */
1502 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1503 --ip->ip_ttl;
1504 ip->ip_sum = 0;
1505 mb_copy->m_data += sizeof(multicast_encap_iphdr);
1506 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1507 mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1508
1509 if (vifp->v_rate_limit <= 0)
1510 tbf_send_packet(vifp, mb_copy);
1511 else
1512 tbf_control(vifp, mb_copy, ip, ntohs(ip_copy->ip_len));
1513 }
1514
1515 /*
1516 * De-encapsulate a packet and feed it back through ip input.
1517 */
1518 static void
1519 #if __STDC__
1520 vif_input(struct mbuf *m, ...)
1521 #else
1522 vif_input(m, va_alist)
1523 struct mbuf *m;
1524 va_dcl
1525 #endif
1526 {
1527 int off, proto;
1528 va_list ap;
1529 struct ip *ip;
1530 struct vif *vifp;
1531 int s;
1532 struct ifqueue *ifq;
1533
1534 va_start(ap, m);
1535 off = va_arg(ap, int);
1536 proto = va_arg(ap, int);
1537 va_end(ap);
1538
1539 vifp = (struct vif *)encap_getarg(m);
1540 if (!vifp || proto != AF_INET) {
1541 m_freem(m);
1542 mrtstat.mrts_bad_tunnel++;
1543 return;
1544 }
1545
1546 ip = mtod(m, struct ip *);
1547
1548 m_adj(m, off);
1549 m->m_pkthdr.rcvif = vifp->v_ifp;
1550 ifq = &ipintrq;
1551 s = splnet();
1552 if (IF_QFULL(ifq)) {
1553 IF_DROP(ifq);
1554 m_freem(m);
1555 } else {
1556 IF_ENQUEUE(ifq, m);
1557 /*
1558 * normally we would need a "schednetisr(NETISR_IP)"
1559 * here but we were called by ip_input and it is going
1560 * to loop back & try to dequeue the packet we just
1561 * queued as soon as we return so we avoid the
1562 * unnecessary software interrrupt.
1563 */
1564 }
1565 splx(s);
1566 }
1567
1568 /*
1569 * Check if the packet should be grabbed by us.
1570 */
1571 static int
1572 vif_encapcheck(m, off, proto, arg)
1573 const struct mbuf *m;
1574 int off;
1575 int proto;
1576 void *arg;
1577 {
1578 struct vif *vifp;
1579 struct ip ip;
1580
1581 #ifdef DIAGNOSTIC
1582 if (!arg || proto != IPPROTO_IPV4)
1583 panic("unexpected arg in vif_encapcheck");
1584 #endif
1585
1586 /*
1587 * do not grab the packet if it's not to a multicast destination or if
1588 * we don't have an encapsulating tunnel with the source.
1589 * Note: This code assumes that the remote site IP address
1590 * uniquely identifies the tunnel (i.e., that this site has
1591 * at most one tunnel with the remote site).
1592 */
1593
1594 /* LINTED const cast */
1595 m_copydata((struct mbuf *)m, off, sizeof(ip), (caddr_t)&ip);
1596 if (!IN_MULTICAST(ip.ip_dst.s_addr))
1597 return 0;
1598
1599 /* LINTED const cast */
1600 m_copydata((struct mbuf *)m, 0, sizeof(ip), (caddr_t)&ip);
1601 if (!in_hosteq(ip.ip_src, last_encap_src)) {
1602 vifp = (struct vif *)arg;
1603 if (vifp->v_flags & VIFF_TUNNEL &&
1604 in_hosteq(vifp->v_rmt_addr, ip.ip_src))
1605 ;
1606 else
1607 return 0;
1608 last_encap_vif = vifp;
1609 last_encap_src = ip.ip_src;
1610 } else
1611 vifp = last_encap_vif;
1612
1613 /* 32bit match, since we have checked ip_src only */
1614 return 32;
1615 }
1616
1617 /*
1618 * Token bucket filter module
1619 */
1620 static void
1621 tbf_control(vifp, m, ip, len)
1622 struct vif *vifp;
1623 struct mbuf *m;
1624 struct ip *ip;
1625 u_int32_t len;
1626 {
1627
1628 if (len > MAX_BKT_SIZE) {
1629 /* drop if packet is too large */
1630 mrtstat.mrts_pkt2large++;
1631 m_freem(m);
1632 return;
1633 }
1634
1635 tbf_update_tokens(vifp);
1636
1637 /*
1638 * If there are enough tokens, and the queue is empty, send this packet
1639 * out immediately. Otherwise, try to insert it on this vif's queue.
1640 */
1641 if (vifp->tbf_q_len == 0) {
1642 if (len <= vifp->tbf_n_tok) {
1643 vifp->tbf_n_tok -= len;
1644 tbf_send_packet(vifp, m);
1645 } else {
1646 /* queue packet and timeout till later */
1647 tbf_queue(vifp, m);
1648 callout_reset(&vifp->v_repq_ch, TBF_REPROCESS,
1649 tbf_reprocess_q, vifp);
1650 }
1651 } else {
1652 if (vifp->tbf_q_len >= vifp->tbf_max_q_len &&
1653 !tbf_dq_sel(vifp, ip)) {
1654 /* queue length too much, and couldn't make room */
1655 mrtstat.mrts_q_overflow++;
1656 m_freem(m);
1657 } else {
1658 /* queue length low enough, or made room */
1659 tbf_queue(vifp, m);
1660 tbf_process_q(vifp);
1661 }
1662 }
1663 }
1664
1665 /*
1666 * adds a packet to the queue at the interface
1667 */
1668 static void
1669 tbf_queue(vifp, m)
1670 struct vif *vifp;
1671 struct mbuf *m;
1672 {
1673 int s = splsoftnet();
1674
1675 /* insert at tail */
1676 *vifp->tbf_t = m;
1677 vifp->tbf_t = &m->m_nextpkt;
1678 vifp->tbf_q_len++;
1679
1680 splx(s);
1681 }
1682
1683
1684 /*
1685 * processes the queue at the interface
1686 */
1687 static void
1688 tbf_process_q(vifp)
1689 struct vif *vifp;
1690 {
1691 struct mbuf *m;
1692 int len;
1693 int s = splsoftnet();
1694
1695 /*
1696 * Loop through the queue at the interface and send as many packets
1697 * as possible.
1698 */
1699 for (m = vifp->tbf_q;
1700 m != 0;
1701 m = vifp->tbf_q) {
1702 len = ntohs(mtod(m, struct ip *)->ip_len);
1703
1704 /* determine if the packet can be sent */
1705 if (len <= vifp->tbf_n_tok) {
1706 /* if so,
1707 * reduce no of tokens, dequeue the packet,
1708 * send the packet.
1709 */
1710 if ((vifp->tbf_q = m->m_nextpkt) == 0)
1711 vifp->tbf_t = &vifp->tbf_q;
1712 --vifp->tbf_q_len;
1713
1714 m->m_nextpkt = 0;
1715 vifp->tbf_n_tok -= len;
1716 tbf_send_packet(vifp, m);
1717 } else
1718 break;
1719 }
1720 splx(s);
1721 }
1722
1723 static void
1724 tbf_reprocess_q(arg)
1725 void *arg;
1726 {
1727 struct vif *vifp = arg;
1728
1729 if (ip_mrouter == 0)
1730 return;
1731
1732 tbf_update_tokens(vifp);
1733 tbf_process_q(vifp);
1734
1735 if (vifp->tbf_q_len != 0)
1736 callout_reset(&vifp->v_repq_ch, TBF_REPROCESS,
1737 tbf_reprocess_q, vifp);
1738 }
1739
1740 /* function that will selectively discard a member of the queue
1741 * based on the precedence value and the priority
1742 */
1743 static int
1744 tbf_dq_sel(vifp, ip)
1745 struct vif *vifp;
1746 struct ip *ip;
1747 {
1748 u_int p;
1749 struct mbuf **mp, *m;
1750 int s = splsoftnet();
1751
1752 p = priority(vifp, ip);
1753
1754 for (mp = &vifp->tbf_q, m = *mp;
1755 m != 0;
1756 mp = &m->m_nextpkt, m = *mp) {
1757 if (p > priority(vifp, mtod(m, struct ip *))) {
1758 if ((*mp = m->m_nextpkt) == 0)
1759 vifp->tbf_t = mp;
1760 --vifp->tbf_q_len;
1761
1762 m_freem(m);
1763 mrtstat.mrts_drop_sel++;
1764 splx(s);
1765 return (1);
1766 }
1767 }
1768 splx(s);
1769 return (0);
1770 }
1771
1772 static void
1773 tbf_send_packet(vifp, m)
1774 struct vif *vifp;
1775 struct mbuf *m;
1776 {
1777 int error;
1778 int s = splsoftnet();
1779
1780 if (vifp->v_flags & VIFF_TUNNEL) {
1781 /* If tunnel options */
1782 #ifdef IPSEC
1783 /* Don't lookup socket in forwading case */
1784 (void)ipsec_setsocket(m, NULL);
1785 #endif
1786 ip_output(m, (struct mbuf *)0, &vifp->v_route,
1787 IP_FORWARDING, (struct ip_moptions *)0);
1788 } else {
1789 /* if physical interface option, extract the options and then send */
1790 struct ip_moptions imo;
1791
1792 imo.imo_multicast_ifp = vifp->v_ifp;
1793 imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1;
1794 imo.imo_multicast_loop = 1;
1795 #ifdef RSVP_ISI
1796 imo.imo_multicast_vif = -1;
1797 #endif
1798
1799 #ifdef IPSEC
1800 /* Don't lookup socket in forwading case */
1801 (void)ipsec_setsocket(m, NULL);
1802 #endif
1803 error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1804 IP_FORWARDING|IP_MULTICASTOPTS, &imo);
1805
1806 if (mrtdebug & DEBUG_XMIT)
1807 log(LOG_DEBUG, "phyint_send on vif %ld err %d\n",
1808 (long)(vifp-viftable), error);
1809 }
1810 splx(s);
1811 }
1812
1813 /* determine the current time and then
1814 * the elapsed time (between the last time and time now)
1815 * in milliseconds & update the no. of tokens in the bucket
1816 */
1817 static void
1818 tbf_update_tokens(vifp)
1819 struct vif *vifp;
1820 {
1821 struct timeval tp;
1822 u_int32_t tm;
1823 int s = splsoftnet();
1824
1825 microtime(&tp);
1826
1827 TV_DELTA(tp, vifp->tbf_last_pkt_t, tm);
1828
1829 /*
1830 * This formula is actually
1831 * "time in seconds" * "bytes/second".
1832 *
1833 * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
1834 *
1835 * The (1000/1024) was introduced in add_vif to optimize
1836 * this divide into a shift.
1837 */
1838 vifp->tbf_n_tok += tm * vifp->v_rate_limit / 8192;
1839 vifp->tbf_last_pkt_t = tp;
1840
1841 if (vifp->tbf_n_tok > MAX_BKT_SIZE)
1842 vifp->tbf_n_tok = MAX_BKT_SIZE;
1843
1844 splx(s);
1845 }
1846
1847 static int
1848 priority(vifp, ip)
1849 struct vif *vifp;
1850 struct ip *ip;
1851 {
1852 int prio;
1853
1854 /* temporary hack; may add general packet classifier some day */
1855
1856 /*
1857 * The UDP port space is divided up into four priority ranges:
1858 * [0, 16384) : unclassified - lowest priority
1859 * [16384, 32768) : audio - highest priority
1860 * [32768, 49152) : whiteboard - medium priority
1861 * [49152, 65536) : video - low priority
1862 */
1863 if (ip->ip_p == IPPROTO_UDP) {
1864 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1865
1866 switch (ntohs(udp->uh_dport) & 0xc000) {
1867 case 0x4000:
1868 prio = 70;
1869 break;
1870 case 0x8000:
1871 prio = 60;
1872 break;
1873 case 0xc000:
1874 prio = 55;
1875 break;
1876 default:
1877 prio = 50;
1878 break;
1879 }
1880
1881 if (tbfdebug > 1)
1882 log(LOG_DEBUG, "port %x prio %d\n", ntohs(udp->uh_dport), prio);
1883 } else
1884 prio = 50;
1885
1886
1887 return (prio);
1888 }
1889
1890 /*
1891 * End of token bucket filter modifications
1892 */
1893
1894 #ifdef RSVP_ISI
1895
1896 int
1897 ip_rsvp_vif_init(so, m)
1898 struct socket *so;
1899 struct mbuf *m;
1900 {
1901 int i;
1902 int s;
1903
1904 if (rsvpdebug)
1905 printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1906 so->so_type, so->so_proto->pr_protocol);
1907
1908 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1909 return (EOPNOTSUPP);
1910
1911 /* Check mbuf. */
1912 if (m == 0 || m->m_len != sizeof(int)) {
1913 return (EINVAL);
1914 }
1915 i = *(mtod(m, int *));
1916
1917 if (rsvpdebug)
1918 printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on);
1919
1920 s = splsoftnet();
1921
1922 /* Check vif. */
1923 if (!legal_vif_num(i)) {
1924 splx(s);
1925 return (EADDRNOTAVAIL);
1926 }
1927
1928 /* Check if socket is available. */
1929 if (viftable[i].v_rsvpd != 0) {
1930 splx(s);
1931 return (EADDRINUSE);
1932 }
1933
1934 viftable[i].v_rsvpd = so;
1935 /* This may seem silly, but we need to be sure we don't over-increment
1936 * the RSVP counter, in case something slips up.
1937 */
1938 if (!viftable[i].v_rsvp_on) {
1939 viftable[i].v_rsvp_on = 1;
1940 rsvp_on++;
1941 }
1942
1943 splx(s);
1944 return (0);
1945 }
1946
1947 int
1948 ip_rsvp_vif_done(so, m)
1949 struct socket *so;
1950 struct mbuf *m;
1951 {
1952 int i;
1953 int s;
1954
1955 if (rsvpdebug)
1956 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
1957 so->so_type, so->so_proto->pr_protocol);
1958
1959 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1960 return (EOPNOTSUPP);
1961
1962 /* Check mbuf. */
1963 if (m == 0 || m->m_len != sizeof(int)) {
1964 return (EINVAL);
1965 }
1966 i = *(mtod(m, int *));
1967
1968 s = splsoftnet();
1969
1970 /* Check vif. */
1971 if (!legal_vif_num(i)) {
1972 splx(s);
1973 return (EADDRNOTAVAIL);
1974 }
1975
1976 if (rsvpdebug)
1977 printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n",
1978 viftable[i].v_rsvpd, so);
1979
1980 viftable[i].v_rsvpd = 0;
1981 /* This may seem silly, but we need to be sure we don't over-decrement
1982 * the RSVP counter, in case something slips up.
1983 */
1984 if (viftable[i].v_rsvp_on) {
1985 viftable[i].v_rsvp_on = 0;
1986 rsvp_on--;
1987 }
1988
1989 splx(s);
1990 return (0);
1991 }
1992
1993 void
1994 ip_rsvp_force_done(so)
1995 struct socket *so;
1996 {
1997 int vifi;
1998 int s;
1999
2000 /* Don't bother if it is not the right type of socket. */
2001 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2002 return;
2003
2004 s = splsoftnet();
2005
2006 /* The socket may be attached to more than one vif...this
2007 * is perfectly legal.
2008 */
2009 for (vifi = 0; vifi < numvifs; vifi++) {
2010 if (viftable[vifi].v_rsvpd == so) {
2011 viftable[vifi].v_rsvpd = 0;
2012 /* This may seem silly, but we need to be sure we don't
2013 * over-decrement the RSVP counter, in case something slips up.
2014 */
2015 if (viftable[vifi].v_rsvp_on) {
2016 viftable[vifi].v_rsvp_on = 0;
2017 rsvp_on--;
2018 }
2019 }
2020 }
2021
2022 splx(s);
2023 return;
2024 }
2025
2026 void
2027 rsvp_input(m, ifp)
2028 struct mbuf *m;
2029 struct ifnet *ifp;
2030 {
2031 int vifi;
2032 struct ip *ip = mtod(m, struct ip *);
2033 static struct sockaddr_in rsvp_src = { sizeof(sin), AF_INET };
2034 int s;
2035
2036 if (rsvpdebug)
2037 printf("rsvp_input: rsvp_on %d\n",rsvp_on);
2038
2039 /* Can still get packets with rsvp_on = 0 if there is a local member
2040 * of the group to which the RSVP packet is addressed. But in this
2041 * case we want to throw the packet away.
2042 */
2043 if (!rsvp_on) {
2044 m_freem(m);
2045 return;
2046 }
2047
2048 /* If the old-style non-vif-associated socket is set, then use
2049 * it and ignore the new ones.
2050 */
2051 if (ip_rsvpd != 0) {
2052 if (rsvpdebug)
2053 printf("rsvp_input: Sending packet up old-style socket\n");
2054 rip_input(m); /*XXX*/
2055 return;
2056 }
2057
2058 s = splsoftnet();
2059
2060 if (rsvpdebug)
2061 printf("rsvp_input: check vifs\n");
2062
2063 /* Find which vif the packet arrived on. */
2064 for (vifi = 0; vifi < numvifs; vifi++) {
2065 if (viftable[vifi].v_ifp == ifp)
2066 break;
2067 }
2068
2069 if (vifi == numvifs) {
2070 /* Can't find vif packet arrived on. Drop packet. */
2071 if (rsvpdebug)
2072 printf("rsvp_input: Can't find vif for packet...dropping it.\n");
2073 m_freem(m);
2074 splx(s);
2075 return;
2076 }
2077
2078 if (rsvpdebug)
2079 printf("rsvp_input: check socket\n");
2080
2081 if (viftable[vifi].v_rsvpd == 0) {
2082 /* drop packet, since there is no specific socket for this
2083 * interface */
2084 if (rsvpdebug)
2085 printf("rsvp_input: No socket defined for vif %d\n",vifi);
2086 m_freem(m);
2087 splx(s);
2088 return;
2089 }
2090
2091 rsvp_src.sin_addr = ip->ip_src;
2092
2093 if (rsvpdebug && m)
2094 printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
2095 m->m_len,sbspace(&viftable[vifi].v_rsvpd->so_rcv));
2096
2097 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0)
2098 if (rsvpdebug)
2099 printf("rsvp_input: Failed to append to socket\n");
2100 else
2101 if (rsvpdebug)
2102 printf("rsvp_input: send packet up\n");
2103
2104 splx(s);
2105 }
2106 #endif /* RSVP_ISI */
2107