ip_mroute.c revision 1.60 1 /* $NetBSD: ip_mroute.c,v 1.60 2002/06/09 16:33:41 itojun Exp $ */
2
3 /*
4 * IP multicast forwarding procedures
5 *
6 * Written by David Waitzman, BBN Labs, August 1988.
7 * Modified by Steve Deering, Stanford, February 1989.
8 * Modified by Mark J. Steiglitz, Stanford, May, 1991
9 * Modified by Van Jacobson, LBL, January 1993
10 * Modified by Ajit Thyagarajan, PARC, August 1993
11 * Modified by Bill Fenner, PARC, April 1994
12 * Modified by Charles M. Hannum, NetBSD, May 1995.
13 *
14 * MROUTING Revision: 1.2
15 */
16
17 #include <sys/cdefs.h>
18 __KERNEL_RCSID(0, "$NetBSD: ip_mroute.c,v 1.60 2002/06/09 16:33:41 itojun Exp $");
19
20 #include "opt_ipsec.h"
21
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/callout.h>
25 #include <sys/mbuf.h>
26 #include <sys/socket.h>
27 #include <sys/socketvar.h>
28 #include <sys/protosw.h>
29 #include <sys/errno.h>
30 #include <sys/time.h>
31 #include <sys/kernel.h>
32 #include <sys/ioctl.h>
33 #include <sys/syslog.h>
34 #include <net/if.h>
35 #include <net/route.h>
36 #include <net/raw_cb.h>
37 #include <netinet/in.h>
38 #include <netinet/in_var.h>
39 #include <netinet/in_systm.h>
40 #include <netinet/ip.h>
41 #include <netinet/ip_var.h>
42 #include <netinet/in_pcb.h>
43 #include <netinet/udp.h>
44 #include <netinet/igmp.h>
45 #include <netinet/igmp_var.h>
46 #include <netinet/ip_mroute.h>
47 #include <netinet/ip_encap.h>
48
49 #include <machine/stdarg.h>
50
51 #define IP_MULTICASTOPTS 0
52 #define M_PULLUP(m, len) \
53 do { \
54 if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \
55 (m) = m_pullup((m), (len)); \
56 } while (0)
57
58 /*
59 * Globals. All but ip_mrouter and ip_mrtproto could be static,
60 * except for netstat or debugging purposes.
61 */
62 struct socket *ip_mrouter = 0;
63 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */
64
65 #define NO_RTE_FOUND 0x1
66 #define RTE_FOUND 0x2
67
68 #define MFCHASH(a, g) \
69 ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
70 ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & mfchash)
71 LIST_HEAD(mfchashhdr, mfc) *mfchashtbl;
72 u_long mfchash;
73
74 u_char nexpire[MFCTBLSIZ];
75 struct vif viftable[MAXVIFS];
76 struct mrtstat mrtstat;
77 u_int mrtdebug = 0; /* debug level */
78 #define DEBUG_MFC 0x02
79 #define DEBUG_FORWARD 0x04
80 #define DEBUG_EXPIRE 0x08
81 #define DEBUG_XMIT 0x10
82 u_int tbfdebug = 0; /* tbf debug level */
83 #ifdef RSVP_ISI
84 u_int rsvpdebug = 0; /* rsvp debug level */
85 extern struct socket *ip_rsvpd;
86 extern int rsvp_on;
87 #endif /* RSVP_ISI */
88
89 /* vif attachment using sys/netinet/ip_encap.c */
90 extern struct domain inetdomain;
91 static void vif_input __P((struct mbuf *, ...));
92 static int vif_encapcheck __P((const struct mbuf *, int, int, void *));
93 static struct protosw vif_protosw =
94 { SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR,
95 vif_input, rip_output, 0, rip_ctloutput,
96 rip_usrreq,
97 0, 0, 0, 0,
98 };
99
100 #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
101 #define UPCALL_EXPIRE 6 /* number of timeouts */
102
103 /*
104 * Define the token bucket filter structures
105 */
106
107 #define TBF_REPROCESS (hz / 100) /* 100x / second */
108
109 static int get_sg_cnt __P((struct sioc_sg_req *));
110 static int get_vif_cnt __P((struct sioc_vif_req *));
111 static int ip_mrouter_init __P((struct socket *, struct mbuf *));
112 static int get_version __P((struct mbuf *));
113 static int set_assert __P((struct mbuf *));
114 static int get_assert __P((struct mbuf *));
115 static int add_vif __P((struct mbuf *));
116 static int del_vif __P((struct mbuf *));
117 static void update_mfc __P((struct mfcctl *, struct mfc *));
118 static void expire_mfc __P((struct mfc *));
119 static int add_mfc __P((struct mbuf *));
120 #ifdef UPCALL_TIMING
121 static void collate __P((struct timeval *));
122 #endif
123 static int del_mfc __P((struct mbuf *));
124 static int socket_send __P((struct socket *, struct mbuf *,
125 struct sockaddr_in *));
126 static void expire_upcalls __P((void *));
127 #ifdef RSVP_ISI
128 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *, vifi_t));
129 #else
130 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *));
131 #endif
132 static void phyint_send __P((struct ip *, struct vif *, struct mbuf *));
133 static void encap_send __P((struct ip *, struct vif *, struct mbuf *));
134 static void tbf_control __P((struct vif *, struct mbuf *, struct ip *,
135 u_int32_t));
136 static void tbf_queue __P((struct vif *, struct mbuf *));
137 static void tbf_process_q __P((struct vif *));
138 static void tbf_reprocess_q __P((void *));
139 static int tbf_dq_sel __P((struct vif *, struct ip *));
140 static void tbf_send_packet __P((struct vif *, struct mbuf *));
141 static void tbf_update_tokens __P((struct vif *));
142 static int priority __P((struct vif *, struct ip *));
143
144 /*
145 * 'Interfaces' associated with decapsulator (so we can tell
146 * packets that went through it from ones that get reflected
147 * by a broken gateway). These interfaces are never linked into
148 * the system ifnet list & no routes point to them. I.e., packets
149 * can't be sent this way. They only exist as a placeholder for
150 * multicast source verification.
151 */
152 #if 0
153 struct ifnet multicast_decap_if[MAXVIFS];
154 #endif
155
156 #define ENCAP_TTL 64
157 #define ENCAP_PROTO IPPROTO_IPIP /* 4 */
158
159 /* prototype IP hdr for encapsulated packets */
160 struct ip multicast_encap_iphdr = {
161 #if BYTE_ORDER == LITTLE_ENDIAN
162 sizeof(struct ip) >> 2, IPVERSION,
163 #else
164 IPVERSION, sizeof(struct ip) >> 2,
165 #endif
166 0, /* tos */
167 sizeof(struct ip), /* total length */
168 0, /* id */
169 0, /* frag offset */
170 ENCAP_TTL, ENCAP_PROTO,
171 0, /* checksum */
172 };
173
174 /*
175 * Private variables.
176 */
177 static vifi_t numvifs = 0;
178
179 static struct callout expire_upcalls_ch;
180
181 /*
182 * one-back cache used by vif_encapcheck to locate a tunnel's vif
183 * given a datagram's src ip address.
184 */
185 static struct in_addr last_encap_src;
186 static struct vif *last_encap_vif;
187
188 /*
189 * whether or not special PIM assert processing is enabled.
190 */
191 static int pim_assert;
192 /*
193 * Rate limit for assert notification messages, in usec
194 */
195 #define ASSERT_MSG_TIME 3000000
196
197 /*
198 * Find a route for a given origin IP address and Multicast group address
199 * Type of service parameter to be added in the future!!!
200 */
201
202 #define MFCFIND(o, g, rt) { \
203 struct mfc *_rt; \
204 (rt) = 0; \
205 ++mrtstat.mrts_mfc_lookups; \
206 LIST_FOREACH(_rt, &mfchashtbl[MFCHASH(o, g)], mfc_hash) { \
207 if (in_hosteq(_rt->mfc_origin, (o)) && \
208 in_hosteq(_rt->mfc_mcastgrp, (g)) && \
209 _rt->mfc_stall == 0) { \
210 (rt) = _rt; \
211 break; \
212 } \
213 } \
214 if ((rt) == 0) \
215 ++mrtstat.mrts_mfc_misses; \
216 }
217
218 /*
219 * Macros to compute elapsed time efficiently
220 * Borrowed from Van Jacobson's scheduling code
221 */
222 #define TV_DELTA(a, b, delta) { \
223 int xxs; \
224 delta = (a).tv_usec - (b).tv_usec; \
225 xxs = (a).tv_sec - (b).tv_sec; \
226 switch (xxs) { \
227 case 2: \
228 delta += 1000000; \
229 /* fall through */ \
230 case 1: \
231 delta += 1000000; \
232 /* fall through */ \
233 case 0: \
234 break; \
235 default: \
236 delta += (1000000 * xxs); \
237 break; \
238 } \
239 }
240
241 #ifdef UPCALL_TIMING
242 u_int32_t upcall_data[51];
243 #endif /* UPCALL_TIMING */
244
245 /*
246 * Handle MRT setsockopt commands to modify the multicast routing tables.
247 */
248 int
249 ip_mrouter_set(so, optname, m)
250 struct socket *so;
251 int optname;
252 struct mbuf **m;
253 {
254 int error;
255
256 if (optname != MRT_INIT && so != ip_mrouter)
257 error = ENOPROTOOPT;
258 else
259 switch (optname) {
260 case MRT_INIT:
261 error = ip_mrouter_init(so, *m);
262 break;
263 case MRT_DONE:
264 error = ip_mrouter_done();
265 break;
266 case MRT_ADD_VIF:
267 error = add_vif(*m);
268 break;
269 case MRT_DEL_VIF:
270 error = del_vif(*m);
271 break;
272 case MRT_ADD_MFC:
273 error = add_mfc(*m);
274 break;
275 case MRT_DEL_MFC:
276 error = del_mfc(*m);
277 break;
278 case MRT_ASSERT:
279 error = set_assert(*m);
280 break;
281 default:
282 error = ENOPROTOOPT;
283 break;
284 }
285
286 if (*m)
287 m_free(*m);
288 return (error);
289 }
290
291 /*
292 * Handle MRT getsockopt commands
293 */
294 int
295 ip_mrouter_get(so, optname, m)
296 struct socket *so;
297 int optname;
298 struct mbuf **m;
299 {
300 int error;
301
302 if (so != ip_mrouter)
303 error = ENOPROTOOPT;
304 else {
305 *m = m_get(M_WAIT, MT_SOOPTS);
306
307 switch (optname) {
308 case MRT_VERSION:
309 error = get_version(*m);
310 break;
311 case MRT_ASSERT:
312 error = get_assert(*m);
313 break;
314 default:
315 error = ENOPROTOOPT;
316 break;
317 }
318
319 if (error)
320 m_free(*m);
321 }
322
323 return (error);
324 }
325
326 /*
327 * Handle ioctl commands to obtain information from the cache
328 */
329 int
330 mrt_ioctl(so, cmd, data)
331 struct socket *so;
332 u_long cmd;
333 caddr_t data;
334 {
335 int error;
336
337 if (so != ip_mrouter)
338 error = EINVAL;
339 else
340 switch (cmd) {
341 case SIOCGETVIFCNT:
342 error = get_vif_cnt((struct sioc_vif_req *)data);
343 break;
344 case SIOCGETSGCNT:
345 error = get_sg_cnt((struct sioc_sg_req *)data);
346 break;
347 default:
348 error = EINVAL;
349 break;
350 }
351
352 return (error);
353 }
354
355 /*
356 * returns the packet, byte, rpf-failure count for the source group provided
357 */
358 static int
359 get_sg_cnt(req)
360 struct sioc_sg_req *req;
361 {
362 struct mfc *rt;
363 int s;
364
365 s = splsoftnet();
366 MFCFIND(req->src, req->grp, rt);
367 splx(s);
368 if (rt != 0) {
369 req->pktcnt = rt->mfc_pkt_cnt;
370 req->bytecnt = rt->mfc_byte_cnt;
371 req->wrong_if = rt->mfc_wrong_if;
372 } else
373 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
374
375 return (0);
376 }
377
378 /*
379 * returns the input and output packet and byte counts on the vif provided
380 */
381 static int
382 get_vif_cnt(req)
383 struct sioc_vif_req *req;
384 {
385 vifi_t vifi = req->vifi;
386
387 if (vifi >= numvifs)
388 return (EINVAL);
389
390 req->icount = viftable[vifi].v_pkt_in;
391 req->ocount = viftable[vifi].v_pkt_out;
392 req->ibytes = viftable[vifi].v_bytes_in;
393 req->obytes = viftable[vifi].v_bytes_out;
394
395 return (0);
396 }
397
398 /*
399 * Enable multicast routing
400 */
401 static int
402 ip_mrouter_init(so, m)
403 struct socket *so;
404 struct mbuf *m;
405 {
406 int *v;
407
408 if (mrtdebug)
409 log(LOG_DEBUG,
410 "ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
411 so->so_type, so->so_proto->pr_protocol);
412
413 if (so->so_type != SOCK_RAW ||
414 so->so_proto->pr_protocol != IPPROTO_IGMP)
415 return (EOPNOTSUPP);
416
417 if (m == 0 || m->m_len < sizeof(int))
418 return (EINVAL);
419
420 v = mtod(m, int *);
421 if (*v != 1)
422 return (EINVAL);
423
424 if (ip_mrouter != 0)
425 return (EADDRINUSE);
426
427 ip_mrouter = so;
428
429 mfchashtbl =
430 hashinit(MFCTBLSIZ, HASH_LIST, M_MRTABLE, M_WAITOK, &mfchash);
431 bzero((caddr_t)nexpire, sizeof(nexpire));
432
433 pim_assert = 0;
434
435 callout_init(&expire_upcalls_ch);
436 callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
437 expire_upcalls, NULL);
438
439 if (mrtdebug)
440 log(LOG_DEBUG, "ip_mrouter_init\n");
441
442 return (0);
443 }
444
445 /*
446 * Disable multicast routing
447 */
448 int
449 ip_mrouter_done()
450 {
451 vifi_t vifi;
452 struct vif *vifp;
453 int i;
454 int s;
455
456 s = splsoftnet();
457
458 /* Clear out all the vifs currently in use. */
459 for (vifi = 0; vifi < numvifs; vifi++) {
460 vifp = &viftable[vifi];
461 if (!in_nullhost(vifp->v_lcl_addr))
462 reset_vif(vifp);
463 }
464
465 numvifs = 0;
466 pim_assert = 0;
467
468 callout_stop(&expire_upcalls_ch);
469
470 /*
471 * Free all multicast forwarding cache entries.
472 */
473 for (i = 0; i < MFCTBLSIZ; i++) {
474 struct mfc *rt, *nrt;
475
476 for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
477 nrt = LIST_NEXT(rt, mfc_hash);
478
479 expire_mfc(rt);
480 }
481 }
482
483 free(mfchashtbl, M_MRTABLE);
484 mfchashtbl = 0;
485
486 /* Reset de-encapsulation cache. */
487
488 ip_mrouter = 0;
489
490 splx(s);
491
492 if (mrtdebug)
493 log(LOG_DEBUG, "ip_mrouter_done\n");
494
495 return (0);
496 }
497
498 static int
499 get_version(m)
500 struct mbuf *m;
501 {
502 int *v = mtod(m, int *);
503
504 *v = 0x0305; /* XXX !!!! */
505 m->m_len = sizeof(int);
506 return (0);
507 }
508
509 /*
510 * Set PIM assert processing global
511 */
512 static int
513 set_assert(m)
514 struct mbuf *m;
515 {
516 int *i;
517
518 if (m == 0 || m->m_len < sizeof(int))
519 return (EINVAL);
520
521 i = mtod(m, int *);
522 pim_assert = !!*i;
523 return (0);
524 }
525
526 /*
527 * Get PIM assert processing global
528 */
529 static int
530 get_assert(m)
531 struct mbuf *m;
532 {
533 int *i = mtod(m, int *);
534
535 *i = pim_assert;
536 m->m_len = sizeof(int);
537 return (0);
538 }
539
540 static struct sockaddr_in sin = { sizeof(sin), AF_INET };
541
542 /*
543 * Add a vif to the vif table
544 */
545 static int
546 add_vif(m)
547 struct mbuf *m;
548 {
549 struct vifctl *vifcp;
550 struct vif *vifp;
551 struct ifaddr *ifa;
552 struct ifnet *ifp;
553 struct ifreq ifr;
554 int error, s;
555
556 if (m == 0 || m->m_len < sizeof(struct vifctl))
557 return (EINVAL);
558
559 vifcp = mtod(m, struct vifctl *);
560 if (vifcp->vifc_vifi >= MAXVIFS)
561 return (EINVAL);
562
563 vifp = &viftable[vifcp->vifc_vifi];
564 if (!in_nullhost(vifp->v_lcl_addr))
565 return (EADDRINUSE);
566
567 /* Find the interface with an address in AF_INET family. */
568 sin.sin_addr = vifcp->vifc_lcl_addr;
569 ifa = ifa_ifwithaddr(sintosa(&sin));
570 if (ifa == 0)
571 return (EADDRNOTAVAIL);
572
573 if (vifcp->vifc_flags & VIFF_TUNNEL) {
574 if (vifcp->vifc_flags & VIFF_SRCRT) {
575 log(LOG_ERR, "Source routed tunnels not supported\n");
576 return (EOPNOTSUPP);
577 }
578
579 /* attach this vif to decapsulator dispatch table */
580 vifp->v_encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4,
581 vif_encapcheck, &vif_protosw, vifp);
582 if (!vifp->v_encap_cookie)
583 return (EINVAL);
584
585 /* Create a fake encapsulation interface. */
586 ifp = (struct ifnet *)malloc(sizeof(*ifp), M_MRTABLE, M_WAITOK);
587 bzero(ifp, sizeof(*ifp));
588 sprintf(ifp->if_xname, "mdecap%d", vifcp->vifc_vifi);
589
590 /* Prepare cached route entry. */
591 bzero(&vifp->v_route, sizeof(vifp->v_route));
592 } else {
593 /* Use the physical interface associated with the address. */
594 ifp = ifa->ifa_ifp;
595
596 /* Make sure the interface supports multicast. */
597 if ((ifp->if_flags & IFF_MULTICAST) == 0)
598 return (EOPNOTSUPP);
599
600 /* Enable promiscuous reception of all IP multicasts. */
601 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
602 satosin(&ifr.ifr_addr)->sin_family = AF_INET;
603 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
604 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
605 if (error)
606 return (error);
607 }
608
609 s = splsoftnet();
610
611 /* Define parameters for the tbf structure. */
612 vifp->tbf_q = 0;
613 vifp->tbf_t = &vifp->tbf_q;
614 microtime(&vifp->tbf_last_pkt_t);
615 vifp->tbf_n_tok = 0;
616 vifp->tbf_q_len = 0;
617 vifp->tbf_max_q_len = MAXQSIZE;
618
619 vifp->v_flags = vifcp->vifc_flags;
620 vifp->v_threshold = vifcp->vifc_threshold;
621 /* scaling up here allows division by 1024 in critical code */
622 vifp->v_rate_limit = vifcp->vifc_rate_limit * 1024 / 1000;
623 vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
624 vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
625 vifp->v_ifp = ifp;
626 /* Initialize per vif pkt counters. */
627 vifp->v_pkt_in = 0;
628 vifp->v_pkt_out = 0;
629 vifp->v_bytes_in = 0;
630 vifp->v_bytes_out = 0;
631
632 callout_init(&vifp->v_repq_ch);
633
634 #ifdef RSVP_ISI
635 vifp->v_rsvp_on = 0;
636 vifp->v_rsvpd = 0;
637 #endif /* RSVP_ISI */
638
639 splx(s);
640
641 /* Adjust numvifs up if the vifi is higher than numvifs. */
642 if (numvifs <= vifcp->vifc_vifi)
643 numvifs = vifcp->vifc_vifi + 1;
644
645 if (mrtdebug)
646 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n",
647 vifcp->vifc_vifi,
648 ntohl(vifcp->vifc_lcl_addr.s_addr),
649 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
650 ntohl(vifcp->vifc_rmt_addr.s_addr),
651 vifcp->vifc_threshold,
652 vifcp->vifc_rate_limit);
653
654 return (0);
655 }
656
657 void
658 reset_vif(vifp)
659 struct vif *vifp;
660 {
661 struct mbuf *m, *n;
662 struct ifnet *ifp;
663 struct ifreq ifr;
664
665 callout_stop(&vifp->v_repq_ch);
666
667 /* detach this vif from decapsulator dispatch table */
668 encap_detach(vifp->v_encap_cookie);
669 vifp->v_encap_cookie = NULL;
670
671 for (m = vifp->tbf_q; m != 0; m = n) {
672 n = m->m_nextpkt;
673 m_freem(m);
674 }
675
676 if (vifp->v_flags & VIFF_TUNNEL) {
677 free(vifp->v_ifp, M_MRTABLE);
678 if (vifp == last_encap_vif) {
679 last_encap_vif = 0;
680 last_encap_src = zeroin_addr;
681 }
682 } else {
683 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
684 satosin(&ifr.ifr_addr)->sin_family = AF_INET;
685 satosin(&ifr.ifr_addr)->sin_addr = zeroin_addr;
686 ifp = vifp->v_ifp;
687 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
688 }
689 bzero((caddr_t)vifp, sizeof(*vifp));
690 }
691
692 /*
693 * Delete a vif from the vif table
694 */
695 static int
696 del_vif(m)
697 struct mbuf *m;
698 {
699 vifi_t *vifip;
700 struct vif *vifp;
701 vifi_t vifi;
702 int s;
703
704 if (m == 0 || m->m_len < sizeof(vifi_t))
705 return (EINVAL);
706
707 vifip = mtod(m, vifi_t *);
708 if (*vifip >= numvifs)
709 return (EINVAL);
710
711 vifp = &viftable[*vifip];
712 if (in_nullhost(vifp->v_lcl_addr))
713 return (EADDRNOTAVAIL);
714
715 s = splsoftnet();
716
717 reset_vif(vifp);
718
719 /* Adjust numvifs down */
720 for (vifi = numvifs; vifi > 0; vifi--)
721 if (!in_nullhost(viftable[vifi-1].v_lcl_addr))
722 break;
723 numvifs = vifi;
724
725 splx(s);
726
727 if (mrtdebug)
728 log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs);
729
730 return (0);
731 }
732
733 static void
734 update_mfc(mfccp, rt)
735 struct mfcctl *mfccp;
736 struct mfc *rt;
737 {
738 vifi_t vifi;
739
740 rt->mfc_parent = mfccp->mfcc_parent;
741 for (vifi = 0; vifi < numvifs; vifi++)
742 rt->mfc_ttls[vifi] = mfccp->mfcc_ttls[vifi];
743 rt->mfc_expire = 0;
744 rt->mfc_stall = 0;
745 }
746
747 static void
748 expire_mfc(rt)
749 struct mfc *rt;
750 {
751 struct rtdetq *rte, *nrte;
752
753 for (rte = rt->mfc_stall; rte != 0; rte = nrte) {
754 nrte = rte->next;
755 m_freem(rte->m);
756 free(rte, M_MRTABLE);
757 }
758
759 LIST_REMOVE(rt, mfc_hash);
760 free(rt, M_MRTABLE);
761 }
762
763 /*
764 * Add an mfc entry
765 */
766 static int
767 add_mfc(m)
768 struct mbuf *m;
769 {
770 struct mfcctl *mfccp;
771 struct mfc *rt;
772 u_int32_t hash = 0;
773 struct rtdetq *rte, *nrte;
774 u_short nstl;
775 int s;
776
777 if (m == 0 || m->m_len < sizeof(struct mfcctl))
778 return (EINVAL);
779
780 mfccp = mtod(m, struct mfcctl *);
781
782 s = splsoftnet();
783 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt);
784
785 /* If an entry already exists, just update the fields */
786 if (rt) {
787 if (mrtdebug & DEBUG_MFC)
788 log(LOG_DEBUG,"add_mfc update o %x g %x p %x\n",
789 ntohl(mfccp->mfcc_origin.s_addr),
790 ntohl(mfccp->mfcc_mcastgrp.s_addr),
791 mfccp->mfcc_parent);
792
793 if (rt->mfc_expire)
794 nexpire[hash]--;
795
796 update_mfc(mfccp, rt);
797
798 splx(s);
799 return (0);
800 }
801
802 /*
803 * Find the entry for which the upcall was made and update
804 */
805 nstl = 0;
806 hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
807 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
808 if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
809 in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
810 rt->mfc_stall != 0) {
811 if (nstl++)
812 log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %p\n",
813 "multiple kernel entries",
814 ntohl(mfccp->mfcc_origin.s_addr),
815 ntohl(mfccp->mfcc_mcastgrp.s_addr),
816 mfccp->mfcc_parent, rt->mfc_stall);
817
818 if (mrtdebug & DEBUG_MFC)
819 log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %p\n",
820 ntohl(mfccp->mfcc_origin.s_addr),
821 ntohl(mfccp->mfcc_mcastgrp.s_addr),
822 mfccp->mfcc_parent, rt->mfc_stall);
823
824 if (rt->mfc_expire)
825 nexpire[hash]--;
826
827 rte = rt->mfc_stall;
828 update_mfc(mfccp, rt);
829
830 /* free packets Qed at the end of this entry */
831 for (; rte != 0; rte = nrte) {
832 nrte = rte->next;
833 #ifdef RSVP_ISI
834 ip_mdq(rte->m, rte->ifp, rt, -1);
835 #else
836 ip_mdq(rte->m, rte->ifp, rt);
837 #endif /* RSVP_ISI */
838 m_freem(rte->m);
839 #ifdef UPCALL_TIMING
840 collate(&rte->t);
841 #endif /* UPCALL_TIMING */
842 free(rte, M_MRTABLE);
843 }
844 }
845 }
846
847 if (nstl == 0) {
848 /*
849 * No mfc; make a new one
850 */
851 if (mrtdebug & DEBUG_MFC)
852 log(LOG_DEBUG,"add_mfc no upcall o %x g %x p %x\n",
853 ntohl(mfccp->mfcc_origin.s_addr),
854 ntohl(mfccp->mfcc_mcastgrp.s_addr),
855 mfccp->mfcc_parent);
856
857 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
858 if (rt == 0) {
859 splx(s);
860 return (ENOBUFS);
861 }
862
863 rt->mfc_origin = mfccp->mfcc_origin;
864 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
865 /* initialize pkt counters per src-grp */
866 rt->mfc_pkt_cnt = 0;
867 rt->mfc_byte_cnt = 0;
868 rt->mfc_wrong_if = 0;
869 timerclear(&rt->mfc_last_assert);
870 update_mfc(mfccp, rt);
871
872 /* insert new entry at head of hash chain */
873 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
874 }
875
876 splx(s);
877 return (0);
878 }
879
880 #ifdef UPCALL_TIMING
881 /*
882 * collect delay statistics on the upcalls
883 */
884 static void collate(t)
885 struct timeval *t;
886 {
887 u_int32_t d;
888 struct timeval tp;
889 u_int32_t delta;
890
891 microtime(&tp);
892
893 if (timercmp(t, &tp, <)) {
894 TV_DELTA(tp, *t, delta);
895
896 d = delta >> 10;
897 if (d > 50)
898 d = 50;
899
900 ++upcall_data[d];
901 }
902 }
903 #endif /* UPCALL_TIMING */
904
905 /*
906 * Delete an mfc entry
907 */
908 static int
909 del_mfc(m)
910 struct mbuf *m;
911 {
912 struct mfcctl *mfccp;
913 struct mfc *rt;
914 int s;
915
916 if (m == 0 || m->m_len < sizeof(struct mfcctl))
917 return (EINVAL);
918
919 mfccp = mtod(m, struct mfcctl *);
920
921 if (mrtdebug & DEBUG_MFC)
922 log(LOG_DEBUG, "del_mfc origin %x mcastgrp %x\n",
923 ntohl(mfccp->mfcc_origin.s_addr),
924 ntohl(mfccp->mfcc_mcastgrp.s_addr));
925
926 s = splsoftnet();
927
928 MFCFIND(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp, rt);
929 if (rt == 0) {
930 splx(s);
931 return (EADDRNOTAVAIL);
932 }
933
934 LIST_REMOVE(rt, mfc_hash);
935 free(rt, M_MRTABLE);
936
937 splx(s);
938 return (0);
939 }
940
941 static int
942 socket_send(s, mm, src)
943 struct socket *s;
944 struct mbuf *mm;
945 struct sockaddr_in *src;
946 {
947 if (s) {
948 if (sbappendaddr(&s->so_rcv, sintosa(src), mm, (struct mbuf *)0) != 0) {
949 sorwakeup(s);
950 return (0);
951 }
952 }
953 m_freem(mm);
954 return (-1);
955 }
956
957 /*
958 * IP multicast forwarding function. This function assumes that the packet
959 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
960 * pointed to by "ifp", and the packet is to be relayed to other networks
961 * that have members of the packet's destination IP multicast group.
962 *
963 * The packet is returned unscathed to the caller, unless it is
964 * erroneous, in which case a non-zero return value tells the caller to
965 * discard it.
966 */
967
968 #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */
969 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */
970
971 int
972 #ifdef RSVP_ISI
973 ip_mforward(m, ifp, imo)
974 #else
975 ip_mforward(m, ifp)
976 #endif /* RSVP_ISI */
977 struct mbuf *m;
978 struct ifnet *ifp;
979 #ifdef RSVP_ISI
980 struct ip_moptions *imo;
981 #endif /* RSVP_ISI */
982 {
983 struct ip *ip = mtod(m, struct ip *);
984 struct mfc *rt;
985 u_char *ipoptions;
986 static int srctun = 0;
987 struct mbuf *mm;
988 int s;
989 #ifdef RSVP_ISI
990 struct vif *vifp;
991 vifi_t vifi;
992 #endif /* RSVP_ISI */
993
994 /*
995 * Clear any in-bound checksum flags for this packet.
996 */
997 m->m_pkthdr.csum_flags = 0;
998
999 if (mrtdebug & DEBUG_FORWARD)
1000 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p\n",
1001 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
1002
1003 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
1004 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR) {
1005 /*
1006 * Packet arrived via a physical interface or
1007 * an encapuslated tunnel.
1008 */
1009 } else {
1010 /*
1011 * Packet arrived through a source-route tunnel.
1012 * Source-route tunnels are no longer supported.
1013 */
1014 if ((srctun++ % 1000) == 0)
1015 log(LOG_ERR, "ip_mforward: received source-routed packet from %x\n",
1016 ntohl(ip->ip_src.s_addr));
1017
1018 return (1);
1019 }
1020
1021 #ifdef RSVP_ISI
1022 if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) {
1023 if (ip->ip_ttl < 255)
1024 ip->ip_ttl++; /* compensate for -1 in *_send routines */
1025 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1026 vifp = viftable + vifi;
1027 printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n",
1028 ntohl(ip->ip_src), ntohl(ip->ip_dst), vifi,
1029 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
1030 vifp->v_ifp->if_xname);
1031 }
1032 return (ip_mdq(m, ifp, (struct mfc *)0, vifi));
1033 }
1034 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1035 printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
1036 ntohl(ip->ip_src), ntohl(ip->ip_dst));
1037 }
1038 #endif /* RSVP_ISI */
1039
1040 /*
1041 * Don't forward a packet with time-to-live of zero or one,
1042 * or a packet destined to a local-only group.
1043 */
1044 if (ip->ip_ttl <= 1 ||
1045 IN_LOCAL_GROUP(ip->ip_dst.s_addr))
1046 return (0);
1047
1048 /*
1049 * Determine forwarding vifs from the forwarding cache table
1050 */
1051 s = splsoftnet();
1052 MFCFIND(ip->ip_src, ip->ip_dst, rt);
1053
1054 /* Entry exists, so forward if necessary */
1055 if (rt != 0) {
1056 splx(s);
1057 #ifdef RSVP_ISI
1058 return (ip_mdq(m, ifp, rt, -1));
1059 #else
1060 return (ip_mdq(m, ifp, rt));
1061 #endif /* RSVP_ISI */
1062 } else {
1063 /*
1064 * If we don't have a route for packet's origin,
1065 * Make a copy of the packet &
1066 * send message to routing daemon
1067 */
1068
1069 struct mbuf *mb0;
1070 struct rtdetq *rte;
1071 u_int32_t hash;
1072 int hlen = ip->ip_hl << 2;
1073 #ifdef UPCALL_TIMING
1074 struct timeval tp;
1075
1076 microtime(&tp);
1077 #endif /* UPCALL_TIMING */
1078
1079 mrtstat.mrts_no_route++;
1080 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1081 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n",
1082 ntohl(ip->ip_src.s_addr),
1083 ntohl(ip->ip_dst.s_addr));
1084
1085 /*
1086 * Allocate mbufs early so that we don't do extra work if we are
1087 * just going to fail anyway. Make sure to pullup the header so
1088 * that other people can't step on it.
1089 */
1090 rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT);
1091 if (rte == 0) {
1092 splx(s);
1093 return (ENOBUFS);
1094 }
1095 mb0 = m_copy(m, 0, M_COPYALL);
1096 M_PULLUP(mb0, hlen);
1097 if (mb0 == 0) {
1098 free(rte, M_MRTABLE);
1099 splx(s);
1100 return (ENOBUFS);
1101 }
1102
1103 /* is there an upcall waiting for this packet? */
1104 hash = MFCHASH(ip->ip_src, ip->ip_dst);
1105 LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) {
1106 if (in_hosteq(ip->ip_src, rt->mfc_origin) &&
1107 in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&
1108 rt->mfc_stall != 0)
1109 break;
1110 }
1111
1112 if (rt == 0) {
1113 int i;
1114 struct igmpmsg *im;
1115
1116 /* no upcall, so make a new entry */
1117 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1118 if (rt == 0) {
1119 free(rte, M_MRTABLE);
1120 m_freem(mb0);
1121 splx(s);
1122 return (ENOBUFS);
1123 }
1124 /* Make a copy of the header to send to the user level process */
1125 mm = m_copy(m, 0, hlen);
1126 M_PULLUP(mm, hlen);
1127 if (mm == 0) {
1128 free(rte, M_MRTABLE);
1129 m_freem(mb0);
1130 free(rt, M_MRTABLE);
1131 splx(s);
1132 return (ENOBUFS);
1133 }
1134
1135 /*
1136 * Send message to routing daemon to install
1137 * a route into the kernel table
1138 */
1139 sin.sin_addr = ip->ip_src;
1140
1141 im = mtod(mm, struct igmpmsg *);
1142 im->im_msgtype = IGMPMSG_NOCACHE;
1143 im->im_mbz = 0;
1144
1145 mrtstat.mrts_upcalls++;
1146
1147 if (socket_send(ip_mrouter, mm, &sin) < 0) {
1148 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
1149 ++mrtstat.mrts_upq_sockfull;
1150 free(rte, M_MRTABLE);
1151 m_freem(mb0);
1152 free(rt, M_MRTABLE);
1153 splx(s);
1154 return (ENOBUFS);
1155 }
1156
1157 /* insert new entry at head of hash chain */
1158 rt->mfc_origin = ip->ip_src;
1159 rt->mfc_mcastgrp = ip->ip_dst;
1160 rt->mfc_pkt_cnt = 0;
1161 rt->mfc_byte_cnt = 0;
1162 rt->mfc_wrong_if = 0;
1163 rt->mfc_expire = UPCALL_EXPIRE;
1164 nexpire[hash]++;
1165 for (i = 0; i < numvifs; i++)
1166 rt->mfc_ttls[i] = 0;
1167 rt->mfc_parent = -1;
1168
1169 /* link into table */
1170 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
1171 /* Add this entry to the end of the queue */
1172 rt->mfc_stall = rte;
1173 } else {
1174 /* determine if q has overflowed */
1175 struct rtdetq **p;
1176 int npkts = 0;
1177
1178 for (p = &rt->mfc_stall; *p != 0; p = &(*p)->next)
1179 if (++npkts > MAX_UPQ) {
1180 mrtstat.mrts_upq_ovflw++;
1181 free(rte, M_MRTABLE);
1182 m_freem(mb0);
1183 splx(s);
1184 return (0);
1185 }
1186
1187 /* Add this entry to the end of the queue */
1188 *p = rte;
1189 }
1190
1191 rte->next = 0;
1192 rte->m = mb0;
1193 rte->ifp = ifp;
1194 #ifdef UPCALL_TIMING
1195 rte->t = tp;
1196 #endif /* UPCALL_TIMING */
1197
1198
1199 splx(s);
1200
1201 return (0);
1202 }
1203 }
1204
1205
1206 /*ARGSUSED*/
1207 static void
1208 expire_upcalls(v)
1209 void *v;
1210 {
1211 int i;
1212 int s;
1213
1214 s = splsoftnet();
1215
1216 for (i = 0; i < MFCTBLSIZ; i++) {
1217 struct mfc *rt, *nrt;
1218
1219 if (nexpire[i] == 0)
1220 continue;
1221
1222 for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) {
1223 nrt = LIST_NEXT(rt, mfc_hash);
1224
1225 if (rt->mfc_expire == 0 ||
1226 --rt->mfc_expire > 0)
1227 continue;
1228 nexpire[i]--;
1229
1230 ++mrtstat.mrts_cache_cleanups;
1231 if (mrtdebug & DEBUG_EXPIRE)
1232 log(LOG_DEBUG,
1233 "expire_upcalls: expiring (%x %x)\n",
1234 ntohl(rt->mfc_origin.s_addr),
1235 ntohl(rt->mfc_mcastgrp.s_addr));
1236
1237 expire_mfc(rt);
1238 }
1239 }
1240
1241 splx(s);
1242 callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
1243 expire_upcalls, NULL);
1244 }
1245
1246 /*
1247 * Packet forwarding routine once entry in the cache is made
1248 */
1249 static int
1250 #ifdef RSVP_ISI
1251 ip_mdq(m, ifp, rt, xmt_vif)
1252 #else
1253 ip_mdq(m, ifp, rt)
1254 #endif /* RSVP_ISI */
1255 struct mbuf *m;
1256 struct ifnet *ifp;
1257 struct mfc *rt;
1258 #ifdef RSVP_ISI
1259 vifi_t xmt_vif;
1260 #endif /* RSVP_ISI */
1261 {
1262 struct ip *ip = mtod(m, struct ip *);
1263 vifi_t vifi;
1264 struct vif *vifp;
1265 int plen = ntohs(ip->ip_len);
1266
1267 /*
1268 * Macro to send packet on vif. Since RSVP packets don't get counted on
1269 * input, they shouldn't get counted on output, so statistics keeping is
1270 * separate.
1271 */
1272 #define MC_SEND(ip,vifp,m) { \
1273 if ((vifp)->v_flags & VIFF_TUNNEL) \
1274 encap_send((ip), (vifp), (m)); \
1275 else \
1276 phyint_send((ip), (vifp), (m)); \
1277 }
1278
1279 #ifdef RSVP_ISI
1280 /*
1281 * If xmt_vif is not -1, send on only the requested vif.
1282 *
1283 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.
1284 */
1285 if (xmt_vif < numvifs) {
1286 MC_SEND(ip, viftable + xmt_vif, m);
1287 return (1);
1288 }
1289 #endif /* RSVP_ISI */
1290
1291 /*
1292 * Don't forward if it didn't arrive from the parent vif for its origin.
1293 */
1294 vifi = rt->mfc_parent;
1295 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1296 /* came in the wrong interface */
1297 if (mrtdebug & DEBUG_FORWARD)
1298 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
1299 ifp, vifi, viftable[vifi].v_ifp);
1300 ++mrtstat.mrts_wrong_if;
1301 ++rt->mfc_wrong_if;
1302 /*
1303 * If we are doing PIM assert processing, and we are forwarding
1304 * packets on this interface, and it is a broadcast medium
1305 * interface (and not a tunnel), send a message to the routing daemon.
1306 */
1307 if (pim_assert && rt->mfc_ttls[vifi] &&
1308 (ifp->if_flags & IFF_BROADCAST) &&
1309 !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1310 struct mbuf *mm;
1311 struct igmpmsg *im;
1312 int hlen = ip->ip_hl << 2;
1313 struct timeval now;
1314 u_int32_t delta;
1315
1316 microtime(&now);
1317
1318 TV_DELTA(rt->mfc_last_assert, now, delta);
1319
1320 if (delta > ASSERT_MSG_TIME) {
1321 mm = m_copy(m, 0, hlen);
1322 M_PULLUP(mm, hlen);
1323 if (mm == 0) {
1324 return (ENOBUFS);
1325 }
1326
1327 rt->mfc_last_assert = now;
1328
1329 im = mtod(mm, struct igmpmsg *);
1330 im->im_msgtype = IGMPMSG_WRONGVIF;
1331 im->im_mbz = 0;
1332 im->im_vif = vifi;
1333
1334 sin.sin_addr = im->im_src;
1335
1336 socket_send(ip_mrouter, mm, &sin);
1337 }
1338 }
1339 return (0);
1340 }
1341
1342 /* If I sourced this packet, it counts as output, else it was input. */
1343 if (in_hosteq(ip->ip_src, viftable[vifi].v_lcl_addr)) {
1344 viftable[vifi].v_pkt_out++;
1345 viftable[vifi].v_bytes_out += plen;
1346 } else {
1347 viftable[vifi].v_pkt_in++;
1348 viftable[vifi].v_bytes_in += plen;
1349 }
1350 rt->mfc_pkt_cnt++;
1351 rt->mfc_byte_cnt += plen;
1352
1353 /*
1354 * For each vif, decide if a copy of the packet should be forwarded.
1355 * Forward if:
1356 * - the ttl exceeds the vif's threshold
1357 * - there are group members downstream on interface
1358 */
1359 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1360 if ((rt->mfc_ttls[vifi] > 0) &&
1361 (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1362 vifp->v_pkt_out++;
1363 vifp->v_bytes_out += plen;
1364 MC_SEND(ip, vifp, m);
1365 }
1366
1367 return (0);
1368 }
1369
1370 #ifdef RSVP_ISI
1371 /*
1372 * check if a vif number is legal/ok. This is used by ip_output, to export
1373 * numvifs there,
1374 */
1375 int
1376 legal_vif_num(vif)
1377 int vif;
1378 {
1379 if (vif >= 0 && vif < numvifs)
1380 return (1);
1381 else
1382 return (0);
1383 }
1384 #endif /* RSVP_ISI */
1385
1386 static void
1387 phyint_send(ip, vifp, m)
1388 struct ip *ip;
1389 struct vif *vifp;
1390 struct mbuf *m;
1391 {
1392 struct mbuf *mb_copy;
1393 int hlen = ip->ip_hl << 2;
1394
1395 /*
1396 * Make a new reference to the packet; make sure that
1397 * the IP header is actually copied, not just referenced,
1398 * so that ip_output() only scribbles on the copy.
1399 */
1400 mb_copy = m_copy(m, 0, M_COPYALL);
1401 M_PULLUP(mb_copy, hlen);
1402 if (mb_copy == 0)
1403 return;
1404
1405 if (vifp->v_rate_limit <= 0)
1406 tbf_send_packet(vifp, mb_copy);
1407 else
1408 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
1409 }
1410
1411 static void
1412 encap_send(ip, vifp, m)
1413 struct ip *ip;
1414 struct vif *vifp;
1415 struct mbuf *m;
1416 {
1417 struct mbuf *mb_copy;
1418 struct ip *ip_copy;
1419 int i, len = ip->ip_len + sizeof(multicast_encap_iphdr);
1420
1421 /*
1422 * copy the old packet & pullup it's IP header into the
1423 * new mbuf so we can modify it. Try to fill the new
1424 * mbuf since if we don't the ethernet driver will.
1425 */
1426 MGETHDR(mb_copy, M_DONTWAIT, MT_DATA);
1427 if (mb_copy == 0)
1428 return;
1429 mb_copy->m_data += max_linkhdr;
1430 mb_copy->m_pkthdr.len = len;
1431 mb_copy->m_len = sizeof(multicast_encap_iphdr);
1432
1433 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == 0) {
1434 m_freem(mb_copy);
1435 return;
1436 }
1437 i = MHLEN - max_linkhdr;
1438 if (i > len)
1439 i = len;
1440 mb_copy = m_pullup(mb_copy, i);
1441 if (mb_copy == 0)
1442 return;
1443
1444 /*
1445 * fill in the encapsulating IP header.
1446 */
1447 ip_copy = mtod(mb_copy, struct ip *);
1448 *ip_copy = multicast_encap_iphdr;
1449 ip_copy->ip_id = htons(ip_id++);
1450 ip_copy->ip_len = len;
1451 ip_copy->ip_src = vifp->v_lcl_addr;
1452 ip_copy->ip_dst = vifp->v_rmt_addr;
1453
1454 /*
1455 * turn the encapsulated IP header back into a valid one.
1456 */
1457 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1458 --ip->ip_ttl;
1459 HTONS(ip->ip_len);
1460 HTONS(ip->ip_off);
1461 ip->ip_sum = 0;
1462 mb_copy->m_data += sizeof(multicast_encap_iphdr);
1463 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1464 mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1465
1466 if (vifp->v_rate_limit <= 0)
1467 tbf_send_packet(vifp, mb_copy);
1468 else
1469 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
1470 }
1471
1472 /*
1473 * De-encapsulate a packet and feed it back through ip input.
1474 */
1475 static void
1476 #if __STDC__
1477 vif_input(struct mbuf *m, ...)
1478 #else
1479 vif_input(m, va_alist)
1480 struct mbuf *m;
1481 va_dcl
1482 #endif
1483 {
1484 int off, proto;
1485 va_list ap;
1486 struct ip *ip;
1487 struct vif *vifp;
1488 int s;
1489 struct ifqueue *ifq;
1490
1491 va_start(ap, m);
1492 off = va_arg(ap, int);
1493 proto = va_arg(ap, int);
1494 va_end(ap);
1495
1496 vifp = (struct vif *)encap_getarg(m);
1497 if (!vifp || proto != AF_INET) {
1498 m_freem(m);
1499 mrtstat.mrts_bad_tunnel++;
1500 return;
1501 }
1502
1503 ip = mtod(m, struct ip *);
1504
1505 m_adj(m, off);
1506 m->m_pkthdr.rcvif = vifp->v_ifp;
1507 ifq = &ipintrq;
1508 s = splnet();
1509 if (IF_QFULL(ifq)) {
1510 IF_DROP(ifq);
1511 m_freem(m);
1512 } else {
1513 IF_ENQUEUE(ifq, m);
1514 /*
1515 * normally we would need a "schednetisr(NETISR_IP)"
1516 * here but we were called by ip_input and it is going
1517 * to loop back & try to dequeue the packet we just
1518 * queued as soon as we return so we avoid the
1519 * unnecessary software interrrupt.
1520 */
1521 }
1522 splx(s);
1523 }
1524
1525 /*
1526 * Check if the packet should be grabbed by us.
1527 */
1528 static int
1529 vif_encapcheck(m, off, proto, arg)
1530 const struct mbuf *m;
1531 int off;
1532 int proto;
1533 void *arg;
1534 {
1535 struct vif *vifp;
1536 struct ip ip;
1537
1538 #ifdef DIAGNOSTIC
1539 if (!arg || proto != IPPROTO_IPV4)
1540 panic("unexpected arg in vif_encapcheck");
1541 #endif
1542
1543 /*
1544 * do not grab the packet if it's not to a multicast destination or if
1545 * we don't have an encapsulating tunnel with the source.
1546 * Note: This code assumes that the remote site IP address
1547 * uniquely identifies the tunnel (i.e., that this site has
1548 * at most one tunnel with the remote site).
1549 */
1550
1551 /* LINTED const cast */
1552 m_copydata((struct mbuf *)m, off, sizeof(ip), (caddr_t)&ip);
1553 if (!IN_MULTICAST(ip.ip_dst.s_addr))
1554 return 0;
1555
1556 /* LINTED const cast */
1557 m_copydata((struct mbuf *)m, 0, sizeof(ip), (caddr_t)&ip);
1558 if (!in_hosteq(ip.ip_src, last_encap_src)) {
1559 vifp = (struct vif *)arg;
1560 if (vifp->v_flags & VIFF_TUNNEL &&
1561 in_hosteq(vifp->v_rmt_addr, ip.ip_src))
1562 ;
1563 else
1564 return 0;
1565 last_encap_vif = vifp;
1566 last_encap_src = ip.ip_src;
1567 } else
1568 vifp = last_encap_vif;
1569
1570 /* 32bit match, since we have checked ip_src only */
1571 return 32;
1572 }
1573
1574 /*
1575 * Token bucket filter module
1576 */
1577 static void
1578 tbf_control(vifp, m, ip, len)
1579 struct vif *vifp;
1580 struct mbuf *m;
1581 struct ip *ip;
1582 u_int32_t len;
1583 {
1584
1585 if (len > MAX_BKT_SIZE) {
1586 /* drop if packet is too large */
1587 mrtstat.mrts_pkt2large++;
1588 m_freem(m);
1589 return;
1590 }
1591
1592 tbf_update_tokens(vifp);
1593
1594 /*
1595 * If there are enough tokens, and the queue is empty, send this packet
1596 * out immediately. Otherwise, try to insert it on this vif's queue.
1597 */
1598 if (vifp->tbf_q_len == 0) {
1599 if (len <= vifp->tbf_n_tok) {
1600 vifp->tbf_n_tok -= len;
1601 tbf_send_packet(vifp, m);
1602 } else {
1603 /* queue packet and timeout till later */
1604 tbf_queue(vifp, m);
1605 callout_reset(&vifp->v_repq_ch, TBF_REPROCESS,
1606 tbf_reprocess_q, vifp);
1607 }
1608 } else {
1609 if (vifp->tbf_q_len >= vifp->tbf_max_q_len &&
1610 !tbf_dq_sel(vifp, ip)) {
1611 /* queue length too much, and couldn't make room */
1612 mrtstat.mrts_q_overflow++;
1613 m_freem(m);
1614 } else {
1615 /* queue length low enough, or made room */
1616 tbf_queue(vifp, m);
1617 tbf_process_q(vifp);
1618 }
1619 }
1620 }
1621
1622 /*
1623 * adds a packet to the queue at the interface
1624 */
1625 static void
1626 tbf_queue(vifp, m)
1627 struct vif *vifp;
1628 struct mbuf *m;
1629 {
1630 int s = splsoftnet();
1631
1632 /* insert at tail */
1633 *vifp->tbf_t = m;
1634 vifp->tbf_t = &m->m_nextpkt;
1635 vifp->tbf_q_len++;
1636
1637 splx(s);
1638 }
1639
1640
1641 /*
1642 * processes the queue at the interface
1643 */
1644 static void
1645 tbf_process_q(vifp)
1646 struct vif *vifp;
1647 {
1648 struct mbuf *m;
1649 int len;
1650 int s = splsoftnet();
1651
1652 /*
1653 * Loop through the queue at the interface and send as many packets
1654 * as possible.
1655 */
1656 for (m = vifp->tbf_q;
1657 m != 0;
1658 m = vifp->tbf_q) {
1659 len = mtod(m, struct ip *)->ip_len;
1660
1661 /* determine if the packet can be sent */
1662 if (len <= vifp->tbf_n_tok) {
1663 /* if so,
1664 * reduce no of tokens, dequeue the packet,
1665 * send the packet.
1666 */
1667 if ((vifp->tbf_q = m->m_nextpkt) == 0)
1668 vifp->tbf_t = &vifp->tbf_q;
1669 --vifp->tbf_q_len;
1670
1671 m->m_nextpkt = 0;
1672 vifp->tbf_n_tok -= len;
1673 tbf_send_packet(vifp, m);
1674 } else
1675 break;
1676 }
1677 splx(s);
1678 }
1679
1680 static void
1681 tbf_reprocess_q(arg)
1682 void *arg;
1683 {
1684 struct vif *vifp = arg;
1685
1686 if (ip_mrouter == 0)
1687 return;
1688
1689 tbf_update_tokens(vifp);
1690 tbf_process_q(vifp);
1691
1692 if (vifp->tbf_q_len != 0)
1693 callout_reset(&vifp->v_repq_ch, TBF_REPROCESS,
1694 tbf_reprocess_q, vifp);
1695 }
1696
1697 /* function that will selectively discard a member of the queue
1698 * based on the precedence value and the priority
1699 */
1700 static int
1701 tbf_dq_sel(vifp, ip)
1702 struct vif *vifp;
1703 struct ip *ip;
1704 {
1705 u_int p;
1706 struct mbuf **mp, *m;
1707 int s = splsoftnet();
1708
1709 p = priority(vifp, ip);
1710
1711 for (mp = &vifp->tbf_q, m = *mp;
1712 m != 0;
1713 mp = &m->m_nextpkt, m = *mp) {
1714 if (p > priority(vifp, mtod(m, struct ip *))) {
1715 if ((*mp = m->m_nextpkt) == 0)
1716 vifp->tbf_t = mp;
1717 --vifp->tbf_q_len;
1718
1719 m_freem(m);
1720 mrtstat.mrts_drop_sel++;
1721 splx(s);
1722 return (1);
1723 }
1724 }
1725 splx(s);
1726 return (0);
1727 }
1728
1729 static void
1730 tbf_send_packet(vifp, m)
1731 struct vif *vifp;
1732 struct mbuf *m;
1733 {
1734 int error;
1735 int s = splsoftnet();
1736
1737 if (vifp->v_flags & VIFF_TUNNEL) {
1738 /* If tunnel options */
1739 #ifdef IPSEC
1740 /* Don't lookup socket in forwading case */
1741 (void)ipsec_setsocket(m, NULL);
1742 #endif
1743 ip_output(m, (struct mbuf *)0, &vifp->v_route,
1744 IP_FORWARDING, (struct ip_moptions *)0);
1745 } else {
1746 /* if physical interface option, extract the options and then send */
1747 struct ip_moptions imo;
1748
1749 imo.imo_multicast_ifp = vifp->v_ifp;
1750 imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1;
1751 imo.imo_multicast_loop = 1;
1752 #ifdef RSVP_ISI
1753 imo.imo_multicast_vif = -1;
1754 #endif
1755
1756 #ifdef IPSEC
1757 /* Don't lookup socket in forwading case */
1758 (void)ipsec_setsocket(m, NULL);
1759 #endif
1760 error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1761 IP_FORWARDING|IP_MULTICASTOPTS, &imo);
1762
1763 if (mrtdebug & DEBUG_XMIT)
1764 log(LOG_DEBUG, "phyint_send on vif %ld err %d\n",
1765 (long)(vifp-viftable), error);
1766 }
1767 splx(s);
1768 }
1769
1770 /* determine the current time and then
1771 * the elapsed time (between the last time and time now)
1772 * in milliseconds & update the no. of tokens in the bucket
1773 */
1774 static void
1775 tbf_update_tokens(vifp)
1776 struct vif *vifp;
1777 {
1778 struct timeval tp;
1779 u_int32_t tm;
1780 int s = splsoftnet();
1781
1782 microtime(&tp);
1783
1784 TV_DELTA(tp, vifp->tbf_last_pkt_t, tm);
1785
1786 /*
1787 * This formula is actually
1788 * "time in seconds" * "bytes/second".
1789 *
1790 * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
1791 *
1792 * The (1000/1024) was introduced in add_vif to optimize
1793 * this divide into a shift.
1794 */
1795 vifp->tbf_n_tok += tm * vifp->v_rate_limit / 8192;
1796 vifp->tbf_last_pkt_t = tp;
1797
1798 if (vifp->tbf_n_tok > MAX_BKT_SIZE)
1799 vifp->tbf_n_tok = MAX_BKT_SIZE;
1800
1801 splx(s);
1802 }
1803
1804 static int
1805 priority(vifp, ip)
1806 struct vif *vifp;
1807 struct ip *ip;
1808 {
1809 int prio;
1810
1811 /* temporary hack; may add general packet classifier some day */
1812
1813 /*
1814 * The UDP port space is divided up into four priority ranges:
1815 * [0, 16384) : unclassified - lowest priority
1816 * [16384, 32768) : audio - highest priority
1817 * [32768, 49152) : whiteboard - medium priority
1818 * [49152, 65536) : video - low priority
1819 */
1820 if (ip->ip_p == IPPROTO_UDP) {
1821 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1822
1823 switch (ntohs(udp->uh_dport) & 0xc000) {
1824 case 0x4000:
1825 prio = 70;
1826 break;
1827 case 0x8000:
1828 prio = 60;
1829 break;
1830 case 0xc000:
1831 prio = 55;
1832 break;
1833 default:
1834 prio = 50;
1835 break;
1836 }
1837
1838 if (tbfdebug > 1)
1839 log(LOG_DEBUG, "port %x prio %d\n", ntohs(udp->uh_dport), prio);
1840 } else
1841 prio = 50;
1842
1843
1844 return (prio);
1845 }
1846
1847 /*
1848 * End of token bucket filter modifications
1849 */
1850
1851 #ifdef RSVP_ISI
1852
1853 int
1854 ip_rsvp_vif_init(so, m)
1855 struct socket *so;
1856 struct mbuf *m;
1857 {
1858 int i;
1859 int s;
1860
1861 if (rsvpdebug)
1862 printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1863 so->so_type, so->so_proto->pr_protocol);
1864
1865 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1866 return (EOPNOTSUPP);
1867
1868 /* Check mbuf. */
1869 if (m == 0 || m->m_len != sizeof(int)) {
1870 return (EINVAL);
1871 }
1872 i = *(mtod(m, int *));
1873
1874 if (rsvpdebug)
1875 printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on);
1876
1877 s = splsoftnet();
1878
1879 /* Check vif. */
1880 if (!legal_vif_num(i)) {
1881 splx(s);
1882 return (EADDRNOTAVAIL);
1883 }
1884
1885 /* Check if socket is available. */
1886 if (viftable[i].v_rsvpd != 0) {
1887 splx(s);
1888 return (EADDRINUSE);
1889 }
1890
1891 viftable[i].v_rsvpd = so;
1892 /* This may seem silly, but we need to be sure we don't over-increment
1893 * the RSVP counter, in case something slips up.
1894 */
1895 if (!viftable[i].v_rsvp_on) {
1896 viftable[i].v_rsvp_on = 1;
1897 rsvp_on++;
1898 }
1899
1900 splx(s);
1901 return (0);
1902 }
1903
1904 int
1905 ip_rsvp_vif_done(so, m)
1906 struct socket *so;
1907 struct mbuf *m;
1908 {
1909 int i;
1910 int s;
1911
1912 if (rsvpdebug)
1913 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
1914 so->so_type, so->so_proto->pr_protocol);
1915
1916 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1917 return (EOPNOTSUPP);
1918
1919 /* Check mbuf. */
1920 if (m == 0 || m->m_len != sizeof(int)) {
1921 return (EINVAL);
1922 }
1923 i = *(mtod(m, int *));
1924
1925 s = splsoftnet();
1926
1927 /* Check vif. */
1928 if (!legal_vif_num(i)) {
1929 splx(s);
1930 return (EADDRNOTAVAIL);
1931 }
1932
1933 if (rsvpdebug)
1934 printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n",
1935 viftable[i].v_rsvpd, so);
1936
1937 viftable[i].v_rsvpd = 0;
1938 /* This may seem silly, but we need to be sure we don't over-decrement
1939 * the RSVP counter, in case something slips up.
1940 */
1941 if (viftable[i].v_rsvp_on) {
1942 viftable[i].v_rsvp_on = 0;
1943 rsvp_on--;
1944 }
1945
1946 splx(s);
1947 return (0);
1948 }
1949
1950 void
1951 ip_rsvp_force_done(so)
1952 struct socket *so;
1953 {
1954 int vifi;
1955 int s;
1956
1957 /* Don't bother if it is not the right type of socket. */
1958 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1959 return;
1960
1961 s = splsoftnet();
1962
1963 /* The socket may be attached to more than one vif...this
1964 * is perfectly legal.
1965 */
1966 for (vifi = 0; vifi < numvifs; vifi++) {
1967 if (viftable[vifi].v_rsvpd == so) {
1968 viftable[vifi].v_rsvpd = 0;
1969 /* This may seem silly, but we need to be sure we don't
1970 * over-decrement the RSVP counter, in case something slips up.
1971 */
1972 if (viftable[vifi].v_rsvp_on) {
1973 viftable[vifi].v_rsvp_on = 0;
1974 rsvp_on--;
1975 }
1976 }
1977 }
1978
1979 splx(s);
1980 return;
1981 }
1982
1983 void
1984 rsvp_input(m, ifp)
1985 struct mbuf *m;
1986 struct ifnet *ifp;
1987 {
1988 int vifi;
1989 struct ip *ip = mtod(m, struct ip *);
1990 static struct sockaddr_in rsvp_src = { sizeof(sin), AF_INET };
1991 int s;
1992
1993 if (rsvpdebug)
1994 printf("rsvp_input: rsvp_on %d\n",rsvp_on);
1995
1996 /* Can still get packets with rsvp_on = 0 if there is a local member
1997 * of the group to which the RSVP packet is addressed. But in this
1998 * case we want to throw the packet away.
1999 */
2000 if (!rsvp_on) {
2001 m_freem(m);
2002 return;
2003 }
2004
2005 /* If the old-style non-vif-associated socket is set, then use
2006 * it and ignore the new ones.
2007 */
2008 if (ip_rsvpd != 0) {
2009 if (rsvpdebug)
2010 printf("rsvp_input: Sending packet up old-style socket\n");
2011 rip_input(m); /*XXX*/
2012 return;
2013 }
2014
2015 s = splsoftnet();
2016
2017 if (rsvpdebug)
2018 printf("rsvp_input: check vifs\n");
2019
2020 /* Find which vif the packet arrived on. */
2021 for (vifi = 0; vifi < numvifs; vifi++) {
2022 if (viftable[vifi].v_ifp == ifp)
2023 break;
2024 }
2025
2026 if (vifi == numvifs) {
2027 /* Can't find vif packet arrived on. Drop packet. */
2028 if (rsvpdebug)
2029 printf("rsvp_input: Can't find vif for packet...dropping it.\n");
2030 m_freem(m);
2031 splx(s);
2032 return;
2033 }
2034
2035 if (rsvpdebug)
2036 printf("rsvp_input: check socket\n");
2037
2038 if (viftable[vifi].v_rsvpd == 0) {
2039 /* drop packet, since there is no specific socket for this
2040 * interface */
2041 if (rsvpdebug)
2042 printf("rsvp_input: No socket defined for vif %d\n",vifi);
2043 m_freem(m);
2044 splx(s);
2045 return;
2046 }
2047
2048 rsvp_src.sin_addr = ip->ip_src;
2049
2050 if (rsvpdebug && m)
2051 printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
2052 m->m_len,sbspace(&viftable[vifi].v_rsvpd->so_rcv));
2053
2054 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0)
2055 if (rsvpdebug)
2056 printf("rsvp_input: Failed to append to socket\n");
2057 else
2058 if (rsvpdebug)
2059 printf("rsvp_input: send packet up\n");
2060
2061 splx(s);
2062 }
2063 #endif /* RSVP_ISI */
2064