ip_mroute.c revision 1.27 1 /* $NetBSD: ip_mroute.c,v 1.27 1996/05/07 02:40:50 thorpej Exp $ */
2
3 /*
4 * IP multicast forwarding procedures
5 *
6 * Written by David Waitzman, BBN Labs, August 1988.
7 * Modified by Steve Deering, Stanford, February 1989.
8 * Modified by Mark J. Steiglitz, Stanford, May, 1991
9 * Modified by Van Jacobson, LBL, January 1993
10 * Modified by Ajit Thyagarajan, PARC, August 1993
11 * Modified by Bill Fenner, PARC, April 1994
12 * Modified by Charles M. Hannum, NetBSD, May 1995.
13 *
14 * MROUTING Revision: 1.2
15 */
16
17 #include <sys/param.h>
18 #include <sys/systm.h>
19 #include <sys/mbuf.h>
20 #include <sys/socket.h>
21 #include <sys/socketvar.h>
22 #include <sys/protosw.h>
23 #include <sys/errno.h>
24 #include <sys/time.h>
25 #include <sys/kernel.h>
26 #include <sys/ioctl.h>
27 #include <sys/syslog.h>
28 #include <net/if.h>
29 #include <net/route.h>
30 #include <net/raw_cb.h>
31 #include <netinet/in.h>
32 #include <netinet/in_var.h>
33 #include <netinet/in_systm.h>
34 #include <netinet/ip.h>
35 #include <netinet/ip_var.h>
36 #include <netinet/in_pcb.h>
37 #include <netinet/udp.h>
38 #include <netinet/igmp.h>
39 #include <netinet/igmp_var.h>
40 #include <netinet/ip_mroute.h>
41
42 #include <machine/stdarg.h>
43
44 #define IP_MULTICASTOPTS 0
45 #define M_PULLUP(m, len) \
46 do { \
47 if ((m) && ((m)->m_flags & M_EXT || (m)->m_len < (len))) \
48 (m) = m_pullup((m), (len)); \
49 } while (0)
50
51 /*
52 * Globals. All but ip_mrouter and ip_mrtproto could be static,
53 * except for netstat or debugging purposes.
54 */
55 struct socket *ip_mrouter = NULL;
56 int ip_mrtproto = IGMP_DVMRP; /* for netstat only */
57
58 #define NO_RTE_FOUND 0x1
59 #define RTE_FOUND 0x2
60
61 #define MFCHASH(a, g) \
62 ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
63 ((g) >> 20) ^ ((g) >> 10) ^ (g)) & mfchash)
64 LIST_HEAD(mfchashhdr, mfc) *mfchashtbl;
65 u_long mfchash;
66
67 u_char nexpire[MFCTBLSIZ];
68 struct vif viftable[MAXVIFS];
69 struct mrtstat mrtstat;
70 u_int mrtdebug = 0; /* debug level */
71 #define DEBUG_MFC 0x02
72 #define DEBUG_FORWARD 0x04
73 #define DEBUG_EXPIRE 0x08
74 #define DEBUG_XMIT 0x10
75 u_int tbfdebug = 0; /* tbf debug level */
76 #ifdef RSVP_ISI
77 u_int rsvpdebug = 0; /* rsvp debug level */
78 extern struct socket *ip_rsvpd;
79 extern int rsvp_on;
80 #endif /* RSVP_ISI */
81
82 #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
83 #define UPCALL_EXPIRE 6 /* number of timeouts */
84
85 /*
86 * Define the token bucket filter structures
87 * qtable -> each interface has an associated queue of pkts
88 */
89
90 struct pkt_queue qtable[MAXVIFS][MAXQSIZE];
91
92 static int get_sg_cnt __P((struct sioc_sg_req *));
93 static int get_vif_cnt __P((struct sioc_vif_req *));
94 static int ip_mrouter_init __P((struct socket *, struct mbuf *));
95 static int get_version __P((struct mbuf *));
96 static int set_assert __P((struct mbuf *));
97 static int get_assert __P((struct mbuf *));
98 static int add_vif __P((struct mbuf *));
99 static int del_vif __P((struct mbuf *));
100 static void update_mfc __P((struct mfcctl *, struct mfc *));
101 static void expire_mfc __P((struct mfc *));
102 static int add_mfc __P((struct mbuf *));
103 #ifdef UPCALL_TIMING
104 static void collate __P((struct timeval *));
105 #endif
106 static int del_mfc __P((struct mbuf *));
107 static int socket_send __P((struct socket *, struct mbuf *,
108 struct sockaddr_in *));
109 static void expire_upcalls __P((void *));
110 #ifdef RSVP_ISI
111 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *, vifi_t));
112 #else
113 static int ip_mdq __P((struct mbuf *, struct ifnet *, struct mfc *));
114 #endif
115 static void phyint_send __P((struct ip *, struct vif *, struct mbuf *));
116 static void encap_send __P((struct ip *, struct vif *, struct mbuf *));
117 static void tbf_control __P((struct vif *, struct mbuf *, struct ip *,
118 u_int32_t));
119 static void tbf_queue __P((struct vif *, struct mbuf *, struct ip *));
120 static void tbf_process_q __P((struct vif *));
121 static void tbf_dequeue __P((struct vif *, int));
122 static void tbf_reprocess_q __P((void *));
123 static int tbf_dq_sel __P((struct vif *, struct ip *));
124 static void tbf_send_packet __P((struct vif *, struct mbuf *));
125 static void tbf_update_tokens __P((struct vif *));
126 static int priority __P((struct vif *, struct ip *));
127
128 /*
129 * 'Interfaces' associated with decapsulator (so we can tell
130 * packets that went through it from ones that get reflected
131 * by a broken gateway). These interfaces are never linked into
132 * the system ifnet list & no routes point to them. I.e., packets
133 * can't be sent this way. They only exist as a placeholder for
134 * multicast source verification.
135 */
136 #if 0
137 struct ifnet multicast_decap_if[MAXVIFS];
138 #endif
139
140 #define ENCAP_TTL 64
141 #define ENCAP_PROTO IPPROTO_IPIP /* 4 */
142
143 /* prototype IP hdr for encapsulated packets */
144 struct ip multicast_encap_iphdr = {
145 #if BYTE_ORDER == LITTLE_ENDIAN
146 sizeof(struct ip) >> 2, IPVERSION,
147 #else
148 IPVERSION, sizeof(struct ip) >> 2,
149 #endif
150 0, /* tos */
151 sizeof(struct ip), /* total length */
152 0, /* id */
153 0, /* frag offset */
154 ENCAP_TTL, ENCAP_PROTO,
155 0, /* checksum */
156 };
157
158 /*
159 * Private variables.
160 */
161 static vifi_t numvifs = 0;
162 static int have_encap_tunnel = 0;
163
164 /*
165 * one-back cache used by ipip_input to locate a tunnel's vif
166 * given a datagram's src ip address.
167 */
168 static u_int32_t last_encap_src;
169 static struct vif *last_encap_vif;
170
171 /*
172 * whether or not special PIM assert processing is enabled.
173 */
174 static int pim_assert;
175 /*
176 * Rate limit for assert notification messages, in usec
177 */
178 #define ASSERT_MSG_TIME 3000000
179
180 /*
181 * Find a route for a given origin IP address and Multicast group address
182 * Type of service parameter to be added in the future!!!
183 */
184
185 #define MFCFIND(o, g, rt) { \
186 register struct mfc *_rt; \
187 (rt) = NULL; \
188 ++mrtstat.mrts_mfc_lookups; \
189 for (_rt = mfchashtbl[MFCHASH(o, g)].lh_first; \
190 _rt; _rt = _rt->mfc_hash.le_next) { \
191 if (_rt->mfc_origin.s_addr == (o) && \
192 _rt->mfc_mcastgrp.s_addr == (g) && \
193 _rt->mfc_stall == NULL) { \
194 (rt) = _rt; \
195 break; \
196 } \
197 } \
198 if ((rt) == NULL) \
199 ++mrtstat.mrts_mfc_misses; \
200 }
201
202 /*
203 * Macros to compute elapsed time efficiently
204 * Borrowed from Van Jacobson's scheduling code
205 */
206 #define TV_DELTA(a, b, delta) { \
207 register int xxs; \
208 delta = (a).tv_usec - (b).tv_usec; \
209 xxs = (a).tv_sec - (b).tv_sec; \
210 switch (xxs) { \
211 case 2: \
212 delta += 1000000; \
213 /* fall through */ \
214 case 1: \
215 delta += 1000000; \
216 /* fall through */ \
217 case 0: \
218 break; \
219 default: \
220 delta += (1000000 * xxs); \
221 break; \
222 } \
223 }
224
225 #ifdef UPCALL_TIMING
226 u_int32_t upcall_data[51];
227 #endif /* UPCALL_TIMING */
228
229 /*
230 * Handle MRT setsockopt commands to modify the multicast routing tables.
231 */
232 int
233 ip_mrouter_set(cmd, so, m)
234 int cmd;
235 struct socket *so;
236 struct mbuf **m;
237 {
238 int error;
239
240 if (cmd != MRT_INIT && so != ip_mrouter)
241 error = EACCES;
242 else
243 switch (cmd) {
244 case MRT_INIT:
245 error = ip_mrouter_init(so, *m);
246 break;
247 case MRT_DONE:
248 error = ip_mrouter_done();
249 break;
250 case MRT_ADD_VIF:
251 error = add_vif(*m);
252 break;
253 case MRT_DEL_VIF:
254 error = del_vif(*m);
255 break;
256 case MRT_ADD_MFC:
257 error = add_mfc(*m);
258 break;
259 case MRT_DEL_MFC:
260 error = del_mfc(*m);
261 break;
262 case MRT_ASSERT:
263 error = set_assert(*m);
264 break;
265 default:
266 error = EOPNOTSUPP;
267 break;
268 }
269
270 if (*m)
271 m_free(*m);
272 return (error);
273 }
274
275 /*
276 * Handle MRT getsockopt commands
277 */
278 int
279 ip_mrouter_get(cmd, so, m)
280 int cmd;
281 struct socket *so;
282 struct mbuf **m;
283 {
284 struct mbuf *mb;
285 int error;
286
287 if (so != ip_mrouter)
288 error = EACCES;
289 else {
290 *m = mb = m_get(M_WAIT, MT_SOOPTS);
291
292 switch (cmd) {
293 case MRT_VERSION:
294 error = get_version(mb);
295 break;
296 case MRT_ASSERT:
297 error = get_assert(mb);
298 break;
299 default:
300 error = EOPNOTSUPP;
301 break;
302 }
303
304 if (error)
305 m_free(mb);
306 }
307
308 return (error);
309 }
310
311 /*
312 * Handle ioctl commands to obtain information from the cache
313 */
314 int
315 mrt_ioctl(cmd, data)
316 u_long cmd;
317 caddr_t data;
318 {
319 int error;
320
321 switch (cmd) {
322 case SIOCGETVIFCNT:
323 error = get_vif_cnt((struct sioc_vif_req *)data);
324 break;
325 case SIOCGETSGCNT:
326 error = get_sg_cnt((struct sioc_sg_req *)data);
327 break;
328 default:
329 error = EINVAL;
330 break;
331 }
332
333 return (error);
334 }
335
336 /*
337 * returns the packet, byte, rpf-failure count for the source group provided
338 */
339 static int
340 get_sg_cnt(req)
341 register struct sioc_sg_req *req;
342 {
343 register struct mfc *rt;
344 int s;
345
346 s = splsoftnet();
347 MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
348 splx(s);
349 if (rt != NULL) {
350 req->pktcnt = rt->mfc_pkt_cnt;
351 req->bytecnt = rt->mfc_byte_cnt;
352 req->wrong_if = rt->mfc_wrong_if;
353 } else
354 req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
355
356 return (0);
357 }
358
359 /*
360 * returns the input and output packet and byte counts on the vif provided
361 */
362 static int
363 get_vif_cnt(req)
364 register struct sioc_vif_req *req;
365 {
366 register vifi_t vifi = req->vifi;
367
368 if (vifi >= numvifs)
369 return (EINVAL);
370
371 req->icount = viftable[vifi].v_pkt_in;
372 req->ocount = viftable[vifi].v_pkt_out;
373 req->ibytes = viftable[vifi].v_bytes_in;
374 req->obytes = viftable[vifi].v_bytes_out;
375
376 return (0);
377 }
378
379 /*
380 * Enable multicast routing
381 */
382 static int
383 ip_mrouter_init(so, m)
384 struct socket *so;
385 struct mbuf *m;
386 {
387 int *v;
388
389 if (mrtdebug)
390 log(LOG_DEBUG,
391 "ip_mrouter_init: so_type = %d, pr_protocol = %d",
392 so->so_type, so->so_proto->pr_protocol);
393
394 if (so->so_type != SOCK_RAW ||
395 so->so_proto->pr_protocol != IPPROTO_IGMP)
396 return (EOPNOTSUPP);
397
398 if (m == 0 || m->m_len < sizeof(int))
399 return (EINVAL);
400
401 v = mtod(m, int *);
402 if (*v != 1)
403 return (EINVAL);
404
405 if (ip_mrouter != NULL)
406 return (EADDRINUSE);
407
408 ip_mrouter = so;
409
410 mfchashtbl = hashinit(MFCTBLSIZ, M_MRTABLE, &mfchash);
411 bzero((caddr_t)nexpire, sizeof(nexpire));
412
413 pim_assert = 0;
414
415 timeout(expire_upcalls, (caddr_t)0, EXPIRE_TIMEOUT);
416
417 if (mrtdebug)
418 log(LOG_DEBUG, "ip_mrouter_init");
419
420 return (0);
421 }
422
423 /*
424 * Disable multicast routing
425 */
426 int
427 ip_mrouter_done()
428 {
429 vifi_t vifi;
430 register struct vif *vifp;
431 int i;
432 int s;
433
434 s = splsoftnet();
435
436 /* Clear out all the vifs currently in use. */
437 for (vifi = 0; vifi < numvifs; vifi++) {
438 vifp = &viftable[vifi];
439 if (vifp->v_lcl_addr.s_addr != 0)
440 reset_vif(vifp);
441 }
442
443 bzero((caddr_t)qtable, sizeof(qtable));
444 numvifs = 0;
445 pim_assert = 0;
446
447 untimeout(expire_upcalls, (caddr_t)NULL);
448
449 /*
450 * Free all multicast forwarding cache entries.
451 */
452 for (i = 0; i < MFCTBLSIZ; i++) {
453 register struct mfc *rt, *nrt;
454
455 for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) {
456 nrt = rt->mfc_hash.le_next;
457
458 expire_mfc(rt);
459 }
460 }
461 free(mfchashtbl, M_MRTABLE);
462
463 /* Reset de-encapsulation cache. */
464 have_encap_tunnel = 0;
465
466 ip_mrouter = NULL;
467
468 splx(s);
469
470 if (mrtdebug)
471 log(LOG_DEBUG, "ip_mrouter_done");
472
473 return (0);
474 }
475
476 static int
477 get_version(m)
478 struct mbuf *m;
479 {
480 int *v = mtod(m, int *);
481
482 *v = 0x0305; /* XXX !!!! */
483 m->m_len = sizeof(int);
484 return (0);
485 }
486
487 /*
488 * Set PIM assert processing global
489 */
490 static int
491 set_assert(m)
492 struct mbuf *m;
493 {
494 int *i;
495
496 if (m == 0 || m->m_len < sizeof(int))
497 return (EINVAL);
498
499 i = mtod(m, int *);
500 pim_assert = !!*i;
501 return (0);
502 }
503
504 /*
505 * Get PIM assert processing global
506 */
507 static int
508 get_assert(m)
509 struct mbuf *m;
510 {
511 int *i = mtod(m, int *);
512
513 *i = pim_assert;
514 m->m_len = sizeof(int);
515 return (0);
516 }
517
518 static struct sockaddr_in sin = { sizeof(sin), AF_INET };
519
520 /*
521 * Add a vif to the vif table
522 */
523 static int
524 add_vif(m)
525 struct mbuf *m;
526 {
527 register struct vifctl *vifcp;
528 register struct vif *vifp;
529 struct ifaddr *ifa;
530 struct ifnet *ifp;
531 struct ifreq ifr;
532 int error, s;
533
534 if (m == 0 || m->m_len < sizeof(struct vifctl))
535 return (EINVAL);
536
537 vifcp = mtod(m, struct vifctl *);
538 if (vifcp->vifc_vifi >= MAXVIFS)
539 return (EINVAL);
540
541 vifp = &viftable[vifcp->vifc_vifi];
542 if (vifp->v_lcl_addr.s_addr != 0)
543 return (EADDRINUSE);
544
545 /* Find the interface with an address in AF_INET family. */
546 sin.sin_addr = vifcp->vifc_lcl_addr;
547 ifa = ifa_ifwithaddr(sintosa(&sin));
548 if (ifa == 0)
549 return (EADDRNOTAVAIL);
550
551 if (vifcp->vifc_flags & VIFF_TUNNEL) {
552 if (vifcp->vifc_flags & VIFF_SRCRT) {
553 log(LOG_ERR, "Source routed tunnels not supported.");
554 return (EOPNOTSUPP);
555 }
556
557 /* Create a fake encapsulation interface. */
558 ifp = (struct ifnet *)malloc(sizeof(*ifp), M_MRTABLE, M_WAITOK);
559 bzero(ifp, sizeof(*ifp));
560 sprintf(ifp->if_xname, "mdecap%d", vifcp->vifc_vifi);
561
562 /* Prepare cached route entry. */
563 bzero(&vifp->v_route, sizeof(vifp->v_route));
564
565 /* Tell ipip_input() to start looking at encapsulated packets. */
566 have_encap_tunnel = 1;
567 } else {
568 /* Use the physical interface associated with the address. */
569 ifp = ifa->ifa_ifp;
570
571 /* Make sure the interface supports multicast. */
572 if ((ifp->if_flags & IFF_MULTICAST) == 0)
573 return (EOPNOTSUPP);
574
575 /* Enable promiscuous reception of all IP multicasts. */
576 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
577 satosin(&ifr.ifr_addr)->sin_family = AF_INET;
578 satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
579 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr);
580 if (error)
581 return (error);
582 }
583
584 s = splsoftnet();
585 /* Define parameters for the tbf structure. */
586 vifp->v_tbf.q_len = 0;
587 vifp->v_tbf.n_tok = 0;
588 vifp->v_tbf.last_pkt_t = 0;
589
590 vifp->v_flags = vifcp->vifc_flags;
591 vifp->v_threshold = vifcp->vifc_threshold;
592 vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
593 vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
594 vifp->v_ifp = ifp;
595 vifp->v_rate_limit = vifcp->vifc_rate_limit;
596 #ifdef RSVP_ISI
597 vifp->v_rsvp_on = 0;
598 vifp->v_rsvpd = NULL;
599 #endif /* RSVP_ISI */
600 /* Initialize per vif pkt counters. */
601 vifp->v_pkt_in = 0;
602 vifp->v_pkt_out = 0;
603 vifp->v_bytes_in = 0;
604 vifp->v_bytes_out = 0;
605 splx(s);
606
607 /* Adjust numvifs up if the vifi is higher than numvifs. */
608 if (numvifs <= vifcp->vifc_vifi)
609 numvifs = vifcp->vifc_vifi + 1;
610
611 if (mrtdebug)
612 log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d",
613 vifcp->vifc_vifi,
614 ntohl(vifcp->vifc_lcl_addr.s_addr),
615 (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
616 ntohl(vifcp->vifc_rmt_addr.s_addr),
617 vifcp->vifc_threshold,
618 vifcp->vifc_rate_limit);
619
620 return (0);
621 }
622
623 void
624 reset_vif(vifp)
625 register struct vif *vifp;
626 {
627 struct ifnet *ifp;
628 struct ifreq ifr;
629
630 if (vifp->v_flags & VIFF_TUNNEL) {
631 free(vifp->v_ifp, M_MRTABLE);
632 if (vifp == last_encap_vif) {
633 last_encap_vif = 0;
634 last_encap_src = 0;
635 }
636 } else {
637 satosin(&ifr.ifr_addr)->sin_len = sizeof(struct sockaddr_in);
638 satosin(&ifr.ifr_addr)->sin_family = AF_INET;
639 satosin(&ifr.ifr_addr)->sin_addr.s_addr = INADDR_ANY;
640 ifp = vifp->v_ifp;
641 (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr);
642 }
643 bzero((caddr_t)vifp, sizeof(*vifp));
644 }
645
646 /*
647 * Delete a vif from the vif table
648 */
649 static int
650 del_vif(m)
651 struct mbuf *m;
652 {
653 vifi_t *vifip;
654 register struct vif *vifp;
655 register vifi_t vifi;
656 int s;
657
658 if (m == 0 || m->m_len < sizeof(vifi_t))
659 return (EINVAL);
660
661 vifip = mtod(m, vifi_t *);
662 if (*vifip >= numvifs)
663 return (EINVAL);
664
665 vifp = &viftable[*vifip];
666 if (vifp->v_lcl_addr.s_addr == 0)
667 return (EADDRNOTAVAIL);
668
669 s = splsoftnet();
670
671 reset_vif(vifp);
672
673 bzero((caddr_t)qtable[*vifip], sizeof(qtable[*vifip]));
674
675 /* Adjust numvifs down */
676 for (vifi = numvifs; vifi > 0; vifi--)
677 if (viftable[vifi-1].v_lcl_addr.s_addr != 0)
678 break;
679 numvifs = vifi;
680
681 splx(s);
682
683 if (mrtdebug)
684 log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs);
685
686 return (0);
687 }
688
689 static void
690 update_mfc(mfccp, rt)
691 struct mfcctl *mfccp;
692 struct mfc *rt;
693 {
694 vifi_t vifi;
695
696 rt->mfc_parent = mfccp->mfcc_parent;
697 for (vifi = 0; vifi < numvifs; vifi++)
698 rt->mfc_ttls[vifi] = mfccp->mfcc_ttls[vifi];
699 rt->mfc_expire = 0;
700 rt->mfc_stall = 0;
701 }
702
703 static void
704 expire_mfc(rt)
705 struct mfc *rt;
706 {
707 struct rtdetq *rte, *nrte;
708
709 for (rte = rt->mfc_stall; rte != NULL; rte = nrte) {
710 nrte = rte->next;
711 m_freem(rte->m);
712 free(rte, M_MRTABLE);
713 }
714
715 LIST_REMOVE(rt, mfc_hash);
716 free(rt, M_MRTABLE);
717 }
718
719 /*
720 * Add an mfc entry
721 */
722 static int
723 add_mfc(m)
724 struct mbuf *m;
725 {
726 struct mfcctl *mfccp;
727 struct mfc *rt;
728 u_int32_t hash = 0;
729 struct rtdetq *rte, *nrte;
730 register u_short nstl;
731 int s;
732
733 if (m == 0 || m->m_len < sizeof(struct mfcctl))
734 return (EINVAL);
735
736 mfccp = mtod(m, struct mfcctl *);
737
738 s = splsoftnet();
739 MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
740
741 /* If an entry already exists, just update the fields */
742 if (rt) {
743 if (mrtdebug & DEBUG_MFC)
744 log(LOG_DEBUG,"add_mfc update o %x g %x p %x",
745 ntohl(mfccp->mfcc_origin.s_addr),
746 ntohl(mfccp->mfcc_mcastgrp.s_addr),
747 mfccp->mfcc_parent);
748
749 if (rt->mfc_expire)
750 nexpire[hash]--;
751
752 update_mfc(mfccp, rt);
753
754 splx(s);
755 return (0);
756 }
757
758 /*
759 * Find the entry for which the upcall was made and update
760 */
761 nstl = 0;
762 hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
763 for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) {
764 if (rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr &&
765 rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr &&
766 rt->mfc_stall != NULL) {
767 if (nstl++)
768 log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %p",
769 "multiple kernel entries",
770 ntohl(mfccp->mfcc_origin.s_addr),
771 ntohl(mfccp->mfcc_mcastgrp.s_addr),
772 mfccp->mfcc_parent, rt->mfc_stall);
773
774 if (mrtdebug & DEBUG_MFC)
775 log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %p",
776 ntohl(mfccp->mfcc_origin.s_addr),
777 ntohl(mfccp->mfcc_mcastgrp.s_addr),
778 mfccp->mfcc_parent, rt->mfc_stall);
779
780 if (rt->mfc_expire)
781 nexpire[hash]--;
782
783 /* free packets Qed at the end of this entry */
784 for (rte = rt->mfc_stall; rte != NULL; rte = nrte) {
785 nrte = rte->next;
786 #ifdef RSVP_ISI
787 ip_mdq(rte->m, rte->ifp, rt, -1);
788 #else
789 ip_mdq(rte->m, rte->ifp, rt);
790 #endif /* RSVP_ISI */
791 m_freem(rte->m);
792 #ifdef UPCALL_TIMING
793 collate(&rte->t);
794 #endif /* UPCALL_TIMING */
795 free(rte, M_MRTABLE);
796 }
797
798 update_mfc(mfccp, rt);
799 }
800 }
801
802 if (nstl == 0) {
803 /*
804 * No mfc; make a new one
805 */
806 if (mrtdebug & DEBUG_MFC)
807 log(LOG_DEBUG,"add_mfc no upcall o %x g %x p %x",
808 ntohl(mfccp->mfcc_origin.s_addr),
809 ntohl(mfccp->mfcc_mcastgrp.s_addr),
810 mfccp->mfcc_parent);
811
812 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
813 if (rt == NULL) {
814 splx(s);
815 return (ENOBUFS);
816 }
817
818 rt->mfc_origin = mfccp->mfcc_origin;
819 rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp;
820 /* initialize pkt counters per src-grp */
821 rt->mfc_pkt_cnt = 0;
822 rt->mfc_byte_cnt = 0;
823 rt->mfc_wrong_if = 0;
824 timerclear(&rt->mfc_last_assert);
825 update_mfc(mfccp, rt);
826
827 /* insert new entry at head of hash chain */
828 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
829 }
830
831 splx(s);
832 return (0);
833 }
834
835 #ifdef UPCALL_TIMING
836 /*
837 * collect delay statistics on the upcalls
838 */
839 static void collate(t)
840 register struct timeval *t;
841 {
842 register u_int32_t d;
843 register struct timeval tp;
844 register u_int32_t delta;
845
846 microtime(&tp);
847
848 if (timercmp(t, &tp, <)) {
849 TV_DELTA(tp, *t, delta);
850
851 d = delta >> 10;
852 if (d > 50)
853 d = 50;
854
855 ++upcall_data[d];
856 }
857 }
858 #endif /* UPCALL_TIMING */
859
860 /*
861 * Delete an mfc entry
862 */
863 static int
864 del_mfc(m)
865 struct mbuf *m;
866 {
867 struct mfcctl *mfccp;
868 struct mfc *rt;
869 int s;
870
871 if (m == 0 || m->m_len < sizeof(struct mfcctl))
872 return (EINVAL);
873
874 mfccp = mtod(m, struct mfcctl *);
875
876 if (mrtdebug & DEBUG_MFC)
877 log(LOG_DEBUG, "del_mfc origin %x mcastgrp %x",
878 ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr));
879
880 s = splsoftnet();
881
882 MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
883 if (rt == NULL) {
884 splx(s);
885 return (EADDRNOTAVAIL);
886 }
887
888 LIST_REMOVE(rt, mfc_hash);
889 free(rt, M_MRTABLE);
890
891 splx(s);
892 return (0);
893 }
894
895 static int
896 socket_send(s, mm, src)
897 struct socket *s;
898 struct mbuf *mm;
899 struct sockaddr_in *src;
900 {
901 if (s) {
902 if (sbappendaddr(&s->so_rcv, sintosa(src), mm, (struct mbuf *)0) != 0) {
903 sorwakeup(s);
904 return (0);
905 }
906 }
907 m_freem(mm);
908 return (-1);
909 }
910
911 /*
912 * IP multicast forwarding function. This function assumes that the packet
913 * pointed to by "ip" has arrived on (or is about to be sent to) the interface
914 * pointed to by "ifp", and the packet is to be relayed to other networks
915 * that have members of the packet's destination IP multicast group.
916 *
917 * The packet is returned unscathed to the caller, unless it is
918 * erroneous, in which case a non-zero return value tells the caller to
919 * discard it.
920 */
921
922 #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */
923 #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */
924
925 int
926 #ifdef RSVP_ISI
927 ip_mforward(m, ifp, imo)
928 #else
929 ip_mforward(m, ifp)
930 #endif /* RSVP_ISI */
931 struct mbuf *m;
932 struct ifnet *ifp;
933 #ifdef RSVP_ISI
934 struct ip_moptions *imo;
935 #endif /* RSVP_ISI */
936 {
937 register struct ip *ip = mtod(m, struct ip *);
938 register struct mfc *rt;
939 register u_char *ipoptions;
940 static int srctun = 0;
941 register struct mbuf *mm;
942 int s;
943 #ifdef RSVP_ISI
944 register struct vif *vifp;
945 vifi_t vifi;
946 #endif /* RSVP_ISI */
947
948 if (mrtdebug & DEBUG_FORWARD)
949 log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %p",
950 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp);
951
952 if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
953 (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR) {
954 /*
955 * Packet arrived via a physical interface or
956 * an encapuslated tunnel.
957 */
958 } else {
959 /*
960 * Packet arrived through a source-route tunnel.
961 * Source-route tunnels are no longer supported.
962 */
963 if ((srctun++ % 1000) == 0)
964 log(LOG_ERR, "ip_mforward: received source-routed packet from %x",
965 ntohl(ip->ip_src.s_addr));
966
967 return (1);
968 }
969
970 #ifdef RSVP_ISI
971 if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) {
972 if (ip->ip_ttl < 255)
973 ip->ip_ttl++; /* compensate for -1 in *_send routines */
974 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
975 vifp = viftable + vifi;
976 printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n",
977 ntohl(ip->ip_src), ntohl(ip->ip_dst), vifi,
978 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
979 vifp->v_ifp->if_xname);
980 }
981 return (ip_mdq(m, ifp, rt, vifi));
982 }
983 if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
984 printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
985 ntohl(ip->ip_src), ntohl(ip->ip_dst));
986 }
987 #endif /* RSVP_ISI */
988
989 /*
990 * Don't forward a packet with time-to-live of zero or one,
991 * or a packet destined to a local-only group.
992 */
993 if (ip->ip_ttl <= 1 ||
994 IN_LOCAL_GROUP(ip->ip_dst.s_addr))
995 return (0);
996
997 /*
998 * Determine forwarding vifs from the forwarding cache table
999 */
1000 s = splsoftnet();
1001 MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1002
1003 /* Entry exists, so forward if necessary */
1004 if (rt != NULL) {
1005 splx(s);
1006 #ifdef RSVP_ISI
1007 return (ip_mdq(m, ifp, rt, -1));
1008 #else
1009 return (ip_mdq(m, ifp, rt));
1010 #endif /* RSVP_ISI */
1011 } else {
1012 /*
1013 * If we don't have a route for packet's origin,
1014 * Make a copy of the packet &
1015 * send message to routing daemon
1016 */
1017
1018 register struct mbuf *mb0;
1019 register struct rtdetq *rte;
1020 register u_int32_t hash;
1021 #ifdef UPCALL_TIMING
1022 struct timeval tp;
1023
1024 microtime(&tp);
1025 #endif /* UPCALL_TIMING */
1026
1027 mrtstat.mrts_no_route++;
1028 if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1029 log(LOG_DEBUG, "ip_mforward: no rte s %x g %x",
1030 ntohl(ip->ip_src.s_addr),
1031 ntohl(ip->ip_dst.s_addr));
1032
1033 /*
1034 * Allocate mbufs early so that we don't do extra work if we are
1035 * just going to fail anyway.
1036 */
1037 rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT);
1038 if (rte == NULL) {
1039 splx(s);
1040 return (ENOBUFS);
1041 }
1042 mb0 = m_copy(m, 0, M_COPYALL);
1043 if (mb0 == NULL) {
1044 free(rte, M_MRTABLE);
1045 splx(s);
1046 return (ENOBUFS);
1047 }
1048
1049 /* is there an upcall waiting for this packet? */
1050 hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1051 for (rt = mfchashtbl[hash].lh_first; rt; rt = rt->mfc_hash.le_next) {
1052 if (ip->ip_src.s_addr == rt->mfc_origin.s_addr &&
1053 ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr &&
1054 rt->mfc_stall != NULL)
1055 break;
1056 }
1057
1058 if (rt == NULL) {
1059 int hlen = ip->ip_hl << 2;
1060 int i;
1061 struct igmpmsg *im;
1062
1063 /* no upcall, so make a new entry */
1064 rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1065 if (rt == NULL) {
1066 free(rte, M_MRTABLE);
1067 m_free(mb0);
1068 splx(s);
1069 return (ENOBUFS);
1070 }
1071 /* Make a copy of the header to send to the user level process */
1072 mm = m_copy(m, 0, hlen);
1073 M_PULLUP(mm, hlen);
1074 if (mm == NULL) {
1075 free(rte, M_MRTABLE);
1076 m_free(mb0);
1077 free(rt, M_MRTABLE);
1078 splx(s);
1079 return (ENOBUFS);
1080 }
1081
1082 /*
1083 * Send message to routing daemon to install
1084 * a route into the kernel table
1085 */
1086 sin.sin_addr = ip->ip_src;
1087
1088 im = mtod(mm, struct igmpmsg *);
1089 im->im_msgtype = IGMPMSG_NOCACHE;
1090 im->im_mbz = 0;
1091
1092 mrtstat.mrts_upcalls++;
1093
1094 if (socket_send(ip_mrouter, mm, &sin) < 0) {
1095 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full");
1096 ++mrtstat.mrts_upq_sockfull;
1097 free(rte, M_MRTABLE);
1098 m_free(mb0);
1099 free(rt, M_MRTABLE);
1100 splx(s);
1101 return (ENOBUFS);
1102 }
1103
1104 /* insert new entry at head of hash chain */
1105 rt->mfc_origin = ip->ip_src;
1106 rt->mfc_mcastgrp = ip->ip_dst;
1107 rt->mfc_pkt_cnt = 0;
1108 rt->mfc_byte_cnt = 0;
1109 rt->mfc_wrong_if = 0;
1110 rt->mfc_expire = UPCALL_EXPIRE;
1111 nexpire[hash]++;
1112 for (i = 0; i < numvifs; i++)
1113 rt->mfc_ttls[i] = 0;
1114 rt->mfc_parent = -1;
1115
1116 /* link into table */
1117 LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash);
1118 /* Add this entry to the end of the queue */
1119 rt->mfc_stall = rte;
1120 } else {
1121 /* determine if q has overflowed */
1122 struct rtdetq **p;
1123 register int npkts = 0;
1124
1125 for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)
1126 if (++npkts > MAX_UPQ) {
1127 mrtstat.mrts_upq_ovflw++;
1128 free(rte, M_MRTABLE);
1129 m_free(mb0);
1130 splx(s);
1131 return (0);
1132 }
1133
1134 /* Add this entry to the end of the queue */
1135 *p = rte;
1136 }
1137
1138 rte->next = NULL;
1139 rte->m = mb0;
1140 rte->ifp = ifp;
1141 #ifdef UPCALL_TIMING
1142 rte->t = tp;
1143 #endif /* UPCALL_TIMING */
1144
1145
1146 splx(s);
1147
1148 return (0);
1149 }
1150 }
1151
1152
1153 /*ARGSUSED*/
1154 static void
1155 expire_upcalls(v)
1156 void *v;
1157 {
1158 int i;
1159 int s;
1160
1161 s = splsoftnet();
1162
1163 for (i = 0; i < MFCTBLSIZ; i++) {
1164 register struct mfc *rt, *nrt;
1165
1166 if (nexpire[i] == 0)
1167 continue;
1168
1169 for (rt = mfchashtbl[i].lh_first; rt; rt = nrt) {
1170 nrt = rt->mfc_hash.le_next;
1171
1172 if (rt->mfc_expire == 0 ||
1173 --rt->mfc_expire > 0)
1174 continue;
1175 nexpire[i]--;
1176
1177 ++mrtstat.mrts_cache_cleanups;
1178 if (mrtdebug & DEBUG_EXPIRE)
1179 log(LOG_DEBUG,
1180 "expire_upcalls: expiring (%x %x)",
1181 ntohl(rt->mfc_origin.s_addr),
1182 ntohl(rt->mfc_mcastgrp.s_addr));
1183
1184 expire_mfc(rt);
1185 }
1186 }
1187
1188 splx(s);
1189 timeout(expire_upcalls, (caddr_t)0, EXPIRE_TIMEOUT);
1190 }
1191
1192 /*
1193 * Packet forwarding routine once entry in the cache is made
1194 */
1195 static int
1196 #ifdef RSVP_ISI
1197 ip_mdq(m, ifp, rt, xmt_vif)
1198 #else
1199 ip_mdq(m, ifp, rt)
1200 #endif /* RSVP_ISI */
1201 register struct mbuf *m;
1202 register struct ifnet *ifp;
1203 register struct mfc *rt;
1204 #ifdef RSVP_ISI
1205 register vifi_t xmt_vif;
1206 #endif /* RSVP_ISI */
1207 {
1208 register struct ip *ip = mtod(m, struct ip *);
1209 register vifi_t vifi;
1210 register struct vif *vifp;
1211 register int plen = ntohs(ip->ip_len);
1212
1213 /*
1214 * Macro to send packet on vif. Since RSVP packets don't get counted on
1215 * input, they shouldn't get counted on output, so statistics keeping is
1216 * seperate.
1217 */
1218 #define MC_SEND(ip,vifp,m) { \
1219 if ((vifp)->v_flags & VIFF_TUNNEL) \
1220 encap_send((ip), (vifp), (m)); \
1221 else \
1222 phyint_send((ip), (vifp), (m)); \
1223 }
1224
1225 #ifdef RSVP_ISI
1226 /*
1227 * If xmt_vif is not -1, send on only the requested vif.
1228 *
1229 * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.
1230 */
1231 if (xmt_vif < numvifs) {
1232 MC_SEND(ip, viftable + xmt_vif, m);
1233 return (1);
1234 }
1235 #endif /* RSVP_ISI */
1236
1237 /*
1238 * Don't forward if it didn't arrive from the parent vif for its origin.
1239 */
1240 vifi = rt->mfc_parent;
1241 if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1242 /* came in the wrong interface */
1243 if (mrtdebug & DEBUG_FORWARD)
1244 log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p",
1245 ifp, vifi, viftable[vifi].v_ifp);
1246 ++mrtstat.mrts_wrong_if;
1247 ++rt->mfc_wrong_if;
1248 /*
1249 * If we are doing PIM assert processing, and we are forwarding
1250 * packets on this interface, and it is a broadcast medium
1251 * interface (and not a tunnel), send a message to the routing daemon.
1252 */
1253 if (pim_assert && rt->mfc_ttls[vifi] &&
1254 (ifp->if_flags & IFF_BROADCAST) &&
1255 !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1256 struct mbuf *mm;
1257 struct igmpmsg *im;
1258 int hlen = ip->ip_hl << 2;
1259 struct timeval now;
1260 register u_int32_t delta;
1261
1262 microtime(&now);
1263
1264 TV_DELTA(rt->mfc_last_assert, now, delta);
1265
1266 if (delta > ASSERT_MSG_TIME) {
1267 mm = m_copy(m, 0, hlen);
1268 M_PULLUP(mm, hlen);
1269 if (mm == NULL) {
1270 return (ENOBUFS);
1271 }
1272
1273 rt->mfc_last_assert = now;
1274
1275 im = mtod(mm, struct igmpmsg *);
1276 im->im_msgtype = IGMPMSG_WRONGVIF;
1277 im->im_mbz = 0;
1278 im->im_vif = vifi;
1279
1280 sin.sin_addr = im->im_src;
1281
1282 socket_send(ip_mrouter, m, &sin);
1283 }
1284 }
1285 return (0);
1286 }
1287
1288 /* If I sourced this packet, it counts as output, else it was input. */
1289 if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
1290 viftable[vifi].v_pkt_out++;
1291 viftable[vifi].v_bytes_out += plen;
1292 } else {
1293 viftable[vifi].v_pkt_in++;
1294 viftable[vifi].v_bytes_in += plen;
1295 }
1296 rt->mfc_pkt_cnt++;
1297 rt->mfc_byte_cnt += plen;
1298
1299 /*
1300 * For each vif, decide if a copy of the packet should be forwarded.
1301 * Forward if:
1302 * - the ttl exceeds the vif's threshold
1303 * - there are group members downstream on interface
1304 */
1305 for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1306 if ((rt->mfc_ttls[vifi] > 0) &&
1307 (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1308 vifp->v_pkt_out++;
1309 vifp->v_bytes_out += plen;
1310 MC_SEND(ip, vifp, m);
1311 }
1312
1313 return (0);
1314 }
1315
1316 #ifdef RSVP_ISI
1317 /*
1318 * check if a vif number is legal/ok. This is used by ip_output, to export
1319 * numvifs there,
1320 */
1321 int
1322 legal_vif_num(vif)
1323 int vif;
1324 {
1325 if (vif >= 0 && vif < numvifs)
1326 return (1);
1327 else
1328 return (0);
1329 }
1330 #endif /* RSVP_ISI */
1331
1332 static void
1333 phyint_send(ip, vifp, m)
1334 struct ip *ip;
1335 struct vif *vifp;
1336 struct mbuf *m;
1337 {
1338 register struct mbuf *mb_copy;
1339 register int hlen = ip->ip_hl << 2;
1340
1341 /*
1342 * Make a new reference to the packet; make sure that
1343 * the IP header is actually copied, not just referenced,
1344 * so that ip_output() only scribbles on the copy.
1345 */
1346 mb_copy = m_copy(m, 0, M_COPYALL);
1347 M_PULLUP(mb_copy, hlen);
1348 if (mb_copy == NULL)
1349 return;
1350
1351 if (vifp->v_rate_limit <= 0)
1352 tbf_send_packet(vifp, mb_copy);
1353 else
1354 tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
1355 }
1356
1357 static void
1358 encap_send(ip, vifp, m)
1359 register struct ip *ip;
1360 register struct vif *vifp;
1361 register struct mbuf *m;
1362 {
1363 register struct mbuf *mb_copy;
1364 register struct ip *ip_copy;
1365 register int i, len = ip->ip_len + sizeof(multicast_encap_iphdr);
1366
1367 /*
1368 * copy the old packet & pullup it's IP header into the
1369 * new mbuf so we can modify it. Try to fill the new
1370 * mbuf since if we don't the ethernet driver will.
1371 */
1372 MGETHDR(mb_copy, M_DONTWAIT, MT_DATA);
1373 if (mb_copy == NULL)
1374 return;
1375 mb_copy->m_data += max_linkhdr;
1376 mb_copy->m_pkthdr.len = len;
1377 mb_copy->m_len = sizeof(multicast_encap_iphdr);
1378
1379 if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1380 m_freem(mb_copy);
1381 return;
1382 }
1383 i = MHLEN - max_linkhdr;
1384 if (i > len)
1385 i = len;
1386 mb_copy = m_pullup(mb_copy, i);
1387 if (mb_copy == NULL)
1388 return;
1389
1390 /*
1391 * fill in the encapsulating IP header.
1392 */
1393 ip_copy = mtod(mb_copy, struct ip *);
1394 *ip_copy = multicast_encap_iphdr;
1395 ip_copy->ip_id = htons(ip_id++);
1396 ip_copy->ip_len = len;
1397 ip_copy->ip_src = vifp->v_lcl_addr;
1398 ip_copy->ip_dst = vifp->v_rmt_addr;
1399
1400 /*
1401 * turn the encapsulated IP header back into a valid one.
1402 */
1403 ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1404 --ip->ip_ttl;
1405 HTONS(ip->ip_len);
1406 HTONS(ip->ip_off);
1407 ip->ip_sum = 0;
1408 #if defined(LBL) && !defined(ultrix) && !defined(i386)
1409 ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0);
1410 #else
1411 mb_copy->m_data += sizeof(multicast_encap_iphdr);
1412 ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1413 mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1414 #endif
1415
1416 if (vifp->v_rate_limit <= 0)
1417 tbf_send_packet(vifp, mb_copy);
1418 else
1419 tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
1420 }
1421
1422 /*
1423 * De-encapsulate a packet and feed it back through ip input (this
1424 * routine is called whenever IP gets a packet with proto type
1425 * ENCAP_PROTO and a local destination address).
1426 */
1427 void
1428 #if __STDC__
1429 ipip_input(struct mbuf *m, ...)
1430 #else
1431 ipip_input(m, va_alist)
1432 struct mbuf *m;
1433 va_dcl
1434 #endif
1435 {
1436 register int hlen;
1437 register struct ip *ip = mtod(m, struct ip *);
1438 register int s;
1439 register struct ifqueue *ifq;
1440 register struct vif *vifp;
1441 va_list ap;
1442
1443 va_start(ap, m);
1444 hlen = va_arg(ap, int);
1445 va_end(ap);
1446
1447 if (!have_encap_tunnel) {
1448 rip_input(m);
1449 return;
1450 }
1451
1452 /*
1453 * dump the packet if it's not to a multicast destination or if
1454 * we don't have an encapsulating tunnel with the source.
1455 * Note: This code assumes that the remote site IP address
1456 * uniquely identifies the tunnel (i.e., that this site has
1457 * at most one tunnel with the remote site).
1458 */
1459 if (!IN_MULTICAST(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr)) {
1460 ++mrtstat.mrts_bad_tunnel;
1461 m_freem(m);
1462 return;
1463 }
1464
1465 if (ip->ip_src.s_addr != last_encap_src) {
1466 register struct vif *vife;
1467
1468 vifp = viftable;
1469 vife = vifp + numvifs;
1470 for (; vifp < vife; vifp++)
1471 if (vifp->v_flags & VIFF_TUNNEL &&
1472 vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr)
1473 break;
1474 if (vifp == vife) {
1475 mrtstat.mrts_cant_tunnel++; /*XXX*/
1476 m_freem(m);
1477 if (mrtdebug)
1478 log(LOG_DEBUG, "ip_mforward: no tunnel with %x",
1479 ntohl(ip->ip_src.s_addr));
1480 return;
1481 }
1482 last_encap_vif = vifp;
1483 last_encap_src = ip->ip_src.s_addr;
1484 } else
1485 vifp = last_encap_vif;
1486
1487 m->m_data += hlen;
1488 m->m_len -= hlen;
1489 m->m_pkthdr.len -= hlen;
1490 m->m_pkthdr.rcvif = vifp->v_ifp;
1491 ifq = &ipintrq;
1492 s = splimp();
1493 if (IF_QFULL(ifq)) {
1494 IF_DROP(ifq);
1495 m_freem(m);
1496 } else {
1497 IF_ENQUEUE(ifq, m);
1498 /*
1499 * normally we would need a "schednetisr(NETISR_IP)"
1500 * here but we were called by ip_input and it is going
1501 * to loop back & try to dequeue the packet we just
1502 * queued as soon as we return so we avoid the
1503 * unnecessary software interrrupt.
1504 */
1505 }
1506 splx(s);
1507 }
1508
1509 /*
1510 * Token bucket filter module
1511 */
1512 static void
1513 tbf_control(vifp, m, ip, p_len)
1514 register struct vif *vifp;
1515 register struct mbuf *m;
1516 register struct ip *ip;
1517 register u_int32_t p_len;
1518 {
1519
1520 tbf_update_tokens(vifp);
1521
1522 /*
1523 * If there are enough tokens, and the queue is empty, send this packet
1524 * out immediately. Otherwise, try to insert it on this vif's queue.
1525 */
1526 if (vifp->v_tbf.q_len == 0) {
1527 if (p_len <= vifp->v_tbf.n_tok) {
1528 vifp->v_tbf.n_tok -= p_len;
1529 tbf_send_packet(vifp, m);
1530 } else if (p_len > MAX_BKT_SIZE) {
1531 /* drop if packet is too large */
1532 mrtstat.mrts_pkt2large++;
1533 m_freem(m);
1534 } else {
1535 /* queue packet and timeout till later */
1536 tbf_queue(vifp, m, ip);
1537 timeout(tbf_reprocess_q, vifp, 1);
1538 }
1539 } else {
1540 if (vifp->v_tbf.q_len >= MAXQSIZE &&
1541 !tbf_dq_sel(vifp, ip)) {
1542 /* queue length too much, and couldn't make room */
1543 mrtstat.mrts_q_overflow++;
1544 m_freem(m);
1545 } else {
1546 /* queue length low enough, or made room */
1547 tbf_queue(vifp, m, ip);
1548 tbf_process_q(vifp);
1549 }
1550 }
1551 }
1552
1553 /*
1554 * adds a packet to the queue at the interface
1555 */
1556 static void
1557 tbf_queue(vifp, m, ip)
1558 register struct vif *vifp;
1559 register struct mbuf *m;
1560 register struct ip *ip;
1561 {
1562 register u_int32_t ql;
1563 register int index = (vifp - viftable);
1564 register int s = splsoftnet();
1565
1566 ql = vifp->v_tbf.q_len;
1567
1568 qtable[index][ql].pkt_m = m;
1569 qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len;
1570 qtable[index][ql].pkt_ip = ip;
1571
1572 vifp->v_tbf.q_len++;
1573 splx(s);
1574 }
1575
1576
1577 /*
1578 * processes the queue at the interface
1579 */
1580 static void
1581 tbf_process_q(vifp)
1582 register struct vif *vifp;
1583 {
1584 register struct pkt_queue pkt_1;
1585 register int index = (vifp - viftable);
1586 register int s = splsoftnet();
1587
1588 /* loop through the queue at the interface and send as many packets
1589 * as possible
1590 */
1591 while (vifp->v_tbf.q_len > 0) {
1592 /* locate the first packet */
1593 pkt_1 = qtable[index][0];
1594
1595 /* determine if the packet can be sent */
1596 if (pkt_1.pkt_len <= vifp->v_tbf.n_tok) {
1597 /* if so,
1598 * reduce no of tokens, dequeue the queue,
1599 * send the packet.
1600 */
1601 vifp->v_tbf.n_tok -= pkt_1.pkt_len;
1602
1603 tbf_dequeue(vifp, 0);
1604 tbf_send_packet(vifp, pkt_1.pkt_m);
1605 } else
1606 break;
1607 }
1608 splx(s);
1609 }
1610
1611 /*
1612 * removes the jth packet from the queue at the interface
1613 */
1614 static void
1615 tbf_dequeue(vifp, j)
1616 register struct vif *vifp;
1617 register int j;
1618 {
1619 register u_int32_t index = vifp - viftable;
1620 register int i;
1621
1622 for (i=j+1; i <= vifp->v_tbf.q_len - 1; i++) {
1623 qtable[index][i-1] = qtable[index][i];
1624 }
1625 qtable[index][i-1].pkt_m = NULL;
1626 qtable[index][i-1].pkt_len = NULL;
1627 qtable[index][i-1].pkt_ip = NULL;
1628
1629 vifp->v_tbf.q_len--;
1630
1631 if (tbfdebug > 1)
1632 log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d",vifp-viftable, i-1);
1633 }
1634
1635 static void
1636 tbf_reprocess_q(arg)
1637 void *arg;
1638 {
1639 register struct vif *vifp = arg;
1640
1641 if (ip_mrouter == NULL)
1642 return;
1643
1644 tbf_update_tokens(vifp);
1645 tbf_process_q(vifp);
1646
1647 if (vifp->v_tbf.q_len)
1648 timeout(tbf_reprocess_q, vifp, 1);
1649 }
1650
1651 /* function that will selectively discard a member of the queue
1652 * based on the precedence value and the priority obtained through
1653 * a lookup table - not yet implemented accurately!
1654 */
1655 static int
1656 tbf_dq_sel(vifp, ip)
1657 register struct vif *vifp;
1658 register struct ip *ip;
1659 {
1660 register int i;
1661 register int s = splsoftnet();
1662 register u_int p;
1663
1664 p = priority(vifp, ip);
1665
1666 for(i=vifp->v_tbf.q_len-1;i >= 0;i--) {
1667 if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) {
1668 m_freem(qtable[vifp-viftable][i].pkt_m);
1669 tbf_dequeue(vifp, i);
1670 splx(s);
1671 mrtstat.mrts_drop_sel++;
1672 return (1);
1673 }
1674 }
1675 splx(s);
1676 return (0);
1677 }
1678
1679 static void
1680 tbf_send_packet(vifp,m)
1681 register struct vif *vifp;
1682 register struct mbuf *m;
1683 {
1684 int error;
1685 int s = splsoftnet();
1686
1687 if (vifp->v_flags & VIFF_TUNNEL) {
1688 /* If tunnel options */
1689 ip_output(m, (struct mbuf *)0, &vifp->v_route,
1690 IP_FORWARDING, NULL);
1691 } else {
1692 /* if physical interface option, extract the options and then send */
1693 struct ip *ip = mtod(m, struct ip *);
1694 struct ip_moptions imo;
1695 imo.imo_multicast_ifp = vifp->v_ifp;
1696 imo.imo_multicast_ttl = ip->ip_ttl - 1;
1697 imo.imo_multicast_loop = 1;
1698 #ifdef RSVP_ISI
1699 imo.imo_multicast_vif = -1;
1700 #endif
1701
1702 error = ip_output(m, (struct mbuf *)0, (struct route *)0,
1703 IP_FORWARDING|IP_MULTICASTOPTS, &imo);
1704 if (mrtdebug & DEBUG_XMIT)
1705 log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error);
1706 }
1707 splx(s);
1708 }
1709
1710 /* determine the current time and then
1711 * the elapsed time (between the last time and time now)
1712 * in milliseconds & update the no. of tokens in the bucket
1713 */
1714 static void
1715 tbf_update_tokens(vifp)
1716 register struct vif *vifp;
1717 {
1718 struct timeval tp;
1719 register u_int32_t t;
1720 register u_int32_t elapsed;
1721 register int s = splsoftnet();
1722
1723 microtime(&tp);
1724
1725 t = tp.tv_sec*1000 + tp.tv_usec/1000;
1726
1727 elapsed = (t - vifp->v_tbf.last_pkt_t) * vifp->v_rate_limit /8;
1728 vifp->v_tbf.n_tok += elapsed;
1729 vifp->v_tbf.last_pkt_t = t;
1730
1731 if (vifp->v_tbf.n_tok > MAX_BKT_SIZE)
1732 vifp->v_tbf.n_tok = MAX_BKT_SIZE;
1733
1734 splx(s);
1735 }
1736
1737 static int
1738 priority(vifp, ip)
1739 register struct vif *vifp;
1740 register struct ip *ip;
1741 {
1742 register int prio;
1743
1744 /* temporary hack; may add general packet classifier some day */
1745
1746 /*
1747 * The UDP port space is divided up into four priority ranges:
1748 * [0, 16384) : unclassified - lowest priority
1749 * [16384, 32768) : audio - highest priority
1750 * [32768, 49152) : whiteboard - medium priority
1751 * [49152, 65536) : video - low priority
1752 */
1753 if (ip->ip_p == IPPROTO_UDP) {
1754 struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1755
1756 switch (ntohs(udp->uh_dport) & 0xc000) {
1757 case 0x4000:
1758 prio = 70;
1759 break;
1760 case 0x8000:
1761 prio = 60;
1762 break;
1763 case 0xc000:
1764 prio = 55;
1765 break;
1766 default:
1767 prio = 50;
1768 break;
1769 }
1770
1771 if (tbfdebug > 1) log(LOG_DEBUG, "port %x prio %d", ntohs(udp->uh_dport), prio);
1772 } else
1773 prio = 50;
1774
1775
1776 return (prio);
1777 }
1778
1779 /*
1780 * End of token bucket filter modifications
1781 */
1782
1783 #ifdef RSVP_ISI
1784
1785 int
1786 ip_rsvp_vif_init(so, m)
1787 struct socket *so;
1788 struct mbuf *m;
1789 {
1790 int i;
1791 register int s;
1792
1793 if (rsvpdebug)
1794 printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1795 so->so_type, so->so_proto->pr_protocol);
1796
1797 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1798 return (EOPNOTSUPP);
1799
1800 /* Check mbuf. */
1801 if (m == NULL || m->m_len != sizeof(int)) {
1802 return (EINVAL);
1803 }
1804 i = *(mtod(m, int *));
1805
1806 if (rsvpdebug)
1807 printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on);
1808
1809 s = splsoftnet();
1810
1811 /* Check vif. */
1812 if (!legal_vif_num(i)) {
1813 splx(s);
1814 return (EADDRNOTAVAIL);
1815 }
1816
1817 /* Check if socket is available. */
1818 if (viftable[i].v_rsvpd != NULL) {
1819 splx(s);
1820 return (EADDRINUSE);
1821 }
1822
1823 viftable[i].v_rsvpd = so;
1824 /* This may seem silly, but we need to be sure we don't over-increment
1825 * the RSVP counter, in case something slips up.
1826 */
1827 if (!viftable[i].v_rsvp_on) {
1828 viftable[i].v_rsvp_on = 1;
1829 rsvp_on++;
1830 }
1831
1832 splx(s);
1833 return (0);
1834 }
1835
1836 int
1837 ip_rsvp_vif_done(so, m)
1838 struct socket *so;
1839 struct mbuf *m;
1840 {
1841 int i;
1842 register int s;
1843
1844 if (rsvpdebug)
1845 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
1846 so->so_type, so->so_proto->pr_protocol);
1847
1848 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1849 return (EOPNOTSUPP);
1850
1851 /* Check mbuf. */
1852 if (m == NULL || m->m_len != sizeof(int)) {
1853 return (EINVAL);
1854 }
1855 i = *(mtod(m, int *));
1856
1857 s = splsoftnet();
1858
1859 /* Check vif. */
1860 if (!legal_vif_num(i)) {
1861 splx(s);
1862 return (EADDRNOTAVAIL);
1863 }
1864
1865 if (rsvpdebug)
1866 printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n",
1867 viftable[i].v_rsvpd, so);
1868
1869 viftable[i].v_rsvpd = NULL;
1870 /* This may seem silly, but we need to be sure we don't over-decrement
1871 * the RSVP counter, in case something slips up.
1872 */
1873 if (viftable[i].v_rsvp_on) {
1874 viftable[i].v_rsvp_on = 0;
1875 rsvp_on--;
1876 }
1877
1878 splx(s);
1879 return (0);
1880 }
1881
1882 void
1883 ip_rsvp_force_done(so)
1884 struct socket *so;
1885 {
1886 int vifi;
1887 register int s;
1888
1889 /* Don't bother if it is not the right type of socket. */
1890 if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1891 return;
1892
1893 s = splsoftnet();
1894
1895 /* The socket may be attached to more than one vif...this
1896 * is perfectly legal.
1897 */
1898 for (vifi = 0; vifi < numvifs; vifi++) {
1899 if (viftable[vifi].v_rsvpd == so) {
1900 viftable[vifi].v_rsvpd = NULL;
1901 /* This may seem silly, but we need to be sure we don't
1902 * over-decrement the RSVP counter, in case something slips up.
1903 */
1904 if (viftable[vifi].v_rsvp_on) {
1905 viftable[vifi].v_rsvp_on = 0;
1906 rsvp_on--;
1907 }
1908 }
1909 }
1910
1911 splx(s);
1912 return;
1913 }
1914
1915 void
1916 rsvp_input(m, ifp)
1917 struct mbuf *m;
1918 struct ifnet *ifp;
1919 {
1920 int vifi;
1921 register struct ip *ip = mtod(m, struct ip *);
1922 static struct sockaddr_in rsvp_src = { sizeof(sin), AF_INET };
1923 register int s;
1924
1925 if (rsvpdebug)
1926 printf("rsvp_input: rsvp_on %d\n",rsvp_on);
1927
1928 /* Can still get packets with rsvp_on = 0 if there is a local member
1929 * of the group to which the RSVP packet is addressed. But in this
1930 * case we want to throw the packet away.
1931 */
1932 if (!rsvp_on) {
1933 m_freem(m);
1934 return;
1935 }
1936
1937 /* If the old-style non-vif-associated socket is set, then use
1938 * it and ignore the new ones.
1939 */
1940 if (ip_rsvpd != NULL) {
1941 if (rsvpdebug)
1942 printf("rsvp_input: Sending packet up old-style socket\n");
1943 rip_input(m);
1944 return;
1945 }
1946
1947 s = splsoftnet();
1948
1949 if (rsvpdebug)
1950 printf("rsvp_input: check vifs\n");
1951
1952 /* Find which vif the packet arrived on. */
1953 for (vifi = 0; vifi < numvifs; vifi++) {
1954 if (viftable[vifi].v_ifp == ifp)
1955 break;
1956 }
1957
1958 if (vifi == numvifs) {
1959 /* Can't find vif packet arrived on. Drop packet. */
1960 if (rsvpdebug)
1961 printf("rsvp_input: Can't find vif for packet...dropping it.\n");
1962 m_freem(m);
1963 splx(s);
1964 return;
1965 }
1966
1967 if (rsvpdebug)
1968 printf("rsvp_input: check socket\n");
1969
1970 if (viftable[vifi].v_rsvpd == NULL) {
1971 /* drop packet, since there is no specific socket for this
1972 * interface */
1973 if (rsvpdebug)
1974 printf("rsvp_input: No socket defined for vif %d\n",vifi);
1975 m_freem(m);
1976 splx(s);
1977 return;
1978 }
1979
1980 rsvp_src.sin_addr = ip->ip_src;
1981
1982 if (rsvpdebug && m)
1983 printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
1984 m->m_len,sbspace(&viftable[vifi].v_rsvpd->so_rcv));
1985
1986 if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0)
1987 if (rsvpdebug)
1988 printf("rsvp_input: Failed to append to socket\n");
1989 else
1990 if (rsvpdebug)
1991 printf("rsvp_input: send packet up\n");
1992
1993 splx(s);
1994 }
1995 #endif /* RSVP_ISI */
1996