ip_output.c revision 1.91 1 /* $NetBSD: ip_output.c,v 1.91 2002/01/08 10:05:13 itojun Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 * must display the following acknowledgement:
50 * This product includes software developed by the NetBSD
51 * Foundation, Inc. and its contributors.
52 * 4. Neither the name of The NetBSD Foundation nor the names of its
53 * contributors may be used to endorse or promote products derived
54 * from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 */
68
69 /*
70 * Copyright (c) 1982, 1986, 1988, 1990, 1993
71 * The Regents of the University of California. All rights reserved.
72 *
73 * Redistribution and use in source and binary forms, with or without
74 * modification, are permitted provided that the following conditions
75 * are met:
76 * 1. Redistributions of source code must retain the above copyright
77 * notice, this list of conditions and the following disclaimer.
78 * 2. Redistributions in binary form must reproduce the above copyright
79 * notice, this list of conditions and the following disclaimer in the
80 * documentation and/or other materials provided with the distribution.
81 * 3. All advertising materials mentioning features or use of this software
82 * must display the following acknowledgement:
83 * This product includes software developed by the University of
84 * California, Berkeley and its contributors.
85 * 4. Neither the name of the University nor the names of its contributors
86 * may be used to endorse or promote products derived from this software
87 * without specific prior written permission.
88 *
89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99 * SUCH DAMAGE.
100 *
101 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
102 */
103
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.91 2002/01/08 10:05:13 itojun Exp $");
106
107 #include "opt_pfil_hooks.h"
108 #include "opt_ipsec.h"
109 #include "opt_mrouting.h"
110
111 #include <sys/param.h>
112 #include <sys/malloc.h>
113 #include <sys/mbuf.h>
114 #include <sys/errno.h>
115 #include <sys/protosw.h>
116 #include <sys/socket.h>
117 #include <sys/socketvar.h>
118 #include <sys/systm.h>
119 #include <sys/proc.h>
120
121 #include <net/if.h>
122 #include <net/route.h>
123 #include <net/pfil.h>
124
125 #include <netinet/in.h>
126 #include <netinet/in_systm.h>
127 #include <netinet/ip.h>
128 #include <netinet/in_pcb.h>
129 #include <netinet/in_var.h>
130 #include <netinet/ip_var.h>
131
132 #ifdef MROUTING
133 #include <netinet/ip_mroute.h>
134 #endif
135
136 #include <machine/stdarg.h>
137
138 #ifdef IPSEC
139 #include <netinet6/ipsec.h>
140 #include <netkey/key.h>
141 #include <netkey/key_debug.h>
142 #endif /*IPSEC*/
143
144 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
145 static struct ifnet *ip_multicast_if __P((struct in_addr *, int *));
146 static void ip_mloopback
147 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
148
149 #ifdef PFIL_HOOKS
150 extern struct pfil_head inet_pfil_hook; /* XXX */
151 #endif
152
153 /*
154 * IP output. The packet in mbuf chain m contains a skeletal IP
155 * header (with len, off, ttl, proto, tos, src, dst).
156 * The mbuf chain containing the packet will be freed.
157 * The mbuf opt, if present, will not be freed.
158 */
159 int
160 #if __STDC__
161 ip_output(struct mbuf *m0, ...)
162 #else
163 ip_output(m0, va_alist)
164 struct mbuf *m0;
165 va_dcl
166 #endif
167 {
168 struct ip *ip, *mhip;
169 struct ifnet *ifp;
170 struct mbuf *m = m0;
171 int hlen = sizeof (struct ip);
172 int len, off, error = 0;
173 struct route iproute;
174 struct sockaddr_in *dst;
175 struct in_ifaddr *ia;
176 struct mbuf *opt;
177 struct route *ro;
178 int flags, sw_csum;
179 int *mtu_p;
180 int mtu;
181 struct ip_moptions *imo;
182 va_list ap;
183 #ifdef IPSEC
184 struct socket *so;
185 struct secpolicy *sp = NULL;
186 #endif /*IPSEC*/
187 u_int16_t ip_len;
188
189 va_start(ap, m0);
190 opt = va_arg(ap, struct mbuf *);
191 ro = va_arg(ap, struct route *);
192 flags = va_arg(ap, int);
193 imo = va_arg(ap, struct ip_moptions *);
194 if (flags & IP_RETURNMTU)
195 mtu_p = va_arg(ap, int *);
196 else
197 mtu_p = NULL;
198 va_end(ap);
199
200 #ifdef IPSEC
201 so = ipsec_getsocket(m);
202 (void)ipsec_setsocket(m, NULL);
203 #endif /*IPSEC*/
204
205 #ifdef DIAGNOSTIC
206 if ((m->m_flags & M_PKTHDR) == 0)
207 panic("ip_output no HDR");
208 #endif
209 if (opt) {
210 m = ip_insertoptions(m, opt, &len);
211 hlen = len;
212 }
213 ip = mtod(m, struct ip *);
214 /*
215 * Fill in IP header.
216 */
217 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
218 ip->ip_v = IPVERSION;
219 ip->ip_off = 0;
220 ip->ip_id = htons(ip_id++);
221 ip->ip_hl = hlen >> 2;
222 ipstat.ips_localout++;
223 } else {
224 hlen = ip->ip_hl << 2;
225 }
226 /*
227 * Route packet.
228 */
229 if (ro == 0) {
230 ro = &iproute;
231 bzero((caddr_t)ro, sizeof (*ro));
232 }
233 dst = satosin(&ro->ro_dst);
234 /*
235 * If there is a cached route,
236 * check that it is to the same destination
237 * and is still up. If not, free it and try again.
238 */
239 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
240 !in_hosteq(dst->sin_addr, ip->ip_dst))) {
241 RTFREE(ro->ro_rt);
242 ro->ro_rt = (struct rtentry *)0;
243 }
244 if (ro->ro_rt == 0) {
245 dst->sin_family = AF_INET;
246 dst->sin_len = sizeof(*dst);
247 dst->sin_addr = ip->ip_dst;
248 }
249 /*
250 * If routing to interface only,
251 * short circuit routing lookup.
252 */
253 if (flags & IP_ROUTETOIF) {
254 if ((ia = ifatoia(ifa_ifwithladdr(sintosa(dst)))) == 0) {
255 ipstat.ips_noroute++;
256 error = ENETUNREACH;
257 goto bad;
258 }
259 ifp = ia->ia_ifp;
260 mtu = ifp->if_mtu;
261 ip->ip_ttl = 1;
262 } else {
263 if (ro->ro_rt == 0)
264 rtalloc(ro);
265 if (ro->ro_rt == 0) {
266 ipstat.ips_noroute++;
267 error = EHOSTUNREACH;
268 goto bad;
269 }
270 ia = ifatoia(ro->ro_rt->rt_ifa);
271 ifp = ro->ro_rt->rt_ifp;
272 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
273 mtu = ifp->if_mtu;
274 ro->ro_rt->rt_use++;
275 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
276 dst = satosin(ro->ro_rt->rt_gateway);
277 }
278 if (IN_MULTICAST(ip->ip_dst.s_addr) ||
279 (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
280 struct in_multi *inm;
281
282 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
283 M_BCAST : M_MCAST;
284 /*
285 * IP destination address is multicast. Make sure "dst"
286 * still points to the address in "ro". (It may have been
287 * changed to point to a gateway address, above.)
288 */
289 dst = satosin(&ro->ro_dst);
290 /*
291 * See if the caller provided any multicast options
292 */
293 if (imo != NULL) {
294 ip->ip_ttl = imo->imo_multicast_ttl;
295 if (imo->imo_multicast_ifp != NULL) {
296 ifp = imo->imo_multicast_ifp;
297 mtu = ifp->if_mtu;
298 }
299 } else
300 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
301 /*
302 * Confirm that the outgoing interface supports multicast.
303 */
304 if (((m->m_flags & M_MCAST) &&
305 (ifp->if_flags & IFF_MULTICAST) == 0) ||
306 ((m->m_flags & M_BCAST) &&
307 (ifp->if_flags & IFF_BROADCAST) == 0)) {
308 ipstat.ips_noroute++;
309 error = ENETUNREACH;
310 goto bad;
311 }
312 /*
313 * If source address not specified yet, use an address
314 * of outgoing interface.
315 */
316 if (in_nullhost(ip->ip_src)) {
317 struct in_ifaddr *ia;
318
319 IFP_TO_IA(ifp, ia);
320 if (!ia) {
321 error = EADDRNOTAVAIL;
322 goto bad;
323 }
324 ip->ip_src = ia->ia_addr.sin_addr;
325 }
326
327 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
328 if (inm != NULL &&
329 (imo == NULL || imo->imo_multicast_loop)) {
330 /*
331 * If we belong to the destination multicast group
332 * on the outgoing interface, and the caller did not
333 * forbid loopback, loop back a copy.
334 */
335 ip_mloopback(ifp, m, dst);
336 }
337 #ifdef MROUTING
338 else {
339 /*
340 * If we are acting as a multicast router, perform
341 * multicast forwarding as if the packet had just
342 * arrived on the interface to which we are about
343 * to send. The multicast forwarding function
344 * recursively calls this function, using the
345 * IP_FORWARDING flag to prevent infinite recursion.
346 *
347 * Multicasts that are looped back by ip_mloopback(),
348 * above, will be forwarded by the ip_input() routine,
349 * if necessary.
350 */
351 extern struct socket *ip_mrouter;
352
353 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
354 if (ip_mforward(m, ifp) != 0) {
355 m_freem(m);
356 goto done;
357 }
358 }
359 }
360 #endif
361 /*
362 * Multicasts with a time-to-live of zero may be looped-
363 * back, above, but must not be transmitted on a network.
364 * Also, multicasts addressed to the loopback interface
365 * are not sent -- the above call to ip_mloopback() will
366 * loop back a copy if this host actually belongs to the
367 * destination group on the loopback interface.
368 */
369 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
370 m_freem(m);
371 goto done;
372 }
373
374 goto sendit;
375 }
376 #ifndef notdef
377 /*
378 * If source address not specified yet, use address
379 * of outgoing interface.
380 */
381 if (in_nullhost(ip->ip_src))
382 ip->ip_src = ia->ia_addr.sin_addr;
383 #endif
384
385 /*
386 * packets with Class-D address as source are not valid per
387 * RFC 1112
388 */
389 if (IN_MULTICAST(ip->ip_src.s_addr)) {
390 ipstat.ips_odropped++;
391 error = EADDRNOTAVAIL;
392 goto bad;
393 }
394
395 /*
396 * Look for broadcast address and
397 * and verify user is allowed to send
398 * such a packet.
399 */
400 if (in_broadcast(dst->sin_addr, ifp)) {
401 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
402 error = EADDRNOTAVAIL;
403 goto bad;
404 }
405 if ((flags & IP_ALLOWBROADCAST) == 0) {
406 error = EACCES;
407 goto bad;
408 }
409 /* don't allow broadcast messages to be fragmented */
410 if ((u_int16_t)ip->ip_len > ifp->if_mtu) {
411 error = EMSGSIZE;
412 goto bad;
413 }
414 m->m_flags |= M_BCAST;
415 } else
416 m->m_flags &= ~M_BCAST;
417
418 sendit:
419 /*
420 * If we're doing Path MTU Discovery, we need to set DF unless
421 * the route's MTU is locked.
422 */
423 if ((flags & IP_MTUDISC) != 0 && ro->ro_rt != NULL &&
424 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
425 ip->ip_off |= IP_DF;
426
427 /*
428 * Remember the current ip_len and ip_off, and swap them into
429 * network order.
430 */
431 ip_len = ip->ip_len;
432
433 HTONS(ip->ip_len);
434 HTONS(ip->ip_off);
435
436 #ifdef IPSEC
437 /* get SP for this packet */
438 if (so == NULL)
439 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
440 else
441 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
442
443 if (sp == NULL) {
444 ipsecstat.out_inval++;
445 goto bad;
446 }
447
448 error = 0;
449
450 /* check policy */
451 switch (sp->policy) {
452 case IPSEC_POLICY_DISCARD:
453 /*
454 * This packet is just discarded.
455 */
456 ipsecstat.out_polvio++;
457 goto bad;
458
459 case IPSEC_POLICY_BYPASS:
460 case IPSEC_POLICY_NONE:
461 /* no need to do IPsec. */
462 goto skip_ipsec;
463
464 case IPSEC_POLICY_IPSEC:
465 if (sp->req == NULL) {
466 /* XXX should be panic ? */
467 printf("ip_output: No IPsec request specified.\n");
468 error = EINVAL;
469 goto bad;
470 }
471 break;
472
473 case IPSEC_POLICY_ENTRUST:
474 default:
475 printf("ip_output: Invalid policy found. %d\n", sp->policy);
476 }
477
478 /*
479 * ipsec4_output() expects ip_len and ip_off in network
480 * order. They have been set to network order above.
481 */
482
483 {
484 struct ipsec_output_state state;
485 bzero(&state, sizeof(state));
486 state.m = m;
487 if (flags & IP_ROUTETOIF) {
488 state.ro = &iproute;
489 bzero(&iproute, sizeof(iproute));
490 } else
491 state.ro = ro;
492 state.dst = (struct sockaddr *)dst;
493
494 /*
495 * We can't defer the checksum of payload data if
496 * we're about to encrypt/authenticate it.
497 *
498 * XXX When we support crypto offloading functions of
499 * XXX network interfaces, we need to reconsider this,
500 * XXX since it's likely that they'll support checksumming,
501 * XXX as well.
502 */
503 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
504 in_delayed_cksum(m);
505 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
506 }
507
508 error = ipsec4_output(&state, sp, flags);
509
510 m = state.m;
511 if (flags & IP_ROUTETOIF) {
512 /*
513 * if we have tunnel mode SA, we may need to ignore
514 * IP_ROUTETOIF.
515 */
516 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
517 flags &= ~IP_ROUTETOIF;
518 ro = state.ro;
519 }
520 } else
521 ro = state.ro;
522 dst = (struct sockaddr_in *)state.dst;
523 if (error) {
524 /* mbuf is already reclaimed in ipsec4_output. */
525 m0 = NULL;
526 switch (error) {
527 case EHOSTUNREACH:
528 case ENETUNREACH:
529 case EMSGSIZE:
530 case ENOBUFS:
531 case ENOMEM:
532 break;
533 default:
534 printf("ip4_output (ipsec): error code %d\n", error);
535 /*fall through*/
536 case ENOENT:
537 /* don't show these error codes to the user */
538 error = 0;
539 break;
540 }
541 goto bad;
542 }
543
544 /* be sure to update variables that are affected by ipsec4_output() */
545 ip = mtod(m, struct ip *);
546 #ifdef _IP_VHL
547 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
548 #else
549 hlen = ip->ip_hl << 2;
550 #endif
551 ip_len = ntohs(ip->ip_len);
552
553 if (ro->ro_rt == NULL) {
554 if ((flags & IP_ROUTETOIF) == 0) {
555 printf("ip_output: "
556 "can't update route after IPsec processing\n");
557 error = EHOSTUNREACH; /*XXX*/
558 goto bad;
559 }
560 } else {
561 /* nobody uses ia beyond here */
562 if (state.encap)
563 ifp = ro->ro_rt->rt_ifp;
564 }
565 }
566
567 skip_ipsec:
568 #endif /*IPSEC*/
569
570 #ifdef PFIL_HOOKS
571 /*
572 * Run through list of hooks for output packets.
573 */
574 if ((error = pfil_run_hooks(&inet_pfil_hook, &m, ifp,
575 PFIL_OUT)) != 0)
576 goto done;
577 if (m == NULL)
578 goto done;
579
580 ip = mtod(m, struct ip *);
581 #endif /* PFIL_HOOKS */
582
583 /*
584 * If small enough for mtu of path, can just send directly.
585 */
586 if (ip_len <= mtu) {
587 #if IFA_STATS
588 /*
589 * search for the source address structure to
590 * maintain output statistics.
591 */
592 INADDR_TO_IA(ip->ip_src, ia);
593 if (ia)
594 ia->ia_ifa.ifa_data.ifad_outbytes += ip_len;
595 #endif
596 /*
597 * Always initialize the sum to 0! Some HW assisted
598 * checksumming requires this.
599 */
600 ip->ip_sum = 0;
601 m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
602
603 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
604
605 /*
606 * Perform any checksums that the hardware can't do
607 * for us.
608 *
609 * XXX Does any hardware require the {th,uh}_sum
610 * XXX fields to be 0?
611 */
612 if (sw_csum & M_CSUM_IPv4)
613 ip->ip_sum = in_cksum(m, hlen);
614 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
615 in_delayed_cksum(m);
616 sw_csum &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
617 }
618 m->m_pkthdr.csum_flags &= ifp->if_csum_flags_tx;
619
620 #ifdef IPSEC
621 /* clean ipsec history once it goes out of the node */
622 ipsec_delaux(m);
623 #endif
624 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
625 goto done;
626 }
627
628 /*
629 * We can't use HW checksumming if we're about to
630 * to fragment the packet.
631 *
632 * XXX Some hardware can do this.
633 */
634 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
635 in_delayed_cksum(m);
636 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
637 }
638
639 /*
640 * Too large for interface; fragment if possible.
641 * Must be able to put at least 8 bytes per fragment.
642 *
643 * Note we swap ip_len and ip_off into host order to make
644 * the logic below a little simpler.
645 */
646
647 NTOHS(ip->ip_len);
648 NTOHS(ip->ip_off);
649
650 if (ip->ip_off & IP_DF) {
651 if (flags & IP_RETURNMTU)
652 *mtu_p = mtu;
653 error = EMSGSIZE;
654 ipstat.ips_cantfrag++;
655 goto bad;
656 }
657 len = (mtu - hlen) &~ 7;
658 if (len < 8) {
659 error = EMSGSIZE;
660 goto bad;
661 }
662
663 {
664 int mhlen, firstlen = len;
665 struct mbuf **mnext = &m->m_nextpkt;
666 int fragments = 0;
667 int s;
668
669 /*
670 * Loop through length of segment after first fragment,
671 * make new header and copy data of each part and link onto chain.
672 */
673 m0 = m;
674 mhlen = sizeof (struct ip);
675 for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) {
676 MGETHDR(m, M_DONTWAIT, MT_HEADER);
677 if (m == 0) {
678 error = ENOBUFS;
679 ipstat.ips_odropped++;
680 goto sendorfree;
681 }
682 *mnext = m;
683 mnext = &m->m_nextpkt;
684 m->m_data += max_linkhdr;
685 mhip = mtod(m, struct ip *);
686 *mhip = *ip;
687 /* we must inherit MCAST and BCAST flags */
688 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST);
689 if (hlen > sizeof (struct ip)) {
690 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
691 mhip->ip_hl = mhlen >> 2;
692 }
693 m->m_len = mhlen;
694 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
695 if (ip->ip_off & IP_MF)
696 mhip->ip_off |= IP_MF;
697 if (off + len >= (u_int16_t)ip->ip_len)
698 len = (u_int16_t)ip->ip_len - off;
699 else
700 mhip->ip_off |= IP_MF;
701 mhip->ip_len = htons((u_int16_t)(len + mhlen));
702 m->m_next = m_copy(m0, off, len);
703 if (m->m_next == 0) {
704 error = ENOBUFS; /* ??? */
705 ipstat.ips_odropped++;
706 goto sendorfree;
707 }
708 m->m_pkthdr.len = mhlen + len;
709 m->m_pkthdr.rcvif = (struct ifnet *)0;
710 HTONS(mhip->ip_off);
711 mhip->ip_sum = 0;
712 mhip->ip_sum = in_cksum(m, mhlen);
713 ipstat.ips_ofragments++;
714 fragments++;
715 }
716 /*
717 * Update first fragment by trimming what's been copied out
718 * and updating header, then send each fragment (in order).
719 */
720 m = m0;
721 m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len);
722 m->m_pkthdr.len = hlen + firstlen;
723 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
724 ip->ip_off |= IP_MF;
725 HTONS(ip->ip_off);
726 ip->ip_sum = 0;
727 ip->ip_sum = in_cksum(m, hlen);
728 sendorfree:
729 /*
730 * If there is no room for all the fragments, don't queue
731 * any of them.
732 */
733 s = splnet();
734 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments)
735 error = ENOBUFS;
736 splx(s);
737 for (m = m0; m; m = m0) {
738 m0 = m->m_nextpkt;
739 m->m_nextpkt = 0;
740 if (error == 0) {
741 #if IFA_STATS
742 /*
743 * search for the source address structure to
744 * maintain output statistics.
745 */
746 INADDR_TO_IA(ip->ip_src, ia);
747 if (ia) {
748 ia->ia_ifa.ifa_data.ifad_outbytes +=
749 ntohs(ip->ip_len);
750 }
751 #endif
752 #ifdef IPSEC
753 /* clean ipsec history once it goes out of the node */
754 ipsec_delaux(m);
755 #endif
756 error = (*ifp->if_output)(ifp, m, sintosa(dst),
757 ro->ro_rt);
758 } else
759 m_freem(m);
760 }
761
762 if (error == 0)
763 ipstat.ips_fragmented++;
764 }
765 done:
766 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt) {
767 RTFREE(ro->ro_rt);
768 ro->ro_rt = 0;
769 }
770
771 #ifdef IPSEC
772 if (sp != NULL) {
773 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
774 printf("DP ip_output call free SP:%p\n", sp));
775 key_freesp(sp);
776 }
777 #endif /* IPSEC */
778
779 return (error);
780 bad:
781 m_freem(m);
782 goto done;
783 }
784
785 /*
786 * Process a delayed payload checksum calculation.
787 */
788 void
789 in_delayed_cksum(struct mbuf *m)
790 {
791 struct ip *ip;
792 u_int16_t csum, offset;
793
794 ip = mtod(m, struct ip *);
795 offset = ip->ip_hl << 2;
796 csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset);
797 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0)
798 csum = 0xffff;
799
800 offset += m->m_pkthdr.csum_data; /* checksum offset */
801
802 if ((offset + sizeof(u_int16_t)) > m->m_len) {
803 /* This happen when ip options were inserted
804 printf("in_delayed_cksum: pullup len %d off %d proto %d\n",
805 m->m_len, offset, ip->ip_p);
806 */
807 m_copyback(m, offset, sizeof(csum), (caddr_t) &csum);
808 } else
809 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
810 }
811
812 /*
813 * Determine the maximum length of the options to be inserted;
814 * we would far rather allocate too much space rather than too little.
815 */
816
817 u_int
818 ip_optlen(inp)
819 struct inpcb *inp;
820 {
821 struct mbuf *m = inp->inp_options;
822
823 if (m && m->m_len > offsetof(struct ipoption, ipopt_dst))
824 return(m->m_len - offsetof(struct ipoption, ipopt_dst));
825 else
826 return 0;
827 }
828
829
830 /*
831 * Insert IP options into preformed packet.
832 * Adjust IP destination as required for IP source routing,
833 * as indicated by a non-zero in_addr at the start of the options.
834 */
835 static struct mbuf *
836 ip_insertoptions(m, opt, phlen)
837 struct mbuf *m;
838 struct mbuf *opt;
839 int *phlen;
840 {
841 struct ipoption *p = mtod(opt, struct ipoption *);
842 struct mbuf *n;
843 struct ip *ip = mtod(m, struct ip *);
844 unsigned optlen;
845
846 optlen = opt->m_len - sizeof(p->ipopt_dst);
847 if (optlen + (u_int16_t)ip->ip_len > IP_MAXPACKET)
848 return (m); /* XXX should fail */
849 if (!in_nullhost(p->ipopt_dst))
850 ip->ip_dst = p->ipopt_dst;
851 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
852 MGETHDR(n, M_DONTWAIT, MT_HEADER);
853 if (n == 0)
854 return (m);
855 M_COPY_PKTHDR(n, m);
856 m->m_flags &= ~M_PKTHDR;
857 m->m_len -= sizeof(struct ip);
858 m->m_data += sizeof(struct ip);
859 n->m_next = m;
860 m = n;
861 m->m_len = optlen + sizeof(struct ip);
862 m->m_data += max_linkhdr;
863 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
864 } else {
865 m->m_data -= optlen;
866 m->m_len += optlen;
867 memmove(mtod(m, caddr_t), ip, sizeof(struct ip));
868 }
869 m->m_pkthdr.len += optlen;
870 ip = mtod(m, struct ip *);
871 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
872 *phlen = sizeof(struct ip) + optlen;
873 ip->ip_len += optlen;
874 return (m);
875 }
876
877 /*
878 * Copy options from ip to jp,
879 * omitting those not copied during fragmentation.
880 */
881 int
882 ip_optcopy(ip, jp)
883 struct ip *ip, *jp;
884 {
885 u_char *cp, *dp;
886 int opt, optlen, cnt;
887
888 cp = (u_char *)(ip + 1);
889 dp = (u_char *)(jp + 1);
890 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
891 for (; cnt > 0; cnt -= optlen, cp += optlen) {
892 opt = cp[0];
893 if (opt == IPOPT_EOL)
894 break;
895 if (opt == IPOPT_NOP) {
896 /* Preserve for IP mcast tunnel's LSRR alignment. */
897 *dp++ = IPOPT_NOP;
898 optlen = 1;
899 continue;
900 }
901 #ifdef DIAGNOSTIC
902 if (cnt < IPOPT_OLEN + sizeof(*cp))
903 panic("malformed IPv4 option passed to ip_optcopy");
904 #endif
905 optlen = cp[IPOPT_OLEN];
906 #ifdef DIAGNOSTIC
907 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
908 panic("malformed IPv4 option passed to ip_optcopy");
909 #endif
910 /* bogus lengths should have been caught by ip_dooptions */
911 if (optlen > cnt)
912 optlen = cnt;
913 if (IPOPT_COPIED(opt)) {
914 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
915 dp += optlen;
916 }
917 }
918 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
919 *dp++ = IPOPT_EOL;
920 return (optlen);
921 }
922
923 /*
924 * IP socket option processing.
925 */
926 int
927 ip_ctloutput(op, so, level, optname, mp)
928 int op;
929 struct socket *so;
930 int level, optname;
931 struct mbuf **mp;
932 {
933 struct inpcb *inp = sotoinpcb(so);
934 struct mbuf *m = *mp;
935 int optval = 0;
936 int error = 0;
937 #ifdef IPSEC
938 #ifdef __NetBSD__
939 struct proc *p = curproc; /*XXX*/
940 #endif
941 #endif
942
943 if (level != IPPROTO_IP) {
944 error = EINVAL;
945 if (op == PRCO_SETOPT && *mp)
946 (void) m_free(*mp);
947 } else switch (op) {
948
949 case PRCO_SETOPT:
950 switch (optname) {
951 case IP_OPTIONS:
952 #ifdef notyet
953 case IP_RETOPTS:
954 return (ip_pcbopts(optname, &inp->inp_options, m));
955 #else
956 return (ip_pcbopts(&inp->inp_options, m));
957 #endif
958
959 case IP_TOS:
960 case IP_TTL:
961 case IP_RECVOPTS:
962 case IP_RECVRETOPTS:
963 case IP_RECVDSTADDR:
964 case IP_RECVIF:
965 if (m == NULL || m->m_len != sizeof(int))
966 error = EINVAL;
967 else {
968 optval = *mtod(m, int *);
969 switch (optname) {
970
971 case IP_TOS:
972 inp->inp_ip.ip_tos = optval;
973 break;
974
975 case IP_TTL:
976 inp->inp_ip.ip_ttl = optval;
977 break;
978 #define OPTSET(bit) \
979 if (optval) \
980 inp->inp_flags |= bit; \
981 else \
982 inp->inp_flags &= ~bit;
983
984 case IP_RECVOPTS:
985 OPTSET(INP_RECVOPTS);
986 break;
987
988 case IP_RECVRETOPTS:
989 OPTSET(INP_RECVRETOPTS);
990 break;
991
992 case IP_RECVDSTADDR:
993 OPTSET(INP_RECVDSTADDR);
994 break;
995
996 case IP_RECVIF:
997 OPTSET(INP_RECVIF);
998 break;
999 }
1000 }
1001 break;
1002 #undef OPTSET
1003
1004 case IP_MULTICAST_IF:
1005 case IP_MULTICAST_TTL:
1006 case IP_MULTICAST_LOOP:
1007 case IP_ADD_MEMBERSHIP:
1008 case IP_DROP_MEMBERSHIP:
1009 error = ip_setmoptions(optname, &inp->inp_moptions, m);
1010 break;
1011
1012 case IP_PORTRANGE:
1013 if (m == 0 || m->m_len != sizeof(int))
1014 error = EINVAL;
1015 else {
1016 optval = *mtod(m, int *);
1017
1018 switch (optval) {
1019
1020 case IP_PORTRANGE_DEFAULT:
1021 case IP_PORTRANGE_HIGH:
1022 inp->inp_flags &= ~(INP_LOWPORT);
1023 break;
1024
1025 case IP_PORTRANGE_LOW:
1026 inp->inp_flags |= INP_LOWPORT;
1027 break;
1028
1029 default:
1030 error = EINVAL;
1031 break;
1032 }
1033 }
1034 break;
1035
1036 #ifdef IPSEC
1037 case IP_IPSEC_POLICY:
1038 {
1039 caddr_t req = NULL;
1040 size_t len = 0;
1041 int priv = 0;
1042
1043 #ifdef __NetBSD__
1044 if (p == 0 || suser(p->p_ucred, &p->p_acflag))
1045 priv = 0;
1046 else
1047 priv = 1;
1048 #else
1049 priv = (in6p->in6p_socket->so_state & SS_PRIV);
1050 #endif
1051 if (m) {
1052 req = mtod(m, caddr_t);
1053 len = m->m_len;
1054 }
1055 error = ipsec4_set_policy(inp, optname, req, len, priv);
1056 break;
1057 }
1058 #endif /*IPSEC*/
1059
1060 default:
1061 error = ENOPROTOOPT;
1062 break;
1063 }
1064 if (m)
1065 (void)m_free(m);
1066 break;
1067
1068 case PRCO_GETOPT:
1069 switch (optname) {
1070 case IP_OPTIONS:
1071 case IP_RETOPTS:
1072 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1073 if (inp->inp_options) {
1074 m->m_len = inp->inp_options->m_len;
1075 bcopy(mtod(inp->inp_options, caddr_t),
1076 mtod(m, caddr_t), (unsigned)m->m_len);
1077 } else
1078 m->m_len = 0;
1079 break;
1080
1081 case IP_TOS:
1082 case IP_TTL:
1083 case IP_RECVOPTS:
1084 case IP_RECVRETOPTS:
1085 case IP_RECVDSTADDR:
1086 case IP_RECVIF:
1087 case IP_ERRORMTU:
1088 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1089 m->m_len = sizeof(int);
1090 switch (optname) {
1091
1092 case IP_TOS:
1093 optval = inp->inp_ip.ip_tos;
1094 break;
1095
1096 case IP_TTL:
1097 optval = inp->inp_ip.ip_ttl;
1098 break;
1099
1100 case IP_ERRORMTU:
1101 optval = inp->inp_errormtu;
1102 break;
1103
1104 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1105
1106 case IP_RECVOPTS:
1107 optval = OPTBIT(INP_RECVOPTS);
1108 break;
1109
1110 case IP_RECVRETOPTS:
1111 optval = OPTBIT(INP_RECVRETOPTS);
1112 break;
1113
1114 case IP_RECVDSTADDR:
1115 optval = OPTBIT(INP_RECVDSTADDR);
1116 break;
1117
1118 case IP_RECVIF:
1119 optval = OPTBIT(INP_RECVIF);
1120 break;
1121 }
1122 *mtod(m, int *) = optval;
1123 break;
1124
1125 #ifdef IPSEC
1126 case IP_IPSEC_POLICY:
1127 {
1128 caddr_t req = NULL;
1129 size_t len = 0;
1130
1131 if (m) {
1132 req = mtod(m, caddr_t);
1133 len = m->m_len;
1134 }
1135 error = ipsec4_get_policy(inp, req, len, mp);
1136 break;
1137 }
1138 #endif /*IPSEC*/
1139
1140 case IP_MULTICAST_IF:
1141 case IP_MULTICAST_TTL:
1142 case IP_MULTICAST_LOOP:
1143 case IP_ADD_MEMBERSHIP:
1144 case IP_DROP_MEMBERSHIP:
1145 error = ip_getmoptions(optname, inp->inp_moptions, mp);
1146 break;
1147
1148 case IP_PORTRANGE:
1149 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1150 m->m_len = sizeof(int);
1151
1152 if (inp->inp_flags & INP_LOWPORT)
1153 optval = IP_PORTRANGE_LOW;
1154 else
1155 optval = IP_PORTRANGE_DEFAULT;
1156
1157 *mtod(m, int *) = optval;
1158 break;
1159
1160 default:
1161 error = ENOPROTOOPT;
1162 break;
1163 }
1164 break;
1165 }
1166 return (error);
1167 }
1168
1169 /*
1170 * Set up IP options in pcb for insertion in output packets.
1171 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1172 * with destination address if source routed.
1173 */
1174 int
1175 #ifdef notyet
1176 ip_pcbopts(optname, pcbopt, m)
1177 int optname;
1178 #else
1179 ip_pcbopts(pcbopt, m)
1180 #endif
1181 struct mbuf **pcbopt;
1182 struct mbuf *m;
1183 {
1184 int cnt, optlen;
1185 u_char *cp;
1186 u_char opt;
1187
1188 /* turn off any old options */
1189 if (*pcbopt)
1190 (void)m_free(*pcbopt);
1191 *pcbopt = 0;
1192 if (m == (struct mbuf *)0 || m->m_len == 0) {
1193 /*
1194 * Only turning off any previous options.
1195 */
1196 if (m)
1197 (void)m_free(m);
1198 return (0);
1199 }
1200
1201 #ifndef __vax__
1202 if (m->m_len % sizeof(int32_t))
1203 goto bad;
1204 #endif
1205 /*
1206 * IP first-hop destination address will be stored before
1207 * actual options; move other options back
1208 * and clear it when none present.
1209 */
1210 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1211 goto bad;
1212 cnt = m->m_len;
1213 m->m_len += sizeof(struct in_addr);
1214 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1215 memmove(cp, mtod(m, caddr_t), (unsigned)cnt);
1216 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1217
1218 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1219 opt = cp[IPOPT_OPTVAL];
1220 if (opt == IPOPT_EOL)
1221 break;
1222 if (opt == IPOPT_NOP)
1223 optlen = 1;
1224 else {
1225 if (cnt < IPOPT_OLEN + sizeof(*cp))
1226 goto bad;
1227 optlen = cp[IPOPT_OLEN];
1228 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1229 goto bad;
1230 }
1231 switch (opt) {
1232
1233 default:
1234 break;
1235
1236 case IPOPT_LSRR:
1237 case IPOPT_SSRR:
1238 /*
1239 * user process specifies route as:
1240 * ->A->B->C->D
1241 * D must be our final destination (but we can't
1242 * check that since we may not have connected yet).
1243 * A is first hop destination, which doesn't appear in
1244 * actual IP option, but is stored before the options.
1245 */
1246 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1247 goto bad;
1248 m->m_len -= sizeof(struct in_addr);
1249 cnt -= sizeof(struct in_addr);
1250 optlen -= sizeof(struct in_addr);
1251 cp[IPOPT_OLEN] = optlen;
1252 /*
1253 * Move first hop before start of options.
1254 */
1255 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1256 sizeof(struct in_addr));
1257 /*
1258 * Then copy rest of options back
1259 * to close up the deleted entry.
1260 */
1261 memmove(&cp[IPOPT_OFFSET+1],
1262 (caddr_t)(&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
1263 (unsigned)cnt + sizeof(struct in_addr));
1264 break;
1265 }
1266 }
1267 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1268 goto bad;
1269 *pcbopt = m;
1270 return (0);
1271
1272 bad:
1273 (void)m_free(m);
1274 return (EINVAL);
1275 }
1276
1277 /*
1278 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1279 */
1280 static struct ifnet *
1281 ip_multicast_if(a, ifindexp)
1282 struct in_addr *a;
1283 int *ifindexp;
1284 {
1285 int ifindex;
1286 struct ifnet *ifp;
1287
1288 if (ifindexp)
1289 *ifindexp = 0;
1290 if (ntohl(a->s_addr) >> 24 == 0) {
1291 ifindex = ntohl(a->s_addr) & 0xffffff;
1292 if (ifindex < 0 || if_index < ifindex)
1293 return NULL;
1294 ifp = ifindex2ifnet[ifindex];
1295 if (ifindexp)
1296 *ifindexp = ifindex;
1297 } else {
1298 INADDR_TO_IFP(*a, ifp);
1299 }
1300 return ifp;
1301 }
1302
1303 /*
1304 * Set the IP multicast options in response to user setsockopt().
1305 */
1306 int
1307 ip_setmoptions(optname, imop, m)
1308 int optname;
1309 struct ip_moptions **imop;
1310 struct mbuf *m;
1311 {
1312 int error = 0;
1313 u_char loop;
1314 int i;
1315 struct in_addr addr;
1316 struct ip_mreq *mreq;
1317 struct ifnet *ifp;
1318 struct ip_moptions *imo = *imop;
1319 struct route ro;
1320 struct sockaddr_in *dst;
1321 int ifindex;
1322
1323 if (imo == NULL) {
1324 /*
1325 * No multicast option buffer attached to the pcb;
1326 * allocate one and initialize to default values.
1327 */
1328 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
1329 M_WAITOK);
1330
1331 if (imo == NULL)
1332 return (ENOBUFS);
1333 *imop = imo;
1334 imo->imo_multicast_ifp = NULL;
1335 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1336 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1337 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1338 imo->imo_num_memberships = 0;
1339 }
1340
1341 switch (optname) {
1342
1343 case IP_MULTICAST_IF:
1344 /*
1345 * Select the interface for outgoing multicast packets.
1346 */
1347 if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1348 error = EINVAL;
1349 break;
1350 }
1351 addr = *(mtod(m, struct in_addr *));
1352 /*
1353 * INADDR_ANY is used to remove a previous selection.
1354 * When no interface is selected, a default one is
1355 * chosen every time a multicast packet is sent.
1356 */
1357 if (in_nullhost(addr)) {
1358 imo->imo_multicast_ifp = NULL;
1359 break;
1360 }
1361 /*
1362 * The selected interface is identified by its local
1363 * IP address. Find the interface and confirm that
1364 * it supports multicasting.
1365 */
1366 ifp = ip_multicast_if(&addr, &ifindex);
1367 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1368 error = EADDRNOTAVAIL;
1369 break;
1370 }
1371 imo->imo_multicast_ifp = ifp;
1372 if (ifindex)
1373 imo->imo_multicast_addr = addr;
1374 else
1375 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1376 break;
1377
1378 case IP_MULTICAST_TTL:
1379 /*
1380 * Set the IP time-to-live for outgoing multicast packets.
1381 */
1382 if (m == NULL || m->m_len != 1) {
1383 error = EINVAL;
1384 break;
1385 }
1386 imo->imo_multicast_ttl = *(mtod(m, u_char *));
1387 break;
1388
1389 case IP_MULTICAST_LOOP:
1390 /*
1391 * Set the loopback flag for outgoing multicast packets.
1392 * Must be zero or one.
1393 */
1394 if (m == NULL || m->m_len != 1 ||
1395 (loop = *(mtod(m, u_char *))) > 1) {
1396 error = EINVAL;
1397 break;
1398 }
1399 imo->imo_multicast_loop = loop;
1400 break;
1401
1402 case IP_ADD_MEMBERSHIP:
1403 /*
1404 * Add a multicast group membership.
1405 * Group must be a valid IP multicast address.
1406 */
1407 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1408 error = EINVAL;
1409 break;
1410 }
1411 mreq = mtod(m, struct ip_mreq *);
1412 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1413 error = EINVAL;
1414 break;
1415 }
1416 /*
1417 * If no interface address was provided, use the interface of
1418 * the route to the given multicast address.
1419 */
1420 if (in_nullhost(mreq->imr_interface)) {
1421 bzero((caddr_t)&ro, sizeof(ro));
1422 ro.ro_rt = NULL;
1423 dst = satosin(&ro.ro_dst);
1424 dst->sin_len = sizeof(*dst);
1425 dst->sin_family = AF_INET;
1426 dst->sin_addr = mreq->imr_multiaddr;
1427 rtalloc(&ro);
1428 if (ro.ro_rt == NULL) {
1429 error = EADDRNOTAVAIL;
1430 break;
1431 }
1432 ifp = ro.ro_rt->rt_ifp;
1433 rtfree(ro.ro_rt);
1434 } else {
1435 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
1436 }
1437 /*
1438 * See if we found an interface, and confirm that it
1439 * supports multicast.
1440 */
1441 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1442 error = EADDRNOTAVAIL;
1443 break;
1444 }
1445 /*
1446 * See if the membership already exists or if all the
1447 * membership slots are full.
1448 */
1449 for (i = 0; i < imo->imo_num_memberships; ++i) {
1450 if (imo->imo_membership[i]->inm_ifp == ifp &&
1451 in_hosteq(imo->imo_membership[i]->inm_addr,
1452 mreq->imr_multiaddr))
1453 break;
1454 }
1455 if (i < imo->imo_num_memberships) {
1456 error = EADDRINUSE;
1457 break;
1458 }
1459 if (i == IP_MAX_MEMBERSHIPS) {
1460 error = ETOOMANYREFS;
1461 break;
1462 }
1463 /*
1464 * Everything looks good; add a new record to the multicast
1465 * address list for the given interface.
1466 */
1467 if ((imo->imo_membership[i] =
1468 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1469 error = ENOBUFS;
1470 break;
1471 }
1472 ++imo->imo_num_memberships;
1473 break;
1474
1475 case IP_DROP_MEMBERSHIP:
1476 /*
1477 * Drop a multicast group membership.
1478 * Group must be a valid IP multicast address.
1479 */
1480 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1481 error = EINVAL;
1482 break;
1483 }
1484 mreq = mtod(m, struct ip_mreq *);
1485 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1486 error = EINVAL;
1487 break;
1488 }
1489 /*
1490 * If an interface address was specified, get a pointer
1491 * to its ifnet structure.
1492 */
1493 if (in_nullhost(mreq->imr_interface))
1494 ifp = NULL;
1495 else {
1496 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
1497 if (ifp == NULL) {
1498 error = EADDRNOTAVAIL;
1499 break;
1500 }
1501 }
1502 /*
1503 * Find the membership in the membership array.
1504 */
1505 for (i = 0; i < imo->imo_num_memberships; ++i) {
1506 if ((ifp == NULL ||
1507 imo->imo_membership[i]->inm_ifp == ifp) &&
1508 in_hosteq(imo->imo_membership[i]->inm_addr,
1509 mreq->imr_multiaddr))
1510 break;
1511 }
1512 if (i == imo->imo_num_memberships) {
1513 error = EADDRNOTAVAIL;
1514 break;
1515 }
1516 /*
1517 * Give up the multicast address record to which the
1518 * membership points.
1519 */
1520 in_delmulti(imo->imo_membership[i]);
1521 /*
1522 * Remove the gap in the membership array.
1523 */
1524 for (++i; i < imo->imo_num_memberships; ++i)
1525 imo->imo_membership[i-1] = imo->imo_membership[i];
1526 --imo->imo_num_memberships;
1527 break;
1528
1529 default:
1530 error = EOPNOTSUPP;
1531 break;
1532 }
1533
1534 /*
1535 * If all options have default values, no need to keep the mbuf.
1536 */
1537 if (imo->imo_multicast_ifp == NULL &&
1538 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1539 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1540 imo->imo_num_memberships == 0) {
1541 free(*imop, M_IPMOPTS);
1542 *imop = NULL;
1543 }
1544
1545 return (error);
1546 }
1547
1548 /*
1549 * Return the IP multicast options in response to user getsockopt().
1550 */
1551 int
1552 ip_getmoptions(optname, imo, mp)
1553 int optname;
1554 struct ip_moptions *imo;
1555 struct mbuf **mp;
1556 {
1557 u_char *ttl;
1558 u_char *loop;
1559 struct in_addr *addr;
1560 struct in_ifaddr *ia;
1561
1562 *mp = m_get(M_WAIT, MT_SOOPTS);
1563
1564 switch (optname) {
1565
1566 case IP_MULTICAST_IF:
1567 addr = mtod(*mp, struct in_addr *);
1568 (*mp)->m_len = sizeof(struct in_addr);
1569 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1570 *addr = zeroin_addr;
1571 else if (imo->imo_multicast_addr.s_addr) {
1572 /* return the value user has set */
1573 *addr = imo->imo_multicast_addr;
1574 } else {
1575 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1576 *addr = ia ? ia->ia_addr.sin_addr : zeroin_addr;
1577 }
1578 return (0);
1579
1580 case IP_MULTICAST_TTL:
1581 ttl = mtod(*mp, u_char *);
1582 (*mp)->m_len = 1;
1583 *ttl = imo ? imo->imo_multicast_ttl
1584 : IP_DEFAULT_MULTICAST_TTL;
1585 return (0);
1586
1587 case IP_MULTICAST_LOOP:
1588 loop = mtod(*mp, u_char *);
1589 (*mp)->m_len = 1;
1590 *loop = imo ? imo->imo_multicast_loop
1591 : IP_DEFAULT_MULTICAST_LOOP;
1592 return (0);
1593
1594 default:
1595 return (EOPNOTSUPP);
1596 }
1597 }
1598
1599 /*
1600 * Discard the IP multicast options.
1601 */
1602 void
1603 ip_freemoptions(imo)
1604 struct ip_moptions *imo;
1605 {
1606 int i;
1607
1608 if (imo != NULL) {
1609 for (i = 0; i < imo->imo_num_memberships; ++i)
1610 in_delmulti(imo->imo_membership[i]);
1611 free(imo, M_IPMOPTS);
1612 }
1613 }
1614
1615 /*
1616 * Routine called from ip_output() to loop back a copy of an IP multicast
1617 * packet to the input queue of a specified interface. Note that this
1618 * calls the output routine of the loopback "driver", but with an interface
1619 * pointer that might NOT be &loif -- easier than replicating that code here.
1620 */
1621 static void
1622 ip_mloopback(ifp, m, dst)
1623 struct ifnet *ifp;
1624 struct mbuf *m;
1625 struct sockaddr_in *dst;
1626 {
1627 struct ip *ip;
1628 struct mbuf *copym;
1629
1630 copym = m_copy(m, 0, M_COPYALL);
1631 if (copym != NULL
1632 && (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip)))
1633 copym = m_pullup(copym, sizeof(struct ip));
1634 if (copym != NULL) {
1635 /*
1636 * We don't bother to fragment if the IP length is greater
1637 * than the interface's MTU. Can this possibly matter?
1638 */
1639 ip = mtod(copym, struct ip *);
1640 HTONS(ip->ip_len);
1641 HTONS(ip->ip_off);
1642 ip->ip_sum = 0;
1643 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1644 (void) looutput(ifp, copym, sintosa(dst), NULL);
1645 }
1646 }
1647