ip_output.c revision 1.90 1 /* $NetBSD: ip_output.c,v 1.90 2001/11/21 06:28:08 itojun Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 * must display the following acknowledgement:
50 * This product includes software developed by the NetBSD
51 * Foundation, Inc. and its contributors.
52 * 4. Neither the name of The NetBSD Foundation nor the names of its
53 * contributors may be used to endorse or promote products derived
54 * from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 */
68
69 /*
70 * Copyright (c) 1982, 1986, 1988, 1990, 1993
71 * The Regents of the University of California. All rights reserved.
72 *
73 * Redistribution and use in source and binary forms, with or without
74 * modification, are permitted provided that the following conditions
75 * are met:
76 * 1. Redistributions of source code must retain the above copyright
77 * notice, this list of conditions and the following disclaimer.
78 * 2. Redistributions in binary form must reproduce the above copyright
79 * notice, this list of conditions and the following disclaimer in the
80 * documentation and/or other materials provided with the distribution.
81 * 3. All advertising materials mentioning features or use of this software
82 * must display the following acknowledgement:
83 * This product includes software developed by the University of
84 * California, Berkeley and its contributors.
85 * 4. Neither the name of the University nor the names of its contributors
86 * may be used to endorse or promote products derived from this software
87 * without specific prior written permission.
88 *
89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99 * SUCH DAMAGE.
100 *
101 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
102 */
103
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.90 2001/11/21 06:28:08 itojun Exp $");
106
107 #include "opt_pfil_hooks.h"
108 #include "opt_ipsec.h"
109 #include "opt_mrouting.h"
110
111 #include <sys/param.h>
112 #include <sys/malloc.h>
113 #include <sys/mbuf.h>
114 #include <sys/errno.h>
115 #include <sys/protosw.h>
116 #include <sys/socket.h>
117 #include <sys/socketvar.h>
118 #include <sys/systm.h>
119 #include <sys/proc.h>
120
121 #include <net/if.h>
122 #include <net/route.h>
123 #include <net/pfil.h>
124
125 #include <netinet/in.h>
126 #include <netinet/in_systm.h>
127 #include <netinet/ip.h>
128 #include <netinet/in_pcb.h>
129 #include <netinet/in_var.h>
130 #include <netinet/ip_var.h>
131
132 #ifdef MROUTING
133 #include <netinet/ip_mroute.h>
134 #endif
135
136 #include <machine/stdarg.h>
137
138 #ifdef IPSEC
139 #include <netinet6/ipsec.h>
140 #include <netkey/key.h>
141 #include <netkey/key_debug.h>
142 #endif /*IPSEC*/
143
144 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
145 static struct ifnet *ip_multicast_if __P((struct in_addr *, int *));
146 static void ip_mloopback
147 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
148
149 #ifdef PFIL_HOOKS
150 extern struct pfil_head inet_pfil_hook; /* XXX */
151 #endif
152
153 /*
154 * IP output. The packet in mbuf chain m contains a skeletal IP
155 * header (with len, off, ttl, proto, tos, src, dst).
156 * The mbuf chain containing the packet will be freed.
157 * The mbuf opt, if present, will not be freed.
158 */
159 int
160 #if __STDC__
161 ip_output(struct mbuf *m0, ...)
162 #else
163 ip_output(m0, va_alist)
164 struct mbuf *m0;
165 va_dcl
166 #endif
167 {
168 struct ip *ip, *mhip;
169 struct ifnet *ifp;
170 struct mbuf *m = m0;
171 int hlen = sizeof (struct ip);
172 int len, off, error = 0;
173 struct route iproute;
174 struct sockaddr_in *dst;
175 struct in_ifaddr *ia;
176 struct mbuf *opt;
177 struct route *ro;
178 int flags, sw_csum;
179 int *mtu_p;
180 int mtu;
181 struct ip_moptions *imo;
182 va_list ap;
183 #ifdef IPSEC
184 struct socket *so;
185 struct secpolicy *sp = NULL;
186 #endif /*IPSEC*/
187 u_int16_t ip_len;
188
189 va_start(ap, m0);
190 opt = va_arg(ap, struct mbuf *);
191 ro = va_arg(ap, struct route *);
192 flags = va_arg(ap, int);
193 imo = va_arg(ap, struct ip_moptions *);
194 if (flags & IP_RETURNMTU)
195 mtu_p = va_arg(ap, int *);
196 else
197 mtu_p = NULL;
198 va_end(ap);
199
200 #ifdef IPSEC
201 so = ipsec_getsocket(m);
202 (void)ipsec_setsocket(m, NULL);
203 #endif /*IPSEC*/
204
205 #ifdef DIAGNOSTIC
206 if ((m->m_flags & M_PKTHDR) == 0)
207 panic("ip_output no HDR");
208 #endif
209 if (opt) {
210 m = ip_insertoptions(m, opt, &len);
211 hlen = len;
212 }
213 ip = mtod(m, struct ip *);
214 /*
215 * Fill in IP header.
216 */
217 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
218 ip->ip_v = IPVERSION;
219 ip->ip_off = 0;
220 ip->ip_id = htons(ip_id++);
221 ip->ip_hl = hlen >> 2;
222 ipstat.ips_localout++;
223 } else {
224 hlen = ip->ip_hl << 2;
225 }
226 /*
227 * Route packet.
228 */
229 if (ro == 0) {
230 ro = &iproute;
231 bzero((caddr_t)ro, sizeof (*ro));
232 }
233 dst = satosin(&ro->ro_dst);
234 /*
235 * If there is a cached route,
236 * check that it is to the same destination
237 * and is still up. If not, free it and try again.
238 */
239 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
240 !in_hosteq(dst->sin_addr, ip->ip_dst))) {
241 RTFREE(ro->ro_rt);
242 ro->ro_rt = (struct rtentry *)0;
243 }
244 if (ro->ro_rt == 0) {
245 dst->sin_family = AF_INET;
246 dst->sin_len = sizeof(*dst);
247 dst->sin_addr = ip->ip_dst;
248 }
249 /*
250 * If routing to interface only,
251 * short circuit routing lookup.
252 */
253 if (flags & IP_ROUTETOIF) {
254 if ((ia = ifatoia(ifa_ifwithladdr(sintosa(dst)))) == 0) {
255 ipstat.ips_noroute++;
256 error = ENETUNREACH;
257 goto bad;
258 }
259 ifp = ia->ia_ifp;
260 mtu = ifp->if_mtu;
261 ip->ip_ttl = 1;
262 } else {
263 if (ro->ro_rt == 0)
264 rtalloc(ro);
265 if (ro->ro_rt == 0) {
266 ipstat.ips_noroute++;
267 error = EHOSTUNREACH;
268 goto bad;
269 }
270 ia = ifatoia(ro->ro_rt->rt_ifa);
271 ifp = ro->ro_rt->rt_ifp;
272 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
273 mtu = ifp->if_mtu;
274 ro->ro_rt->rt_use++;
275 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
276 dst = satosin(ro->ro_rt->rt_gateway);
277 }
278 if (IN_MULTICAST(ip->ip_dst.s_addr) ||
279 (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
280 struct in_multi *inm;
281
282 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
283 M_BCAST : M_MCAST;
284 /*
285 * IP destination address is multicast. Make sure "dst"
286 * still points to the address in "ro". (It may have been
287 * changed to point to a gateway address, above.)
288 */
289 dst = satosin(&ro->ro_dst);
290 /*
291 * See if the caller provided any multicast options
292 */
293 if (imo != NULL) {
294 ip->ip_ttl = imo->imo_multicast_ttl;
295 if (imo->imo_multicast_ifp != NULL) {
296 ifp = imo->imo_multicast_ifp;
297 mtu = ifp->if_mtu;
298 }
299 } else
300 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
301 /*
302 * Confirm that the outgoing interface supports multicast.
303 */
304 if (((m->m_flags & M_MCAST) &&
305 (ifp->if_flags & IFF_MULTICAST) == 0) ||
306 ((m->m_flags & M_BCAST) &&
307 (ifp->if_flags & IFF_BROADCAST) == 0)) {
308 ipstat.ips_noroute++;
309 error = ENETUNREACH;
310 goto bad;
311 }
312 /*
313 * If source address not specified yet, use an address
314 * of outgoing interface.
315 */
316 if (in_nullhost(ip->ip_src)) {
317 struct in_ifaddr *ia;
318
319 IFP_TO_IA(ifp, ia);
320 ip->ip_src = ia->ia_addr.sin_addr;
321 }
322
323 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
324 if (inm != NULL &&
325 (imo == NULL || imo->imo_multicast_loop)) {
326 /*
327 * If we belong to the destination multicast group
328 * on the outgoing interface, and the caller did not
329 * forbid loopback, loop back a copy.
330 */
331 ip_mloopback(ifp, m, dst);
332 }
333 #ifdef MROUTING
334 else {
335 /*
336 * If we are acting as a multicast router, perform
337 * multicast forwarding as if the packet had just
338 * arrived on the interface to which we are about
339 * to send. The multicast forwarding function
340 * recursively calls this function, using the
341 * IP_FORWARDING flag to prevent infinite recursion.
342 *
343 * Multicasts that are looped back by ip_mloopback(),
344 * above, will be forwarded by the ip_input() routine,
345 * if necessary.
346 */
347 extern struct socket *ip_mrouter;
348
349 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
350 if (ip_mforward(m, ifp) != 0) {
351 m_freem(m);
352 goto done;
353 }
354 }
355 }
356 #endif
357 /*
358 * Multicasts with a time-to-live of zero may be looped-
359 * back, above, but must not be transmitted on a network.
360 * Also, multicasts addressed to the loopback interface
361 * are not sent -- the above call to ip_mloopback() will
362 * loop back a copy if this host actually belongs to the
363 * destination group on the loopback interface.
364 */
365 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
366 m_freem(m);
367 goto done;
368 }
369
370 goto sendit;
371 }
372 #ifndef notdef
373 /*
374 * If source address not specified yet, use address
375 * of outgoing interface.
376 */
377 if (in_nullhost(ip->ip_src))
378 ip->ip_src = ia->ia_addr.sin_addr;
379 #endif
380
381 /*
382 * packets with Class-D address as source are not valid per
383 * RFC 1112
384 */
385 if (IN_MULTICAST(ip->ip_src.s_addr)) {
386 ipstat.ips_odropped++;
387 error = EADDRNOTAVAIL;
388 goto bad;
389 }
390
391 /*
392 * Look for broadcast address and
393 * and verify user is allowed to send
394 * such a packet.
395 */
396 if (in_broadcast(dst->sin_addr, ifp)) {
397 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
398 error = EADDRNOTAVAIL;
399 goto bad;
400 }
401 if ((flags & IP_ALLOWBROADCAST) == 0) {
402 error = EACCES;
403 goto bad;
404 }
405 /* don't allow broadcast messages to be fragmented */
406 if ((u_int16_t)ip->ip_len > ifp->if_mtu) {
407 error = EMSGSIZE;
408 goto bad;
409 }
410 m->m_flags |= M_BCAST;
411 } else
412 m->m_flags &= ~M_BCAST;
413
414 sendit:
415 /*
416 * If we're doing Path MTU Discovery, we need to set DF unless
417 * the route's MTU is locked.
418 */
419 if ((flags & IP_MTUDISC) != 0 && ro->ro_rt != NULL &&
420 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
421 ip->ip_off |= IP_DF;
422
423 /*
424 * Remember the current ip_len and ip_off, and swap them into
425 * network order.
426 */
427 ip_len = ip->ip_len;
428
429 HTONS(ip->ip_len);
430 HTONS(ip->ip_off);
431
432 #ifdef IPSEC
433 /* get SP for this packet */
434 if (so == NULL)
435 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
436 else
437 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
438
439 if (sp == NULL) {
440 ipsecstat.out_inval++;
441 goto bad;
442 }
443
444 error = 0;
445
446 /* check policy */
447 switch (sp->policy) {
448 case IPSEC_POLICY_DISCARD:
449 /*
450 * This packet is just discarded.
451 */
452 ipsecstat.out_polvio++;
453 goto bad;
454
455 case IPSEC_POLICY_BYPASS:
456 case IPSEC_POLICY_NONE:
457 /* no need to do IPsec. */
458 goto skip_ipsec;
459
460 case IPSEC_POLICY_IPSEC:
461 if (sp->req == NULL) {
462 /* XXX should be panic ? */
463 printf("ip_output: No IPsec request specified.\n");
464 error = EINVAL;
465 goto bad;
466 }
467 break;
468
469 case IPSEC_POLICY_ENTRUST:
470 default:
471 printf("ip_output: Invalid policy found. %d\n", sp->policy);
472 }
473
474 /*
475 * ipsec4_output() expects ip_len and ip_off in network
476 * order. They have been set to network order above.
477 */
478
479 {
480 struct ipsec_output_state state;
481 bzero(&state, sizeof(state));
482 state.m = m;
483 if (flags & IP_ROUTETOIF) {
484 state.ro = &iproute;
485 bzero(&iproute, sizeof(iproute));
486 } else
487 state.ro = ro;
488 state.dst = (struct sockaddr *)dst;
489
490 /*
491 * We can't defer the checksum of payload data if
492 * we're about to encrypt/authenticate it.
493 *
494 * XXX When we support crypto offloading functions of
495 * XXX network interfaces, we need to reconsider this,
496 * XXX since it's likely that they'll support checksumming,
497 * XXX as well.
498 */
499 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
500 in_delayed_cksum(m);
501 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
502 }
503
504 error = ipsec4_output(&state, sp, flags);
505
506 m = state.m;
507 if (flags & IP_ROUTETOIF) {
508 /*
509 * if we have tunnel mode SA, we may need to ignore
510 * IP_ROUTETOIF.
511 */
512 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
513 flags &= ~IP_ROUTETOIF;
514 ro = state.ro;
515 }
516 } else
517 ro = state.ro;
518 dst = (struct sockaddr_in *)state.dst;
519 if (error) {
520 /* mbuf is already reclaimed in ipsec4_output. */
521 m0 = NULL;
522 switch (error) {
523 case EHOSTUNREACH:
524 case ENETUNREACH:
525 case EMSGSIZE:
526 case ENOBUFS:
527 case ENOMEM:
528 break;
529 default:
530 printf("ip4_output (ipsec): error code %d\n", error);
531 /*fall through*/
532 case ENOENT:
533 /* don't show these error codes to the user */
534 error = 0;
535 break;
536 }
537 goto bad;
538 }
539
540 /* be sure to update variables that are affected by ipsec4_output() */
541 ip = mtod(m, struct ip *);
542 #ifdef _IP_VHL
543 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
544 #else
545 hlen = ip->ip_hl << 2;
546 #endif
547 ip_len = ntohs(ip->ip_len);
548
549 if (ro->ro_rt == NULL) {
550 if ((flags & IP_ROUTETOIF) == 0) {
551 printf("ip_output: "
552 "can't update route after IPsec processing\n");
553 error = EHOSTUNREACH; /*XXX*/
554 goto bad;
555 }
556 } else {
557 /* nobody uses ia beyond here */
558 if (state.encap)
559 ifp = ro->ro_rt->rt_ifp;
560 }
561 }
562
563 skip_ipsec:
564 #endif /*IPSEC*/
565
566 #ifdef PFIL_HOOKS
567 /*
568 * Run through list of hooks for output packets.
569 */
570 if ((error = pfil_run_hooks(&inet_pfil_hook, &m, ifp,
571 PFIL_OUT)) != 0)
572 goto done;
573 if (m == NULL)
574 goto done;
575
576 ip = mtod(m, struct ip *);
577 #endif /* PFIL_HOOKS */
578
579 /*
580 * If small enough for mtu of path, can just send directly.
581 */
582 if (ip_len <= mtu) {
583 #if IFA_STATS
584 /*
585 * search for the source address structure to
586 * maintain output statistics.
587 */
588 INADDR_TO_IA(ip->ip_src, ia);
589 if (ia)
590 ia->ia_ifa.ifa_data.ifad_outbytes += ip_len;
591 #endif
592 /*
593 * Always initialize the sum to 0! Some HW assisted
594 * checksumming requires this.
595 */
596 ip->ip_sum = 0;
597 m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
598
599 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
600
601 /*
602 * Perform any checksums that the hardware can't do
603 * for us.
604 *
605 * XXX Does any hardware require the {th,uh}_sum
606 * XXX fields to be 0?
607 */
608 if (sw_csum & M_CSUM_IPv4)
609 ip->ip_sum = in_cksum(m, hlen);
610 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
611 in_delayed_cksum(m);
612 sw_csum &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
613 }
614 m->m_pkthdr.csum_flags &= ifp->if_csum_flags_tx;
615
616 #ifdef IPSEC
617 /* clean ipsec history once it goes out of the node */
618 ipsec_delaux(m);
619 #endif
620 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
621 goto done;
622 }
623
624 /*
625 * We can't use HW checksumming if we're about to
626 * to fragment the packet.
627 *
628 * XXX Some hardware can do this.
629 */
630 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
631 in_delayed_cksum(m);
632 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
633 }
634
635 /*
636 * Too large for interface; fragment if possible.
637 * Must be able to put at least 8 bytes per fragment.
638 *
639 * Note we swap ip_len and ip_off into host order to make
640 * the logic below a little simpler.
641 */
642
643 NTOHS(ip->ip_len);
644 NTOHS(ip->ip_off);
645
646 if (ip->ip_off & IP_DF) {
647 if (flags & IP_RETURNMTU)
648 *mtu_p = mtu;
649 error = EMSGSIZE;
650 ipstat.ips_cantfrag++;
651 goto bad;
652 }
653 len = (mtu - hlen) &~ 7;
654 if (len < 8) {
655 error = EMSGSIZE;
656 goto bad;
657 }
658
659 {
660 int mhlen, firstlen = len;
661 struct mbuf **mnext = &m->m_nextpkt;
662 int fragments = 0;
663 int s;
664
665 /*
666 * Loop through length of segment after first fragment,
667 * make new header and copy data of each part and link onto chain.
668 */
669 m0 = m;
670 mhlen = sizeof (struct ip);
671 for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) {
672 MGETHDR(m, M_DONTWAIT, MT_HEADER);
673 if (m == 0) {
674 error = ENOBUFS;
675 ipstat.ips_odropped++;
676 goto sendorfree;
677 }
678 *mnext = m;
679 mnext = &m->m_nextpkt;
680 m->m_data += max_linkhdr;
681 mhip = mtod(m, struct ip *);
682 *mhip = *ip;
683 /* we must inherit MCAST and BCAST flags */
684 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST);
685 if (hlen > sizeof (struct ip)) {
686 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
687 mhip->ip_hl = mhlen >> 2;
688 }
689 m->m_len = mhlen;
690 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
691 if (ip->ip_off & IP_MF)
692 mhip->ip_off |= IP_MF;
693 if (off + len >= (u_int16_t)ip->ip_len)
694 len = (u_int16_t)ip->ip_len - off;
695 else
696 mhip->ip_off |= IP_MF;
697 mhip->ip_len = htons((u_int16_t)(len + mhlen));
698 m->m_next = m_copy(m0, off, len);
699 if (m->m_next == 0) {
700 error = ENOBUFS; /* ??? */
701 ipstat.ips_odropped++;
702 goto sendorfree;
703 }
704 m->m_pkthdr.len = mhlen + len;
705 m->m_pkthdr.rcvif = (struct ifnet *)0;
706 HTONS(mhip->ip_off);
707 mhip->ip_sum = 0;
708 mhip->ip_sum = in_cksum(m, mhlen);
709 ipstat.ips_ofragments++;
710 fragments++;
711 }
712 /*
713 * Update first fragment by trimming what's been copied out
714 * and updating header, then send each fragment (in order).
715 */
716 m = m0;
717 m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len);
718 m->m_pkthdr.len = hlen + firstlen;
719 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
720 ip->ip_off |= IP_MF;
721 HTONS(ip->ip_off);
722 ip->ip_sum = 0;
723 ip->ip_sum = in_cksum(m, hlen);
724 sendorfree:
725 /*
726 * If there is no room for all the fragments, don't queue
727 * any of them.
728 */
729 s = splnet();
730 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments)
731 error = ENOBUFS;
732 splx(s);
733 for (m = m0; m; m = m0) {
734 m0 = m->m_nextpkt;
735 m->m_nextpkt = 0;
736 if (error == 0) {
737 #if IFA_STATS
738 /*
739 * search for the source address structure to
740 * maintain output statistics.
741 */
742 INADDR_TO_IA(ip->ip_src, ia);
743 if (ia) {
744 ia->ia_ifa.ifa_data.ifad_outbytes +=
745 ntohs(ip->ip_len);
746 }
747 #endif
748 #ifdef IPSEC
749 /* clean ipsec history once it goes out of the node */
750 ipsec_delaux(m);
751 #endif
752 error = (*ifp->if_output)(ifp, m, sintosa(dst),
753 ro->ro_rt);
754 } else
755 m_freem(m);
756 }
757
758 if (error == 0)
759 ipstat.ips_fragmented++;
760 }
761 done:
762 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt) {
763 RTFREE(ro->ro_rt);
764 ro->ro_rt = 0;
765 }
766
767 #ifdef IPSEC
768 if (sp != NULL) {
769 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
770 printf("DP ip_output call free SP:%p\n", sp));
771 key_freesp(sp);
772 }
773 #endif /* IPSEC */
774
775 return (error);
776 bad:
777 m_freem(m);
778 goto done;
779 }
780
781 /*
782 * Process a delayed payload checksum calculation.
783 */
784 void
785 in_delayed_cksum(struct mbuf *m)
786 {
787 struct ip *ip;
788 u_int16_t csum, offset;
789
790 ip = mtod(m, struct ip *);
791 offset = ip->ip_hl << 2;
792 csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset);
793 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0)
794 csum = 0xffff;
795
796 offset += m->m_pkthdr.csum_data; /* checksum offset */
797
798 if ((offset + sizeof(u_int16_t)) > m->m_len) {
799 /* This happen when ip options were inserted
800 printf("in_delayed_cksum: pullup len %d off %d proto %d\n",
801 m->m_len, offset, ip->ip_p);
802 */
803 m_copyback(m, offset, sizeof(csum), (caddr_t) &csum);
804 } else
805 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
806 }
807
808 /*
809 * Determine the maximum length of the options to be inserted;
810 * we would far rather allocate too much space rather than too little.
811 */
812
813 u_int
814 ip_optlen(inp)
815 struct inpcb *inp;
816 {
817 struct mbuf *m = inp->inp_options;
818
819 if (m && m->m_len > offsetof(struct ipoption, ipopt_dst))
820 return(m->m_len - offsetof(struct ipoption, ipopt_dst));
821 else
822 return 0;
823 }
824
825
826 /*
827 * Insert IP options into preformed packet.
828 * Adjust IP destination as required for IP source routing,
829 * as indicated by a non-zero in_addr at the start of the options.
830 */
831 static struct mbuf *
832 ip_insertoptions(m, opt, phlen)
833 struct mbuf *m;
834 struct mbuf *opt;
835 int *phlen;
836 {
837 struct ipoption *p = mtod(opt, struct ipoption *);
838 struct mbuf *n;
839 struct ip *ip = mtod(m, struct ip *);
840 unsigned optlen;
841
842 optlen = opt->m_len - sizeof(p->ipopt_dst);
843 if (optlen + (u_int16_t)ip->ip_len > IP_MAXPACKET)
844 return (m); /* XXX should fail */
845 if (!in_nullhost(p->ipopt_dst))
846 ip->ip_dst = p->ipopt_dst;
847 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
848 MGETHDR(n, M_DONTWAIT, MT_HEADER);
849 if (n == 0)
850 return (m);
851 M_COPY_PKTHDR(n, m);
852 m->m_flags &= ~M_PKTHDR;
853 m->m_len -= sizeof(struct ip);
854 m->m_data += sizeof(struct ip);
855 n->m_next = m;
856 m = n;
857 m->m_len = optlen + sizeof(struct ip);
858 m->m_data += max_linkhdr;
859 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
860 } else {
861 m->m_data -= optlen;
862 m->m_len += optlen;
863 memmove(mtod(m, caddr_t), ip, sizeof(struct ip));
864 }
865 m->m_pkthdr.len += optlen;
866 ip = mtod(m, struct ip *);
867 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
868 *phlen = sizeof(struct ip) + optlen;
869 ip->ip_len += optlen;
870 return (m);
871 }
872
873 /*
874 * Copy options from ip to jp,
875 * omitting those not copied during fragmentation.
876 */
877 int
878 ip_optcopy(ip, jp)
879 struct ip *ip, *jp;
880 {
881 u_char *cp, *dp;
882 int opt, optlen, cnt;
883
884 cp = (u_char *)(ip + 1);
885 dp = (u_char *)(jp + 1);
886 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
887 for (; cnt > 0; cnt -= optlen, cp += optlen) {
888 opt = cp[0];
889 if (opt == IPOPT_EOL)
890 break;
891 if (opt == IPOPT_NOP) {
892 /* Preserve for IP mcast tunnel's LSRR alignment. */
893 *dp++ = IPOPT_NOP;
894 optlen = 1;
895 continue;
896 }
897 #ifdef DIAGNOSTIC
898 if (cnt < IPOPT_OLEN + sizeof(*cp))
899 panic("malformed IPv4 option passed to ip_optcopy");
900 #endif
901 optlen = cp[IPOPT_OLEN];
902 #ifdef DIAGNOSTIC
903 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
904 panic("malformed IPv4 option passed to ip_optcopy");
905 #endif
906 /* bogus lengths should have been caught by ip_dooptions */
907 if (optlen > cnt)
908 optlen = cnt;
909 if (IPOPT_COPIED(opt)) {
910 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
911 dp += optlen;
912 }
913 }
914 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
915 *dp++ = IPOPT_EOL;
916 return (optlen);
917 }
918
919 /*
920 * IP socket option processing.
921 */
922 int
923 ip_ctloutput(op, so, level, optname, mp)
924 int op;
925 struct socket *so;
926 int level, optname;
927 struct mbuf **mp;
928 {
929 struct inpcb *inp = sotoinpcb(so);
930 struct mbuf *m = *mp;
931 int optval = 0;
932 int error = 0;
933 #ifdef IPSEC
934 #ifdef __NetBSD__
935 struct proc *p = curproc; /*XXX*/
936 #endif
937 #endif
938
939 if (level != IPPROTO_IP) {
940 error = EINVAL;
941 if (op == PRCO_SETOPT && *mp)
942 (void) m_free(*mp);
943 } else switch (op) {
944
945 case PRCO_SETOPT:
946 switch (optname) {
947 case IP_OPTIONS:
948 #ifdef notyet
949 case IP_RETOPTS:
950 return (ip_pcbopts(optname, &inp->inp_options, m));
951 #else
952 return (ip_pcbopts(&inp->inp_options, m));
953 #endif
954
955 case IP_TOS:
956 case IP_TTL:
957 case IP_RECVOPTS:
958 case IP_RECVRETOPTS:
959 case IP_RECVDSTADDR:
960 case IP_RECVIF:
961 if (m == NULL || m->m_len != sizeof(int))
962 error = EINVAL;
963 else {
964 optval = *mtod(m, int *);
965 switch (optname) {
966
967 case IP_TOS:
968 inp->inp_ip.ip_tos = optval;
969 break;
970
971 case IP_TTL:
972 inp->inp_ip.ip_ttl = optval;
973 break;
974 #define OPTSET(bit) \
975 if (optval) \
976 inp->inp_flags |= bit; \
977 else \
978 inp->inp_flags &= ~bit;
979
980 case IP_RECVOPTS:
981 OPTSET(INP_RECVOPTS);
982 break;
983
984 case IP_RECVRETOPTS:
985 OPTSET(INP_RECVRETOPTS);
986 break;
987
988 case IP_RECVDSTADDR:
989 OPTSET(INP_RECVDSTADDR);
990 break;
991
992 case IP_RECVIF:
993 OPTSET(INP_RECVIF);
994 break;
995 }
996 }
997 break;
998 #undef OPTSET
999
1000 case IP_MULTICAST_IF:
1001 case IP_MULTICAST_TTL:
1002 case IP_MULTICAST_LOOP:
1003 case IP_ADD_MEMBERSHIP:
1004 case IP_DROP_MEMBERSHIP:
1005 error = ip_setmoptions(optname, &inp->inp_moptions, m);
1006 break;
1007
1008 case IP_PORTRANGE:
1009 if (m == 0 || m->m_len != sizeof(int))
1010 error = EINVAL;
1011 else {
1012 optval = *mtod(m, int *);
1013
1014 switch (optval) {
1015
1016 case IP_PORTRANGE_DEFAULT:
1017 case IP_PORTRANGE_HIGH:
1018 inp->inp_flags &= ~(INP_LOWPORT);
1019 break;
1020
1021 case IP_PORTRANGE_LOW:
1022 inp->inp_flags |= INP_LOWPORT;
1023 break;
1024
1025 default:
1026 error = EINVAL;
1027 break;
1028 }
1029 }
1030 break;
1031
1032 #ifdef IPSEC
1033 case IP_IPSEC_POLICY:
1034 {
1035 caddr_t req = NULL;
1036 size_t len = 0;
1037 int priv = 0;
1038
1039 #ifdef __NetBSD__
1040 if (p == 0 || suser(p->p_ucred, &p->p_acflag))
1041 priv = 0;
1042 else
1043 priv = 1;
1044 #else
1045 priv = (in6p->in6p_socket->so_state & SS_PRIV);
1046 #endif
1047 if (m) {
1048 req = mtod(m, caddr_t);
1049 len = m->m_len;
1050 }
1051 error = ipsec4_set_policy(inp, optname, req, len, priv);
1052 break;
1053 }
1054 #endif /*IPSEC*/
1055
1056 default:
1057 error = ENOPROTOOPT;
1058 break;
1059 }
1060 if (m)
1061 (void)m_free(m);
1062 break;
1063
1064 case PRCO_GETOPT:
1065 switch (optname) {
1066 case IP_OPTIONS:
1067 case IP_RETOPTS:
1068 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1069 if (inp->inp_options) {
1070 m->m_len = inp->inp_options->m_len;
1071 bcopy(mtod(inp->inp_options, caddr_t),
1072 mtod(m, caddr_t), (unsigned)m->m_len);
1073 } else
1074 m->m_len = 0;
1075 break;
1076
1077 case IP_TOS:
1078 case IP_TTL:
1079 case IP_RECVOPTS:
1080 case IP_RECVRETOPTS:
1081 case IP_RECVDSTADDR:
1082 case IP_RECVIF:
1083 case IP_ERRORMTU:
1084 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1085 m->m_len = sizeof(int);
1086 switch (optname) {
1087
1088 case IP_TOS:
1089 optval = inp->inp_ip.ip_tos;
1090 break;
1091
1092 case IP_TTL:
1093 optval = inp->inp_ip.ip_ttl;
1094 break;
1095
1096 case IP_ERRORMTU:
1097 optval = inp->inp_errormtu;
1098 break;
1099
1100 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1101
1102 case IP_RECVOPTS:
1103 optval = OPTBIT(INP_RECVOPTS);
1104 break;
1105
1106 case IP_RECVRETOPTS:
1107 optval = OPTBIT(INP_RECVRETOPTS);
1108 break;
1109
1110 case IP_RECVDSTADDR:
1111 optval = OPTBIT(INP_RECVDSTADDR);
1112 break;
1113
1114 case IP_RECVIF:
1115 optval = OPTBIT(INP_RECVIF);
1116 break;
1117 }
1118 *mtod(m, int *) = optval;
1119 break;
1120
1121 #ifdef IPSEC
1122 case IP_IPSEC_POLICY:
1123 {
1124 caddr_t req = NULL;
1125 size_t len = 0;
1126
1127 if (m) {
1128 req = mtod(m, caddr_t);
1129 len = m->m_len;
1130 }
1131 error = ipsec4_get_policy(inp, req, len, mp);
1132 break;
1133 }
1134 #endif /*IPSEC*/
1135
1136 case IP_MULTICAST_IF:
1137 case IP_MULTICAST_TTL:
1138 case IP_MULTICAST_LOOP:
1139 case IP_ADD_MEMBERSHIP:
1140 case IP_DROP_MEMBERSHIP:
1141 error = ip_getmoptions(optname, inp->inp_moptions, mp);
1142 break;
1143
1144 case IP_PORTRANGE:
1145 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1146 m->m_len = sizeof(int);
1147
1148 if (inp->inp_flags & INP_LOWPORT)
1149 optval = IP_PORTRANGE_LOW;
1150 else
1151 optval = IP_PORTRANGE_DEFAULT;
1152
1153 *mtod(m, int *) = optval;
1154 break;
1155
1156 default:
1157 error = ENOPROTOOPT;
1158 break;
1159 }
1160 break;
1161 }
1162 return (error);
1163 }
1164
1165 /*
1166 * Set up IP options in pcb for insertion in output packets.
1167 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1168 * with destination address if source routed.
1169 */
1170 int
1171 #ifdef notyet
1172 ip_pcbopts(optname, pcbopt, m)
1173 int optname;
1174 #else
1175 ip_pcbopts(pcbopt, m)
1176 #endif
1177 struct mbuf **pcbopt;
1178 struct mbuf *m;
1179 {
1180 int cnt, optlen;
1181 u_char *cp;
1182 u_char opt;
1183
1184 /* turn off any old options */
1185 if (*pcbopt)
1186 (void)m_free(*pcbopt);
1187 *pcbopt = 0;
1188 if (m == (struct mbuf *)0 || m->m_len == 0) {
1189 /*
1190 * Only turning off any previous options.
1191 */
1192 if (m)
1193 (void)m_free(m);
1194 return (0);
1195 }
1196
1197 #ifndef __vax__
1198 if (m->m_len % sizeof(int32_t))
1199 goto bad;
1200 #endif
1201 /*
1202 * IP first-hop destination address will be stored before
1203 * actual options; move other options back
1204 * and clear it when none present.
1205 */
1206 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1207 goto bad;
1208 cnt = m->m_len;
1209 m->m_len += sizeof(struct in_addr);
1210 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1211 memmove(cp, mtod(m, caddr_t), (unsigned)cnt);
1212 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1213
1214 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1215 opt = cp[IPOPT_OPTVAL];
1216 if (opt == IPOPT_EOL)
1217 break;
1218 if (opt == IPOPT_NOP)
1219 optlen = 1;
1220 else {
1221 if (cnt < IPOPT_OLEN + sizeof(*cp))
1222 goto bad;
1223 optlen = cp[IPOPT_OLEN];
1224 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1225 goto bad;
1226 }
1227 switch (opt) {
1228
1229 default:
1230 break;
1231
1232 case IPOPT_LSRR:
1233 case IPOPT_SSRR:
1234 /*
1235 * user process specifies route as:
1236 * ->A->B->C->D
1237 * D must be our final destination (but we can't
1238 * check that since we may not have connected yet).
1239 * A is first hop destination, which doesn't appear in
1240 * actual IP option, but is stored before the options.
1241 */
1242 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1243 goto bad;
1244 m->m_len -= sizeof(struct in_addr);
1245 cnt -= sizeof(struct in_addr);
1246 optlen -= sizeof(struct in_addr);
1247 cp[IPOPT_OLEN] = optlen;
1248 /*
1249 * Move first hop before start of options.
1250 */
1251 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1252 sizeof(struct in_addr));
1253 /*
1254 * Then copy rest of options back
1255 * to close up the deleted entry.
1256 */
1257 memmove(&cp[IPOPT_OFFSET+1],
1258 (caddr_t)(&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
1259 (unsigned)cnt + sizeof(struct in_addr));
1260 break;
1261 }
1262 }
1263 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1264 goto bad;
1265 *pcbopt = m;
1266 return (0);
1267
1268 bad:
1269 (void)m_free(m);
1270 return (EINVAL);
1271 }
1272
1273 /*
1274 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1275 */
1276 static struct ifnet *
1277 ip_multicast_if(a, ifindexp)
1278 struct in_addr *a;
1279 int *ifindexp;
1280 {
1281 int ifindex;
1282 struct ifnet *ifp;
1283
1284 if (ifindexp)
1285 *ifindexp = 0;
1286 if (ntohl(a->s_addr) >> 24 == 0) {
1287 ifindex = ntohl(a->s_addr) & 0xffffff;
1288 if (ifindex < 0 || if_index < ifindex)
1289 return NULL;
1290 ifp = ifindex2ifnet[ifindex];
1291 if (ifindexp)
1292 *ifindexp = ifindex;
1293 } else {
1294 INADDR_TO_IFP(*a, ifp);
1295 }
1296 return ifp;
1297 }
1298
1299 /*
1300 * Set the IP multicast options in response to user setsockopt().
1301 */
1302 int
1303 ip_setmoptions(optname, imop, m)
1304 int optname;
1305 struct ip_moptions **imop;
1306 struct mbuf *m;
1307 {
1308 int error = 0;
1309 u_char loop;
1310 int i;
1311 struct in_addr addr;
1312 struct ip_mreq *mreq;
1313 struct ifnet *ifp;
1314 struct ip_moptions *imo = *imop;
1315 struct route ro;
1316 struct sockaddr_in *dst;
1317 int ifindex;
1318
1319 if (imo == NULL) {
1320 /*
1321 * No multicast option buffer attached to the pcb;
1322 * allocate one and initialize to default values.
1323 */
1324 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
1325 M_WAITOK);
1326
1327 if (imo == NULL)
1328 return (ENOBUFS);
1329 *imop = imo;
1330 imo->imo_multicast_ifp = NULL;
1331 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1332 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1333 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1334 imo->imo_num_memberships = 0;
1335 }
1336
1337 switch (optname) {
1338
1339 case IP_MULTICAST_IF:
1340 /*
1341 * Select the interface for outgoing multicast packets.
1342 */
1343 if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1344 error = EINVAL;
1345 break;
1346 }
1347 addr = *(mtod(m, struct in_addr *));
1348 /*
1349 * INADDR_ANY is used to remove a previous selection.
1350 * When no interface is selected, a default one is
1351 * chosen every time a multicast packet is sent.
1352 */
1353 if (in_nullhost(addr)) {
1354 imo->imo_multicast_ifp = NULL;
1355 break;
1356 }
1357 /*
1358 * The selected interface is identified by its local
1359 * IP address. Find the interface and confirm that
1360 * it supports multicasting.
1361 */
1362 ifp = ip_multicast_if(&addr, &ifindex);
1363 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1364 error = EADDRNOTAVAIL;
1365 break;
1366 }
1367 imo->imo_multicast_ifp = ifp;
1368 if (ifindex)
1369 imo->imo_multicast_addr = addr;
1370 else
1371 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1372 break;
1373
1374 case IP_MULTICAST_TTL:
1375 /*
1376 * Set the IP time-to-live for outgoing multicast packets.
1377 */
1378 if (m == NULL || m->m_len != 1) {
1379 error = EINVAL;
1380 break;
1381 }
1382 imo->imo_multicast_ttl = *(mtod(m, u_char *));
1383 break;
1384
1385 case IP_MULTICAST_LOOP:
1386 /*
1387 * Set the loopback flag for outgoing multicast packets.
1388 * Must be zero or one.
1389 */
1390 if (m == NULL || m->m_len != 1 ||
1391 (loop = *(mtod(m, u_char *))) > 1) {
1392 error = EINVAL;
1393 break;
1394 }
1395 imo->imo_multicast_loop = loop;
1396 break;
1397
1398 case IP_ADD_MEMBERSHIP:
1399 /*
1400 * Add a multicast group membership.
1401 * Group must be a valid IP multicast address.
1402 */
1403 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1404 error = EINVAL;
1405 break;
1406 }
1407 mreq = mtod(m, struct ip_mreq *);
1408 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1409 error = EINVAL;
1410 break;
1411 }
1412 /*
1413 * If no interface address was provided, use the interface of
1414 * the route to the given multicast address.
1415 */
1416 if (in_nullhost(mreq->imr_interface)) {
1417 bzero((caddr_t)&ro, sizeof(ro));
1418 ro.ro_rt = NULL;
1419 dst = satosin(&ro.ro_dst);
1420 dst->sin_len = sizeof(*dst);
1421 dst->sin_family = AF_INET;
1422 dst->sin_addr = mreq->imr_multiaddr;
1423 rtalloc(&ro);
1424 if (ro.ro_rt == NULL) {
1425 error = EADDRNOTAVAIL;
1426 break;
1427 }
1428 ifp = ro.ro_rt->rt_ifp;
1429 rtfree(ro.ro_rt);
1430 } else {
1431 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
1432 }
1433 /*
1434 * See if we found an interface, and confirm that it
1435 * supports multicast.
1436 */
1437 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1438 error = EADDRNOTAVAIL;
1439 break;
1440 }
1441 /*
1442 * See if the membership already exists or if all the
1443 * membership slots are full.
1444 */
1445 for (i = 0; i < imo->imo_num_memberships; ++i) {
1446 if (imo->imo_membership[i]->inm_ifp == ifp &&
1447 in_hosteq(imo->imo_membership[i]->inm_addr,
1448 mreq->imr_multiaddr))
1449 break;
1450 }
1451 if (i < imo->imo_num_memberships) {
1452 error = EADDRINUSE;
1453 break;
1454 }
1455 if (i == IP_MAX_MEMBERSHIPS) {
1456 error = ETOOMANYREFS;
1457 break;
1458 }
1459 /*
1460 * Everything looks good; add a new record to the multicast
1461 * address list for the given interface.
1462 */
1463 if ((imo->imo_membership[i] =
1464 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1465 error = ENOBUFS;
1466 break;
1467 }
1468 ++imo->imo_num_memberships;
1469 break;
1470
1471 case IP_DROP_MEMBERSHIP:
1472 /*
1473 * Drop a multicast group membership.
1474 * Group must be a valid IP multicast address.
1475 */
1476 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1477 error = EINVAL;
1478 break;
1479 }
1480 mreq = mtod(m, struct ip_mreq *);
1481 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1482 error = EINVAL;
1483 break;
1484 }
1485 /*
1486 * If an interface address was specified, get a pointer
1487 * to its ifnet structure.
1488 */
1489 if (in_nullhost(mreq->imr_interface))
1490 ifp = NULL;
1491 else {
1492 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
1493 if (ifp == NULL) {
1494 error = EADDRNOTAVAIL;
1495 break;
1496 }
1497 }
1498 /*
1499 * Find the membership in the membership array.
1500 */
1501 for (i = 0; i < imo->imo_num_memberships; ++i) {
1502 if ((ifp == NULL ||
1503 imo->imo_membership[i]->inm_ifp == ifp) &&
1504 in_hosteq(imo->imo_membership[i]->inm_addr,
1505 mreq->imr_multiaddr))
1506 break;
1507 }
1508 if (i == imo->imo_num_memberships) {
1509 error = EADDRNOTAVAIL;
1510 break;
1511 }
1512 /*
1513 * Give up the multicast address record to which the
1514 * membership points.
1515 */
1516 in_delmulti(imo->imo_membership[i]);
1517 /*
1518 * Remove the gap in the membership array.
1519 */
1520 for (++i; i < imo->imo_num_memberships; ++i)
1521 imo->imo_membership[i-1] = imo->imo_membership[i];
1522 --imo->imo_num_memberships;
1523 break;
1524
1525 default:
1526 error = EOPNOTSUPP;
1527 break;
1528 }
1529
1530 /*
1531 * If all options have default values, no need to keep the mbuf.
1532 */
1533 if (imo->imo_multicast_ifp == NULL &&
1534 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1535 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1536 imo->imo_num_memberships == 0) {
1537 free(*imop, M_IPMOPTS);
1538 *imop = NULL;
1539 }
1540
1541 return (error);
1542 }
1543
1544 /*
1545 * Return the IP multicast options in response to user getsockopt().
1546 */
1547 int
1548 ip_getmoptions(optname, imo, mp)
1549 int optname;
1550 struct ip_moptions *imo;
1551 struct mbuf **mp;
1552 {
1553 u_char *ttl;
1554 u_char *loop;
1555 struct in_addr *addr;
1556 struct in_ifaddr *ia;
1557
1558 *mp = m_get(M_WAIT, MT_SOOPTS);
1559
1560 switch (optname) {
1561
1562 case IP_MULTICAST_IF:
1563 addr = mtod(*mp, struct in_addr *);
1564 (*mp)->m_len = sizeof(struct in_addr);
1565 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1566 *addr = zeroin_addr;
1567 else if (imo->imo_multicast_addr.s_addr) {
1568 /* return the value user has set */
1569 *addr = imo->imo_multicast_addr;
1570 } else {
1571 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1572 *addr = ia ? ia->ia_addr.sin_addr : zeroin_addr;
1573 }
1574 return (0);
1575
1576 case IP_MULTICAST_TTL:
1577 ttl = mtod(*mp, u_char *);
1578 (*mp)->m_len = 1;
1579 *ttl = imo ? imo->imo_multicast_ttl
1580 : IP_DEFAULT_MULTICAST_TTL;
1581 return (0);
1582
1583 case IP_MULTICAST_LOOP:
1584 loop = mtod(*mp, u_char *);
1585 (*mp)->m_len = 1;
1586 *loop = imo ? imo->imo_multicast_loop
1587 : IP_DEFAULT_MULTICAST_LOOP;
1588 return (0);
1589
1590 default:
1591 return (EOPNOTSUPP);
1592 }
1593 }
1594
1595 /*
1596 * Discard the IP multicast options.
1597 */
1598 void
1599 ip_freemoptions(imo)
1600 struct ip_moptions *imo;
1601 {
1602 int i;
1603
1604 if (imo != NULL) {
1605 for (i = 0; i < imo->imo_num_memberships; ++i)
1606 in_delmulti(imo->imo_membership[i]);
1607 free(imo, M_IPMOPTS);
1608 }
1609 }
1610
1611 /*
1612 * Routine called from ip_output() to loop back a copy of an IP multicast
1613 * packet to the input queue of a specified interface. Note that this
1614 * calls the output routine of the loopback "driver", but with an interface
1615 * pointer that might NOT be &loif -- easier than replicating that code here.
1616 */
1617 static void
1618 ip_mloopback(ifp, m, dst)
1619 struct ifnet *ifp;
1620 struct mbuf *m;
1621 struct sockaddr_in *dst;
1622 {
1623 struct ip *ip;
1624 struct mbuf *copym;
1625
1626 copym = m_copy(m, 0, M_COPYALL);
1627 if (copym != NULL
1628 && (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip)))
1629 copym = m_pullup(copym, sizeof(struct ip));
1630 if (copym != NULL) {
1631 /*
1632 * We don't bother to fragment if the IP length is greater
1633 * than the interface's MTU. Can this possibly matter?
1634 */
1635 ip = mtod(copym, struct ip *);
1636 HTONS(ip->ip_len);
1637 HTONS(ip->ip_off);
1638 ip->ip_sum = 0;
1639 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1640 (void) looutput(ifp, copym, sintosa(dst), NULL);
1641 }
1642 }
1643