ip_output.c revision 1.95.8.2 1 /* $NetBSD: ip_output.c,v 1.95.8.2 2002/07/15 10:36:59 gehenna Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 * must display the following acknowledgement:
50 * This product includes software developed by the NetBSD
51 * Foundation, Inc. and its contributors.
52 * 4. Neither the name of The NetBSD Foundation nor the names of its
53 * contributors may be used to endorse or promote products derived
54 * from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 */
68
69 /*
70 * Copyright (c) 1982, 1986, 1988, 1990, 1993
71 * The Regents of the University of California. All rights reserved.
72 *
73 * Redistribution and use in source and binary forms, with or without
74 * modification, are permitted provided that the following conditions
75 * are met:
76 * 1. Redistributions of source code must retain the above copyright
77 * notice, this list of conditions and the following disclaimer.
78 * 2. Redistributions in binary form must reproduce the above copyright
79 * notice, this list of conditions and the following disclaimer in the
80 * documentation and/or other materials provided with the distribution.
81 * 3. All advertising materials mentioning features or use of this software
82 * must display the following acknowledgement:
83 * This product includes software developed by the University of
84 * California, Berkeley and its contributors.
85 * 4. Neither the name of the University nor the names of its contributors
86 * may be used to endorse or promote products derived from this software
87 * without specific prior written permission.
88 *
89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99 * SUCH DAMAGE.
100 *
101 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
102 */
103
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.95.8.2 2002/07/15 10:36:59 gehenna Exp $");
106
107 #include "opt_pfil_hooks.h"
108 #include "opt_ipsec.h"
109 #include "opt_mrouting.h"
110
111 #include <sys/param.h>
112 #include <sys/malloc.h>
113 #include <sys/mbuf.h>
114 #include <sys/errno.h>
115 #include <sys/protosw.h>
116 #include <sys/socket.h>
117 #include <sys/socketvar.h>
118 #include <sys/systm.h>
119 #include <sys/proc.h>
120
121 #include <net/if.h>
122 #include <net/route.h>
123 #include <net/pfil.h>
124
125 #include <netinet/in.h>
126 #include <netinet/in_systm.h>
127 #include <netinet/ip.h>
128 #include <netinet/in_pcb.h>
129 #include <netinet/in_var.h>
130 #include <netinet/ip_var.h>
131
132 #ifdef MROUTING
133 #include <netinet/ip_mroute.h>
134 #endif
135
136 #include <machine/stdarg.h>
137
138 #ifdef IPSEC
139 #include <netinet6/ipsec.h>
140 #include <netkey/key.h>
141 #include <netkey/key_debug.h>
142 #endif /*IPSEC*/
143
144 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
145 static struct ifnet *ip_multicast_if __P((struct in_addr *, int *));
146 static void ip_mloopback
147 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
148
149 #ifdef PFIL_HOOKS
150 extern struct pfil_head inet_pfil_hook; /* XXX */
151 #endif
152
153 /*
154 * IP output. The packet in mbuf chain m contains a skeletal IP
155 * header (with len, off, ttl, proto, tos, src, dst).
156 * The mbuf chain containing the packet will be freed.
157 * The mbuf opt, if present, will not be freed.
158 */
159 int
160 #if __STDC__
161 ip_output(struct mbuf *m0, ...)
162 #else
163 ip_output(m0, va_alist)
164 struct mbuf *m0;
165 va_dcl
166 #endif
167 {
168 struct ip *ip, *mhip;
169 struct ifnet *ifp;
170 struct mbuf *m = m0;
171 int hlen = sizeof (struct ip);
172 int len, off, error = 0;
173 struct route iproute;
174 struct sockaddr_in *dst;
175 struct in_ifaddr *ia;
176 struct mbuf *opt;
177 struct route *ro;
178 int flags, sw_csum;
179 int *mtu_p;
180 u_long mtu;
181 struct ip_moptions *imo;
182 va_list ap;
183 #ifdef IPSEC
184 struct socket *so;
185 struct secpolicy *sp = NULL;
186 #endif /*IPSEC*/
187 u_int16_t ip_len;
188
189 va_start(ap, m0);
190 opt = va_arg(ap, struct mbuf *);
191 ro = va_arg(ap, struct route *);
192 flags = va_arg(ap, int);
193 imo = va_arg(ap, struct ip_moptions *);
194 if (flags & IP_RETURNMTU)
195 mtu_p = va_arg(ap, int *);
196 else
197 mtu_p = NULL;
198 va_end(ap);
199
200 #ifdef IPSEC
201 so = ipsec_getsocket(m);
202 (void)ipsec_setsocket(m, NULL);
203 #endif /*IPSEC*/
204
205 #ifdef DIAGNOSTIC
206 if ((m->m_flags & M_PKTHDR) == 0)
207 panic("ip_output no HDR");
208 #endif
209 if (opt) {
210 m = ip_insertoptions(m, opt, &len);
211 hlen = len;
212 }
213 ip = mtod(m, struct ip *);
214 /*
215 * Fill in IP header.
216 */
217 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
218 ip->ip_v = IPVERSION;
219 ip->ip_off = 0;
220 ip->ip_id = htons(ip_id++);
221 ip->ip_hl = hlen >> 2;
222 ipstat.ips_localout++;
223 } else {
224 hlen = ip->ip_hl << 2;
225 }
226 /*
227 * Route packet.
228 */
229 if (ro == 0) {
230 ro = &iproute;
231 bzero((caddr_t)ro, sizeof (*ro));
232 }
233 dst = satosin(&ro->ro_dst);
234 /*
235 * If there is a cached route,
236 * check that it is to the same destination
237 * and is still up. If not, free it and try again.
238 * The address family should also be checked in case of sharing the
239 * cache with IPv6.
240 */
241 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
242 dst->sin_family != AF_INET ||
243 !in_hosteq(dst->sin_addr, ip->ip_dst))) {
244 RTFREE(ro->ro_rt);
245 ro->ro_rt = (struct rtentry *)0;
246 }
247 if (ro->ro_rt == 0) {
248 bzero(dst, sizeof(*dst));
249 dst->sin_family = AF_INET;
250 dst->sin_len = sizeof(*dst);
251 dst->sin_addr = ip->ip_dst;
252 }
253 /*
254 * If routing to interface only,
255 * short circuit routing lookup.
256 */
257 if (flags & IP_ROUTETOIF) {
258 if ((ia = ifatoia(ifa_ifwithladdr(sintosa(dst)))) == 0) {
259 ipstat.ips_noroute++;
260 error = ENETUNREACH;
261 goto bad;
262 }
263 ifp = ia->ia_ifp;
264 mtu = ifp->if_mtu;
265 ip->ip_ttl = 1;
266 } else if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
267 ip->ip_dst.s_addr == INADDR_BROADCAST) &&
268 imo != NULL && imo->imo_multicast_ifp != NULL) {
269 ifp = imo->imo_multicast_ifp;
270 mtu = ifp->if_mtu;
271 IFP_TO_IA(ifp, ia);
272 } else {
273 if (ro->ro_rt == 0)
274 rtalloc(ro);
275 if (ro->ro_rt == 0) {
276 ipstat.ips_noroute++;
277 error = EHOSTUNREACH;
278 goto bad;
279 }
280 ia = ifatoia(ro->ro_rt->rt_ifa);
281 ifp = ro->ro_rt->rt_ifp;
282 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
283 mtu = ifp->if_mtu;
284 ro->ro_rt->rt_use++;
285 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
286 dst = satosin(ro->ro_rt->rt_gateway);
287 }
288 if (IN_MULTICAST(ip->ip_dst.s_addr) ||
289 (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
290 struct in_multi *inm;
291
292 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
293 M_BCAST : M_MCAST;
294 /*
295 * IP destination address is multicast. Make sure "dst"
296 * still points to the address in "ro". (It may have been
297 * changed to point to a gateway address, above.)
298 */
299 dst = satosin(&ro->ro_dst);
300 /*
301 * See if the caller provided any multicast options
302 */
303 if (imo != NULL)
304 ip->ip_ttl = imo->imo_multicast_ttl;
305 else
306 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
307
308 /*
309 * if we don't know the outgoing ifp yet, we can't generate
310 * output
311 */
312 if (!ifp) {
313 ipstat.ips_noroute++;
314 error = ENETUNREACH;
315 goto bad;
316 }
317
318 /*
319 * If the packet is multicast or broadcast, confirm that
320 * the outgoing interface can transmit it.
321 */
322 if (((m->m_flags & M_MCAST) &&
323 (ifp->if_flags & IFF_MULTICAST) == 0) ||
324 ((m->m_flags & M_BCAST) &&
325 (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0)) {
326 ipstat.ips_noroute++;
327 error = ENETUNREACH;
328 goto bad;
329 }
330 /*
331 * If source address not specified yet, use an address
332 * of outgoing interface.
333 */
334 if (in_nullhost(ip->ip_src)) {
335 struct in_ifaddr *ia;
336
337 IFP_TO_IA(ifp, ia);
338 if (!ia) {
339 error = EADDRNOTAVAIL;
340 goto bad;
341 }
342 ip->ip_src = ia->ia_addr.sin_addr;
343 }
344
345 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
346 if (inm != NULL &&
347 (imo == NULL || imo->imo_multicast_loop)) {
348 /*
349 * If we belong to the destination multicast group
350 * on the outgoing interface, and the caller did not
351 * forbid loopback, loop back a copy.
352 */
353 ip_mloopback(ifp, m, dst);
354 }
355 #ifdef MROUTING
356 else {
357 /*
358 * If we are acting as a multicast router, perform
359 * multicast forwarding as if the packet had just
360 * arrived on the interface to which we are about
361 * to send. The multicast forwarding function
362 * recursively calls this function, using the
363 * IP_FORWARDING flag to prevent infinite recursion.
364 *
365 * Multicasts that are looped back by ip_mloopback(),
366 * above, will be forwarded by the ip_input() routine,
367 * if necessary.
368 */
369 extern struct socket *ip_mrouter;
370
371 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
372 if (ip_mforward(m, ifp) != 0) {
373 m_freem(m);
374 goto done;
375 }
376 }
377 }
378 #endif
379 /*
380 * Multicasts with a time-to-live of zero may be looped-
381 * back, above, but must not be transmitted on a network.
382 * Also, multicasts addressed to the loopback interface
383 * are not sent -- the above call to ip_mloopback() will
384 * loop back a copy if this host actually belongs to the
385 * destination group on the loopback interface.
386 */
387 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
388 m_freem(m);
389 goto done;
390 }
391
392 goto sendit;
393 }
394 #ifndef notdef
395 /*
396 * If source address not specified yet, use address
397 * of outgoing interface.
398 */
399 if (in_nullhost(ip->ip_src))
400 ip->ip_src = ia->ia_addr.sin_addr;
401 #endif
402
403 /*
404 * packets with Class-D address as source are not valid per
405 * RFC 1112
406 */
407 if (IN_MULTICAST(ip->ip_src.s_addr)) {
408 ipstat.ips_odropped++;
409 error = EADDRNOTAVAIL;
410 goto bad;
411 }
412
413 /*
414 * Look for broadcast address and
415 * and verify user is allowed to send
416 * such a packet.
417 */
418 if (in_broadcast(dst->sin_addr, ifp)) {
419 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
420 error = EADDRNOTAVAIL;
421 goto bad;
422 }
423 if ((flags & IP_ALLOWBROADCAST) == 0) {
424 error = EACCES;
425 goto bad;
426 }
427 /* don't allow broadcast messages to be fragmented */
428 if ((u_int16_t)ip->ip_len > ifp->if_mtu) {
429 error = EMSGSIZE;
430 goto bad;
431 }
432 m->m_flags |= M_BCAST;
433 } else
434 m->m_flags &= ~M_BCAST;
435
436 sendit:
437 /*
438 * If we're doing Path MTU Discovery, we need to set DF unless
439 * the route's MTU is locked.
440 */
441 if ((flags & IP_MTUDISC) != 0 && ro->ro_rt != NULL &&
442 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
443 ip->ip_off |= IP_DF;
444
445 /*
446 * Remember the current ip_len and ip_off, and swap them into
447 * network order.
448 */
449 ip_len = ip->ip_len;
450
451 HTONS(ip->ip_len);
452 HTONS(ip->ip_off);
453
454 #ifdef IPSEC
455 /* get SP for this packet */
456 if (so == NULL)
457 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
458 else
459 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
460
461 if (sp == NULL) {
462 ipsecstat.out_inval++;
463 goto bad;
464 }
465
466 error = 0;
467
468 /* check policy */
469 switch (sp->policy) {
470 case IPSEC_POLICY_DISCARD:
471 /*
472 * This packet is just discarded.
473 */
474 ipsecstat.out_polvio++;
475 goto bad;
476
477 case IPSEC_POLICY_BYPASS:
478 case IPSEC_POLICY_NONE:
479 /* no need to do IPsec. */
480 goto skip_ipsec;
481
482 case IPSEC_POLICY_IPSEC:
483 if (sp->req == NULL) {
484 /* XXX should be panic ? */
485 printf("ip_output: No IPsec request specified.\n");
486 error = EINVAL;
487 goto bad;
488 }
489 break;
490
491 case IPSEC_POLICY_ENTRUST:
492 default:
493 printf("ip_output: Invalid policy found. %d\n", sp->policy);
494 }
495
496 /*
497 * ipsec4_output() expects ip_len and ip_off in network
498 * order. They have been set to network order above.
499 */
500
501 {
502 struct ipsec_output_state state;
503 bzero(&state, sizeof(state));
504 state.m = m;
505 if (flags & IP_ROUTETOIF) {
506 state.ro = &iproute;
507 bzero(&iproute, sizeof(iproute));
508 } else
509 state.ro = ro;
510 state.dst = (struct sockaddr *)dst;
511
512 /*
513 * We can't defer the checksum of payload data if
514 * we're about to encrypt/authenticate it.
515 *
516 * XXX When we support crypto offloading functions of
517 * XXX network interfaces, we need to reconsider this,
518 * XXX since it's likely that they'll support checksumming,
519 * XXX as well.
520 */
521 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
522 in_delayed_cksum(m);
523 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
524 }
525
526 error = ipsec4_output(&state, sp, flags);
527
528 m = state.m;
529 if (flags & IP_ROUTETOIF) {
530 /*
531 * if we have tunnel mode SA, we may need to ignore
532 * IP_ROUTETOIF.
533 */
534 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
535 flags &= ~IP_ROUTETOIF;
536 ro = state.ro;
537 }
538 } else
539 ro = state.ro;
540 dst = (struct sockaddr_in *)state.dst;
541 if (error) {
542 /* mbuf is already reclaimed in ipsec4_output. */
543 m0 = NULL;
544 switch (error) {
545 case EHOSTUNREACH:
546 case ENETUNREACH:
547 case EMSGSIZE:
548 case ENOBUFS:
549 case ENOMEM:
550 break;
551 default:
552 printf("ip4_output (ipsec): error code %d\n", error);
553 /*fall through*/
554 case ENOENT:
555 /* don't show these error codes to the user */
556 error = 0;
557 break;
558 }
559 goto bad;
560 }
561
562 /* be sure to update variables that are affected by ipsec4_output() */
563 ip = mtod(m, struct ip *);
564 #ifdef _IP_VHL
565 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
566 #else
567 hlen = ip->ip_hl << 2;
568 #endif
569 ip_len = ntohs(ip->ip_len);
570
571 if (ro->ro_rt == NULL) {
572 if ((flags & IP_ROUTETOIF) == 0) {
573 printf("ip_output: "
574 "can't update route after IPsec processing\n");
575 error = EHOSTUNREACH; /*XXX*/
576 goto bad;
577 }
578 } else {
579 /* nobody uses ia beyond here */
580 if (state.encap)
581 ifp = ro->ro_rt->rt_ifp;
582 }
583 }
584
585 skip_ipsec:
586 #endif /*IPSEC*/
587
588 #ifdef PFIL_HOOKS
589 /*
590 * Run through list of hooks for output packets.
591 */
592 if ((error = pfil_run_hooks(&inet_pfil_hook, &m, ifp,
593 PFIL_OUT)) != 0)
594 goto done;
595 if (m == NULL)
596 goto done;
597
598 ip = mtod(m, struct ip *);
599 #endif /* PFIL_HOOKS */
600
601 /*
602 * If small enough for mtu of path, can just send directly.
603 */
604 if (ip_len <= mtu) {
605 #if IFA_STATS
606 /*
607 * search for the source address structure to
608 * maintain output statistics.
609 */
610 INADDR_TO_IA(ip->ip_src, ia);
611 if (ia)
612 ia->ia_ifa.ifa_data.ifad_outbytes += ip_len;
613 #endif
614 /*
615 * Always initialize the sum to 0! Some HW assisted
616 * checksumming requires this.
617 */
618 ip->ip_sum = 0;
619 m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
620
621 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
622
623 /*
624 * Perform any checksums that the hardware can't do
625 * for us.
626 *
627 * XXX Does any hardware require the {th,uh}_sum
628 * XXX fields to be 0?
629 */
630 if (sw_csum & M_CSUM_IPv4)
631 ip->ip_sum = in_cksum(m, hlen);
632 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
633 in_delayed_cksum(m);
634 sw_csum &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
635 }
636 m->m_pkthdr.csum_flags &= ifp->if_csum_flags_tx;
637
638 #ifdef IPSEC
639 /* clean ipsec history once it goes out of the node */
640 ipsec_delaux(m);
641 #endif
642 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
643 goto done;
644 }
645
646 /*
647 * We can't use HW checksumming if we're about to
648 * to fragment the packet.
649 *
650 * XXX Some hardware can do this.
651 */
652 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
653 in_delayed_cksum(m);
654 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
655 }
656
657 /*
658 * Too large for interface; fragment if possible.
659 * Must be able to put at least 8 bytes per fragment.
660 *
661 * Note we swap ip_len and ip_off into host order to make
662 * the logic below a little simpler.
663 */
664
665 NTOHS(ip->ip_len);
666 NTOHS(ip->ip_off);
667
668 if (ip->ip_off & IP_DF) {
669 if (flags & IP_RETURNMTU)
670 *mtu_p = mtu;
671 error = EMSGSIZE;
672 ipstat.ips_cantfrag++;
673 goto bad;
674 }
675 len = (mtu - hlen) &~ 7;
676 if (len < 8) {
677 error = EMSGSIZE;
678 goto bad;
679 }
680
681 {
682 int mhlen, firstlen = len;
683 struct mbuf **mnext = &m->m_nextpkt;
684 int fragments = 0;
685 int s;
686
687 /*
688 * Loop through length of segment after first fragment,
689 * make new header and copy data of each part and link onto chain.
690 */
691 m0 = m;
692 mhlen = sizeof (struct ip);
693 for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) {
694 MGETHDR(m, M_DONTWAIT, MT_HEADER);
695 if (m == 0) {
696 error = ENOBUFS;
697 ipstat.ips_odropped++;
698 goto sendorfree;
699 }
700 *mnext = m;
701 mnext = &m->m_nextpkt;
702 m->m_data += max_linkhdr;
703 mhip = mtod(m, struct ip *);
704 *mhip = *ip;
705 /* we must inherit MCAST and BCAST flags */
706 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST);
707 if (hlen > sizeof (struct ip)) {
708 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
709 mhip->ip_hl = mhlen >> 2;
710 }
711 m->m_len = mhlen;
712 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
713 if (ip->ip_off & IP_MF)
714 mhip->ip_off |= IP_MF;
715 if (off + len >= (u_int16_t)ip->ip_len)
716 len = (u_int16_t)ip->ip_len - off;
717 else
718 mhip->ip_off |= IP_MF;
719 mhip->ip_len = htons((u_int16_t)(len + mhlen));
720 m->m_next = m_copy(m0, off, len);
721 if (m->m_next == 0) {
722 error = ENOBUFS; /* ??? */
723 ipstat.ips_odropped++;
724 goto sendorfree;
725 }
726 m->m_pkthdr.len = mhlen + len;
727 m->m_pkthdr.rcvif = (struct ifnet *)0;
728 HTONS(mhip->ip_off);
729 mhip->ip_sum = 0;
730 mhip->ip_sum = in_cksum(m, mhlen);
731 ipstat.ips_ofragments++;
732 fragments++;
733 }
734 /*
735 * Update first fragment by trimming what's been copied out
736 * and updating header, then send each fragment (in order).
737 */
738 m = m0;
739 m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len);
740 m->m_pkthdr.len = hlen + firstlen;
741 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
742 ip->ip_off |= IP_MF;
743 HTONS(ip->ip_off);
744 ip->ip_sum = 0;
745 ip->ip_sum = in_cksum(m, hlen);
746 sendorfree:
747 /*
748 * If there is no room for all the fragments, don't queue
749 * any of them.
750 */
751 s = splnet();
752 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments)
753 error = ENOBUFS;
754 splx(s);
755 for (m = m0; m; m = m0) {
756 m0 = m->m_nextpkt;
757 m->m_nextpkt = 0;
758 if (error == 0) {
759 #if IFA_STATS
760 /*
761 * search for the source address structure to
762 * maintain output statistics.
763 */
764 INADDR_TO_IA(ip->ip_src, ia);
765 if (ia) {
766 ia->ia_ifa.ifa_data.ifad_outbytes +=
767 ntohs(ip->ip_len);
768 }
769 #endif
770 #ifdef IPSEC
771 /* clean ipsec history once it goes out of the node */
772 ipsec_delaux(m);
773 #endif
774 error = (*ifp->if_output)(ifp, m, sintosa(dst),
775 ro->ro_rt);
776 } else
777 m_freem(m);
778 }
779
780 if (error == 0)
781 ipstat.ips_fragmented++;
782 }
783 done:
784 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt) {
785 RTFREE(ro->ro_rt);
786 ro->ro_rt = 0;
787 }
788
789 #ifdef IPSEC
790 if (sp != NULL) {
791 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
792 printf("DP ip_output call free SP:%p\n", sp));
793 key_freesp(sp);
794 }
795 #endif /* IPSEC */
796
797 return (error);
798 bad:
799 m_freem(m);
800 goto done;
801 }
802
803 /*
804 * Process a delayed payload checksum calculation.
805 */
806 void
807 in_delayed_cksum(struct mbuf *m)
808 {
809 struct ip *ip;
810 u_int16_t csum, offset;
811
812 ip = mtod(m, struct ip *);
813 offset = ip->ip_hl << 2;
814 csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset);
815 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0)
816 csum = 0xffff;
817
818 offset += m->m_pkthdr.csum_data; /* checksum offset */
819
820 if ((offset + sizeof(u_int16_t)) > m->m_len) {
821 /* This happen when ip options were inserted
822 printf("in_delayed_cksum: pullup len %d off %d proto %d\n",
823 m->m_len, offset, ip->ip_p);
824 */
825 m_copyback(m, offset, sizeof(csum), (caddr_t) &csum);
826 } else
827 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
828 }
829
830 /*
831 * Determine the maximum length of the options to be inserted;
832 * we would far rather allocate too much space rather than too little.
833 */
834
835 u_int
836 ip_optlen(inp)
837 struct inpcb *inp;
838 {
839 struct mbuf *m = inp->inp_options;
840
841 if (m && m->m_len > offsetof(struct ipoption, ipopt_dst))
842 return(m->m_len - offsetof(struct ipoption, ipopt_dst));
843 else
844 return 0;
845 }
846
847
848 /*
849 * Insert IP options into preformed packet.
850 * Adjust IP destination as required for IP source routing,
851 * as indicated by a non-zero in_addr at the start of the options.
852 */
853 static struct mbuf *
854 ip_insertoptions(m, opt, phlen)
855 struct mbuf *m;
856 struct mbuf *opt;
857 int *phlen;
858 {
859 struct ipoption *p = mtod(opt, struct ipoption *);
860 struct mbuf *n;
861 struct ip *ip = mtod(m, struct ip *);
862 unsigned optlen;
863
864 optlen = opt->m_len - sizeof(p->ipopt_dst);
865 if (optlen + (u_int16_t)ip->ip_len > IP_MAXPACKET)
866 return (m); /* XXX should fail */
867 if (!in_nullhost(p->ipopt_dst))
868 ip->ip_dst = p->ipopt_dst;
869 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
870 MGETHDR(n, M_DONTWAIT, MT_HEADER);
871 if (n == 0)
872 return (m);
873 M_COPY_PKTHDR(n, m);
874 m->m_flags &= ~M_PKTHDR;
875 m->m_len -= sizeof(struct ip);
876 m->m_data += sizeof(struct ip);
877 n->m_next = m;
878 m = n;
879 m->m_len = optlen + sizeof(struct ip);
880 m->m_data += max_linkhdr;
881 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
882 } else {
883 m->m_data -= optlen;
884 m->m_len += optlen;
885 memmove(mtod(m, caddr_t), ip, sizeof(struct ip));
886 }
887 m->m_pkthdr.len += optlen;
888 ip = mtod(m, struct ip *);
889 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
890 *phlen = sizeof(struct ip) + optlen;
891 ip->ip_len += optlen;
892 return (m);
893 }
894
895 /*
896 * Copy options from ip to jp,
897 * omitting those not copied during fragmentation.
898 */
899 int
900 ip_optcopy(ip, jp)
901 struct ip *ip, *jp;
902 {
903 u_char *cp, *dp;
904 int opt, optlen, cnt;
905
906 cp = (u_char *)(ip + 1);
907 dp = (u_char *)(jp + 1);
908 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
909 for (; cnt > 0; cnt -= optlen, cp += optlen) {
910 opt = cp[0];
911 if (opt == IPOPT_EOL)
912 break;
913 if (opt == IPOPT_NOP) {
914 /* Preserve for IP mcast tunnel's LSRR alignment. */
915 *dp++ = IPOPT_NOP;
916 optlen = 1;
917 continue;
918 }
919 #ifdef DIAGNOSTIC
920 if (cnt < IPOPT_OLEN + sizeof(*cp))
921 panic("malformed IPv4 option passed to ip_optcopy");
922 #endif
923 optlen = cp[IPOPT_OLEN];
924 #ifdef DIAGNOSTIC
925 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
926 panic("malformed IPv4 option passed to ip_optcopy");
927 #endif
928 /* bogus lengths should have been caught by ip_dooptions */
929 if (optlen > cnt)
930 optlen = cnt;
931 if (IPOPT_COPIED(opt)) {
932 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
933 dp += optlen;
934 }
935 }
936 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
937 *dp++ = IPOPT_EOL;
938 return (optlen);
939 }
940
941 /*
942 * IP socket option processing.
943 */
944 int
945 ip_ctloutput(op, so, level, optname, mp)
946 int op;
947 struct socket *so;
948 int level, optname;
949 struct mbuf **mp;
950 {
951 struct inpcb *inp = sotoinpcb(so);
952 struct mbuf *m = *mp;
953 int optval = 0;
954 int error = 0;
955 #ifdef IPSEC
956 #ifdef __NetBSD__
957 struct proc *p = curproc; /*XXX*/
958 #endif
959 #endif
960
961 if (level != IPPROTO_IP) {
962 error = EINVAL;
963 if (op == PRCO_SETOPT && *mp)
964 (void) m_free(*mp);
965 } else switch (op) {
966
967 case PRCO_SETOPT:
968 switch (optname) {
969 case IP_OPTIONS:
970 #ifdef notyet
971 case IP_RETOPTS:
972 return (ip_pcbopts(optname, &inp->inp_options, m));
973 #else
974 return (ip_pcbopts(&inp->inp_options, m));
975 #endif
976
977 case IP_TOS:
978 case IP_TTL:
979 case IP_RECVOPTS:
980 case IP_RECVRETOPTS:
981 case IP_RECVDSTADDR:
982 case IP_RECVIF:
983 if (m == NULL || m->m_len != sizeof(int))
984 error = EINVAL;
985 else {
986 optval = *mtod(m, int *);
987 switch (optname) {
988
989 case IP_TOS:
990 inp->inp_ip.ip_tos = optval;
991 break;
992
993 case IP_TTL:
994 inp->inp_ip.ip_ttl = optval;
995 break;
996 #define OPTSET(bit) \
997 if (optval) \
998 inp->inp_flags |= bit; \
999 else \
1000 inp->inp_flags &= ~bit;
1001
1002 case IP_RECVOPTS:
1003 OPTSET(INP_RECVOPTS);
1004 break;
1005
1006 case IP_RECVRETOPTS:
1007 OPTSET(INP_RECVRETOPTS);
1008 break;
1009
1010 case IP_RECVDSTADDR:
1011 OPTSET(INP_RECVDSTADDR);
1012 break;
1013
1014 case IP_RECVIF:
1015 OPTSET(INP_RECVIF);
1016 break;
1017 }
1018 }
1019 break;
1020 #undef OPTSET
1021
1022 case IP_MULTICAST_IF:
1023 case IP_MULTICAST_TTL:
1024 case IP_MULTICAST_LOOP:
1025 case IP_ADD_MEMBERSHIP:
1026 case IP_DROP_MEMBERSHIP:
1027 error = ip_setmoptions(optname, &inp->inp_moptions, m);
1028 break;
1029
1030 case IP_PORTRANGE:
1031 if (m == 0 || m->m_len != sizeof(int))
1032 error = EINVAL;
1033 else {
1034 optval = *mtod(m, int *);
1035
1036 switch (optval) {
1037
1038 case IP_PORTRANGE_DEFAULT:
1039 case IP_PORTRANGE_HIGH:
1040 inp->inp_flags &= ~(INP_LOWPORT);
1041 break;
1042
1043 case IP_PORTRANGE_LOW:
1044 inp->inp_flags |= INP_LOWPORT;
1045 break;
1046
1047 default:
1048 error = EINVAL;
1049 break;
1050 }
1051 }
1052 break;
1053
1054 #ifdef IPSEC
1055 case IP_IPSEC_POLICY:
1056 {
1057 caddr_t req = NULL;
1058 size_t len = 0;
1059 int priv = 0;
1060
1061 #ifdef __NetBSD__
1062 if (p == 0 || suser(p->p_ucred, &p->p_acflag))
1063 priv = 0;
1064 else
1065 priv = 1;
1066 #else
1067 priv = (in6p->in6p_socket->so_state & SS_PRIV);
1068 #endif
1069 if (m) {
1070 req = mtod(m, caddr_t);
1071 len = m->m_len;
1072 }
1073 error = ipsec4_set_policy(inp, optname, req, len, priv);
1074 break;
1075 }
1076 #endif /*IPSEC*/
1077
1078 default:
1079 error = ENOPROTOOPT;
1080 break;
1081 }
1082 if (m)
1083 (void)m_free(m);
1084 break;
1085
1086 case PRCO_GETOPT:
1087 switch (optname) {
1088 case IP_OPTIONS:
1089 case IP_RETOPTS:
1090 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1091 if (inp->inp_options) {
1092 m->m_len = inp->inp_options->m_len;
1093 bcopy(mtod(inp->inp_options, caddr_t),
1094 mtod(m, caddr_t), (unsigned)m->m_len);
1095 } else
1096 m->m_len = 0;
1097 break;
1098
1099 case IP_TOS:
1100 case IP_TTL:
1101 case IP_RECVOPTS:
1102 case IP_RECVRETOPTS:
1103 case IP_RECVDSTADDR:
1104 case IP_RECVIF:
1105 case IP_ERRORMTU:
1106 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1107 m->m_len = sizeof(int);
1108 switch (optname) {
1109
1110 case IP_TOS:
1111 optval = inp->inp_ip.ip_tos;
1112 break;
1113
1114 case IP_TTL:
1115 optval = inp->inp_ip.ip_ttl;
1116 break;
1117
1118 case IP_ERRORMTU:
1119 optval = inp->inp_errormtu;
1120 break;
1121
1122 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1123
1124 case IP_RECVOPTS:
1125 optval = OPTBIT(INP_RECVOPTS);
1126 break;
1127
1128 case IP_RECVRETOPTS:
1129 optval = OPTBIT(INP_RECVRETOPTS);
1130 break;
1131
1132 case IP_RECVDSTADDR:
1133 optval = OPTBIT(INP_RECVDSTADDR);
1134 break;
1135
1136 case IP_RECVIF:
1137 optval = OPTBIT(INP_RECVIF);
1138 break;
1139 }
1140 *mtod(m, int *) = optval;
1141 break;
1142
1143 #ifdef IPSEC
1144 case IP_IPSEC_POLICY:
1145 {
1146 caddr_t req = NULL;
1147 size_t len = 0;
1148
1149 if (m) {
1150 req = mtod(m, caddr_t);
1151 len = m->m_len;
1152 }
1153 error = ipsec4_get_policy(inp, req, len, mp);
1154 break;
1155 }
1156 #endif /*IPSEC*/
1157
1158 case IP_MULTICAST_IF:
1159 case IP_MULTICAST_TTL:
1160 case IP_MULTICAST_LOOP:
1161 case IP_ADD_MEMBERSHIP:
1162 case IP_DROP_MEMBERSHIP:
1163 error = ip_getmoptions(optname, inp->inp_moptions, mp);
1164 break;
1165
1166 case IP_PORTRANGE:
1167 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1168 m->m_len = sizeof(int);
1169
1170 if (inp->inp_flags & INP_LOWPORT)
1171 optval = IP_PORTRANGE_LOW;
1172 else
1173 optval = IP_PORTRANGE_DEFAULT;
1174
1175 *mtod(m, int *) = optval;
1176 break;
1177
1178 default:
1179 error = ENOPROTOOPT;
1180 break;
1181 }
1182 break;
1183 }
1184 return (error);
1185 }
1186
1187 /*
1188 * Set up IP options in pcb for insertion in output packets.
1189 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1190 * with destination address if source routed.
1191 */
1192 int
1193 #ifdef notyet
1194 ip_pcbopts(optname, pcbopt, m)
1195 int optname;
1196 #else
1197 ip_pcbopts(pcbopt, m)
1198 #endif
1199 struct mbuf **pcbopt;
1200 struct mbuf *m;
1201 {
1202 int cnt, optlen;
1203 u_char *cp;
1204 u_char opt;
1205
1206 /* turn off any old options */
1207 if (*pcbopt)
1208 (void)m_free(*pcbopt);
1209 *pcbopt = 0;
1210 if (m == (struct mbuf *)0 || m->m_len == 0) {
1211 /*
1212 * Only turning off any previous options.
1213 */
1214 if (m)
1215 (void)m_free(m);
1216 return (0);
1217 }
1218
1219 #ifndef __vax__
1220 if (m->m_len % sizeof(int32_t))
1221 goto bad;
1222 #endif
1223 /*
1224 * IP first-hop destination address will be stored before
1225 * actual options; move other options back
1226 * and clear it when none present.
1227 */
1228 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1229 goto bad;
1230 cnt = m->m_len;
1231 m->m_len += sizeof(struct in_addr);
1232 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1233 memmove(cp, mtod(m, caddr_t), (unsigned)cnt);
1234 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1235
1236 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1237 opt = cp[IPOPT_OPTVAL];
1238 if (opt == IPOPT_EOL)
1239 break;
1240 if (opt == IPOPT_NOP)
1241 optlen = 1;
1242 else {
1243 if (cnt < IPOPT_OLEN + sizeof(*cp))
1244 goto bad;
1245 optlen = cp[IPOPT_OLEN];
1246 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1247 goto bad;
1248 }
1249 switch (opt) {
1250
1251 default:
1252 break;
1253
1254 case IPOPT_LSRR:
1255 case IPOPT_SSRR:
1256 /*
1257 * user process specifies route as:
1258 * ->A->B->C->D
1259 * D must be our final destination (but we can't
1260 * check that since we may not have connected yet).
1261 * A is first hop destination, which doesn't appear in
1262 * actual IP option, but is stored before the options.
1263 */
1264 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1265 goto bad;
1266 m->m_len -= sizeof(struct in_addr);
1267 cnt -= sizeof(struct in_addr);
1268 optlen -= sizeof(struct in_addr);
1269 cp[IPOPT_OLEN] = optlen;
1270 /*
1271 * Move first hop before start of options.
1272 */
1273 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1274 sizeof(struct in_addr));
1275 /*
1276 * Then copy rest of options back
1277 * to close up the deleted entry.
1278 */
1279 memmove(&cp[IPOPT_OFFSET+1],
1280 (caddr_t)(&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
1281 (unsigned)cnt + sizeof(struct in_addr));
1282 break;
1283 }
1284 }
1285 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1286 goto bad;
1287 *pcbopt = m;
1288 return (0);
1289
1290 bad:
1291 (void)m_free(m);
1292 return (EINVAL);
1293 }
1294
1295 /*
1296 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1297 */
1298 static struct ifnet *
1299 ip_multicast_if(a, ifindexp)
1300 struct in_addr *a;
1301 int *ifindexp;
1302 {
1303 int ifindex;
1304 struct ifnet *ifp;
1305
1306 if (ifindexp)
1307 *ifindexp = 0;
1308 if (ntohl(a->s_addr) >> 24 == 0) {
1309 ifindex = ntohl(a->s_addr) & 0xffffff;
1310 if (ifindex < 0 || if_index < ifindex)
1311 return NULL;
1312 ifp = ifindex2ifnet[ifindex];
1313 if (ifindexp)
1314 *ifindexp = ifindex;
1315 } else {
1316 INADDR_TO_IFP(*a, ifp);
1317 }
1318 return ifp;
1319 }
1320
1321 /*
1322 * Set the IP multicast options in response to user setsockopt().
1323 */
1324 int
1325 ip_setmoptions(optname, imop, m)
1326 int optname;
1327 struct ip_moptions **imop;
1328 struct mbuf *m;
1329 {
1330 int error = 0;
1331 u_char loop;
1332 int i;
1333 struct in_addr addr;
1334 struct ip_mreq *mreq;
1335 struct ifnet *ifp;
1336 struct ip_moptions *imo = *imop;
1337 struct route ro;
1338 struct sockaddr_in *dst;
1339 int ifindex;
1340
1341 if (imo == NULL) {
1342 /*
1343 * No multicast option buffer attached to the pcb;
1344 * allocate one and initialize to default values.
1345 */
1346 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
1347 M_WAITOK);
1348
1349 if (imo == NULL)
1350 return (ENOBUFS);
1351 *imop = imo;
1352 imo->imo_multicast_ifp = NULL;
1353 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1354 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1355 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1356 imo->imo_num_memberships = 0;
1357 }
1358
1359 switch (optname) {
1360
1361 case IP_MULTICAST_IF:
1362 /*
1363 * Select the interface for outgoing multicast packets.
1364 */
1365 if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1366 error = EINVAL;
1367 break;
1368 }
1369 addr = *(mtod(m, struct in_addr *));
1370 /*
1371 * INADDR_ANY is used to remove a previous selection.
1372 * When no interface is selected, a default one is
1373 * chosen every time a multicast packet is sent.
1374 */
1375 if (in_nullhost(addr)) {
1376 imo->imo_multicast_ifp = NULL;
1377 break;
1378 }
1379 /*
1380 * The selected interface is identified by its local
1381 * IP address. Find the interface and confirm that
1382 * it supports multicasting.
1383 */
1384 ifp = ip_multicast_if(&addr, &ifindex);
1385 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1386 error = EADDRNOTAVAIL;
1387 break;
1388 }
1389 imo->imo_multicast_ifp = ifp;
1390 if (ifindex)
1391 imo->imo_multicast_addr = addr;
1392 else
1393 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1394 break;
1395
1396 case IP_MULTICAST_TTL:
1397 /*
1398 * Set the IP time-to-live for outgoing multicast packets.
1399 */
1400 if (m == NULL || m->m_len != 1) {
1401 error = EINVAL;
1402 break;
1403 }
1404 imo->imo_multicast_ttl = *(mtod(m, u_char *));
1405 break;
1406
1407 case IP_MULTICAST_LOOP:
1408 /*
1409 * Set the loopback flag for outgoing multicast packets.
1410 * Must be zero or one.
1411 */
1412 if (m == NULL || m->m_len != 1 ||
1413 (loop = *(mtod(m, u_char *))) > 1) {
1414 error = EINVAL;
1415 break;
1416 }
1417 imo->imo_multicast_loop = loop;
1418 break;
1419
1420 case IP_ADD_MEMBERSHIP:
1421 /*
1422 * Add a multicast group membership.
1423 * Group must be a valid IP multicast address.
1424 */
1425 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1426 error = EINVAL;
1427 break;
1428 }
1429 mreq = mtod(m, struct ip_mreq *);
1430 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1431 error = EINVAL;
1432 break;
1433 }
1434 /*
1435 * If no interface address was provided, use the interface of
1436 * the route to the given multicast address.
1437 */
1438 if (in_nullhost(mreq->imr_interface)) {
1439 bzero((caddr_t)&ro, sizeof(ro));
1440 ro.ro_rt = NULL;
1441 dst = satosin(&ro.ro_dst);
1442 dst->sin_len = sizeof(*dst);
1443 dst->sin_family = AF_INET;
1444 dst->sin_addr = mreq->imr_multiaddr;
1445 rtalloc(&ro);
1446 if (ro.ro_rt == NULL) {
1447 error = EADDRNOTAVAIL;
1448 break;
1449 }
1450 ifp = ro.ro_rt->rt_ifp;
1451 rtfree(ro.ro_rt);
1452 } else {
1453 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
1454 }
1455 /*
1456 * See if we found an interface, and confirm that it
1457 * supports multicast.
1458 */
1459 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1460 error = EADDRNOTAVAIL;
1461 break;
1462 }
1463 /*
1464 * See if the membership already exists or if all the
1465 * membership slots are full.
1466 */
1467 for (i = 0; i < imo->imo_num_memberships; ++i) {
1468 if (imo->imo_membership[i]->inm_ifp == ifp &&
1469 in_hosteq(imo->imo_membership[i]->inm_addr,
1470 mreq->imr_multiaddr))
1471 break;
1472 }
1473 if (i < imo->imo_num_memberships) {
1474 error = EADDRINUSE;
1475 break;
1476 }
1477 if (i == IP_MAX_MEMBERSHIPS) {
1478 error = ETOOMANYREFS;
1479 break;
1480 }
1481 /*
1482 * Everything looks good; add a new record to the multicast
1483 * address list for the given interface.
1484 */
1485 if ((imo->imo_membership[i] =
1486 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1487 error = ENOBUFS;
1488 break;
1489 }
1490 ++imo->imo_num_memberships;
1491 break;
1492
1493 case IP_DROP_MEMBERSHIP:
1494 /*
1495 * Drop a multicast group membership.
1496 * Group must be a valid IP multicast address.
1497 */
1498 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1499 error = EINVAL;
1500 break;
1501 }
1502 mreq = mtod(m, struct ip_mreq *);
1503 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1504 error = EINVAL;
1505 break;
1506 }
1507 /*
1508 * If an interface address was specified, get a pointer
1509 * to its ifnet structure.
1510 */
1511 if (in_nullhost(mreq->imr_interface))
1512 ifp = NULL;
1513 else {
1514 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
1515 if (ifp == NULL) {
1516 error = EADDRNOTAVAIL;
1517 break;
1518 }
1519 }
1520 /*
1521 * Find the membership in the membership array.
1522 */
1523 for (i = 0; i < imo->imo_num_memberships; ++i) {
1524 if ((ifp == NULL ||
1525 imo->imo_membership[i]->inm_ifp == ifp) &&
1526 in_hosteq(imo->imo_membership[i]->inm_addr,
1527 mreq->imr_multiaddr))
1528 break;
1529 }
1530 if (i == imo->imo_num_memberships) {
1531 error = EADDRNOTAVAIL;
1532 break;
1533 }
1534 /*
1535 * Give up the multicast address record to which the
1536 * membership points.
1537 */
1538 in_delmulti(imo->imo_membership[i]);
1539 /*
1540 * Remove the gap in the membership array.
1541 */
1542 for (++i; i < imo->imo_num_memberships; ++i)
1543 imo->imo_membership[i-1] = imo->imo_membership[i];
1544 --imo->imo_num_memberships;
1545 break;
1546
1547 default:
1548 error = EOPNOTSUPP;
1549 break;
1550 }
1551
1552 /*
1553 * If all options have default values, no need to keep the mbuf.
1554 */
1555 if (imo->imo_multicast_ifp == NULL &&
1556 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1557 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1558 imo->imo_num_memberships == 0) {
1559 free(*imop, M_IPMOPTS);
1560 *imop = NULL;
1561 }
1562
1563 return (error);
1564 }
1565
1566 /*
1567 * Return the IP multicast options in response to user getsockopt().
1568 */
1569 int
1570 ip_getmoptions(optname, imo, mp)
1571 int optname;
1572 struct ip_moptions *imo;
1573 struct mbuf **mp;
1574 {
1575 u_char *ttl;
1576 u_char *loop;
1577 struct in_addr *addr;
1578 struct in_ifaddr *ia;
1579
1580 *mp = m_get(M_WAIT, MT_SOOPTS);
1581
1582 switch (optname) {
1583
1584 case IP_MULTICAST_IF:
1585 addr = mtod(*mp, struct in_addr *);
1586 (*mp)->m_len = sizeof(struct in_addr);
1587 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1588 *addr = zeroin_addr;
1589 else if (imo->imo_multicast_addr.s_addr) {
1590 /* return the value user has set */
1591 *addr = imo->imo_multicast_addr;
1592 } else {
1593 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1594 *addr = ia ? ia->ia_addr.sin_addr : zeroin_addr;
1595 }
1596 return (0);
1597
1598 case IP_MULTICAST_TTL:
1599 ttl = mtod(*mp, u_char *);
1600 (*mp)->m_len = 1;
1601 *ttl = imo ? imo->imo_multicast_ttl
1602 : IP_DEFAULT_MULTICAST_TTL;
1603 return (0);
1604
1605 case IP_MULTICAST_LOOP:
1606 loop = mtod(*mp, u_char *);
1607 (*mp)->m_len = 1;
1608 *loop = imo ? imo->imo_multicast_loop
1609 : IP_DEFAULT_MULTICAST_LOOP;
1610 return (0);
1611
1612 default:
1613 return (EOPNOTSUPP);
1614 }
1615 }
1616
1617 /*
1618 * Discard the IP multicast options.
1619 */
1620 void
1621 ip_freemoptions(imo)
1622 struct ip_moptions *imo;
1623 {
1624 int i;
1625
1626 if (imo != NULL) {
1627 for (i = 0; i < imo->imo_num_memberships; ++i)
1628 in_delmulti(imo->imo_membership[i]);
1629 free(imo, M_IPMOPTS);
1630 }
1631 }
1632
1633 /*
1634 * Routine called from ip_output() to loop back a copy of an IP multicast
1635 * packet to the input queue of a specified interface. Note that this
1636 * calls the output routine of the loopback "driver", but with an interface
1637 * pointer that might NOT be &loif -- easier than replicating that code here.
1638 */
1639 static void
1640 ip_mloopback(ifp, m, dst)
1641 struct ifnet *ifp;
1642 struct mbuf *m;
1643 struct sockaddr_in *dst;
1644 {
1645 struct ip *ip;
1646 struct mbuf *copym;
1647
1648 copym = m_copy(m, 0, M_COPYALL);
1649 if (copym != NULL
1650 && (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip)))
1651 copym = m_pullup(copym, sizeof(struct ip));
1652 if (copym != NULL) {
1653 /*
1654 * We don't bother to fragment if the IP length is greater
1655 * than the interface's MTU. Can this possibly matter?
1656 */
1657 ip = mtod(copym, struct ip *);
1658 HTONS(ip->ip_len);
1659 HTONS(ip->ip_off);
1660
1661 if (copym->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
1662 in_delayed_cksum(copym);
1663 copym->m_pkthdr.csum_flags &=
1664 ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
1665 }
1666
1667 ip->ip_sum = 0;
1668 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1669 (void) looutput(ifp, copym, sintosa(dst), NULL);
1670 }
1671 }
1672