ip_output.c revision 1.140 1 /* $NetBSD: ip_output.c,v 1.140 2005/02/03 23:13:20 perry Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. All advertising materials mentioning features or use of this software
49 * must display the following acknowledgement:
50 * This product includes software developed by the NetBSD
51 * Foundation, Inc. and its contributors.
52 * 4. Neither the name of The NetBSD Foundation nor the names of its
53 * contributors may be used to endorse or promote products derived
54 * from this software without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 */
68
69 /*
70 * Copyright (c) 1982, 1986, 1988, 1990, 1993
71 * The Regents of the University of California. All rights reserved.
72 *
73 * Redistribution and use in source and binary forms, with or without
74 * modification, are permitted provided that the following conditions
75 * are met:
76 * 1. Redistributions of source code must retain the above copyright
77 * notice, this list of conditions and the following disclaimer.
78 * 2. Redistributions in binary form must reproduce the above copyright
79 * notice, this list of conditions and the following disclaimer in the
80 * documentation and/or other materials provided with the distribution.
81 * 3. Neither the name of the University nor the names of its contributors
82 * may be used to endorse or promote products derived from this software
83 * without specific prior written permission.
84 *
85 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
86 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
87 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
88 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
89 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
90 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
91 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
92 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
93 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
94 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
95 * SUCH DAMAGE.
96 *
97 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
98 */
99
100 #include <sys/cdefs.h>
101 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.140 2005/02/03 23:13:20 perry Exp $");
102
103 #include "opt_pfil_hooks.h"
104 #include "opt_inet.h"
105 #include "opt_ipsec.h"
106 #include "opt_mrouting.h"
107
108 #include <sys/param.h>
109 #include <sys/malloc.h>
110 #include <sys/mbuf.h>
111 #include <sys/errno.h>
112 #include <sys/protosw.h>
113 #include <sys/socket.h>
114 #include <sys/socketvar.h>
115 #ifdef FAST_IPSEC
116 #include <sys/domain.h>
117 #endif
118 #include <sys/systm.h>
119 #include <sys/proc.h>
120
121 #include <net/if.h>
122 #include <net/route.h>
123 #include <net/pfil.h>
124
125 #include <netinet/in.h>
126 #include <netinet/in_systm.h>
127 #include <netinet/ip.h>
128 #include <netinet/in_pcb.h>
129 #include <netinet/in_var.h>
130 #include <netinet/ip_var.h>
131
132 #ifdef MROUTING
133 #include <netinet/ip_mroute.h>
134 #endif
135
136 #include <machine/stdarg.h>
137
138 #ifdef IPSEC
139 #include <netinet6/ipsec.h>
140 #include <netkey/key.h>
141 #include <netkey/key_debug.h>
142 #endif /*IPSEC*/
143
144 #ifdef FAST_IPSEC
145 #include <netipsec/ipsec.h>
146 #include <netipsec/key.h>
147 #include <netipsec/xform.h>
148 #endif /* FAST_IPSEC*/
149
150 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
151 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
152 static void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *);
153
154 #ifdef PFIL_HOOKS
155 extern struct pfil_head inet_pfil_hook; /* XXX */
156 #endif
157
158 /*
159 * IP output. The packet in mbuf chain m contains a skeletal IP
160 * header (with len, off, ttl, proto, tos, src, dst).
161 * The mbuf chain containing the packet will be freed.
162 * The mbuf opt, if present, will not be freed.
163 */
164 int
165 ip_output(struct mbuf *m0, ...)
166 {
167 struct ip *ip;
168 struct ifnet *ifp;
169 struct mbuf *m = m0;
170 int hlen = sizeof (struct ip);
171 int len, error = 0;
172 struct route iproute;
173 struct sockaddr_in *dst;
174 struct in_ifaddr *ia;
175 struct mbuf *opt;
176 struct route *ro;
177 int flags, sw_csum;
178 int *mtu_p;
179 u_long mtu;
180 struct ip_moptions *imo;
181 struct socket *so;
182 va_list ap;
183 #ifdef IPSEC
184 struct secpolicy *sp = NULL;
185 #endif /*IPSEC*/
186 #ifdef FAST_IPSEC
187 struct inpcb *inp;
188 struct m_tag *mtag;
189 struct secpolicy *sp = NULL;
190 struct tdb_ident *tdbi;
191 int s;
192 #endif
193 u_int16_t ip_len;
194
195 len = 0;
196 va_start(ap, m0);
197 opt = va_arg(ap, struct mbuf *);
198 ro = va_arg(ap, struct route *);
199 flags = va_arg(ap, int);
200 imo = va_arg(ap, struct ip_moptions *);
201 so = va_arg(ap, struct socket *);
202 if (flags & IP_RETURNMTU)
203 mtu_p = va_arg(ap, int *);
204 else
205 mtu_p = NULL;
206 va_end(ap);
207
208 MCLAIM(m, &ip_tx_mowner);
209 #ifdef FAST_IPSEC
210 if (so != NULL && so->so_proto->pr_domain->dom_family == AF_INET)
211 inp = (struct inpcb *)so->so_pcb;
212 else
213 inp = NULL;
214 #endif /* FAST_IPSEC */
215
216 #ifdef DIAGNOSTIC
217 if ((m->m_flags & M_PKTHDR) == 0)
218 panic("ip_output no HDR");
219 #endif
220 if (opt) {
221 m = ip_insertoptions(m, opt, &len);
222 if (len >= sizeof(struct ip))
223 hlen = len;
224 }
225 ip = mtod(m, struct ip *);
226 /*
227 * Fill in IP header.
228 */
229 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
230 ip->ip_v = IPVERSION;
231 ip->ip_off = htons(0);
232 ip->ip_id = ip_newid();
233 ip->ip_hl = hlen >> 2;
234 ipstat.ips_localout++;
235 } else {
236 hlen = ip->ip_hl << 2;
237 }
238 /*
239 * Route packet.
240 */
241 if (ro == 0) {
242 ro = &iproute;
243 bzero((caddr_t)ro, sizeof (*ro));
244 }
245 dst = satosin(&ro->ro_dst);
246 /*
247 * If there is a cached route,
248 * check that it is to the same destination
249 * and is still up. If not, free it and try again.
250 * The address family should also be checked in case of sharing the
251 * cache with IPv6.
252 */
253 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
254 dst->sin_family != AF_INET ||
255 !in_hosteq(dst->sin_addr, ip->ip_dst))) {
256 RTFREE(ro->ro_rt);
257 ro->ro_rt = (struct rtentry *)0;
258 }
259 if (ro->ro_rt == 0) {
260 bzero(dst, sizeof(*dst));
261 dst->sin_family = AF_INET;
262 dst->sin_len = sizeof(*dst);
263 dst->sin_addr = ip->ip_dst;
264 }
265 /*
266 * If routing to interface only,
267 * short circuit routing lookup.
268 */
269 if (flags & IP_ROUTETOIF) {
270 if ((ia = ifatoia(ifa_ifwithladdr(sintosa(dst)))) == 0) {
271 ipstat.ips_noroute++;
272 error = ENETUNREACH;
273 goto bad;
274 }
275 ifp = ia->ia_ifp;
276 mtu = ifp->if_mtu;
277 ip->ip_ttl = 1;
278 } else if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
279 ip->ip_dst.s_addr == INADDR_BROADCAST) &&
280 imo != NULL && imo->imo_multicast_ifp != NULL) {
281 ifp = imo->imo_multicast_ifp;
282 mtu = ifp->if_mtu;
283 IFP_TO_IA(ifp, ia);
284 } else {
285 if (ro->ro_rt == 0)
286 rtalloc(ro);
287 if (ro->ro_rt == 0) {
288 ipstat.ips_noroute++;
289 error = EHOSTUNREACH;
290 goto bad;
291 }
292 ia = ifatoia(ro->ro_rt->rt_ifa);
293 ifp = ro->ro_rt->rt_ifp;
294 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
295 mtu = ifp->if_mtu;
296 ro->ro_rt->rt_use++;
297 if (ro->ro_rt->rt_flags & RTF_GATEWAY)
298 dst = satosin(ro->ro_rt->rt_gateway);
299 }
300 if (IN_MULTICAST(ip->ip_dst.s_addr) ||
301 (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
302 struct in_multi *inm;
303
304 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
305 M_BCAST : M_MCAST;
306 /*
307 * IP destination address is multicast. Make sure "dst"
308 * still points to the address in "ro". (It may have been
309 * changed to point to a gateway address, above.)
310 */
311 dst = satosin(&ro->ro_dst);
312 /*
313 * See if the caller provided any multicast options
314 */
315 if (imo != NULL)
316 ip->ip_ttl = imo->imo_multicast_ttl;
317 else
318 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
319
320 /*
321 * if we don't know the outgoing ifp yet, we can't generate
322 * output
323 */
324 if (!ifp) {
325 ipstat.ips_noroute++;
326 error = ENETUNREACH;
327 goto bad;
328 }
329
330 /*
331 * If the packet is multicast or broadcast, confirm that
332 * the outgoing interface can transmit it.
333 */
334 if (((m->m_flags & M_MCAST) &&
335 (ifp->if_flags & IFF_MULTICAST) == 0) ||
336 ((m->m_flags & M_BCAST) &&
337 (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0)) {
338 ipstat.ips_noroute++;
339 error = ENETUNREACH;
340 goto bad;
341 }
342 /*
343 * If source address not specified yet, use an address
344 * of outgoing interface.
345 */
346 if (in_nullhost(ip->ip_src)) {
347 struct in_ifaddr *ia;
348
349 IFP_TO_IA(ifp, ia);
350 if (!ia) {
351 error = EADDRNOTAVAIL;
352 goto bad;
353 }
354 ip->ip_src = ia->ia_addr.sin_addr;
355 }
356
357 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
358 if (inm != NULL &&
359 (imo == NULL || imo->imo_multicast_loop)) {
360 /*
361 * If we belong to the destination multicast group
362 * on the outgoing interface, and the caller did not
363 * forbid loopback, loop back a copy.
364 */
365 ip_mloopback(ifp, m, dst);
366 }
367 #ifdef MROUTING
368 else {
369 /*
370 * If we are acting as a multicast router, perform
371 * multicast forwarding as if the packet had just
372 * arrived on the interface to which we are about
373 * to send. The multicast forwarding function
374 * recursively calls this function, using the
375 * IP_FORWARDING flag to prevent infinite recursion.
376 *
377 * Multicasts that are looped back by ip_mloopback(),
378 * above, will be forwarded by the ip_input() routine,
379 * if necessary.
380 */
381 extern struct socket *ip_mrouter;
382
383 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
384 if (ip_mforward(m, ifp) != 0) {
385 m_freem(m);
386 goto done;
387 }
388 }
389 }
390 #endif
391 /*
392 * Multicasts with a time-to-live of zero may be looped-
393 * back, above, but must not be transmitted on a network.
394 * Also, multicasts addressed to the loopback interface
395 * are not sent -- the above call to ip_mloopback() will
396 * loop back a copy if this host actually belongs to the
397 * destination group on the loopback interface.
398 */
399 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
400 m_freem(m);
401 goto done;
402 }
403
404 goto sendit;
405 }
406 #ifndef notdef
407 /*
408 * If source address not specified yet, use address
409 * of outgoing interface.
410 */
411 if (in_nullhost(ip->ip_src))
412 ip->ip_src = ia->ia_addr.sin_addr;
413 #endif
414
415 /*
416 * packets with Class-D address as source are not valid per
417 * RFC 1112
418 */
419 if (IN_MULTICAST(ip->ip_src.s_addr)) {
420 ipstat.ips_odropped++;
421 error = EADDRNOTAVAIL;
422 goto bad;
423 }
424
425 /*
426 * Look for broadcast address and
427 * and verify user is allowed to send
428 * such a packet.
429 */
430 if (in_broadcast(dst->sin_addr, ifp)) {
431 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
432 error = EADDRNOTAVAIL;
433 goto bad;
434 }
435 if ((flags & IP_ALLOWBROADCAST) == 0) {
436 error = EACCES;
437 goto bad;
438 }
439 /* don't allow broadcast messages to be fragmented */
440 if (ntohs(ip->ip_len) > ifp->if_mtu) {
441 error = EMSGSIZE;
442 goto bad;
443 }
444 m->m_flags |= M_BCAST;
445 } else
446 m->m_flags &= ~M_BCAST;
447
448 sendit:
449 /*
450 * If we're doing Path MTU Discovery, we need to set DF unless
451 * the route's MTU is locked.
452 */
453 if ((flags & IP_MTUDISC) != 0 && ro->ro_rt != NULL &&
454 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
455 ip->ip_off |= htons(IP_DF);
456
457 /* Remember the current ip_len */
458 ip_len = ntohs(ip->ip_len);
459
460 #ifdef IPSEC
461 /* get SP for this packet */
462 if (so == NULL)
463 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
464 flags, &error);
465 else {
466 if (IPSEC_PCB_SKIP_IPSEC(sotoinpcb_hdr(so)->inph_sp,
467 IPSEC_DIR_OUTBOUND))
468 goto skip_ipsec;
469 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
470 }
471
472 if (sp == NULL) {
473 ipsecstat.out_inval++;
474 goto bad;
475 }
476
477 error = 0;
478
479 /* check policy */
480 switch (sp->policy) {
481 case IPSEC_POLICY_DISCARD:
482 /*
483 * This packet is just discarded.
484 */
485 ipsecstat.out_polvio++;
486 goto bad;
487
488 case IPSEC_POLICY_BYPASS:
489 case IPSEC_POLICY_NONE:
490 /* no need to do IPsec. */
491 goto skip_ipsec;
492
493 case IPSEC_POLICY_IPSEC:
494 if (sp->req == NULL) {
495 /* XXX should be panic ? */
496 printf("ip_output: No IPsec request specified.\n");
497 error = EINVAL;
498 goto bad;
499 }
500 break;
501
502 case IPSEC_POLICY_ENTRUST:
503 default:
504 printf("ip_output: Invalid policy found. %d\n", sp->policy);
505 }
506
507 /*
508 * ipsec4_output() expects ip_len and ip_off in network
509 * order. They have been set to network order above.
510 */
511
512 {
513 struct ipsec_output_state state;
514 bzero(&state, sizeof(state));
515 state.m = m;
516 if (flags & IP_ROUTETOIF) {
517 state.ro = &iproute;
518 bzero(&iproute, sizeof(iproute));
519 } else
520 state.ro = ro;
521 state.dst = (struct sockaddr *)dst;
522
523 /*
524 * We can't defer the checksum of payload data if
525 * we're about to encrypt/authenticate it.
526 *
527 * XXX When we support crypto offloading functions of
528 * XXX network interfaces, we need to reconsider this,
529 * XXX since it's likely that they'll support checksumming,
530 * XXX as well.
531 */
532 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
533 in_delayed_cksum(m);
534 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
535 }
536
537 error = ipsec4_output(&state, sp, flags);
538
539 m = state.m;
540 if (flags & IP_ROUTETOIF) {
541 /*
542 * if we have tunnel mode SA, we may need to ignore
543 * IP_ROUTETOIF.
544 */
545 if (state.ro != &iproute || state.ro->ro_rt != NULL) {
546 flags &= ~IP_ROUTETOIF;
547 ro = state.ro;
548 }
549 } else
550 ro = state.ro;
551 dst = (struct sockaddr_in *)state.dst;
552 if (error) {
553 /* mbuf is already reclaimed in ipsec4_output. */
554 m0 = NULL;
555 switch (error) {
556 case EHOSTUNREACH:
557 case ENETUNREACH:
558 case EMSGSIZE:
559 case ENOBUFS:
560 case ENOMEM:
561 break;
562 default:
563 printf("ip4_output (ipsec): error code %d\n", error);
564 /*fall through*/
565 case ENOENT:
566 /* don't show these error codes to the user */
567 error = 0;
568 break;
569 }
570 goto bad;
571 }
572
573 /* be sure to update variables that are affected by ipsec4_output() */
574 ip = mtod(m, struct ip *);
575 hlen = ip->ip_hl << 2;
576 ip_len = ntohs(ip->ip_len);
577
578 if (ro->ro_rt == NULL) {
579 if ((flags & IP_ROUTETOIF) == 0) {
580 printf("ip_output: "
581 "can't update route after IPsec processing\n");
582 error = EHOSTUNREACH; /*XXX*/
583 goto bad;
584 }
585 } else {
586 /* nobody uses ia beyond here */
587 if (state.encap) {
588 ifp = ro->ro_rt->rt_ifp;
589 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
590 mtu = ifp->if_mtu;
591 }
592 }
593 }
594 skip_ipsec:
595 #endif /*IPSEC*/
596 #ifdef FAST_IPSEC
597 /*
598 * Check the security policy (SP) for the packet and, if
599 * required, do IPsec-related processing. There are two
600 * cases here; the first time a packet is sent through
601 * it will be untagged and handled by ipsec4_checkpolicy.
602 * If the packet is resubmitted to ip_output (e.g. after
603 * AH, ESP, etc. processing), there will be a tag to bypass
604 * the lookup and related policy checking.
605 */
606 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
607 s = splsoftnet();
608 if (mtag != NULL) {
609 tdbi = (struct tdb_ident *)(mtag + 1);
610 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
611 if (sp == NULL)
612 error = -EINVAL; /* force silent drop */
613 m_tag_delete(m, mtag);
614 } else {
615 if (inp != NULL &&
616 IPSEC_PCB_SKIP_IPSEC(inp->inp_sp, IPSEC_DIR_OUTBOUND))
617 goto spd_done;
618 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
619 &error, inp);
620 }
621 /*
622 * There are four return cases:
623 * sp != NULL apply IPsec policy
624 * sp == NULL, error == 0 no IPsec handling needed
625 * sp == NULL, error == -EINVAL discard packet w/o error
626 * sp == NULL, error != 0 discard packet, report error
627 */
628 if (sp != NULL) {
629 /* Loop detection, check if ipsec processing already done */
630 IPSEC_ASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
631 for (mtag = m_tag_first(m); mtag != NULL;
632 mtag = m_tag_next(m, mtag)) {
633 #ifdef MTAG_ABI_COMPAT
634 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
635 continue;
636 #endif
637 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
638 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
639 continue;
640 /*
641 * Check if policy has an SA associated with it.
642 * This can happen when an SP has yet to acquire
643 * an SA; e.g. on first reference. If it occurs,
644 * then we let ipsec4_process_packet do its thing.
645 */
646 if (sp->req->sav == NULL)
647 break;
648 tdbi = (struct tdb_ident *)(mtag + 1);
649 if (tdbi->spi == sp->req->sav->spi &&
650 tdbi->proto == sp->req->sav->sah->saidx.proto &&
651 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
652 sizeof (union sockaddr_union)) == 0) {
653 /*
654 * No IPsec processing is needed, free
655 * reference to SP.
656 *
657 * NB: null pointer to avoid free at
658 * done: below.
659 */
660 KEY_FREESP(&sp), sp = NULL;
661 splx(s);
662 goto spd_done;
663 }
664 }
665
666 /*
667 * Do delayed checksums now because we send before
668 * this is done in the normal processing path.
669 */
670 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
671 in_delayed_cksum(m);
672 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
673 }
674
675 #ifdef __FreeBSD__
676 ip->ip_len = htons(ip->ip_len);
677 ip->ip_off = htons(ip->ip_off);
678 #endif
679
680 /* NB: callee frees mbuf */
681 error = ipsec4_process_packet(m, sp->req, flags, 0);
682 /*
683 * Preserve KAME behaviour: ENOENT can be returned
684 * when an SA acquire is in progress. Don't propagate
685 * this to user-level; it confuses applications.
686 *
687 * XXX this will go away when the SADB is redone.
688 */
689 if (error == ENOENT)
690 error = 0;
691 splx(s);
692 goto done;
693 } else {
694 splx(s);
695
696 if (error != 0) {
697 /*
698 * Hack: -EINVAL is used to signal that a packet
699 * should be silently discarded. This is typically
700 * because we asked key management for an SA and
701 * it was delayed (e.g. kicked up to IKE).
702 */
703 if (error == -EINVAL)
704 error = 0;
705 goto bad;
706 } else {
707 /* No IPsec processing for this packet. */
708 }
709 #ifdef notyet
710 /*
711 * If deferred crypto processing is needed, check that
712 * the interface supports it.
713 */
714 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
715 if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
716 /* notify IPsec to do its own crypto */
717 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
718 error = EHOSTUNREACH;
719 goto bad;
720 }
721 #endif
722 }
723 spd_done:
724 #endif /* FAST_IPSEC */
725
726 #ifdef PFIL_HOOKS
727 /*
728 * Run through list of hooks for output packets.
729 */
730 if ((error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT)) != 0)
731 goto done;
732 if (m == NULL)
733 goto done;
734
735 ip = mtod(m, struct ip *);
736 hlen = ip->ip_hl << 2;
737 #endif /* PFIL_HOOKS */
738
739 #if IFA_STATS
740 /*
741 * search for the source address structure to
742 * maintain output statistics.
743 */
744 INADDR_TO_IA(ip->ip_src, ia);
745 #endif
746
747 /* Maybe skip checksums on loopback interfaces. */
748 if (__predict_true(!(ifp->if_flags & IFF_LOOPBACK) ||
749 ip_do_loopback_cksum))
750 m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
751 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
752 /*
753 * If small enough for mtu of path, can just send directly.
754 */
755 if (ip_len <= mtu) {
756 #if IFA_STATS
757 if (ia)
758 ia->ia_ifa.ifa_data.ifad_outbytes += ip_len;
759 #endif
760 /*
761 * Always initialize the sum to 0! Some HW assisted
762 * checksumming requires this.
763 */
764 ip->ip_sum = 0;
765
766 /*
767 * Perform any checksums that the hardware can't do
768 * for us.
769 *
770 * XXX Does any hardware require the {th,uh}_sum
771 * XXX fields to be 0?
772 */
773 if (sw_csum & M_CSUM_IPv4) {
774 ip->ip_sum = in_cksum(m, hlen);
775 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
776 }
777 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
778 in_delayed_cksum(m);
779 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
780 }
781
782 #ifdef IPSEC
783 /* clean ipsec history once it goes out of the node */
784 ipsec_delaux(m);
785 #endif
786 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
787 goto done;
788 }
789
790 /*
791 * We can't use HW checksumming if we're about to
792 * to fragment the packet.
793 *
794 * XXX Some hardware can do this.
795 */
796 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
797 in_delayed_cksum(m);
798 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
799 }
800
801 /*
802 * Too large for interface; fragment if possible.
803 * Must be able to put at least 8 bytes per fragment.
804 */
805 if (ntohs(ip->ip_off) & IP_DF) {
806 if (flags & IP_RETURNMTU)
807 *mtu_p = mtu;
808 error = EMSGSIZE;
809 ipstat.ips_cantfrag++;
810 goto bad;
811 }
812
813 error = ip_fragment(m, ifp, mtu);
814 if (error) {
815 m = NULL;
816 goto bad;
817 }
818
819 for (; m; m = m0) {
820 m0 = m->m_nextpkt;
821 m->m_nextpkt = 0;
822 if (error == 0) {
823 #if IFA_STATS
824 if (ia)
825 ia->ia_ifa.ifa_data.ifad_outbytes +=
826 ntohs(ip->ip_len);
827 #endif
828 #ifdef IPSEC
829 /* clean ipsec history once it goes out of the node */
830 ipsec_delaux(m);
831 #endif
832 KASSERT((m->m_pkthdr.csum_flags &
833 (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0);
834 error = (*ifp->if_output)(ifp, m, sintosa(dst),
835 ro->ro_rt);
836 } else
837 m_freem(m);
838 }
839
840 if (error == 0)
841 ipstat.ips_fragmented++;
842 done:
843 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt) {
844 RTFREE(ro->ro_rt);
845 ro->ro_rt = 0;
846 }
847
848 #ifdef IPSEC
849 if (sp != NULL) {
850 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
851 printf("DP ip_output call free SP:%p\n", sp));
852 key_freesp(sp);
853 }
854 #endif /* IPSEC */
855 #ifdef FAST_IPSEC
856 if (sp != NULL)
857 KEY_FREESP(&sp);
858 #endif /* FAST_IPSEC */
859
860 return (error);
861 bad:
862 m_freem(m);
863 goto done;
864 }
865
866 int
867 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu)
868 {
869 struct ip *ip, *mhip;
870 struct mbuf *m0;
871 int len, hlen, off;
872 int mhlen, firstlen;
873 struct mbuf **mnext;
874 int sw_csum = m->m_pkthdr.csum_flags;
875 int fragments = 0;
876 int s;
877 int error = 0;
878
879 ip = mtod(m, struct ip *);
880 hlen = ip->ip_hl << 2;
881 if (ifp != NULL)
882 sw_csum &= ~ifp->if_csum_flags_tx;
883
884 len = (mtu - hlen) &~ 7;
885 if (len < 8) {
886 m_freem(m);
887 return (EMSGSIZE);
888 }
889
890 firstlen = len;
891 mnext = &m->m_nextpkt;
892
893 /*
894 * Loop through length of segment after first fragment,
895 * make new header and copy data of each part and link onto chain.
896 */
897 m0 = m;
898 mhlen = sizeof (struct ip);
899 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
900 MGETHDR(m, M_DONTWAIT, MT_HEADER);
901 if (m == 0) {
902 error = ENOBUFS;
903 ipstat.ips_odropped++;
904 goto sendorfree;
905 }
906 MCLAIM(m, m0->m_owner);
907 *mnext = m;
908 mnext = &m->m_nextpkt;
909 m->m_data += max_linkhdr;
910 mhip = mtod(m, struct ip *);
911 *mhip = *ip;
912 /* we must inherit MCAST and BCAST flags */
913 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST);
914 if (hlen > sizeof (struct ip)) {
915 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
916 mhip->ip_hl = mhlen >> 2;
917 }
918 m->m_len = mhlen;
919 mhip->ip_off = ((off - hlen) >> 3) +
920 (ntohs(ip->ip_off) & ~IP_MF);
921 if (ip->ip_off & htons(IP_MF))
922 mhip->ip_off |= IP_MF;
923 if (off + len >= ntohs(ip->ip_len))
924 len = ntohs(ip->ip_len) - off;
925 else
926 mhip->ip_off |= IP_MF;
927 HTONS(mhip->ip_off);
928 mhip->ip_len = htons((u_int16_t)(len + mhlen));
929 m->m_next = m_copy(m0, off, len);
930 if (m->m_next == 0) {
931 error = ENOBUFS; /* ??? */
932 ipstat.ips_odropped++;
933 goto sendorfree;
934 }
935 m->m_pkthdr.len = mhlen + len;
936 m->m_pkthdr.rcvif = (struct ifnet *)0;
937 mhip->ip_sum = 0;
938 if (sw_csum & M_CSUM_IPv4) {
939 mhip->ip_sum = in_cksum(m, mhlen);
940 KASSERT((m->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0);
941 } else {
942 m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
943 }
944 ipstat.ips_ofragments++;
945 fragments++;
946 }
947 /*
948 * Update first fragment by trimming what's been copied out
949 * and updating header, then send each fragment (in order).
950 */
951 m = m0;
952 m_adj(m, hlen + firstlen - ntohs(ip->ip_len));
953 m->m_pkthdr.len = hlen + firstlen;
954 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
955 ip->ip_off |= htons(IP_MF);
956 ip->ip_sum = 0;
957 if (sw_csum & M_CSUM_IPv4) {
958 ip->ip_sum = in_cksum(m, hlen);
959 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
960 } else {
961 KASSERT(m->m_pkthdr.csum_flags & M_CSUM_IPv4);
962 }
963 sendorfree:
964 /*
965 * If there is no room for all the fragments, don't queue
966 * any of them.
967 */
968 if (ifp != NULL) {
969 s = splnet();
970 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments &&
971 error == 0) {
972 error = ENOBUFS;
973 ipstat.ips_odropped++;
974 IFQ_INC_DROPS(&ifp->if_snd);
975 }
976 splx(s);
977 }
978 if (error) {
979 for (m = m0; m; m = m0) {
980 m0 = m->m_nextpkt;
981 m->m_nextpkt = NULL;
982 m_freem(m);
983 }
984 }
985 return (error);
986 }
987
988 /*
989 * Process a delayed payload checksum calculation.
990 */
991 void
992 in_delayed_cksum(struct mbuf *m)
993 {
994 struct ip *ip;
995 u_int16_t csum, offset;
996
997 ip = mtod(m, struct ip *);
998 offset = ip->ip_hl << 2;
999 csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset);
1000 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0)
1001 csum = 0xffff;
1002
1003 offset += m->m_pkthdr.csum_data; /* checksum offset */
1004
1005 if ((offset + sizeof(u_int16_t)) > m->m_len) {
1006 /* This happen when ip options were inserted
1007 printf("in_delayed_cksum: pullup len %d off %d proto %d\n",
1008 m->m_len, offset, ip->ip_p);
1009 */
1010 m_copyback(m, offset, sizeof(csum), (caddr_t) &csum);
1011 } else
1012 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
1013 }
1014
1015 /*
1016 * Determine the maximum length of the options to be inserted;
1017 * we would far rather allocate too much space rather than too little.
1018 */
1019
1020 u_int
1021 ip_optlen(struct inpcb *inp)
1022 {
1023 struct mbuf *m = inp->inp_options;
1024
1025 if (m && m->m_len > offsetof(struct ipoption, ipopt_dst))
1026 return (m->m_len - offsetof(struct ipoption, ipopt_dst));
1027 else
1028 return 0;
1029 }
1030
1031
1032 /*
1033 * Insert IP options into preformed packet.
1034 * Adjust IP destination as required for IP source routing,
1035 * as indicated by a non-zero in_addr at the start of the options.
1036 */
1037 static struct mbuf *
1038 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
1039 {
1040 struct ipoption *p = mtod(opt, struct ipoption *);
1041 struct mbuf *n;
1042 struct ip *ip = mtod(m, struct ip *);
1043 unsigned optlen;
1044
1045 optlen = opt->m_len - sizeof(p->ipopt_dst);
1046 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
1047 return (m); /* XXX should fail */
1048 if (!in_nullhost(p->ipopt_dst))
1049 ip->ip_dst = p->ipopt_dst;
1050 if (M_READONLY(m) || M_LEADINGSPACE(m) < optlen) {
1051 MGETHDR(n, M_DONTWAIT, MT_HEADER);
1052 if (n == 0)
1053 return (m);
1054 MCLAIM(n, m->m_owner);
1055 M_COPY_PKTHDR(n, m);
1056 m_tag_delete_chain(m, NULL);
1057 m->m_flags &= ~M_PKTHDR;
1058 m->m_len -= sizeof(struct ip);
1059 m->m_data += sizeof(struct ip);
1060 n->m_next = m;
1061 m = n;
1062 m->m_len = optlen + sizeof(struct ip);
1063 m->m_data += max_linkhdr;
1064 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1065 } else {
1066 m->m_data -= optlen;
1067 m->m_len += optlen;
1068 memmove(mtod(m, caddr_t), ip, sizeof(struct ip));
1069 }
1070 m->m_pkthdr.len += optlen;
1071 ip = mtod(m, struct ip *);
1072 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
1073 *phlen = sizeof(struct ip) + optlen;
1074 ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
1075 return (m);
1076 }
1077
1078 /*
1079 * Copy options from ip to jp,
1080 * omitting those not copied during fragmentation.
1081 */
1082 int
1083 ip_optcopy(struct ip *ip, struct ip *jp)
1084 {
1085 u_char *cp, *dp;
1086 int opt, optlen, cnt;
1087
1088 cp = (u_char *)(ip + 1);
1089 dp = (u_char *)(jp + 1);
1090 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1091 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1092 opt = cp[0];
1093 if (opt == IPOPT_EOL)
1094 break;
1095 if (opt == IPOPT_NOP) {
1096 /* Preserve for IP mcast tunnel's LSRR alignment. */
1097 *dp++ = IPOPT_NOP;
1098 optlen = 1;
1099 continue;
1100 }
1101 #ifdef DIAGNOSTIC
1102 if (cnt < IPOPT_OLEN + sizeof(*cp))
1103 panic("malformed IPv4 option passed to ip_optcopy");
1104 #endif
1105 optlen = cp[IPOPT_OLEN];
1106 #ifdef DIAGNOSTIC
1107 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1108 panic("malformed IPv4 option passed to ip_optcopy");
1109 #endif
1110 /* bogus lengths should have been caught by ip_dooptions */
1111 if (optlen > cnt)
1112 optlen = cnt;
1113 if (IPOPT_COPIED(opt)) {
1114 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
1115 dp += optlen;
1116 }
1117 }
1118 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1119 *dp++ = IPOPT_EOL;
1120 return (optlen);
1121 }
1122
1123 /*
1124 * IP socket option processing.
1125 */
1126 int
1127 ip_ctloutput(int op, struct socket *so, int level, int optname,
1128 struct mbuf **mp)
1129 {
1130 struct inpcb *inp = sotoinpcb(so);
1131 struct mbuf *m = *mp;
1132 int optval = 0;
1133 int error = 0;
1134 #if defined(IPSEC) || defined(FAST_IPSEC)
1135 struct proc *p = curproc; /*XXX*/
1136 #endif
1137
1138 if (level != IPPROTO_IP) {
1139 error = EINVAL;
1140 if (op == PRCO_SETOPT && *mp)
1141 (void) m_free(*mp);
1142 } else switch (op) {
1143
1144 case PRCO_SETOPT:
1145 switch (optname) {
1146 case IP_OPTIONS:
1147 #ifdef notyet
1148 case IP_RETOPTS:
1149 return (ip_pcbopts(optname, &inp->inp_options, m));
1150 #else
1151 return (ip_pcbopts(&inp->inp_options, m));
1152 #endif
1153
1154 case IP_TOS:
1155 case IP_TTL:
1156 case IP_RECVOPTS:
1157 case IP_RECVRETOPTS:
1158 case IP_RECVDSTADDR:
1159 case IP_RECVIF:
1160 if (m == NULL || m->m_len != sizeof(int))
1161 error = EINVAL;
1162 else {
1163 optval = *mtod(m, int *);
1164 switch (optname) {
1165
1166 case IP_TOS:
1167 inp->inp_ip.ip_tos = optval;
1168 break;
1169
1170 case IP_TTL:
1171 inp->inp_ip.ip_ttl = optval;
1172 break;
1173 #define OPTSET(bit) \
1174 if (optval) \
1175 inp->inp_flags |= bit; \
1176 else \
1177 inp->inp_flags &= ~bit;
1178
1179 case IP_RECVOPTS:
1180 OPTSET(INP_RECVOPTS);
1181 break;
1182
1183 case IP_RECVRETOPTS:
1184 OPTSET(INP_RECVRETOPTS);
1185 break;
1186
1187 case IP_RECVDSTADDR:
1188 OPTSET(INP_RECVDSTADDR);
1189 break;
1190
1191 case IP_RECVIF:
1192 OPTSET(INP_RECVIF);
1193 break;
1194 }
1195 }
1196 break;
1197 #undef OPTSET
1198
1199 case IP_MULTICAST_IF:
1200 case IP_MULTICAST_TTL:
1201 case IP_MULTICAST_LOOP:
1202 case IP_ADD_MEMBERSHIP:
1203 case IP_DROP_MEMBERSHIP:
1204 error = ip_setmoptions(optname, &inp->inp_moptions, m);
1205 break;
1206
1207 case IP_PORTRANGE:
1208 if (m == 0 || m->m_len != sizeof(int))
1209 error = EINVAL;
1210 else {
1211 optval = *mtod(m, int *);
1212
1213 switch (optval) {
1214
1215 case IP_PORTRANGE_DEFAULT:
1216 case IP_PORTRANGE_HIGH:
1217 inp->inp_flags &= ~(INP_LOWPORT);
1218 break;
1219
1220 case IP_PORTRANGE_LOW:
1221 inp->inp_flags |= INP_LOWPORT;
1222 break;
1223
1224 default:
1225 error = EINVAL;
1226 break;
1227 }
1228 }
1229 break;
1230
1231 #if defined(IPSEC) || defined(FAST_IPSEC)
1232 case IP_IPSEC_POLICY:
1233 {
1234 caddr_t req = NULL;
1235 size_t len = 0;
1236 int priv = 0;
1237
1238 #ifdef __NetBSD__
1239 if (p == 0 || suser(p->p_ucred, &p->p_acflag))
1240 priv = 0;
1241 else
1242 priv = 1;
1243 #else
1244 priv = (in6p->in6p_socket->so_state & SS_PRIV);
1245 #endif
1246 if (m) {
1247 req = mtod(m, caddr_t);
1248 len = m->m_len;
1249 }
1250 error = ipsec4_set_policy(inp, optname, req, len, priv);
1251 break;
1252 }
1253 #endif /*IPSEC*/
1254
1255 default:
1256 error = ENOPROTOOPT;
1257 break;
1258 }
1259 if (m)
1260 (void)m_free(m);
1261 break;
1262
1263 case PRCO_GETOPT:
1264 switch (optname) {
1265 case IP_OPTIONS:
1266 case IP_RETOPTS:
1267 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1268 MCLAIM(m, so->so_mowner);
1269 if (inp->inp_options) {
1270 m->m_len = inp->inp_options->m_len;
1271 bcopy(mtod(inp->inp_options, caddr_t),
1272 mtod(m, caddr_t), (unsigned)m->m_len);
1273 } else
1274 m->m_len = 0;
1275 break;
1276
1277 case IP_TOS:
1278 case IP_TTL:
1279 case IP_RECVOPTS:
1280 case IP_RECVRETOPTS:
1281 case IP_RECVDSTADDR:
1282 case IP_RECVIF:
1283 case IP_ERRORMTU:
1284 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1285 MCLAIM(m, so->so_mowner);
1286 m->m_len = sizeof(int);
1287 switch (optname) {
1288
1289 case IP_TOS:
1290 optval = inp->inp_ip.ip_tos;
1291 break;
1292
1293 case IP_TTL:
1294 optval = inp->inp_ip.ip_ttl;
1295 break;
1296
1297 case IP_ERRORMTU:
1298 optval = inp->inp_errormtu;
1299 break;
1300
1301 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1302
1303 case IP_RECVOPTS:
1304 optval = OPTBIT(INP_RECVOPTS);
1305 break;
1306
1307 case IP_RECVRETOPTS:
1308 optval = OPTBIT(INP_RECVRETOPTS);
1309 break;
1310
1311 case IP_RECVDSTADDR:
1312 optval = OPTBIT(INP_RECVDSTADDR);
1313 break;
1314
1315 case IP_RECVIF:
1316 optval = OPTBIT(INP_RECVIF);
1317 break;
1318 }
1319 *mtod(m, int *) = optval;
1320 break;
1321
1322 #if 0 /* defined(IPSEC) || defined(FAST_IPSEC) */
1323 /* XXX: code broken */
1324 case IP_IPSEC_POLICY:
1325 {
1326 caddr_t req = NULL;
1327 size_t len = 0;
1328
1329 if (m) {
1330 req = mtod(m, caddr_t);
1331 len = m->m_len;
1332 }
1333 error = ipsec4_get_policy(inp, req, len, mp);
1334 break;
1335 }
1336 #endif /*IPSEC*/
1337
1338 case IP_MULTICAST_IF:
1339 case IP_MULTICAST_TTL:
1340 case IP_MULTICAST_LOOP:
1341 case IP_ADD_MEMBERSHIP:
1342 case IP_DROP_MEMBERSHIP:
1343 error = ip_getmoptions(optname, inp->inp_moptions, mp);
1344 if (*mp)
1345 MCLAIM(*mp, so->so_mowner);
1346 break;
1347
1348 case IP_PORTRANGE:
1349 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1350 MCLAIM(m, so->so_mowner);
1351 m->m_len = sizeof(int);
1352
1353 if (inp->inp_flags & INP_LOWPORT)
1354 optval = IP_PORTRANGE_LOW;
1355 else
1356 optval = IP_PORTRANGE_DEFAULT;
1357
1358 *mtod(m, int *) = optval;
1359 break;
1360
1361 default:
1362 error = ENOPROTOOPT;
1363 break;
1364 }
1365 break;
1366 }
1367 return (error);
1368 }
1369
1370 /*
1371 * Set up IP options in pcb for insertion in output packets.
1372 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1373 * with destination address if source routed.
1374 */
1375 int
1376 #ifdef notyet
1377 ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m)
1378 #else
1379 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m)
1380 #endif
1381 {
1382 int cnt, optlen;
1383 u_char *cp;
1384 u_char opt;
1385
1386 /* turn off any old options */
1387 if (*pcbopt)
1388 (void)m_free(*pcbopt);
1389 *pcbopt = 0;
1390 if (m == (struct mbuf *)0 || m->m_len == 0) {
1391 /*
1392 * Only turning off any previous options.
1393 */
1394 if (m)
1395 (void)m_free(m);
1396 return (0);
1397 }
1398
1399 #ifndef __vax__
1400 if (m->m_len % sizeof(int32_t))
1401 goto bad;
1402 #endif
1403 /*
1404 * IP first-hop destination address will be stored before
1405 * actual options; move other options back
1406 * and clear it when none present.
1407 */
1408 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1409 goto bad;
1410 cnt = m->m_len;
1411 m->m_len += sizeof(struct in_addr);
1412 cp = mtod(m, u_char *) + sizeof(struct in_addr);
1413 memmove(cp, mtod(m, caddr_t), (unsigned)cnt);
1414 bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1415
1416 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1417 opt = cp[IPOPT_OPTVAL];
1418 if (opt == IPOPT_EOL)
1419 break;
1420 if (opt == IPOPT_NOP)
1421 optlen = 1;
1422 else {
1423 if (cnt < IPOPT_OLEN + sizeof(*cp))
1424 goto bad;
1425 optlen = cp[IPOPT_OLEN];
1426 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1427 goto bad;
1428 }
1429 switch (opt) {
1430
1431 default:
1432 break;
1433
1434 case IPOPT_LSRR:
1435 case IPOPT_SSRR:
1436 /*
1437 * user process specifies route as:
1438 * ->A->B->C->D
1439 * D must be our final destination (but we can't
1440 * check that since we may not have connected yet).
1441 * A is first hop destination, which doesn't appear in
1442 * actual IP option, but is stored before the options.
1443 */
1444 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1445 goto bad;
1446 m->m_len -= sizeof(struct in_addr);
1447 cnt -= sizeof(struct in_addr);
1448 optlen -= sizeof(struct in_addr);
1449 cp[IPOPT_OLEN] = optlen;
1450 /*
1451 * Move first hop before start of options.
1452 */
1453 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1454 sizeof(struct in_addr));
1455 /*
1456 * Then copy rest of options back
1457 * to close up the deleted entry.
1458 */
1459 (void)memmove(&cp[IPOPT_OFFSET+1],
1460 &cp[IPOPT_OFFSET+1] + sizeof(struct in_addr),
1461 (unsigned)cnt - (IPOPT_MINOFF - 1));
1462 break;
1463 }
1464 }
1465 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1466 goto bad;
1467 *pcbopt = m;
1468 return (0);
1469
1470 bad:
1471 (void)m_free(m);
1472 return (EINVAL);
1473 }
1474
1475 /*
1476 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1477 */
1478 static struct ifnet *
1479 ip_multicast_if(struct in_addr *a, int *ifindexp)
1480 {
1481 int ifindex;
1482 struct ifnet *ifp = NULL;
1483 struct in_ifaddr *ia;
1484
1485 if (ifindexp)
1486 *ifindexp = 0;
1487 if (ntohl(a->s_addr) >> 24 == 0) {
1488 ifindex = ntohl(a->s_addr) & 0xffffff;
1489 if (ifindex < 0 || if_indexlim <= ifindex)
1490 return NULL;
1491 ifp = ifindex2ifnet[ifindex];
1492 if (!ifp)
1493 return NULL;
1494 if (ifindexp)
1495 *ifindexp = ifindex;
1496 } else {
1497 LIST_FOREACH(ia, &IN_IFADDR_HASH(a->s_addr), ia_hash) {
1498 if (in_hosteq(ia->ia_addr.sin_addr, *a) &&
1499 (ia->ia_ifp->if_flags & IFF_MULTICAST) != 0) {
1500 ifp = ia->ia_ifp;
1501 break;
1502 }
1503 }
1504 }
1505 return ifp;
1506 }
1507
1508 /*
1509 * Set the IP multicast options in response to user setsockopt().
1510 */
1511 int
1512 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m)
1513 {
1514 int error = 0;
1515 u_char loop;
1516 int i;
1517 struct in_addr addr;
1518 struct ip_mreq *mreq;
1519 struct ifnet *ifp;
1520 struct ip_moptions *imo = *imop;
1521 struct route ro;
1522 struct sockaddr_in *dst;
1523 int ifindex;
1524
1525 if (imo == NULL) {
1526 /*
1527 * No multicast option buffer attached to the pcb;
1528 * allocate one and initialize to default values.
1529 */
1530 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
1531 M_WAITOK);
1532
1533 if (imo == NULL)
1534 return (ENOBUFS);
1535 *imop = imo;
1536 imo->imo_multicast_ifp = NULL;
1537 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1538 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1539 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1540 imo->imo_num_memberships = 0;
1541 }
1542
1543 switch (optname) {
1544
1545 case IP_MULTICAST_IF:
1546 /*
1547 * Select the interface for outgoing multicast packets.
1548 */
1549 if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1550 error = EINVAL;
1551 break;
1552 }
1553 addr = *(mtod(m, struct in_addr *));
1554 /*
1555 * INADDR_ANY is used to remove a previous selection.
1556 * When no interface is selected, a default one is
1557 * chosen every time a multicast packet is sent.
1558 */
1559 if (in_nullhost(addr)) {
1560 imo->imo_multicast_ifp = NULL;
1561 break;
1562 }
1563 /*
1564 * The selected interface is identified by its local
1565 * IP address. Find the interface and confirm that
1566 * it supports multicasting.
1567 */
1568 ifp = ip_multicast_if(&addr, &ifindex);
1569 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1570 error = EADDRNOTAVAIL;
1571 break;
1572 }
1573 imo->imo_multicast_ifp = ifp;
1574 if (ifindex)
1575 imo->imo_multicast_addr = addr;
1576 else
1577 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1578 break;
1579
1580 case IP_MULTICAST_TTL:
1581 /*
1582 * Set the IP time-to-live for outgoing multicast packets.
1583 */
1584 if (m == NULL || m->m_len != 1) {
1585 error = EINVAL;
1586 break;
1587 }
1588 imo->imo_multicast_ttl = *(mtod(m, u_char *));
1589 break;
1590
1591 case IP_MULTICAST_LOOP:
1592 /*
1593 * Set the loopback flag for outgoing multicast packets.
1594 * Must be zero or one.
1595 */
1596 if (m == NULL || m->m_len != 1 ||
1597 (loop = *(mtod(m, u_char *))) > 1) {
1598 error = EINVAL;
1599 break;
1600 }
1601 imo->imo_multicast_loop = loop;
1602 break;
1603
1604 case IP_ADD_MEMBERSHIP:
1605 /*
1606 * Add a multicast group membership.
1607 * Group must be a valid IP multicast address.
1608 */
1609 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1610 error = EINVAL;
1611 break;
1612 }
1613 mreq = mtod(m, struct ip_mreq *);
1614 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1615 error = EINVAL;
1616 break;
1617 }
1618 /*
1619 * If no interface address was provided, use the interface of
1620 * the route to the given multicast address.
1621 */
1622 if (in_nullhost(mreq->imr_interface)) {
1623 bzero((caddr_t)&ro, sizeof(ro));
1624 ro.ro_rt = NULL;
1625 dst = satosin(&ro.ro_dst);
1626 dst->sin_len = sizeof(*dst);
1627 dst->sin_family = AF_INET;
1628 dst->sin_addr = mreq->imr_multiaddr;
1629 rtalloc(&ro);
1630 if (ro.ro_rt == NULL) {
1631 error = EADDRNOTAVAIL;
1632 break;
1633 }
1634 ifp = ro.ro_rt->rt_ifp;
1635 rtfree(ro.ro_rt);
1636 } else {
1637 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
1638 }
1639 /*
1640 * See if we found an interface, and confirm that it
1641 * supports multicast.
1642 */
1643 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1644 error = EADDRNOTAVAIL;
1645 break;
1646 }
1647 /*
1648 * See if the membership already exists or if all the
1649 * membership slots are full.
1650 */
1651 for (i = 0; i < imo->imo_num_memberships; ++i) {
1652 if (imo->imo_membership[i]->inm_ifp == ifp &&
1653 in_hosteq(imo->imo_membership[i]->inm_addr,
1654 mreq->imr_multiaddr))
1655 break;
1656 }
1657 if (i < imo->imo_num_memberships) {
1658 error = EADDRINUSE;
1659 break;
1660 }
1661 if (i == IP_MAX_MEMBERSHIPS) {
1662 error = ETOOMANYREFS;
1663 break;
1664 }
1665 /*
1666 * Everything looks good; add a new record to the multicast
1667 * address list for the given interface.
1668 */
1669 if ((imo->imo_membership[i] =
1670 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1671 error = ENOBUFS;
1672 break;
1673 }
1674 ++imo->imo_num_memberships;
1675 break;
1676
1677 case IP_DROP_MEMBERSHIP:
1678 /*
1679 * Drop a multicast group membership.
1680 * Group must be a valid IP multicast address.
1681 */
1682 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1683 error = EINVAL;
1684 break;
1685 }
1686 mreq = mtod(m, struct ip_mreq *);
1687 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1688 error = EINVAL;
1689 break;
1690 }
1691 /*
1692 * If an interface address was specified, get a pointer
1693 * to its ifnet structure.
1694 */
1695 if (in_nullhost(mreq->imr_interface))
1696 ifp = NULL;
1697 else {
1698 ifp = ip_multicast_if(&mreq->imr_interface, NULL);
1699 if (ifp == NULL) {
1700 error = EADDRNOTAVAIL;
1701 break;
1702 }
1703 }
1704 /*
1705 * Find the membership in the membership array.
1706 */
1707 for (i = 0; i < imo->imo_num_memberships; ++i) {
1708 if ((ifp == NULL ||
1709 imo->imo_membership[i]->inm_ifp == ifp) &&
1710 in_hosteq(imo->imo_membership[i]->inm_addr,
1711 mreq->imr_multiaddr))
1712 break;
1713 }
1714 if (i == imo->imo_num_memberships) {
1715 error = EADDRNOTAVAIL;
1716 break;
1717 }
1718 /*
1719 * Give up the multicast address record to which the
1720 * membership points.
1721 */
1722 in_delmulti(imo->imo_membership[i]);
1723 /*
1724 * Remove the gap in the membership array.
1725 */
1726 for (++i; i < imo->imo_num_memberships; ++i)
1727 imo->imo_membership[i-1] = imo->imo_membership[i];
1728 --imo->imo_num_memberships;
1729 break;
1730
1731 default:
1732 error = EOPNOTSUPP;
1733 break;
1734 }
1735
1736 /*
1737 * If all options have default values, no need to keep the mbuf.
1738 */
1739 if (imo->imo_multicast_ifp == NULL &&
1740 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1741 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1742 imo->imo_num_memberships == 0) {
1743 free(*imop, M_IPMOPTS);
1744 *imop = NULL;
1745 }
1746
1747 return (error);
1748 }
1749
1750 /*
1751 * Return the IP multicast options in response to user getsockopt().
1752 */
1753 int
1754 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf **mp)
1755 {
1756 u_char *ttl;
1757 u_char *loop;
1758 struct in_addr *addr;
1759 struct in_ifaddr *ia;
1760
1761 *mp = m_get(M_WAIT, MT_SOOPTS);
1762
1763 switch (optname) {
1764
1765 case IP_MULTICAST_IF:
1766 addr = mtod(*mp, struct in_addr *);
1767 (*mp)->m_len = sizeof(struct in_addr);
1768 if (imo == NULL || imo->imo_multicast_ifp == NULL)
1769 *addr = zeroin_addr;
1770 else if (imo->imo_multicast_addr.s_addr) {
1771 /* return the value user has set */
1772 *addr = imo->imo_multicast_addr;
1773 } else {
1774 IFP_TO_IA(imo->imo_multicast_ifp, ia);
1775 *addr = ia ? ia->ia_addr.sin_addr : zeroin_addr;
1776 }
1777 return (0);
1778
1779 case IP_MULTICAST_TTL:
1780 ttl = mtod(*mp, u_char *);
1781 (*mp)->m_len = 1;
1782 *ttl = imo ? imo->imo_multicast_ttl
1783 : IP_DEFAULT_MULTICAST_TTL;
1784 return (0);
1785
1786 case IP_MULTICAST_LOOP:
1787 loop = mtod(*mp, u_char *);
1788 (*mp)->m_len = 1;
1789 *loop = imo ? imo->imo_multicast_loop
1790 : IP_DEFAULT_MULTICAST_LOOP;
1791 return (0);
1792
1793 default:
1794 return (EOPNOTSUPP);
1795 }
1796 }
1797
1798 /*
1799 * Discard the IP multicast options.
1800 */
1801 void
1802 ip_freemoptions(struct ip_moptions *imo)
1803 {
1804 int i;
1805
1806 if (imo != NULL) {
1807 for (i = 0; i < imo->imo_num_memberships; ++i)
1808 in_delmulti(imo->imo_membership[i]);
1809 free(imo, M_IPMOPTS);
1810 }
1811 }
1812
1813 /*
1814 * Routine called from ip_output() to loop back a copy of an IP multicast
1815 * packet to the input queue of a specified interface. Note that this
1816 * calls the output routine of the loopback "driver", but with an interface
1817 * pointer that might NOT be lo0ifp -- easier than replicating that code here.
1818 */
1819 static void
1820 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst)
1821 {
1822 struct ip *ip;
1823 struct mbuf *copym;
1824
1825 copym = m_copy(m, 0, M_COPYALL);
1826 if (copym != NULL
1827 && (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip)))
1828 copym = m_pullup(copym, sizeof(struct ip));
1829 if (copym != NULL) {
1830 /*
1831 * We don't bother to fragment if the IP length is greater
1832 * than the interface's MTU. Can this possibly matter?
1833 */
1834 ip = mtod(copym, struct ip *);
1835
1836 if (copym->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
1837 in_delayed_cksum(copym);
1838 copym->m_pkthdr.csum_flags &=
1839 ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
1840 }
1841
1842 ip->ip_sum = 0;
1843 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1844 (void) looutput(ifp, copym, sintosa(dst), NULL);
1845 }
1846 }
1847