ip_output.c revision 1.259.2.3 1 /* $NetBSD: ip_output.c,v 1.259.2.3 2017/01/07 08:56:51 pgoyette Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61
62 /*
63 * Copyright (c) 1982, 1986, 1988, 1990, 1993
64 * The Regents of the University of California. All rights reserved.
65 *
66 * Redistribution and use in source and binary forms, with or without
67 * modification, are permitted provided that the following conditions
68 * are met:
69 * 1. Redistributions of source code must retain the above copyright
70 * notice, this list of conditions and the following disclaimer.
71 * 2. Redistributions in binary form must reproduce the above copyright
72 * notice, this list of conditions and the following disclaimer in the
73 * documentation and/or other materials provided with the distribution.
74 * 3. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
91 */
92
93 #include <sys/cdefs.h>
94 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.259.2.3 2017/01/07 08:56:51 pgoyette Exp $");
95
96 #ifdef _KERNEL_OPT
97 #include "opt_inet.h"
98 #include "opt_ipsec.h"
99 #include "opt_mrouting.h"
100 #include "opt_net_mpsafe.h"
101 #include "opt_mpls.h"
102 #endif
103
104 #include "arp.h"
105
106 #include <sys/param.h>
107 #include <sys/kmem.h>
108 #include <sys/mbuf.h>
109 #include <sys/protosw.h>
110 #include <sys/socket.h>
111 #include <sys/socketvar.h>
112 #include <sys/kauth.h>
113 #ifdef IPSEC
114 #include <sys/domain.h>
115 #endif
116 #include <sys/systm.h>
117 #include <sys/syslog.h>
118
119 #include <net/if.h>
120 #include <net/if_types.h>
121 #include <net/route.h>
122 #include <net/pfil.h>
123
124 #include <netinet/in.h>
125 #include <netinet/in_systm.h>
126 #include <netinet/ip.h>
127 #include <netinet/in_pcb.h>
128 #include <netinet/in_var.h>
129 #include <netinet/ip_var.h>
130 #include <netinet/ip_private.h>
131 #include <netinet/in_offload.h>
132 #include <netinet/portalgo.h>
133 #include <netinet/udp.h>
134
135 #ifdef INET6
136 #include <netinet6/ip6_var.h>
137 #endif
138
139 #ifdef MROUTING
140 #include <netinet/ip_mroute.h>
141 #endif
142
143 #ifdef IPSEC
144 #include <netipsec/ipsec.h>
145 #include <netipsec/key.h>
146 #endif
147
148 #ifdef MPLS
149 #include <netmpls/mpls.h>
150 #include <netmpls/mpls_var.h>
151 #endif
152
153 static int ip_pcbopts(struct inpcb *, const struct sockopt *);
154 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
155 static struct ifnet *ip_multicast_if(struct in_addr *, int *);
156 static void ip_mloopback(struct ifnet *, struct mbuf *,
157 const struct sockaddr_in *);
158 static int ip_ifaddrvalid(const struct in_ifaddr *);
159
160 extern pfil_head_t *inet_pfil_hook; /* XXX */
161
162 int ip_do_loopback_cksum = 0;
163
164 static int
165 ip_mark_mpls(struct ifnet * const ifp, struct mbuf * const m,
166 const struct rtentry *rt)
167 {
168 int error = 0;
169 #ifdef MPLS
170 union mpls_shim msh;
171
172 if (rt == NULL || rt_gettag(rt) == NULL ||
173 rt_gettag(rt)->sa_family != AF_MPLS ||
174 (m->m_flags & (M_MCAST | M_BCAST)) != 0 ||
175 ifp->if_type != IFT_ETHER)
176 return 0;
177
178 msh.s_addr = MPLS_GETSADDR(rt);
179 if (msh.shim.label != MPLS_LABEL_IMPLNULL) {
180 struct m_tag *mtag;
181 /*
182 * XXX tentative solution to tell ether_output
183 * it's MPLS. Need some more efficient solution.
184 */
185 mtag = m_tag_get(PACKET_TAG_MPLS,
186 sizeof(int) /* dummy */,
187 M_NOWAIT);
188 if (mtag == NULL)
189 return ENOMEM;
190 m_tag_prepend(m, mtag);
191 }
192 #endif
193 return error;
194 }
195
196 /*
197 * Send an IP packet to a host.
198 */
199 int
200 ip_if_output(struct ifnet * const ifp, struct mbuf * const m,
201 const struct sockaddr * const dst, const struct rtentry *rt)
202 {
203 int error = 0;
204
205 if (rt != NULL) {
206 error = rt_check_reject_route(rt, ifp);
207 if (error != 0) {
208 m_freem(m);
209 return error;
210 }
211 }
212
213 error = ip_mark_mpls(ifp, m, rt);
214 if (error != 0) {
215 m_freem(m);
216 return error;
217 }
218
219 error = if_output_lock(ifp, ifp, m, dst, rt);
220
221 return error;
222 }
223
224 /*
225 * IP output. The packet in mbuf chain m contains a skeletal IP
226 * header (with len, off, ttl, proto, tos, src, dst).
227 * The mbuf chain containing the packet will be freed.
228 * The mbuf opt, if present, will not be freed.
229 */
230 int
231 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags,
232 struct ip_moptions *imo, struct socket *so)
233 {
234 struct rtentry *rt;
235 struct ip *ip;
236 struct ifnet *ifp, *mifp = NULL;
237 struct mbuf *m = m0;
238 int hlen = sizeof (struct ip);
239 int len, error = 0;
240 struct route iproute;
241 const struct sockaddr_in *dst;
242 struct in_ifaddr *ia = NULL;
243 int isbroadcast;
244 int sw_csum;
245 u_long mtu;
246 #ifdef IPSEC
247 struct secpolicy *sp = NULL;
248 #endif
249 bool natt_frag = false;
250 bool rtmtu_nolock;
251 union {
252 struct sockaddr dst;
253 struct sockaddr_in dst4;
254 } u;
255 struct sockaddr *rdst = &u.dst; /* real IP destination, as opposed
256 * to the nexthop
257 */
258 struct psref psref, psref_ia;
259 int bound;
260 bool bind_need_restore = false;
261
262 len = 0;
263
264 MCLAIM(m, &ip_tx_mowner);
265
266 KASSERT((m->m_flags & M_PKTHDR) != 0);
267 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) == 0);
268 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) !=
269 (M_CSUM_TCPv4|M_CSUM_UDPv4));
270
271 if (opt) {
272 m = ip_insertoptions(m, opt, &len);
273 if (len >= sizeof(struct ip))
274 hlen = len;
275 }
276 ip = mtod(m, struct ip *);
277
278 /*
279 * Fill in IP header.
280 */
281 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
282 ip->ip_v = IPVERSION;
283 ip->ip_off = htons(0);
284 /* ip->ip_id filled in after we find out source ia */
285 ip->ip_hl = hlen >> 2;
286 IP_STATINC(IP_STAT_LOCALOUT);
287 } else {
288 hlen = ip->ip_hl << 2;
289 }
290
291 /*
292 * Route packet.
293 */
294 if (ro == NULL) {
295 memset(&iproute, 0, sizeof(iproute));
296 ro = &iproute;
297 }
298 sockaddr_in_init(&u.dst4, &ip->ip_dst, 0);
299 dst = satocsin(rtcache_getdst(ro));
300
301 /*
302 * If there is a cached route, check that it is to the same
303 * destination and is still up. If not, free it and try again.
304 * The address family should also be checked in case of sharing
305 * the cache with IPv6.
306 */
307 if (dst && (dst->sin_family != AF_INET ||
308 !in_hosteq(dst->sin_addr, ip->ip_dst)))
309 rtcache_free(ro);
310
311 if ((rt = rtcache_validate(ro)) == NULL &&
312 (rt = rtcache_update(ro, 1)) == NULL) {
313 dst = &u.dst4;
314 error = rtcache_setdst(ro, &u.dst);
315 if (error != 0)
316 goto bad;
317 }
318
319 bound = curlwp_bind();
320 bind_need_restore = true;
321 /*
322 * If routing to interface only, short circuit routing lookup.
323 */
324 if (flags & IP_ROUTETOIF) {
325 struct ifaddr *ifa;
326
327 ifa = ifa_ifwithladdr_psref(sintocsa(dst), &psref_ia);
328 if (ifa == NULL) {
329 IP_STATINC(IP_STAT_NOROUTE);
330 error = ENETUNREACH;
331 goto bad;
332 }
333 /* ia is already referenced by psref_ia */
334 ia = ifatoia(ifa);
335
336 ifp = ia->ia_ifp;
337 mtu = ifp->if_mtu;
338 ip->ip_ttl = 1;
339 isbroadcast = in_broadcast(dst->sin_addr, ifp);
340 } else if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
341 ip->ip_dst.s_addr == INADDR_BROADCAST) &&
342 imo != NULL && imo->imo_multicast_if_index != 0) {
343 ifp = mifp = if_get_byindex(imo->imo_multicast_if_index, &psref);
344 if (ifp == NULL) {
345 IP_STATINC(IP_STAT_NOROUTE);
346 error = ENETUNREACH;
347 goto bad;
348 }
349 mtu = ifp->if_mtu;
350 ia = in_get_ia_from_ifp_psref(ifp, &psref_ia);
351 if (ia == NULL) {
352 error = EADDRNOTAVAIL;
353 goto bad;
354 }
355 isbroadcast = 0;
356 } else {
357 if (rt == NULL)
358 rt = rtcache_init(ro);
359 if (rt == NULL) {
360 IP_STATINC(IP_STAT_NOROUTE);
361 error = EHOSTUNREACH;
362 goto bad;
363 }
364 if (ifa_is_destroying(rt->rt_ifa)) {
365 rtcache_unref(rt, ro);
366 IP_STATINC(IP_STAT_NOROUTE);
367 error = EHOSTUNREACH;
368 goto bad;
369 }
370 ifa_acquire(rt->rt_ifa, &psref_ia);
371 ia = ifatoia(rt->rt_ifa);
372 ifp = rt->rt_ifp;
373 if ((mtu = rt->rt_rmx.rmx_mtu) == 0)
374 mtu = ifp->if_mtu;
375 rt->rt_use++;
376 if (rt->rt_flags & RTF_GATEWAY)
377 dst = satosin(rt->rt_gateway);
378 if (rt->rt_flags & RTF_HOST)
379 isbroadcast = rt->rt_flags & RTF_BROADCAST;
380 else
381 isbroadcast = in_broadcast(dst->sin_addr, ifp);
382 }
383 rtmtu_nolock = rt && (rt->rt_rmx.rmx_locks & RTV_MTU) == 0;
384
385 if (IN_MULTICAST(ip->ip_dst.s_addr) ||
386 (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
387 bool inmgroup;
388
389 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
390 M_BCAST : M_MCAST;
391 /*
392 * See if the caller provided any multicast options
393 */
394 if (imo != NULL)
395 ip->ip_ttl = imo->imo_multicast_ttl;
396 else
397 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
398
399 /*
400 * if we don't know the outgoing ifp yet, we can't generate
401 * output
402 */
403 if (!ifp) {
404 IP_STATINC(IP_STAT_NOROUTE);
405 error = ENETUNREACH;
406 goto bad;
407 }
408
409 /*
410 * If the packet is multicast or broadcast, confirm that
411 * the outgoing interface can transmit it.
412 */
413 if (((m->m_flags & M_MCAST) &&
414 (ifp->if_flags & IFF_MULTICAST) == 0) ||
415 ((m->m_flags & M_BCAST) &&
416 (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0)) {
417 IP_STATINC(IP_STAT_NOROUTE);
418 error = ENETUNREACH;
419 goto bad;
420 }
421 /*
422 * If source address not specified yet, use an address
423 * of outgoing interface.
424 */
425 if (in_nullhost(ip->ip_src)) {
426 struct in_ifaddr *xia;
427 struct ifaddr *xifa;
428 struct psref _psref;
429
430 xia = in_get_ia_from_ifp_psref(ifp, &_psref);
431 if (!xia) {
432 error = EADDRNOTAVAIL;
433 goto bad;
434 }
435 xifa = &xia->ia_ifa;
436 if (xifa->ifa_getifa != NULL) {
437 ia4_release(xia, &_psref);
438 /* FIXME NOMPSAFE */
439 xia = ifatoia((*xifa->ifa_getifa)(xifa, rdst));
440 if (xia == NULL) {
441 error = EADDRNOTAVAIL;
442 goto bad;
443 }
444 ia4_acquire(xia, &_psref);
445 }
446 ip->ip_src = xia->ia_addr.sin_addr;
447 ia4_release(xia, &_psref);
448 }
449
450 inmgroup = in_multi_group(ip->ip_dst, ifp, flags);
451 if (inmgroup && (imo == NULL || imo->imo_multicast_loop)) {
452 /*
453 * If we belong to the destination multicast group
454 * on the outgoing interface, and the caller did not
455 * forbid loopback, loop back a copy.
456 */
457 ip_mloopback(ifp, m, &u.dst4);
458 }
459 #ifdef MROUTING
460 else {
461 /*
462 * If we are acting as a multicast router, perform
463 * multicast forwarding as if the packet had just
464 * arrived on the interface to which we are about
465 * to send. The multicast forwarding function
466 * recursively calls this function, using the
467 * IP_FORWARDING flag to prevent infinite recursion.
468 *
469 * Multicasts that are looped back by ip_mloopback(),
470 * above, will be forwarded by the ip_input() routine,
471 * if necessary.
472 */
473 extern struct socket *ip_mrouter;
474
475 if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
476 if (ip_mforward(m, ifp) != 0) {
477 m_freem(m);
478 goto done;
479 }
480 }
481 }
482 #endif
483 /*
484 * Multicasts with a time-to-live of zero may be looped-
485 * back, above, but must not be transmitted on a network.
486 * Also, multicasts addressed to the loopback interface
487 * are not sent -- the above call to ip_mloopback() will
488 * loop back a copy if this host actually belongs to the
489 * destination group on the loopback interface.
490 */
491 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
492 m_freem(m);
493 goto done;
494 }
495 goto sendit;
496 }
497
498 /*
499 * If source address not specified yet, use address
500 * of outgoing interface.
501 */
502 if (in_nullhost(ip->ip_src)) {
503 struct ifaddr *xifa;
504
505 xifa = &ia->ia_ifa;
506 if (xifa->ifa_getifa != NULL) {
507 ia4_release(ia, &psref_ia);
508 /* FIXME NOMPSAFE */
509 ia = ifatoia((*xifa->ifa_getifa)(xifa, rdst));
510 if (ia == NULL) {
511 error = EADDRNOTAVAIL;
512 goto bad;
513 }
514 ia4_acquire(ia, &psref_ia);
515 }
516 ip->ip_src = ia->ia_addr.sin_addr;
517 }
518
519 /*
520 * packets with Class-D address as source are not valid per
521 * RFC 1112
522 */
523 if (IN_MULTICAST(ip->ip_src.s_addr)) {
524 IP_STATINC(IP_STAT_ODROPPED);
525 error = EADDRNOTAVAIL;
526 goto bad;
527 }
528
529 /*
530 * Look for broadcast address and and verify user is allowed to
531 * send such a packet.
532 */
533 if (isbroadcast) {
534 if ((ifp->if_flags & IFF_BROADCAST) == 0) {
535 error = EADDRNOTAVAIL;
536 goto bad;
537 }
538 if ((flags & IP_ALLOWBROADCAST) == 0) {
539 error = EACCES;
540 goto bad;
541 }
542 /* don't allow broadcast messages to be fragmented */
543 if (ntohs(ip->ip_len) > ifp->if_mtu) {
544 error = EMSGSIZE;
545 goto bad;
546 }
547 m->m_flags |= M_BCAST;
548 } else
549 m->m_flags &= ~M_BCAST;
550
551 sendit:
552 if ((flags & (IP_FORWARDING|IP_NOIPNEWID)) == 0) {
553 if (m->m_pkthdr.len < IP_MINFRAGSIZE) {
554 ip->ip_id = 0;
555 } else if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
556 ip->ip_id = ip_newid(ia);
557 } else {
558
559 /*
560 * TSO capable interfaces (typically?) increment
561 * ip_id for each segment.
562 * "allocate" enough ids here to increase the chance
563 * for them to be unique.
564 *
565 * note that the following calculation is not
566 * needed to be precise. wasting some ip_id is fine.
567 */
568
569 unsigned int segsz = m->m_pkthdr.segsz;
570 unsigned int datasz = ntohs(ip->ip_len) - hlen;
571 unsigned int num = howmany(datasz, segsz);
572
573 ip->ip_id = ip_newid_range(ia, num);
574 }
575 }
576 if (ia != NULL) {
577 ia4_release(ia, &psref_ia);
578 ia = NULL;
579 }
580
581 /*
582 * If we're doing Path MTU Discovery, we need to set DF unless
583 * the route's MTU is locked.
584 */
585 if ((flags & IP_MTUDISC) != 0 && rtmtu_nolock) {
586 ip->ip_off |= htons(IP_DF);
587 }
588
589 #ifdef IPSEC
590 if (ipsec_used) {
591 bool ipsec_done = false;
592
593 /* Perform IPsec processing, if any. */
594 error = ipsec4_output(m, so, flags, &sp, &mtu, &natt_frag,
595 &ipsec_done);
596 if (error || ipsec_done)
597 goto done;
598 }
599 #endif
600
601 /*
602 * Run through list of hooks for output packets.
603 */
604 error = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_OUT);
605 if (error)
606 goto done;
607 if (m == NULL)
608 goto done;
609
610 ip = mtod(m, struct ip *);
611 hlen = ip->ip_hl << 2;
612
613 m->m_pkthdr.csum_data |= hlen << 16;
614
615 /*
616 * search for the source address structure to
617 * maintain output statistics.
618 */
619 KASSERT(ia == NULL);
620 ia = in_get_ia_psref(ip->ip_src, &psref_ia);
621
622 /* Ensure we only send from a valid address. */
623 if ((ia != NULL || (flags & IP_FORWARDING) == 0) &&
624 (error = ip_ifaddrvalid(ia)) != 0)
625 {
626 arplog(LOG_ERR,
627 "refusing to send from invalid address %s (pid %d)\n",
628 in_fmtaddr(ip->ip_src), curproc->p_pid);
629 IP_STATINC(IP_STAT_ODROPPED);
630 if (error == 1)
631 /*
632 * Address exists, but is tentative or detached.
633 * We can't send from it because it's invalid,
634 * so we drop the packet.
635 */
636 error = 0;
637 else
638 error = EADDRNOTAVAIL;
639 goto bad;
640 }
641
642 /* Maybe skip checksums on loopback interfaces. */
643 if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) {
644 m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
645 }
646 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
647 /*
648 * If small enough for mtu of path, or if using TCP segmentation
649 * offload, can just send directly.
650 */
651 if (ntohs(ip->ip_len) <= mtu ||
652 (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) != 0) {
653 const struct sockaddr *sa;
654
655 #if IFA_STATS
656 if (ia)
657 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len);
658 #endif
659 /*
660 * Always initialize the sum to 0! Some HW assisted
661 * checksumming requires this.
662 */
663 ip->ip_sum = 0;
664
665 if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
666 /*
667 * Perform any checksums that the hardware can't do
668 * for us.
669 *
670 * XXX Does any hardware require the {th,uh}_sum
671 * XXX fields to be 0?
672 */
673 if (sw_csum & M_CSUM_IPv4) {
674 KASSERT(IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4));
675 ip->ip_sum = in_cksum(m, hlen);
676 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
677 }
678 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
679 if (IN_NEED_CHECKSUM(ifp,
680 sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
681 in_delayed_cksum(m);
682 }
683 m->m_pkthdr.csum_flags &=
684 ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
685 }
686 }
687
688 sa = (m->m_flags & M_MCAST) ? sintocsa(rdst) : sintocsa(dst);
689 if (__predict_true(
690 (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0 ||
691 (ifp->if_capenable & IFCAP_TSOv4) != 0)) {
692 error = ip_if_output(ifp, m, sa, rt);
693 } else {
694 error = ip_tso_output(ifp, m, sa, rt);
695 }
696 goto done;
697 }
698
699 /*
700 * We can't use HW checksumming if we're about to
701 * to fragment the packet.
702 *
703 * XXX Some hardware can do this.
704 */
705 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
706 if (IN_NEED_CHECKSUM(ifp,
707 m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
708 in_delayed_cksum(m);
709 }
710 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
711 }
712
713 /*
714 * Too large for interface; fragment if possible.
715 * Must be able to put at least 8 bytes per fragment.
716 */
717 if (ntohs(ip->ip_off) & IP_DF) {
718 if (flags & IP_RETURNMTU) {
719 struct inpcb *inp;
720
721 KASSERT(so && solocked(so));
722 inp = sotoinpcb(so);
723 inp->inp_errormtu = mtu;
724 }
725 error = EMSGSIZE;
726 IP_STATINC(IP_STAT_CANTFRAG);
727 goto bad;
728 }
729
730 error = ip_fragment(m, ifp, mtu);
731 if (error) {
732 m = NULL;
733 goto bad;
734 }
735
736 for (; m; m = m0) {
737 m0 = m->m_nextpkt;
738 m->m_nextpkt = 0;
739 if (error) {
740 m_freem(m);
741 continue;
742 }
743 #if IFA_STATS
744 if (ia)
745 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len);
746 #endif
747 /*
748 * If we get there, the packet has not been handled by
749 * IPsec whereas it should have. Now that it has been
750 * fragmented, re-inject it in ip_output so that IPsec
751 * processing can occur.
752 */
753 if (natt_frag) {
754 error = ip_output(m, opt, ro,
755 flags | IP_RAWOUTPUT | IP_NOIPNEWID,
756 imo, so);
757 } else {
758 KASSERT((m->m_pkthdr.csum_flags &
759 (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0);
760 error = ip_if_output(ifp, m,
761 (m->m_flags & M_MCAST) ?
762 sintocsa(rdst) : sintocsa(dst), rt);
763 }
764 }
765 if (error == 0) {
766 IP_STATINC(IP_STAT_FRAGMENTED);
767 }
768 done:
769 ia4_release(ia, &psref_ia);
770 rtcache_unref(rt, ro);
771 if (ro == &iproute) {
772 rtcache_free(&iproute);
773 }
774 #ifdef IPSEC
775 if (sp) {
776 KEY_FREESP(&sp);
777 }
778 #endif
779 if (mifp != NULL) {
780 if_put(mifp, &psref);
781 }
782 if (bind_need_restore)
783 curlwp_bindx(bound);
784 return error;
785 bad:
786 m_freem(m);
787 goto done;
788 }
789
790 int
791 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu)
792 {
793 struct ip *ip, *mhip;
794 struct mbuf *m0;
795 int len, hlen, off;
796 int mhlen, firstlen;
797 struct mbuf **mnext;
798 int sw_csum = m->m_pkthdr.csum_flags;
799 int fragments = 0;
800 int s;
801 int error = 0;
802
803 ip = mtod(m, struct ip *);
804 hlen = ip->ip_hl << 2;
805 if (ifp != NULL)
806 sw_csum &= ~ifp->if_csum_flags_tx;
807
808 len = (mtu - hlen) &~ 7;
809 if (len < 8) {
810 m_freem(m);
811 return (EMSGSIZE);
812 }
813
814 firstlen = len;
815 mnext = &m->m_nextpkt;
816
817 /*
818 * Loop through length of segment after first fragment,
819 * make new header and copy data of each part and link onto chain.
820 */
821 m0 = m;
822 mhlen = sizeof (struct ip);
823 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
824 MGETHDR(m, M_DONTWAIT, MT_HEADER);
825 if (m == 0) {
826 error = ENOBUFS;
827 IP_STATINC(IP_STAT_ODROPPED);
828 goto sendorfree;
829 }
830 MCLAIM(m, m0->m_owner);
831 *mnext = m;
832 mnext = &m->m_nextpkt;
833 m->m_data += max_linkhdr;
834 mhip = mtod(m, struct ip *);
835 *mhip = *ip;
836 /* we must inherit MCAST and BCAST flags */
837 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST);
838 if (hlen > sizeof (struct ip)) {
839 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
840 mhip->ip_hl = mhlen >> 2;
841 }
842 m->m_len = mhlen;
843 mhip->ip_off = ((off - hlen) >> 3) +
844 (ntohs(ip->ip_off) & ~IP_MF);
845 if (ip->ip_off & htons(IP_MF))
846 mhip->ip_off |= IP_MF;
847 if (off + len >= ntohs(ip->ip_len))
848 len = ntohs(ip->ip_len) - off;
849 else
850 mhip->ip_off |= IP_MF;
851 HTONS(mhip->ip_off);
852 mhip->ip_len = htons((u_int16_t)(len + mhlen));
853 m->m_next = m_copym(m0, off, len, M_DONTWAIT);
854 if (m->m_next == 0) {
855 error = ENOBUFS; /* ??? */
856 IP_STATINC(IP_STAT_ODROPPED);
857 goto sendorfree;
858 }
859 m->m_pkthdr.len = mhlen + len;
860 m_reset_rcvif(m);
861 mhip->ip_sum = 0;
862 KASSERT((m->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0);
863 if (sw_csum & M_CSUM_IPv4) {
864 mhip->ip_sum = in_cksum(m, mhlen);
865 } else {
866 /*
867 * checksum is hw-offloaded or not necessary.
868 */
869 m->m_pkthdr.csum_flags |=
870 m0->m_pkthdr.csum_flags & M_CSUM_IPv4;
871 m->m_pkthdr.csum_data |= mhlen << 16;
872 KASSERT(!(ifp != NULL &&
873 IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) ||
874 (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0);
875 }
876 IP_STATINC(IP_STAT_OFRAGMENTS);
877 fragments++;
878 }
879 /*
880 * Update first fragment by trimming what's been copied out
881 * and updating header, then send each fragment (in order).
882 */
883 m = m0;
884 m_adj(m, hlen + firstlen - ntohs(ip->ip_len));
885 m->m_pkthdr.len = hlen + firstlen;
886 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
887 ip->ip_off |= htons(IP_MF);
888 ip->ip_sum = 0;
889 if (sw_csum & M_CSUM_IPv4) {
890 ip->ip_sum = in_cksum(m, hlen);
891 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
892 } else {
893 /*
894 * checksum is hw-offloaded or not necessary.
895 */
896 KASSERT(!(ifp != NULL && IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) ||
897 (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0);
898 KASSERT(M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data) >=
899 sizeof(struct ip));
900 }
901 sendorfree:
902 /*
903 * If there is no room for all the fragments, don't queue
904 * any of them.
905 */
906 if (ifp != NULL) {
907 s = splnet();
908 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments &&
909 error == 0) {
910 error = ENOBUFS;
911 IP_STATINC(IP_STAT_ODROPPED);
912 IFQ_INC_DROPS(&ifp->if_snd);
913 }
914 splx(s);
915 }
916 if (error) {
917 for (m = m0; m; m = m0) {
918 m0 = m->m_nextpkt;
919 m->m_nextpkt = NULL;
920 m_freem(m);
921 }
922 }
923 return (error);
924 }
925
926 /*
927 * Process a delayed payload checksum calculation.
928 */
929 void
930 in_delayed_cksum(struct mbuf *m)
931 {
932 struct ip *ip;
933 u_int16_t csum, offset;
934
935 ip = mtod(m, struct ip *);
936 offset = ip->ip_hl << 2;
937 csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset);
938 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0)
939 csum = 0xffff;
940
941 offset += M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data);
942
943 if ((offset + sizeof(u_int16_t)) > m->m_len) {
944 /* This happen when ip options were inserted
945 printf("in_delayed_cksum: pullup len %d off %d proto %d\n",
946 m->m_len, offset, ip->ip_p);
947 */
948 m_copyback(m, offset, sizeof(csum), (void *) &csum);
949 } else
950 *(u_int16_t *)(mtod(m, char *) + offset) = csum;
951 }
952
953 /*
954 * Determine the maximum length of the options to be inserted;
955 * we would far rather allocate too much space rather than too little.
956 */
957
958 u_int
959 ip_optlen(struct inpcb *inp)
960 {
961 struct mbuf *m = inp->inp_options;
962
963 if (m && m->m_len > offsetof(struct ipoption, ipopt_dst)) {
964 return (m->m_len - offsetof(struct ipoption, ipopt_dst));
965 }
966 return 0;
967 }
968
969 /*
970 * Insert IP options into preformed packet.
971 * Adjust IP destination as required for IP source routing,
972 * as indicated by a non-zero in_addr at the start of the options.
973 */
974 static struct mbuf *
975 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
976 {
977 struct ipoption *p = mtod(opt, struct ipoption *);
978 struct mbuf *n;
979 struct ip *ip = mtod(m, struct ip *);
980 unsigned optlen;
981
982 optlen = opt->m_len - sizeof(p->ipopt_dst);
983 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
984 return (m); /* XXX should fail */
985 if (!in_nullhost(p->ipopt_dst))
986 ip->ip_dst = p->ipopt_dst;
987 if (M_READONLY(m) || M_LEADINGSPACE(m) < optlen) {
988 MGETHDR(n, M_DONTWAIT, MT_HEADER);
989 if (n == 0)
990 return (m);
991 MCLAIM(n, m->m_owner);
992 M_MOVE_PKTHDR(n, m);
993 m->m_len -= sizeof(struct ip);
994 m->m_data += sizeof(struct ip);
995 n->m_next = m;
996 m = n;
997 m->m_len = optlen + sizeof(struct ip);
998 m->m_data += max_linkhdr;
999 bcopy((void *)ip, mtod(m, void *), sizeof(struct ip));
1000 } else {
1001 m->m_data -= optlen;
1002 m->m_len += optlen;
1003 memmove(mtod(m, void *), ip, sizeof(struct ip));
1004 }
1005 m->m_pkthdr.len += optlen;
1006 ip = mtod(m, struct ip *);
1007 bcopy((void *)p->ipopt_list, (void *)(ip + 1), (unsigned)optlen);
1008 *phlen = sizeof(struct ip) + optlen;
1009 ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
1010 return (m);
1011 }
1012
1013 /*
1014 * Copy options from ip to jp,
1015 * omitting those not copied during fragmentation.
1016 */
1017 int
1018 ip_optcopy(struct ip *ip, struct ip *jp)
1019 {
1020 u_char *cp, *dp;
1021 int opt, optlen, cnt;
1022
1023 cp = (u_char *)(ip + 1);
1024 dp = (u_char *)(jp + 1);
1025 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1026 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1027 opt = cp[0];
1028 if (opt == IPOPT_EOL)
1029 break;
1030 if (opt == IPOPT_NOP) {
1031 /* Preserve for IP mcast tunnel's LSRR alignment. */
1032 *dp++ = IPOPT_NOP;
1033 optlen = 1;
1034 continue;
1035 }
1036
1037 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp));
1038 optlen = cp[IPOPT_OLEN];
1039 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen < cnt);
1040
1041 /* Invalid lengths should have been caught by ip_dooptions. */
1042 if (optlen > cnt)
1043 optlen = cnt;
1044 if (IPOPT_COPIED(opt)) {
1045 bcopy((void *)cp, (void *)dp, (unsigned)optlen);
1046 dp += optlen;
1047 }
1048 }
1049 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1050 *dp++ = IPOPT_EOL;
1051 return (optlen);
1052 }
1053
1054 /*
1055 * IP socket option processing.
1056 */
1057 int
1058 ip_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1059 {
1060 struct inpcb *inp = sotoinpcb(so);
1061 struct ip *ip = &inp->inp_ip;
1062 int inpflags = inp->inp_flags;
1063 int optval = 0, error = 0;
1064
1065 if (sopt->sopt_level != IPPROTO_IP) {
1066 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER)
1067 return 0;
1068 return ENOPROTOOPT;
1069 }
1070
1071 switch (op) {
1072 case PRCO_SETOPT:
1073 switch (sopt->sopt_name) {
1074 case IP_OPTIONS:
1075 #ifdef notyet
1076 case IP_RETOPTS:
1077 #endif
1078 error = ip_pcbopts(inp, sopt);
1079 break;
1080
1081 case IP_TOS:
1082 case IP_TTL:
1083 case IP_MINTTL:
1084 case IP_PKTINFO:
1085 case IP_RECVOPTS:
1086 case IP_RECVRETOPTS:
1087 case IP_RECVDSTADDR:
1088 case IP_RECVIF:
1089 case IP_RECVPKTINFO:
1090 case IP_RECVTTL:
1091 error = sockopt_getint(sopt, &optval);
1092 if (error)
1093 break;
1094
1095 switch (sopt->sopt_name) {
1096 case IP_TOS:
1097 ip->ip_tos = optval;
1098 break;
1099
1100 case IP_TTL:
1101 ip->ip_ttl = optval;
1102 break;
1103
1104 case IP_MINTTL:
1105 if (optval > 0 && optval <= MAXTTL)
1106 inp->inp_ip_minttl = optval;
1107 else
1108 error = EINVAL;
1109 break;
1110 #define OPTSET(bit) \
1111 if (optval) \
1112 inpflags |= bit; \
1113 else \
1114 inpflags &= ~bit;
1115
1116 case IP_PKTINFO:
1117 OPTSET(INP_PKTINFO);
1118 break;
1119
1120 case IP_RECVOPTS:
1121 OPTSET(INP_RECVOPTS);
1122 break;
1123
1124 case IP_RECVPKTINFO:
1125 OPTSET(INP_RECVPKTINFO);
1126 break;
1127
1128 case IP_RECVRETOPTS:
1129 OPTSET(INP_RECVRETOPTS);
1130 break;
1131
1132 case IP_RECVDSTADDR:
1133 OPTSET(INP_RECVDSTADDR);
1134 break;
1135
1136 case IP_RECVIF:
1137 OPTSET(INP_RECVIF);
1138 break;
1139
1140 case IP_RECVTTL:
1141 OPTSET(INP_RECVTTL);
1142 break;
1143 }
1144 break;
1145 #undef OPTSET
1146
1147 case IP_MULTICAST_IF:
1148 case IP_MULTICAST_TTL:
1149 case IP_MULTICAST_LOOP:
1150 case IP_ADD_MEMBERSHIP:
1151 case IP_DROP_MEMBERSHIP:
1152 error = ip_setmoptions(&inp->inp_moptions, sopt);
1153 break;
1154
1155 case IP_PORTRANGE:
1156 error = sockopt_getint(sopt, &optval);
1157 if (error)
1158 break;
1159
1160 switch (optval) {
1161 case IP_PORTRANGE_DEFAULT:
1162 case IP_PORTRANGE_HIGH:
1163 inpflags &= ~(INP_LOWPORT);
1164 break;
1165
1166 case IP_PORTRANGE_LOW:
1167 inpflags |= INP_LOWPORT;
1168 break;
1169
1170 default:
1171 error = EINVAL;
1172 break;
1173 }
1174 break;
1175
1176 case IP_PORTALGO:
1177 error = sockopt_getint(sopt, &optval);
1178 if (error)
1179 break;
1180
1181 error = portalgo_algo_index_select(
1182 (struct inpcb_hdr *)inp, optval);
1183 break;
1184
1185 #if defined(IPSEC)
1186 case IP_IPSEC_POLICY:
1187 if (ipsec_enabled) {
1188 error = ipsec4_set_policy(inp, sopt->sopt_name,
1189 sopt->sopt_data, sopt->sopt_size,
1190 curlwp->l_cred);
1191 break;
1192 }
1193 /*FALLTHROUGH*/
1194 #endif /* IPSEC */
1195
1196 default:
1197 error = ENOPROTOOPT;
1198 break;
1199 }
1200 break;
1201
1202 case PRCO_GETOPT:
1203 switch (sopt->sopt_name) {
1204 case IP_OPTIONS:
1205 case IP_RETOPTS: {
1206 struct mbuf *mopts = inp->inp_options;
1207
1208 if (mopts) {
1209 struct mbuf *m;
1210
1211 m = m_copym(mopts, 0, M_COPYALL, M_DONTWAIT);
1212 if (m == NULL) {
1213 error = ENOBUFS;
1214 break;
1215 }
1216 error = sockopt_setmbuf(sopt, m);
1217 }
1218 break;
1219 }
1220 case IP_PKTINFO:
1221 case IP_TOS:
1222 case IP_TTL:
1223 case IP_MINTTL:
1224 case IP_RECVOPTS:
1225 case IP_RECVRETOPTS:
1226 case IP_RECVDSTADDR:
1227 case IP_RECVIF:
1228 case IP_RECVPKTINFO:
1229 case IP_RECVTTL:
1230 case IP_ERRORMTU:
1231 switch (sopt->sopt_name) {
1232 case IP_TOS:
1233 optval = ip->ip_tos;
1234 break;
1235
1236 case IP_TTL:
1237 optval = ip->ip_ttl;
1238 break;
1239
1240 case IP_MINTTL:
1241 optval = inp->inp_ip_minttl;
1242 break;
1243
1244 case IP_ERRORMTU:
1245 optval = inp->inp_errormtu;
1246 break;
1247
1248 #define OPTBIT(bit) (inpflags & bit ? 1 : 0)
1249
1250 case IP_PKTINFO:
1251 optval = OPTBIT(INP_PKTINFO);
1252 break;
1253
1254 case IP_RECVOPTS:
1255 optval = OPTBIT(INP_RECVOPTS);
1256 break;
1257
1258 case IP_RECVPKTINFO:
1259 optval = OPTBIT(INP_RECVPKTINFO);
1260 break;
1261
1262 case IP_RECVRETOPTS:
1263 optval = OPTBIT(INP_RECVRETOPTS);
1264 break;
1265
1266 case IP_RECVDSTADDR:
1267 optval = OPTBIT(INP_RECVDSTADDR);
1268 break;
1269
1270 case IP_RECVIF:
1271 optval = OPTBIT(INP_RECVIF);
1272 break;
1273
1274 case IP_RECVTTL:
1275 optval = OPTBIT(INP_RECVTTL);
1276 break;
1277 }
1278 error = sockopt_setint(sopt, optval);
1279 break;
1280
1281 #if 0 /* defined(IPSEC) */
1282 case IP_IPSEC_POLICY:
1283 {
1284 struct mbuf *m = NULL;
1285
1286 /* XXX this will return EINVAL as sopt is empty */
1287 error = ipsec4_get_policy(inp, sopt->sopt_data,
1288 sopt->sopt_size, &m);
1289 if (error == 0)
1290 error = sockopt_setmbuf(sopt, m);
1291 break;
1292 }
1293 #endif /*IPSEC*/
1294
1295 case IP_MULTICAST_IF:
1296 case IP_MULTICAST_TTL:
1297 case IP_MULTICAST_LOOP:
1298 case IP_ADD_MEMBERSHIP:
1299 case IP_DROP_MEMBERSHIP:
1300 error = ip_getmoptions(inp->inp_moptions, sopt);
1301 break;
1302
1303 case IP_PORTRANGE:
1304 if (inpflags & INP_LOWPORT)
1305 optval = IP_PORTRANGE_LOW;
1306 else
1307 optval = IP_PORTRANGE_DEFAULT;
1308 error = sockopt_setint(sopt, optval);
1309 break;
1310
1311 case IP_PORTALGO:
1312 optval = inp->inp_portalgo;
1313 error = sockopt_setint(sopt, optval);
1314 break;
1315
1316 default:
1317 error = ENOPROTOOPT;
1318 break;
1319 }
1320 break;
1321 }
1322
1323 if (!error) {
1324 inp->inp_flags = inpflags;
1325 }
1326 return error;
1327 }
1328
1329 /*
1330 * Set up IP options in pcb for insertion in output packets.
1331 * Store in mbuf with pointer in pcbopt, adding pseudo-option
1332 * with destination address if source routed.
1333 */
1334 static int
1335 ip_pcbopts(struct inpcb *inp, const struct sockopt *sopt)
1336 {
1337 struct mbuf *m;
1338 const u_char *cp;
1339 u_char *dp;
1340 int cnt;
1341
1342 /* Turn off any old options. */
1343 if (inp->inp_options) {
1344 m_free(inp->inp_options);
1345 }
1346 inp->inp_options = NULL;
1347 if ((cnt = sopt->sopt_size) == 0) {
1348 /* Only turning off any previous options. */
1349 return 0;
1350 }
1351 cp = sopt->sopt_data;
1352
1353 #ifndef __vax__
1354 if (cnt % sizeof(int32_t))
1355 return (EINVAL);
1356 #endif
1357
1358 m = m_get(M_DONTWAIT, MT_SOOPTS);
1359 if (m == NULL)
1360 return (ENOBUFS);
1361
1362 dp = mtod(m, u_char *);
1363 memset(dp, 0, sizeof(struct in_addr));
1364 dp += sizeof(struct in_addr);
1365 m->m_len = sizeof(struct in_addr);
1366
1367 /*
1368 * IP option list according to RFC791. Each option is of the form
1369 *
1370 * [optval] [olen] [(olen - 2) data bytes]
1371 *
1372 * We validate the list and copy options to an mbuf for prepending
1373 * to data packets. The IP first-hop destination address will be
1374 * stored before actual options and is zero if unset.
1375 */
1376 while (cnt > 0) {
1377 uint8_t optval, olen, offset;
1378
1379 optval = cp[IPOPT_OPTVAL];
1380
1381 if (optval == IPOPT_EOL || optval == IPOPT_NOP) {
1382 olen = 1;
1383 } else {
1384 if (cnt < IPOPT_OLEN + 1)
1385 goto bad;
1386
1387 olen = cp[IPOPT_OLEN];
1388 if (olen < IPOPT_OLEN + 1 || olen > cnt)
1389 goto bad;
1390 }
1391
1392 if (optval == IPOPT_LSRR || optval == IPOPT_SSRR) {
1393 /*
1394 * user process specifies route as:
1395 * ->A->B->C->D
1396 * D must be our final destination (but we can't
1397 * check that since we may not have connected yet).
1398 * A is first hop destination, which doesn't appear in
1399 * actual IP option, but is stored before the options.
1400 */
1401 if (olen < IPOPT_OFFSET + 1 + sizeof(struct in_addr))
1402 goto bad;
1403
1404 offset = cp[IPOPT_OFFSET];
1405 memcpy(mtod(m, u_char *), cp + IPOPT_OFFSET + 1,
1406 sizeof(struct in_addr));
1407
1408 cp += sizeof(struct in_addr);
1409 cnt -= sizeof(struct in_addr);
1410 olen -= sizeof(struct in_addr);
1411
1412 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr))
1413 goto bad;
1414
1415 memcpy(dp, cp, olen);
1416 dp[IPOPT_OPTVAL] = optval;
1417 dp[IPOPT_OLEN] = olen;
1418 dp[IPOPT_OFFSET] = offset;
1419 break;
1420 } else {
1421 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr))
1422 goto bad;
1423
1424 memcpy(dp, cp, olen);
1425 break;
1426 }
1427
1428 dp += olen;
1429 m->m_len += olen;
1430
1431 if (optval == IPOPT_EOL)
1432 break;
1433
1434 cp += olen;
1435 cnt -= olen;
1436 }
1437
1438 inp->inp_options = m;
1439 return 0;
1440 bad:
1441 (void)m_free(m);
1442 return EINVAL;
1443 }
1444
1445 /*
1446 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1447 */
1448 static struct ifnet *
1449 ip_multicast_if(struct in_addr *a, int *ifindexp)
1450 {
1451 int ifindex;
1452 struct ifnet *ifp = NULL;
1453 struct in_ifaddr *ia;
1454
1455 if (ifindexp)
1456 *ifindexp = 0;
1457 if (ntohl(a->s_addr) >> 24 == 0) {
1458 ifindex = ntohl(a->s_addr) & 0xffffff;
1459 ifp = if_byindex(ifindex);
1460 if (!ifp)
1461 return NULL;
1462 if (ifindexp)
1463 *ifindexp = ifindex;
1464 } else {
1465 LIST_FOREACH(ia, &IN_IFADDR_HASH(a->s_addr), ia_hash) {
1466 if (in_hosteq(ia->ia_addr.sin_addr, *a) &&
1467 (ia->ia_ifp->if_flags & IFF_MULTICAST) != 0) {
1468 ifp = ia->ia_ifp;
1469 break;
1470 }
1471 }
1472 }
1473 return ifp;
1474 }
1475
1476 static int
1477 ip_getoptval(const struct sockopt *sopt, u_int8_t *val, u_int maxval)
1478 {
1479 u_int tval;
1480 u_char cval;
1481 int error;
1482
1483 if (sopt == NULL)
1484 return EINVAL;
1485
1486 switch (sopt->sopt_size) {
1487 case sizeof(u_char):
1488 error = sockopt_get(sopt, &cval, sizeof(u_char));
1489 tval = cval;
1490 break;
1491
1492 case sizeof(u_int):
1493 error = sockopt_get(sopt, &tval, sizeof(u_int));
1494 break;
1495
1496 default:
1497 error = EINVAL;
1498 }
1499
1500 if (error)
1501 return error;
1502
1503 if (tval > maxval)
1504 return EINVAL;
1505
1506 *val = tval;
1507 return 0;
1508 }
1509
1510 static int
1511 ip_get_membership(const struct sockopt *sopt, struct ifnet **ifp,
1512 struct in_addr *ia, bool add)
1513 {
1514 int error;
1515 struct ip_mreq mreq;
1516
1517 error = sockopt_get(sopt, &mreq, sizeof(mreq));
1518 if (error)
1519 return error;
1520
1521 if (!IN_MULTICAST(mreq.imr_multiaddr.s_addr))
1522 return EINVAL;
1523
1524 memcpy(ia, &mreq.imr_multiaddr, sizeof(*ia));
1525
1526 if (in_nullhost(mreq.imr_interface)) {
1527 union {
1528 struct sockaddr dst;
1529 struct sockaddr_in dst4;
1530 } u;
1531 struct route ro;
1532
1533 if (!add) {
1534 *ifp = NULL;
1535 return 0;
1536 }
1537 /*
1538 * If no interface address was provided, use the interface of
1539 * the route to the given multicast address.
1540 */
1541 struct rtentry *rt;
1542 memset(&ro, 0, sizeof(ro));
1543
1544 sockaddr_in_init(&u.dst4, ia, 0);
1545 error = rtcache_setdst(&ro, &u.dst);
1546 if (error != 0)
1547 return error;
1548 *ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp : NULL;
1549 rtcache_unref(rt, &ro);
1550 rtcache_free(&ro);
1551 } else {
1552 *ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1553 if (!add && *ifp == NULL)
1554 return EADDRNOTAVAIL;
1555 }
1556 return 0;
1557 }
1558
1559 /*
1560 * Add a multicast group membership.
1561 * Group must be a valid IP multicast address.
1562 */
1563 static int
1564 ip_add_membership(struct ip_moptions *imo, const struct sockopt *sopt)
1565 {
1566 struct ifnet *ifp = NULL; // XXX: gcc [ppc]
1567 struct in_addr ia;
1568 int i, error;
1569
1570 if (sopt->sopt_size == sizeof(struct ip_mreq))
1571 error = ip_get_membership(sopt, &ifp, &ia, true);
1572 else
1573 #ifdef INET6
1574 error = ip6_get_membership(sopt, &ifp, &ia, sizeof(ia));
1575 #else
1576 return EINVAL;
1577 #endif
1578
1579 if (error)
1580 return error;
1581
1582 /*
1583 * See if we found an interface, and confirm that it
1584 * supports multicast.
1585 */
1586 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1587 return EADDRNOTAVAIL;
1588
1589 /*
1590 * See if the membership already exists or if all the
1591 * membership slots are full.
1592 */
1593 for (i = 0; i < imo->imo_num_memberships; ++i) {
1594 if (imo->imo_membership[i]->inm_ifp == ifp &&
1595 in_hosteq(imo->imo_membership[i]->inm_addr, ia))
1596 break;
1597 }
1598 if (i < imo->imo_num_memberships)
1599 return EADDRINUSE;
1600
1601 if (i == IP_MAX_MEMBERSHIPS)
1602 return ETOOMANYREFS;
1603
1604 /*
1605 * Everything looks good; add a new record to the multicast
1606 * address list for the given interface.
1607 */
1608 if ((imo->imo_membership[i] = in_addmulti(&ia, ifp)) == NULL)
1609 return ENOBUFS;
1610
1611 ++imo->imo_num_memberships;
1612 return 0;
1613 }
1614
1615 /*
1616 * Drop a multicast group membership.
1617 * Group must be a valid IP multicast address.
1618 */
1619 static int
1620 ip_drop_membership(struct ip_moptions *imo, const struct sockopt *sopt)
1621 {
1622 struct in_addr ia = { .s_addr = 0 }; // XXX: gcc [ppc]
1623 struct ifnet *ifp = NULL; // XXX: gcc [ppc]
1624 int i, error;
1625
1626 if (sopt->sopt_size == sizeof(struct ip_mreq))
1627 error = ip_get_membership(sopt, &ifp, &ia, false);
1628 else
1629 #ifdef INET6
1630 error = ip6_get_membership(sopt, &ifp, &ia, sizeof(ia));
1631 #else
1632 return EINVAL;
1633 #endif
1634
1635 if (error)
1636 return error;
1637
1638 /*
1639 * Find the membership in the membership array.
1640 */
1641 for (i = 0; i < imo->imo_num_memberships; ++i) {
1642 if ((ifp == NULL ||
1643 imo->imo_membership[i]->inm_ifp == ifp) &&
1644 in_hosteq(imo->imo_membership[i]->inm_addr, ia))
1645 break;
1646 }
1647 if (i == imo->imo_num_memberships)
1648 return EADDRNOTAVAIL;
1649
1650 /*
1651 * Give up the multicast address record to which the
1652 * membership points.
1653 */
1654 in_delmulti(imo->imo_membership[i]);
1655
1656 /*
1657 * Remove the gap in the membership array.
1658 */
1659 for (++i; i < imo->imo_num_memberships; ++i)
1660 imo->imo_membership[i-1] = imo->imo_membership[i];
1661 --imo->imo_num_memberships;
1662 return 0;
1663 }
1664
1665 /*
1666 * Set the IP multicast options in response to user setsockopt().
1667 */
1668 int
1669 ip_setmoptions(struct ip_moptions **pimo, const struct sockopt *sopt)
1670 {
1671 struct ip_moptions *imo = *pimo;
1672 struct in_addr addr;
1673 struct ifnet *ifp;
1674 int ifindex, error = 0;
1675
1676 if (!imo) {
1677 /*
1678 * No multicast option buffer attached to the pcb;
1679 * allocate one and initialize to default values.
1680 */
1681 imo = kmem_intr_alloc(sizeof(*imo), KM_NOSLEEP);
1682 if (imo == NULL)
1683 return ENOBUFS;
1684
1685 imo->imo_multicast_if_index = 0;
1686 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1687 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1688 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1689 imo->imo_num_memberships = 0;
1690 *pimo = imo;
1691 }
1692
1693 switch (sopt->sopt_name) {
1694 case IP_MULTICAST_IF:
1695 /*
1696 * Select the interface for outgoing multicast packets.
1697 */
1698 error = sockopt_get(sopt, &addr, sizeof(addr));
1699 if (error)
1700 break;
1701
1702 /*
1703 * INADDR_ANY is used to remove a previous selection.
1704 * When no interface is selected, a default one is
1705 * chosen every time a multicast packet is sent.
1706 */
1707 if (in_nullhost(addr)) {
1708 imo->imo_multicast_if_index = 0;
1709 break;
1710 }
1711 /*
1712 * The selected interface is identified by its local
1713 * IP address. Find the interface and confirm that
1714 * it supports multicasting.
1715 */
1716 ifp = ip_multicast_if(&addr, &ifindex);
1717 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1718 error = EADDRNOTAVAIL;
1719 break;
1720 }
1721 imo->imo_multicast_if_index = ifp->if_index;
1722 if (ifindex)
1723 imo->imo_multicast_addr = addr;
1724 else
1725 imo->imo_multicast_addr.s_addr = INADDR_ANY;
1726 break;
1727
1728 case IP_MULTICAST_TTL:
1729 /*
1730 * Set the IP time-to-live for outgoing multicast packets.
1731 */
1732 error = ip_getoptval(sopt, &imo->imo_multicast_ttl, MAXTTL);
1733 break;
1734
1735 case IP_MULTICAST_LOOP:
1736 /*
1737 * Set the loopback flag for outgoing multicast packets.
1738 * Must be zero or one.
1739 */
1740 error = ip_getoptval(sopt, &imo->imo_multicast_loop, 1);
1741 break;
1742
1743 case IP_ADD_MEMBERSHIP: /* IPV6_JOIN_GROUP */
1744 error = ip_add_membership(imo, sopt);
1745 break;
1746
1747 case IP_DROP_MEMBERSHIP: /* IPV6_LEAVE_GROUP */
1748 error = ip_drop_membership(imo, sopt);
1749 break;
1750
1751 default:
1752 error = EOPNOTSUPP;
1753 break;
1754 }
1755
1756 /*
1757 * If all options have default values, no need to keep the mbuf.
1758 */
1759 if (imo->imo_multicast_if_index == 0 &&
1760 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1761 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1762 imo->imo_num_memberships == 0) {
1763 kmem_free(imo, sizeof(*imo));
1764 *pimo = NULL;
1765 }
1766
1767 return error;
1768 }
1769
1770 /*
1771 * Return the IP multicast options in response to user getsockopt().
1772 */
1773 int
1774 ip_getmoptions(struct ip_moptions *imo, struct sockopt *sopt)
1775 {
1776 struct in_addr addr;
1777 uint8_t optval;
1778 int error = 0;
1779
1780 switch (sopt->sopt_name) {
1781 case IP_MULTICAST_IF:
1782 if (imo == NULL || imo->imo_multicast_if_index == 0)
1783 addr = zeroin_addr;
1784 else if (imo->imo_multicast_addr.s_addr) {
1785 /* return the value user has set */
1786 addr = imo->imo_multicast_addr;
1787 } else {
1788 struct ifnet *ifp;
1789 struct in_ifaddr *ia = NULL;
1790 int s = pserialize_read_enter();
1791
1792 ifp = if_byindex(imo->imo_multicast_if_index);
1793 if (ifp != NULL) {
1794 ia = in_get_ia_from_ifp(ifp);
1795 }
1796 addr = ia ? ia->ia_addr.sin_addr : zeroin_addr;
1797 pserialize_read_exit(s);
1798 }
1799 error = sockopt_set(sopt, &addr, sizeof(addr));
1800 break;
1801
1802 case IP_MULTICAST_TTL:
1803 optval = imo ? imo->imo_multicast_ttl
1804 : IP_DEFAULT_MULTICAST_TTL;
1805
1806 error = sockopt_set(sopt, &optval, sizeof(optval));
1807 break;
1808
1809 case IP_MULTICAST_LOOP:
1810 optval = imo ? imo->imo_multicast_loop
1811 : IP_DEFAULT_MULTICAST_LOOP;
1812
1813 error = sockopt_set(sopt, &optval, sizeof(optval));
1814 break;
1815
1816 default:
1817 error = EOPNOTSUPP;
1818 }
1819
1820 return error;
1821 }
1822
1823 /*
1824 * Discard the IP multicast options.
1825 */
1826 void
1827 ip_freemoptions(struct ip_moptions *imo)
1828 {
1829 int i;
1830
1831 if (imo != NULL) {
1832 for (i = 0; i < imo->imo_num_memberships; ++i)
1833 in_delmulti(imo->imo_membership[i]);
1834 kmem_free(imo, sizeof(*imo));
1835 }
1836 }
1837
1838 /*
1839 * Routine called from ip_output() to loop back a copy of an IP multicast
1840 * packet to the input queue of a specified interface. Note that this
1841 * calls the output routine of the loopback "driver", but with an interface
1842 * pointer that might NOT be lo0ifp -- easier than replicating that code here.
1843 */
1844 static void
1845 ip_mloopback(struct ifnet *ifp, struct mbuf *m, const struct sockaddr_in *dst)
1846 {
1847 struct ip *ip;
1848 struct mbuf *copym;
1849
1850 copym = m_copypacket(m, M_DONTWAIT);
1851 if (copym != NULL &&
1852 (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip)))
1853 copym = m_pullup(copym, sizeof(struct ip));
1854 if (copym == NULL)
1855 return;
1856 /*
1857 * We don't bother to fragment if the IP length is greater
1858 * than the interface's MTU. Can this possibly matter?
1859 */
1860 ip = mtod(copym, struct ip *);
1861
1862 if (copym->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
1863 in_delayed_cksum(copym);
1864 copym->m_pkthdr.csum_flags &=
1865 ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
1866 }
1867
1868 ip->ip_sum = 0;
1869 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1870 #ifndef NET_MPSAFE
1871 KERNEL_LOCK(1, NULL);
1872 #endif
1873 (void)looutput(ifp, copym, sintocsa(dst), NULL);
1874 #ifndef NET_MPSAFE
1875 KERNEL_UNLOCK_ONE(NULL);
1876 #endif
1877 }
1878
1879 /*
1880 * Ensure sending address is valid.
1881 * Returns 0 on success, -1 if an error should be sent back or 1
1882 * if the packet could be dropped without error (protocol dependent).
1883 */
1884 static int
1885 ip_ifaddrvalid(const struct in_ifaddr *ia)
1886 {
1887
1888 if (ia == NULL)
1889 return -1;
1890
1891 if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
1892 return 0;
1893
1894 if (ia->ia4_flags & IN_IFF_DUPLICATED)
1895 return -1;
1896 else if (ia->ia4_flags & (IN_IFF_TENTATIVE | IN_IFF_DETACHED))
1897 return 1;
1898
1899 return 0;
1900 }
1901