ip6_output.c revision 1.166 1 /* $NetBSD: ip6_output.c,v 1.166 2015/08/24 22:21:27 pooka Exp $ */
2 /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: ip6_output.c,v 1.166 2015/08/24 22:21:27 pooka Exp $");
66
67 #ifdef _KERNEL_OPT
68 #include "opt_inet.h"
69 #include "opt_inet6.h"
70 #include "opt_ipsec.h"
71 #endif
72
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/errno.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/socketvar.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82 #include <sys/kauth.h>
83
84 #include <net/if.h>
85 #include <net/route.h>
86 #include <net/pfil.h>
87
88 #include <netinet/in.h>
89 #include <netinet/in_var.h>
90 #include <netinet/ip6.h>
91 #include <netinet/ip_var.h>
92 #include <netinet/icmp6.h>
93 #include <netinet/in_offload.h>
94 #include <netinet/portalgo.h>
95 #include <netinet6/in6_offload.h>
96 #include <netinet6/ip6_var.h>
97 #include <netinet6/ip6_private.h>
98 #include <netinet6/in6_pcb.h>
99 #include <netinet6/nd6.h>
100 #include <netinet6/ip6protosw.h>
101 #include <netinet6/scope6_var.h>
102
103 #ifdef IPSEC
104 #include <netipsec/ipsec.h>
105 #include <netipsec/ipsec6.h>
106 #include <netipsec/key.h>
107 #include <netipsec/xform.h>
108 #endif
109
110
111 #include <net/net_osdep.h>
112
113 extern pfil_head_t *inet6_pfil_hook; /* XXX */
114
115 struct ip6_exthdrs {
116 struct mbuf *ip6e_ip6;
117 struct mbuf *ip6e_hbh;
118 struct mbuf *ip6e_dest1;
119 struct mbuf *ip6e_rthdr;
120 struct mbuf *ip6e_dest2;
121 };
122
123 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
124 kauth_cred_t, int);
125 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
126 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, kauth_cred_t,
127 int, int, int);
128 static int ip6_setmoptions(const struct sockopt *, struct in6pcb *);
129 static int ip6_getmoptions(struct sockopt *, struct in6pcb *);
130 static int ip6_copyexthdr(struct mbuf **, void *, int);
131 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
132 struct ip6_frag **);
133 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
134 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
135 static int ip6_getpmtu(struct route *, struct route *, struct ifnet *,
136 const struct in6_addr *, u_long *, int *);
137 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
138
139 #ifdef RFC2292
140 static int ip6_pcbopts(struct ip6_pktopts **, struct socket *, struct sockopt *);
141 #endif
142
143 /*
144 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
145 * header (with pri, len, nxt, hlim, src, dst).
146 * This function may modify ver and hlim only.
147 * The mbuf chain containing the packet will be freed.
148 * The mbuf opt, if present, will not be freed.
149 *
150 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
151 * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
152 * which is rt_rmx.rmx_mtu.
153 */
154 int
155 ip6_output(
156 struct mbuf *m0,
157 struct ip6_pktopts *opt,
158 struct route *ro,
159 int flags,
160 struct ip6_moptions *im6o,
161 struct socket *so,
162 struct ifnet **ifpp /* XXX: just for statistics */
163 )
164 {
165 struct ip6_hdr *ip6, *mhip6;
166 struct ifnet *ifp, *origifp;
167 struct mbuf *m = m0;
168 int hlen, tlen, len, off;
169 bool tso;
170 struct route ip6route;
171 struct rtentry *rt = NULL;
172 const struct sockaddr_in6 *dst;
173 struct sockaddr_in6 src_sa, dst_sa;
174 int error = 0;
175 struct in6_ifaddr *ia = NULL;
176 u_long mtu;
177 int alwaysfrag, dontfrag;
178 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
179 struct ip6_exthdrs exthdrs;
180 struct in6_addr finaldst, src0, dst0;
181 u_int32_t zone;
182 struct route *ro_pmtu = NULL;
183 int hdrsplit = 0;
184 int needipsec = 0;
185 #ifdef IPSEC
186 struct secpolicy *sp = NULL;
187 #endif
188
189 memset(&ip6route, 0, sizeof(ip6route));
190
191 #ifdef DIAGNOSTIC
192 if ((m->m_flags & M_PKTHDR) == 0)
193 panic("ip6_output: no HDR");
194
195 if ((m->m_pkthdr.csum_flags &
196 (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TSOv4)) != 0) {
197 panic("ip6_output: IPv4 checksum offload flags: %d",
198 m->m_pkthdr.csum_flags);
199 }
200
201 if ((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) ==
202 (M_CSUM_TCPv6|M_CSUM_UDPv6)) {
203 panic("ip6_output: conflicting checksum offload flags: %d",
204 m->m_pkthdr.csum_flags);
205 }
206 #endif
207
208 M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data, sizeof(struct ip6_hdr));
209
210 #define MAKE_EXTHDR(hp, mp) \
211 do { \
212 if (hp) { \
213 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
214 error = ip6_copyexthdr((mp), (void *)(hp), \
215 ((eh)->ip6e_len + 1) << 3); \
216 if (error) \
217 goto freehdrs; \
218 } \
219 } while (/*CONSTCOND*/ 0)
220
221 memset(&exthdrs, 0, sizeof(exthdrs));
222 if (opt) {
223 /* Hop-by-Hop options header */
224 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
225 /* Destination options header(1st part) */
226 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
227 /* Routing header */
228 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
229 /* Destination options header(2nd part) */
230 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
231 }
232
233 /*
234 * Calculate the total length of the extension header chain.
235 * Keep the length of the unfragmentable part for fragmentation.
236 */
237 optlen = 0;
238 if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
239 if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
240 if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
241 unfragpartlen = optlen + sizeof(struct ip6_hdr);
242 /* NOTE: we don't add AH/ESP length here. do that later. */
243 if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
244
245 #ifdef IPSEC
246 if (ipsec_used) {
247 /* Check the security policy (SP) for the packet */
248
249 sp = ipsec6_check_policy(m, so, flags, &needipsec, &error);
250 if (error != 0) {
251 /*
252 * Hack: -EINVAL is used to signal that a packet
253 * should be silently discarded. This is typically
254 * because we asked key management for an SA and
255 * it was delayed (e.g. kicked up to IKE).
256 */
257 if (error == -EINVAL)
258 error = 0;
259 goto freehdrs;
260 }
261 }
262 #endif /* IPSEC */
263
264
265 if (needipsec &&
266 (m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
267 in6_delayed_cksum(m);
268 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
269 }
270
271
272 /*
273 * If we need IPsec, or there is at least one extension header,
274 * separate IP6 header from the payload.
275 */
276 if ((needipsec || optlen) && !hdrsplit) {
277 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
278 m = NULL;
279 goto freehdrs;
280 }
281 m = exthdrs.ip6e_ip6;
282 hdrsplit++;
283 }
284
285 /* adjust pointer */
286 ip6 = mtod(m, struct ip6_hdr *);
287
288 /* adjust mbuf packet header length */
289 m->m_pkthdr.len += optlen;
290 plen = m->m_pkthdr.len - sizeof(*ip6);
291
292 /* If this is a jumbo payload, insert a jumbo payload option. */
293 if (plen > IPV6_MAXPACKET) {
294 if (!hdrsplit) {
295 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
296 m = NULL;
297 goto freehdrs;
298 }
299 m = exthdrs.ip6e_ip6;
300 hdrsplit++;
301 }
302 /* adjust pointer */
303 ip6 = mtod(m, struct ip6_hdr *);
304 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
305 goto freehdrs;
306 optlen += 8; /* XXX JUMBOOPTLEN */
307 ip6->ip6_plen = 0;
308 } else
309 ip6->ip6_plen = htons(plen);
310
311 /*
312 * Concatenate headers and fill in next header fields.
313 * Here we have, on "m"
314 * IPv6 payload
315 * and we insert headers accordingly. Finally, we should be getting:
316 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
317 *
318 * during the header composing process, "m" points to IPv6 header.
319 * "mprev" points to an extension header prior to esp.
320 */
321 {
322 u_char *nexthdrp = &ip6->ip6_nxt;
323 struct mbuf *mprev = m;
324
325 /*
326 * we treat dest2 specially. this makes IPsec processing
327 * much easier. the goal here is to make mprev point the
328 * mbuf prior to dest2.
329 *
330 * result: IPv6 dest2 payload
331 * m and mprev will point to IPv6 header.
332 */
333 if (exthdrs.ip6e_dest2) {
334 if (!hdrsplit)
335 panic("assumption failed: hdr not split");
336 exthdrs.ip6e_dest2->m_next = m->m_next;
337 m->m_next = exthdrs.ip6e_dest2;
338 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
339 ip6->ip6_nxt = IPPROTO_DSTOPTS;
340 }
341
342 #define MAKE_CHAIN(m, mp, p, i)\
343 do {\
344 if (m) {\
345 if (!hdrsplit) \
346 panic("assumption failed: hdr not split"); \
347 *mtod((m), u_char *) = *(p);\
348 *(p) = (i);\
349 p = mtod((m), u_char *);\
350 (m)->m_next = (mp)->m_next;\
351 (mp)->m_next = (m);\
352 (mp) = (m);\
353 }\
354 } while (/*CONSTCOND*/ 0)
355 /*
356 * result: IPv6 hbh dest1 rthdr dest2 payload
357 * m will point to IPv6 header. mprev will point to the
358 * extension header prior to dest2 (rthdr in the above case).
359 */
360 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
361 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
362 IPPROTO_DSTOPTS);
363 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
364 IPPROTO_ROUTING);
365
366 M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data,
367 sizeof(struct ip6_hdr) + optlen);
368 }
369
370 /*
371 * If there is a routing header, replace destination address field
372 * with the first hop of the routing header.
373 */
374 if (exthdrs.ip6e_rthdr) {
375 struct ip6_rthdr *rh;
376 struct ip6_rthdr0 *rh0;
377 struct in6_addr *addr;
378 struct sockaddr_in6 sa;
379
380 rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
381 struct ip6_rthdr *));
382 finaldst = ip6->ip6_dst;
383 switch (rh->ip6r_type) {
384 case IPV6_RTHDR_TYPE_0:
385 rh0 = (struct ip6_rthdr0 *)rh;
386 addr = (struct in6_addr *)(rh0 + 1);
387
388 /*
389 * construct a sockaddr_in6 form of
390 * the first hop.
391 *
392 * XXX: we may not have enough
393 * information about its scope zone;
394 * there is no standard API to pass
395 * the information from the
396 * application.
397 */
398 sockaddr_in6_init(&sa, addr, 0, 0, 0);
399 if ((error = sa6_embedscope(&sa,
400 ip6_use_defzone)) != 0) {
401 goto bad;
402 }
403 ip6->ip6_dst = sa.sin6_addr;
404 (void)memmove(&addr[0], &addr[1],
405 sizeof(struct in6_addr) *
406 (rh0->ip6r0_segleft - 1));
407 addr[rh0->ip6r0_segleft - 1] = finaldst;
408 /* XXX */
409 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
410 break;
411 default: /* is it possible? */
412 error = EINVAL;
413 goto bad;
414 }
415 }
416
417 /* Source address validation */
418 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
419 (flags & IPV6_UNSPECSRC) == 0) {
420 error = EOPNOTSUPP;
421 IP6_STATINC(IP6_STAT_BADSCOPE);
422 goto bad;
423 }
424 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
425 error = EOPNOTSUPP;
426 IP6_STATINC(IP6_STAT_BADSCOPE);
427 goto bad;
428 }
429
430 IP6_STATINC(IP6_STAT_LOCALOUT);
431
432 /*
433 * Route packet.
434 */
435 /* initialize cached route */
436 if (ro == NULL) {
437 ro = &ip6route;
438 }
439 ro_pmtu = ro;
440 if (opt && opt->ip6po_rthdr)
441 ro = &opt->ip6po_route;
442
443 /*
444 * if specified, try to fill in the traffic class field.
445 * do not override if a non-zero value is already set.
446 * we check the diffserv field and the ecn field separately.
447 */
448 if (opt && opt->ip6po_tclass >= 0) {
449 int mask = 0;
450
451 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
452 mask |= 0xfc;
453 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
454 mask |= 0x03;
455 if (mask != 0)
456 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
457 }
458
459 /* fill in or override the hop limit field, if necessary. */
460 if (opt && opt->ip6po_hlim != -1)
461 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
462 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
463 if (im6o != NULL)
464 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
465 else
466 ip6->ip6_hlim = ip6_defmcasthlim;
467 }
468
469 #ifdef IPSEC
470 if (needipsec) {
471 int s = splsoftnet();
472 error = ipsec6_process_packet(m, sp->req);
473
474 /*
475 * Preserve KAME behaviour: ENOENT can be returned
476 * when an SA acquire is in progress. Don't propagate
477 * this to user-level; it confuses applications.
478 * XXX this will go away when the SADB is redone.
479 */
480 if (error == ENOENT)
481 error = 0;
482 splx(s);
483 goto done;
484 }
485 #endif /* IPSEC */
486
487 /* adjust pointer */
488 ip6 = mtod(m, struct ip6_hdr *);
489
490 sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
491 if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
492 &ifp, &rt, 0)) != 0) {
493 if (ifp != NULL)
494 in6_ifstat_inc(ifp, ifs6_out_discard);
495 goto bad;
496 }
497 if (rt == NULL) {
498 /*
499 * If in6_selectroute() does not return a route entry,
500 * dst may not have been updated.
501 */
502 error = rtcache_setdst(ro, sin6tosa(&dst_sa));
503 if (error) {
504 goto bad;
505 }
506 }
507
508 /*
509 * then rt (for unicast) and ifp must be non-NULL valid values.
510 */
511 if ((flags & IPV6_FORWARDING) == 0) {
512 /* XXX: the FORWARDING flag can be set for mrouting. */
513 in6_ifstat_inc(ifp, ifs6_out_request);
514 }
515 if (rt != NULL) {
516 ia = (struct in6_ifaddr *)(rt->rt_ifa);
517 rt->rt_use++;
518 }
519
520 /*
521 * The outgoing interface must be in the zone of source and
522 * destination addresses. We should use ia_ifp to support the
523 * case of sending packets to an address of our own.
524 */
525 if (ia != NULL && ia->ia_ifp)
526 origifp = ia->ia_ifp;
527 else
528 origifp = ifp;
529
530 src0 = ip6->ip6_src;
531 if (in6_setscope(&src0, origifp, &zone))
532 goto badscope;
533 sockaddr_in6_init(&src_sa, &ip6->ip6_src, 0, 0, 0);
534 if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
535 goto badscope;
536
537 dst0 = ip6->ip6_dst;
538 if (in6_setscope(&dst0, origifp, &zone))
539 goto badscope;
540 /* re-initialize to be sure */
541 sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
542 if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id)
543 goto badscope;
544
545 /* scope check is done. */
546
547 if (rt == NULL || IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
548 dst = satocsin6(rtcache_getdst(ro));
549 KASSERT(dst != NULL);
550 } else if (opt && rtcache_validate(&opt->ip6po_nextroute) != NULL) {
551 /*
552 * The nexthop is explicitly specified by the
553 * application. We assume the next hop is an IPv6
554 * address.
555 */
556 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
557 } else if ((rt->rt_flags & RTF_GATEWAY))
558 dst = (struct sockaddr_in6 *)rt->rt_gateway;
559 else
560 dst = satocsin6(rtcache_getdst(ro));
561
562 /*
563 * XXXXXX: original code follows:
564 */
565 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
566 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
567 else {
568 struct in6_multi *in6m;
569
570 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
571
572 in6_ifstat_inc(ifp, ifs6_out_mcast);
573
574 /*
575 * Confirm that the outgoing interface supports multicast.
576 */
577 if (!(ifp->if_flags & IFF_MULTICAST)) {
578 IP6_STATINC(IP6_STAT_NOROUTE);
579 in6_ifstat_inc(ifp, ifs6_out_discard);
580 error = ENETUNREACH;
581 goto bad;
582 }
583
584 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
585 if (in6m != NULL &&
586 (im6o == NULL || im6o->im6o_multicast_loop)) {
587 /*
588 * If we belong to the destination multicast group
589 * on the outgoing interface, and the caller did not
590 * forbid loopback, loop back a copy.
591 */
592 KASSERT(dst != NULL);
593 ip6_mloopback(ifp, m, dst);
594 } else {
595 /*
596 * If we are acting as a multicast router, perform
597 * multicast forwarding as if the packet had just
598 * arrived on the interface to which we are about
599 * to send. The multicast forwarding function
600 * recursively calls this function, using the
601 * IPV6_FORWARDING flag to prevent infinite recursion.
602 *
603 * Multicasts that are looped back by ip6_mloopback(),
604 * above, will be forwarded by the ip6_input() routine,
605 * if necessary.
606 */
607 if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
608 if (ip6_mforward(ip6, ifp, m) != 0) {
609 m_freem(m);
610 goto done;
611 }
612 }
613 }
614 /*
615 * Multicasts with a hoplimit of zero may be looped back,
616 * above, but must not be transmitted on a network.
617 * Also, multicasts addressed to the loopback interface
618 * are not sent -- the above call to ip6_mloopback() will
619 * loop back a copy if this host actually belongs to the
620 * destination group on the loopback interface.
621 */
622 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
623 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
624 m_freem(m);
625 goto done;
626 }
627 }
628
629 /*
630 * Fill the outgoing inteface to tell the upper layer
631 * to increment per-interface statistics.
632 */
633 if (ifpp)
634 *ifpp = ifp;
635
636 /* Determine path MTU. */
637 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
638 &alwaysfrag)) != 0)
639 goto bad;
640
641 /*
642 * The caller of this function may specify to use the minimum MTU
643 * in some cases.
644 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
645 * setting. The logic is a bit complicated; by default, unicast
646 * packets will follow path MTU while multicast packets will be sent at
647 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
648 * including unicast ones will be sent at the minimum MTU. Multicast
649 * packets will always be sent at the minimum MTU unless
650 * IP6PO_MINMTU_DISABLE is explicitly specified.
651 * See RFC 3542 for more details.
652 */
653 if (mtu > IPV6_MMTU) {
654 if ((flags & IPV6_MINMTU))
655 mtu = IPV6_MMTU;
656 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
657 mtu = IPV6_MMTU;
658 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
659 (opt == NULL ||
660 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
661 mtu = IPV6_MMTU;
662 }
663 }
664
665 /*
666 * clear embedded scope identifiers if necessary.
667 * in6_clearscope will touch the addresses only when necessary.
668 */
669 in6_clearscope(&ip6->ip6_src);
670 in6_clearscope(&ip6->ip6_dst);
671
672 /*
673 * If the outgoing packet contains a hop-by-hop options header,
674 * it must be examined and processed even by the source node.
675 * (RFC 2460, section 4.)
676 */
677 if (ip6->ip6_nxt == IPV6_HOPOPTS) {
678 u_int32_t dummy1; /* XXX unused */
679 u_int32_t dummy2; /* XXX unused */
680 int hoff = sizeof(struct ip6_hdr);
681
682 if (ip6_hopopts_input(&dummy1, &dummy2, &m, &hoff)) {
683 /* m was already freed at this point */
684 error = EINVAL;/* better error? */
685 goto done;
686 }
687
688 ip6 = mtod(m, struct ip6_hdr *);
689 }
690
691 /*
692 * Run through list of hooks for output packets.
693 */
694 if ((error = pfil_run_hooks(inet6_pfil_hook, &m, ifp, PFIL_OUT)) != 0)
695 goto done;
696 if (m == NULL)
697 goto done;
698 ip6 = mtod(m, struct ip6_hdr *);
699
700 /*
701 * Send the packet to the outgoing interface.
702 * If necessary, do IPv6 fragmentation before sending.
703 *
704 * the logic here is rather complex:
705 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
706 * 1-a: send as is if tlen <= path mtu
707 * 1-b: fragment if tlen > path mtu
708 *
709 * 2: if user asks us not to fragment (dontfrag == 1)
710 * 2-a: send as is if tlen <= interface mtu
711 * 2-b: error if tlen > interface mtu
712 *
713 * 3: if we always need to attach fragment header (alwaysfrag == 1)
714 * always fragment
715 *
716 * 4: if dontfrag == 1 && alwaysfrag == 1
717 * error, as we cannot handle this conflicting request
718 */
719 tlen = m->m_pkthdr.len;
720 tso = (m->m_pkthdr.csum_flags & M_CSUM_TSOv6) != 0;
721 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
722 dontfrag = 1;
723 else
724 dontfrag = 0;
725
726 if (dontfrag && alwaysfrag) { /* case 4 */
727 /* conflicting request - can't transmit */
728 error = EMSGSIZE;
729 goto bad;
730 }
731 if (dontfrag && (!tso && tlen > IN6_LINKMTU(ifp))) { /* case 2-b */
732 /*
733 * Even if the DONTFRAG option is specified, we cannot send the
734 * packet when the data length is larger than the MTU of the
735 * outgoing interface.
736 * Notify the error by sending IPV6_PATHMTU ancillary data as
737 * well as returning an error code (the latter is not described
738 * in the API spec.)
739 */
740 u_int32_t mtu32;
741 struct ip6ctlparam ip6cp;
742
743 mtu32 = (u_int32_t)mtu;
744 memset(&ip6cp, 0, sizeof(ip6cp));
745 ip6cp.ip6c_cmdarg = (void *)&mtu32;
746 pfctlinput2(PRC_MSGSIZE,
747 rtcache_getdst(ro_pmtu), &ip6cp);
748
749 error = EMSGSIZE;
750 goto bad;
751 }
752
753 /*
754 * transmit packet without fragmentation
755 */
756 if (dontfrag || (!alwaysfrag && (tlen <= mtu || tso))) {
757 /* case 1-a and 2-a */
758 struct in6_ifaddr *ia6;
759 int sw_csum;
760
761 ip6 = mtod(m, struct ip6_hdr *);
762 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
763 if (ia6) {
764 /* Record statistics for this interface address. */
765 ia6->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len;
766 }
767
768 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
769 if ((sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
770 if (IN6_NEED_CHECKSUM(ifp,
771 sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
772 in6_delayed_cksum(m);
773 }
774 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
775 }
776
777 KASSERT(dst != NULL);
778 if (__predict_true(!tso ||
779 (ifp->if_capenable & IFCAP_TSOv6) != 0)) {
780 error = nd6_output(ifp, origifp, m, dst, rt);
781 } else {
782 error = ip6_tso_output(ifp, origifp, m, dst, rt);
783 }
784 goto done;
785 }
786
787 if (tso) {
788 error = EINVAL; /* XXX */
789 goto bad;
790 }
791
792 /*
793 * try to fragment the packet. case 1-b and 3
794 */
795 if (mtu < IPV6_MMTU) {
796 /* path MTU cannot be less than IPV6_MMTU */
797 error = EMSGSIZE;
798 in6_ifstat_inc(ifp, ifs6_out_fragfail);
799 goto bad;
800 } else if (ip6->ip6_plen == 0) {
801 /* jumbo payload cannot be fragmented */
802 error = EMSGSIZE;
803 in6_ifstat_inc(ifp, ifs6_out_fragfail);
804 goto bad;
805 } else {
806 struct mbuf **mnext, *m_frgpart;
807 struct ip6_frag *ip6f;
808 u_int32_t id = htonl(ip6_randomid());
809 u_char nextproto;
810 #if 0 /* see below */
811 struct ip6ctlparam ip6cp;
812 u_int32_t mtu32;
813 #endif
814
815 /*
816 * Too large for the destination or interface;
817 * fragment if possible.
818 * Must be able to put at least 8 bytes per fragment.
819 */
820 hlen = unfragpartlen;
821 if (mtu > IPV6_MAXPACKET)
822 mtu = IPV6_MAXPACKET;
823
824 #if 0
825 /*
826 * It is believed this code is a leftover from the
827 * development of the IPV6_RECVPATHMTU sockopt and
828 * associated work to implement RFC3542.
829 * It's not entirely clear what the intent of the API
830 * is at this point, so disable this code for now.
831 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
832 * will send notifications if the application requests.
833 */
834
835 /* Notify a proper path MTU to applications. */
836 mtu32 = (u_int32_t)mtu;
837 memset(&ip6cp, 0, sizeof(ip6cp));
838 ip6cp.ip6c_cmdarg = (void *)&mtu32;
839 pfctlinput2(PRC_MSGSIZE,
840 rtcache_getdst(ro_pmtu), &ip6cp);
841 #endif
842
843 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
844 if (len < 8) {
845 error = EMSGSIZE;
846 in6_ifstat_inc(ifp, ifs6_out_fragfail);
847 goto bad;
848 }
849
850 mnext = &m->m_nextpkt;
851
852 /*
853 * Change the next header field of the last header in the
854 * unfragmentable part.
855 */
856 if (exthdrs.ip6e_rthdr) {
857 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
858 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
859 } else if (exthdrs.ip6e_dest1) {
860 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
861 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
862 } else if (exthdrs.ip6e_hbh) {
863 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
864 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
865 } else {
866 nextproto = ip6->ip6_nxt;
867 ip6->ip6_nxt = IPPROTO_FRAGMENT;
868 }
869
870 if ((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6))
871 != 0) {
872 if (IN6_NEED_CHECKSUM(ifp,
873 m->m_pkthdr.csum_flags &
874 (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
875 in6_delayed_cksum(m);
876 }
877 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
878 }
879
880 /*
881 * Loop through length of segment after first fragment,
882 * make new header and copy data of each part and link onto
883 * chain.
884 */
885 m0 = m;
886 for (off = hlen; off < tlen; off += len) {
887 struct mbuf *mlast;
888
889 MGETHDR(m, M_DONTWAIT, MT_HEADER);
890 if (!m) {
891 error = ENOBUFS;
892 IP6_STATINC(IP6_STAT_ODROPPED);
893 goto sendorfree;
894 }
895 m->m_pkthdr.rcvif = NULL;
896 m->m_flags = m0->m_flags & M_COPYFLAGS;
897 *mnext = m;
898 mnext = &m->m_nextpkt;
899 m->m_data += max_linkhdr;
900 mhip6 = mtod(m, struct ip6_hdr *);
901 *mhip6 = *ip6;
902 m->m_len = sizeof(*mhip6);
903 /*
904 * ip6f must be valid if error is 0. But how
905 * can a compiler be expected to infer this?
906 */
907 ip6f = NULL;
908 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
909 if (error) {
910 IP6_STATINC(IP6_STAT_ODROPPED);
911 goto sendorfree;
912 }
913 ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
914 if (off + len >= tlen)
915 len = tlen - off;
916 else
917 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
918 mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
919 sizeof(*ip6f) - sizeof(struct ip6_hdr)));
920 if ((m_frgpart = m_copy(m0, off, len)) == 0) {
921 error = ENOBUFS;
922 IP6_STATINC(IP6_STAT_ODROPPED);
923 goto sendorfree;
924 }
925 for (mlast = m; mlast->m_next; mlast = mlast->m_next)
926 ;
927 mlast->m_next = m_frgpart;
928 m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
929 m->m_pkthdr.rcvif = NULL;
930 ip6f->ip6f_reserved = 0;
931 ip6f->ip6f_ident = id;
932 ip6f->ip6f_nxt = nextproto;
933 IP6_STATINC(IP6_STAT_OFRAGMENTS);
934 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
935 }
936
937 in6_ifstat_inc(ifp, ifs6_out_fragok);
938 }
939
940 /*
941 * Remove leading garbages.
942 */
943 sendorfree:
944 m = m0->m_nextpkt;
945 m0->m_nextpkt = 0;
946 m_freem(m0);
947 for (m0 = m; m; m = m0) {
948 m0 = m->m_nextpkt;
949 m->m_nextpkt = 0;
950 if (error == 0) {
951 struct in6_ifaddr *ia6;
952 ip6 = mtod(m, struct ip6_hdr *);
953 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
954 if (ia6) {
955 /*
956 * Record statistics for this interface
957 * address.
958 */
959 ia6->ia_ifa.ifa_data.ifad_outbytes +=
960 m->m_pkthdr.len;
961 }
962 KASSERT(dst != NULL);
963 error = nd6_output(ifp, origifp, m, dst, rt);
964 } else
965 m_freem(m);
966 }
967
968 if (error == 0)
969 IP6_STATINC(IP6_STAT_FRAGMENTED);
970
971 done:
972 rtcache_free(&ip6route);
973
974 #ifdef IPSEC
975 if (sp != NULL)
976 KEY_FREESP(&sp);
977 #endif /* IPSEC */
978
979
980 return (error);
981
982 freehdrs:
983 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
984 m_freem(exthdrs.ip6e_dest1);
985 m_freem(exthdrs.ip6e_rthdr);
986 m_freem(exthdrs.ip6e_dest2);
987 /* FALLTHROUGH */
988 bad:
989 m_freem(m);
990 goto done;
991 badscope:
992 IP6_STATINC(IP6_STAT_BADSCOPE);
993 in6_ifstat_inc(origifp, ifs6_out_discard);
994 if (error == 0)
995 error = EHOSTUNREACH; /* XXX */
996 goto bad;
997 }
998
999 static int
1000 ip6_copyexthdr(struct mbuf **mp, void *hdr, int hlen)
1001 {
1002 struct mbuf *m;
1003
1004 if (hlen > MCLBYTES)
1005 return (ENOBUFS); /* XXX */
1006
1007 MGET(m, M_DONTWAIT, MT_DATA);
1008 if (!m)
1009 return (ENOBUFS);
1010
1011 if (hlen > MLEN) {
1012 MCLGET(m, M_DONTWAIT);
1013 if ((m->m_flags & M_EXT) == 0) {
1014 m_free(m);
1015 return (ENOBUFS);
1016 }
1017 }
1018 m->m_len = hlen;
1019 if (hdr)
1020 bcopy(hdr, mtod(m, void *), hlen);
1021
1022 *mp = m;
1023 return (0);
1024 }
1025
1026 /*
1027 * Process a delayed payload checksum calculation.
1028 */
1029 void
1030 in6_delayed_cksum(struct mbuf *m)
1031 {
1032 uint16_t csum, offset;
1033
1034 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1035 KASSERT((~m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1036 KASSERT((m->m_pkthdr.csum_flags
1037 & (M_CSUM_UDPv4|M_CSUM_TCPv4|M_CSUM_TSOv4)) == 0);
1038
1039 offset = M_CSUM_DATA_IPv6_HL(m->m_pkthdr.csum_data);
1040 csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1041 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv6) != 0) {
1042 csum = 0xffff;
1043 }
1044
1045 offset += M_CSUM_DATA_IPv6_OFFSET(m->m_pkthdr.csum_data);
1046 if ((offset + sizeof(csum)) > m->m_len) {
1047 m_copyback(m, offset, sizeof(csum), &csum);
1048 } else {
1049 *(uint16_t *)(mtod(m, char *) + offset) = csum;
1050 }
1051 }
1052
1053 /*
1054 * Insert jumbo payload option.
1055 */
1056 static int
1057 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1058 {
1059 struct mbuf *mopt;
1060 u_int8_t *optbuf;
1061 u_int32_t v;
1062
1063 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1064
1065 /*
1066 * If there is no hop-by-hop options header, allocate new one.
1067 * If there is one but it doesn't have enough space to store the
1068 * jumbo payload option, allocate a cluster to store the whole options.
1069 * Otherwise, use it to store the options.
1070 */
1071 if (exthdrs->ip6e_hbh == 0) {
1072 MGET(mopt, M_DONTWAIT, MT_DATA);
1073 if (mopt == 0)
1074 return (ENOBUFS);
1075 mopt->m_len = JUMBOOPTLEN;
1076 optbuf = mtod(mopt, u_int8_t *);
1077 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1078 exthdrs->ip6e_hbh = mopt;
1079 } else {
1080 struct ip6_hbh *hbh;
1081
1082 mopt = exthdrs->ip6e_hbh;
1083 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1084 /*
1085 * XXX assumption:
1086 * - exthdrs->ip6e_hbh is not referenced from places
1087 * other than exthdrs.
1088 * - exthdrs->ip6e_hbh is not an mbuf chain.
1089 */
1090 int oldoptlen = mopt->m_len;
1091 struct mbuf *n;
1092
1093 /*
1094 * XXX: give up if the whole (new) hbh header does
1095 * not fit even in an mbuf cluster.
1096 */
1097 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1098 return (ENOBUFS);
1099
1100 /*
1101 * As a consequence, we must always prepare a cluster
1102 * at this point.
1103 */
1104 MGET(n, M_DONTWAIT, MT_DATA);
1105 if (n) {
1106 MCLGET(n, M_DONTWAIT);
1107 if ((n->m_flags & M_EXT) == 0) {
1108 m_freem(n);
1109 n = NULL;
1110 }
1111 }
1112 if (!n)
1113 return (ENOBUFS);
1114 n->m_len = oldoptlen + JUMBOOPTLEN;
1115 bcopy(mtod(mopt, void *), mtod(n, void *),
1116 oldoptlen);
1117 optbuf = mtod(n, u_int8_t *) + oldoptlen;
1118 m_freem(mopt);
1119 mopt = exthdrs->ip6e_hbh = n;
1120 } else {
1121 optbuf = mtod(mopt, u_int8_t *) + mopt->m_len;
1122 mopt->m_len += JUMBOOPTLEN;
1123 }
1124 optbuf[0] = IP6OPT_PADN;
1125 optbuf[1] = 0;
1126
1127 /*
1128 * Adjust the header length according to the pad and
1129 * the jumbo payload option.
1130 */
1131 hbh = mtod(mopt, struct ip6_hbh *);
1132 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1133 }
1134
1135 /* fill in the option. */
1136 optbuf[2] = IP6OPT_JUMBO;
1137 optbuf[3] = 4;
1138 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1139 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1140
1141 /* finally, adjust the packet header length */
1142 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1143
1144 return (0);
1145 #undef JUMBOOPTLEN
1146 }
1147
1148 /*
1149 * Insert fragment header and copy unfragmentable header portions.
1150 *
1151 * *frghdrp will not be read, and it is guaranteed that either an
1152 * error is returned or that *frghdrp will point to space allocated
1153 * for the fragment header.
1154 */
1155 static int
1156 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1157 struct ip6_frag **frghdrp)
1158 {
1159 struct mbuf *n, *mlast;
1160
1161 if (hlen > sizeof(struct ip6_hdr)) {
1162 n = m_copym(m0, sizeof(struct ip6_hdr),
1163 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1164 if (n == 0)
1165 return (ENOBUFS);
1166 m->m_next = n;
1167 } else
1168 n = m;
1169
1170 /* Search for the last mbuf of unfragmentable part. */
1171 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1172 ;
1173
1174 if ((mlast->m_flags & M_EXT) == 0 &&
1175 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1176 /* use the trailing space of the last mbuf for the fragment hdr */
1177 *frghdrp = (struct ip6_frag *)(mtod(mlast, char *) +
1178 mlast->m_len);
1179 mlast->m_len += sizeof(struct ip6_frag);
1180 m->m_pkthdr.len += sizeof(struct ip6_frag);
1181 } else {
1182 /* allocate a new mbuf for the fragment header */
1183 struct mbuf *mfrg;
1184
1185 MGET(mfrg, M_DONTWAIT, MT_DATA);
1186 if (mfrg == 0)
1187 return (ENOBUFS);
1188 mfrg->m_len = sizeof(struct ip6_frag);
1189 *frghdrp = mtod(mfrg, struct ip6_frag *);
1190 mlast->m_next = mfrg;
1191 }
1192
1193 return (0);
1194 }
1195
1196 static int
1197 ip6_getpmtu(struct route *ro_pmtu, struct route *ro, struct ifnet *ifp,
1198 const struct in6_addr *dst, u_long *mtup, int *alwaysfragp)
1199 {
1200 struct rtentry *rt;
1201 u_int32_t mtu = 0;
1202 int alwaysfrag = 0;
1203 int error = 0;
1204
1205 if (ro_pmtu != ro) {
1206 union {
1207 struct sockaddr dst;
1208 struct sockaddr_in6 dst6;
1209 } u;
1210
1211 /* The first hop and the final destination may differ. */
1212 sockaddr_in6_init(&u.dst6, dst, 0, 0, 0);
1213 rt = rtcache_lookup(ro_pmtu, &u.dst);
1214 } else
1215 rt = rtcache_validate(ro_pmtu);
1216 if (rt != NULL) {
1217 u_int32_t ifmtu;
1218
1219 if (ifp == NULL)
1220 ifp = rt->rt_ifp;
1221 ifmtu = IN6_LINKMTU(ifp);
1222 mtu = rt->rt_rmx.rmx_mtu;
1223 if (mtu == 0)
1224 mtu = ifmtu;
1225 else if (mtu < IPV6_MMTU) {
1226 /*
1227 * RFC2460 section 5, last paragraph:
1228 * if we record ICMPv6 too big message with
1229 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1230 * or smaller, with fragment header attached.
1231 * (fragment header is needed regardless from the
1232 * packet size, for translators to identify packets)
1233 */
1234 alwaysfrag = 1;
1235 mtu = IPV6_MMTU;
1236 } else if (mtu > ifmtu) {
1237 /*
1238 * The MTU on the route is larger than the MTU on
1239 * the interface! This shouldn't happen, unless the
1240 * MTU of the interface has been changed after the
1241 * interface was brought up. Change the MTU in the
1242 * route to match the interface MTU (as long as the
1243 * field isn't locked).
1244 */
1245 mtu = ifmtu;
1246 if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
1247 rt->rt_rmx.rmx_mtu = mtu;
1248 }
1249 } else if (ifp) {
1250 mtu = IN6_LINKMTU(ifp);
1251 } else
1252 error = EHOSTUNREACH; /* XXX */
1253
1254 *mtup = mtu;
1255 if (alwaysfragp)
1256 *alwaysfragp = alwaysfrag;
1257 return (error);
1258 }
1259
1260 /*
1261 * IP6 socket option processing.
1262 */
1263 int
1264 ip6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1265 {
1266 int optdatalen, uproto;
1267 void *optdata;
1268 struct in6pcb *in6p = sotoin6pcb(so);
1269 struct ip_moptions **mopts;
1270 int error, optval;
1271 int level, optname;
1272
1273 KASSERT(sopt != NULL);
1274
1275 level = sopt->sopt_level;
1276 optname = sopt->sopt_name;
1277
1278 error = optval = 0;
1279 uproto = (int)so->so_proto->pr_protocol;
1280
1281 switch (level) {
1282 case IPPROTO_IP:
1283 switch (optname) {
1284 case IP_ADD_MEMBERSHIP:
1285 case IP_DROP_MEMBERSHIP:
1286 case IP_MULTICAST_IF:
1287 case IP_MULTICAST_LOOP:
1288 case IP_MULTICAST_TTL:
1289 mopts = &in6p->in6p_v4moptions;
1290 switch (op) {
1291 case PRCO_GETOPT:
1292 return ip_getmoptions(*mopts, sopt);
1293 case PRCO_SETOPT:
1294 return ip_setmoptions(mopts, sopt);
1295 default:
1296 return EINVAL;
1297 }
1298 default:
1299 return ENOPROTOOPT;
1300 }
1301 case IPPROTO_IPV6:
1302 break;
1303 default:
1304 return ENOPROTOOPT;
1305 }
1306 switch (op) {
1307 case PRCO_SETOPT:
1308 switch (optname) {
1309 #ifdef RFC2292
1310 case IPV6_2292PKTOPTIONS:
1311 error = ip6_pcbopts(&in6p->in6p_outputopts, so, sopt);
1312 break;
1313 #endif
1314
1315 /*
1316 * Use of some Hop-by-Hop options or some
1317 * Destination options, might require special
1318 * privilege. That is, normal applications
1319 * (without special privilege) might be forbidden
1320 * from setting certain options in outgoing packets,
1321 * and might never see certain options in received
1322 * packets. [RFC 2292 Section 6]
1323 * KAME specific note:
1324 * KAME prevents non-privileged users from sending or
1325 * receiving ANY hbh/dst options in order to avoid
1326 * overhead of parsing options in the kernel.
1327 */
1328 case IPV6_RECVHOPOPTS:
1329 case IPV6_RECVDSTOPTS:
1330 case IPV6_RECVRTHDRDSTOPTS:
1331 error = kauth_authorize_network(kauth_cred_get(),
1332 KAUTH_NETWORK_IPV6, KAUTH_REQ_NETWORK_IPV6_HOPBYHOP,
1333 NULL, NULL, NULL);
1334 if (error)
1335 break;
1336 /* FALLTHROUGH */
1337 case IPV6_UNICAST_HOPS:
1338 case IPV6_HOPLIMIT:
1339 case IPV6_FAITH:
1340
1341 case IPV6_RECVPKTINFO:
1342 case IPV6_RECVHOPLIMIT:
1343 case IPV6_RECVRTHDR:
1344 case IPV6_RECVPATHMTU:
1345 case IPV6_RECVTCLASS:
1346 case IPV6_V6ONLY:
1347 error = sockopt_getint(sopt, &optval);
1348 if (error)
1349 break;
1350 switch (optname) {
1351 case IPV6_UNICAST_HOPS:
1352 if (optval < -1 || optval >= 256)
1353 error = EINVAL;
1354 else {
1355 /* -1 = kernel default */
1356 in6p->in6p_hops = optval;
1357 }
1358 break;
1359 #define OPTSET(bit) \
1360 do { \
1361 if (optval) \
1362 in6p->in6p_flags |= (bit); \
1363 else \
1364 in6p->in6p_flags &= ~(bit); \
1365 } while (/*CONSTCOND*/ 0)
1366
1367 #ifdef RFC2292
1368 #define OPTSET2292(bit) \
1369 do { \
1370 in6p->in6p_flags |= IN6P_RFC2292; \
1371 if (optval) \
1372 in6p->in6p_flags |= (bit); \
1373 else \
1374 in6p->in6p_flags &= ~(bit); \
1375 } while (/*CONSTCOND*/ 0)
1376 #endif
1377
1378 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1379
1380 case IPV6_RECVPKTINFO:
1381 #ifdef RFC2292
1382 /* cannot mix with RFC2292 */
1383 if (OPTBIT(IN6P_RFC2292)) {
1384 error = EINVAL;
1385 break;
1386 }
1387 #endif
1388 OPTSET(IN6P_PKTINFO);
1389 break;
1390
1391 case IPV6_HOPLIMIT:
1392 {
1393 struct ip6_pktopts **optp;
1394
1395 #ifdef RFC2292
1396 /* cannot mix with RFC2292 */
1397 if (OPTBIT(IN6P_RFC2292)) {
1398 error = EINVAL;
1399 break;
1400 }
1401 #endif
1402 optp = &in6p->in6p_outputopts;
1403 error = ip6_pcbopt(IPV6_HOPLIMIT,
1404 (u_char *)&optval,
1405 sizeof(optval),
1406 optp,
1407 kauth_cred_get(), uproto);
1408 break;
1409 }
1410
1411 case IPV6_RECVHOPLIMIT:
1412 #ifdef RFC2292
1413 /* cannot mix with RFC2292 */
1414 if (OPTBIT(IN6P_RFC2292)) {
1415 error = EINVAL;
1416 break;
1417 }
1418 #endif
1419 OPTSET(IN6P_HOPLIMIT);
1420 break;
1421
1422 case IPV6_RECVHOPOPTS:
1423 #ifdef RFC2292
1424 /* cannot mix with RFC2292 */
1425 if (OPTBIT(IN6P_RFC2292)) {
1426 error = EINVAL;
1427 break;
1428 }
1429 #endif
1430 OPTSET(IN6P_HOPOPTS);
1431 break;
1432
1433 case IPV6_RECVDSTOPTS:
1434 #ifdef RFC2292
1435 /* cannot mix with RFC2292 */
1436 if (OPTBIT(IN6P_RFC2292)) {
1437 error = EINVAL;
1438 break;
1439 }
1440 #endif
1441 OPTSET(IN6P_DSTOPTS);
1442 break;
1443
1444 case IPV6_RECVRTHDRDSTOPTS:
1445 #ifdef RFC2292
1446 /* cannot mix with RFC2292 */
1447 if (OPTBIT(IN6P_RFC2292)) {
1448 error = EINVAL;
1449 break;
1450 }
1451 #endif
1452 OPTSET(IN6P_RTHDRDSTOPTS);
1453 break;
1454
1455 case IPV6_RECVRTHDR:
1456 #ifdef RFC2292
1457 /* cannot mix with RFC2292 */
1458 if (OPTBIT(IN6P_RFC2292)) {
1459 error = EINVAL;
1460 break;
1461 }
1462 #endif
1463 OPTSET(IN6P_RTHDR);
1464 break;
1465
1466 case IPV6_FAITH:
1467 OPTSET(IN6P_FAITH);
1468 break;
1469
1470 case IPV6_RECVPATHMTU:
1471 /*
1472 * We ignore this option for TCP
1473 * sockets.
1474 * (RFC3542 leaves this case
1475 * unspecified.)
1476 */
1477 if (uproto != IPPROTO_TCP)
1478 OPTSET(IN6P_MTU);
1479 break;
1480
1481 case IPV6_V6ONLY:
1482 /*
1483 * make setsockopt(IPV6_V6ONLY)
1484 * available only prior to bind(2).
1485 * see ipng mailing list, Jun 22 2001.
1486 */
1487 if (in6p->in6p_lport ||
1488 !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1489 error = EINVAL;
1490 break;
1491 }
1492 #ifdef INET6_BINDV6ONLY
1493 if (!optval)
1494 error = EINVAL;
1495 #else
1496 OPTSET(IN6P_IPV6_V6ONLY);
1497 #endif
1498 break;
1499 case IPV6_RECVTCLASS:
1500 #ifdef RFC2292
1501 /* cannot mix with RFC2292 XXX */
1502 if (OPTBIT(IN6P_RFC2292)) {
1503 error = EINVAL;
1504 break;
1505 }
1506 #endif
1507 OPTSET(IN6P_TCLASS);
1508 break;
1509
1510 }
1511 break;
1512
1513 case IPV6_OTCLASS:
1514 {
1515 struct ip6_pktopts **optp;
1516 u_int8_t tclass;
1517
1518 error = sockopt_get(sopt, &tclass, sizeof(tclass));
1519 if (error)
1520 break;
1521 optp = &in6p->in6p_outputopts;
1522 error = ip6_pcbopt(optname,
1523 (u_char *)&tclass,
1524 sizeof(tclass),
1525 optp,
1526 kauth_cred_get(), uproto);
1527 break;
1528 }
1529
1530 case IPV6_TCLASS:
1531 case IPV6_DONTFRAG:
1532 case IPV6_USE_MIN_MTU:
1533 case IPV6_PREFER_TEMPADDR:
1534 error = sockopt_getint(sopt, &optval);
1535 if (error)
1536 break;
1537 {
1538 struct ip6_pktopts **optp;
1539 optp = &in6p->in6p_outputopts;
1540 error = ip6_pcbopt(optname,
1541 (u_char *)&optval,
1542 sizeof(optval),
1543 optp,
1544 kauth_cred_get(), uproto);
1545 break;
1546 }
1547
1548 #ifdef RFC2292
1549 case IPV6_2292PKTINFO:
1550 case IPV6_2292HOPLIMIT:
1551 case IPV6_2292HOPOPTS:
1552 case IPV6_2292DSTOPTS:
1553 case IPV6_2292RTHDR:
1554 /* RFC 2292 */
1555 error = sockopt_getint(sopt, &optval);
1556 if (error)
1557 break;
1558
1559 switch (optname) {
1560 case IPV6_2292PKTINFO:
1561 OPTSET2292(IN6P_PKTINFO);
1562 break;
1563 case IPV6_2292HOPLIMIT:
1564 OPTSET2292(IN6P_HOPLIMIT);
1565 break;
1566 case IPV6_2292HOPOPTS:
1567 /*
1568 * Check super-user privilege.
1569 * See comments for IPV6_RECVHOPOPTS.
1570 */
1571 error =
1572 kauth_authorize_network(kauth_cred_get(),
1573 KAUTH_NETWORK_IPV6,
1574 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
1575 NULL, NULL);
1576 if (error)
1577 return (error);
1578 OPTSET2292(IN6P_HOPOPTS);
1579 break;
1580 case IPV6_2292DSTOPTS:
1581 error =
1582 kauth_authorize_network(kauth_cred_get(),
1583 KAUTH_NETWORK_IPV6,
1584 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
1585 NULL, NULL);
1586 if (error)
1587 return (error);
1588 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1589 break;
1590 case IPV6_2292RTHDR:
1591 OPTSET2292(IN6P_RTHDR);
1592 break;
1593 }
1594 break;
1595 #endif
1596 case IPV6_PKTINFO:
1597 case IPV6_HOPOPTS:
1598 case IPV6_RTHDR:
1599 case IPV6_DSTOPTS:
1600 case IPV6_RTHDRDSTOPTS:
1601 case IPV6_NEXTHOP: {
1602 /* new advanced API (RFC3542) */
1603 void *optbuf;
1604 int optbuflen;
1605 struct ip6_pktopts **optp;
1606
1607 #ifdef RFC2292
1608 /* cannot mix with RFC2292 */
1609 if (OPTBIT(IN6P_RFC2292)) {
1610 error = EINVAL;
1611 break;
1612 }
1613 #endif
1614
1615 optbuflen = sopt->sopt_size;
1616 optbuf = malloc(optbuflen, M_IP6OPT, M_NOWAIT);
1617 if (optbuf == NULL) {
1618 error = ENOBUFS;
1619 break;
1620 }
1621
1622 error = sockopt_get(sopt, optbuf, optbuflen);
1623 if (error) {
1624 free(optbuf, M_IP6OPT);
1625 break;
1626 }
1627 optp = &in6p->in6p_outputopts;
1628 error = ip6_pcbopt(optname, optbuf, optbuflen,
1629 optp, kauth_cred_get(), uproto);
1630
1631 free(optbuf, M_IP6OPT);
1632 break;
1633 }
1634 #undef OPTSET
1635
1636 case IPV6_MULTICAST_IF:
1637 case IPV6_MULTICAST_HOPS:
1638 case IPV6_MULTICAST_LOOP:
1639 case IPV6_JOIN_GROUP:
1640 case IPV6_LEAVE_GROUP:
1641 error = ip6_setmoptions(sopt, in6p);
1642 break;
1643
1644 case IPV6_PORTRANGE:
1645 error = sockopt_getint(sopt, &optval);
1646 if (error)
1647 break;
1648
1649 switch (optval) {
1650 case IPV6_PORTRANGE_DEFAULT:
1651 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1652 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1653 break;
1654
1655 case IPV6_PORTRANGE_HIGH:
1656 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1657 in6p->in6p_flags |= IN6P_HIGHPORT;
1658 break;
1659
1660 case IPV6_PORTRANGE_LOW:
1661 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1662 in6p->in6p_flags |= IN6P_LOWPORT;
1663 break;
1664
1665 default:
1666 error = EINVAL;
1667 break;
1668 }
1669 break;
1670
1671 case IPV6_PORTALGO:
1672 error = sockopt_getint(sopt, &optval);
1673 if (error)
1674 break;
1675
1676 error = portalgo_algo_index_select(
1677 (struct inpcb_hdr *)in6p, optval);
1678 break;
1679
1680 #if defined(IPSEC)
1681 case IPV6_IPSEC_POLICY:
1682 if (ipsec_enabled) {
1683 error = ipsec6_set_policy(in6p, optname,
1684 sopt->sopt_data, sopt->sopt_size,
1685 kauth_cred_get());
1686 break;
1687 }
1688 /*FALLTHROUGH*/
1689 #endif /* IPSEC */
1690
1691 default:
1692 error = ENOPROTOOPT;
1693 break;
1694 }
1695 break;
1696
1697 case PRCO_GETOPT:
1698 switch (optname) {
1699 #ifdef RFC2292
1700 case IPV6_2292PKTOPTIONS:
1701 /*
1702 * RFC3542 (effectively) deprecated the
1703 * semantics of the 2292-style pktoptions.
1704 * Since it was not reliable in nature (i.e.,
1705 * applications had to expect the lack of some
1706 * information after all), it would make sense
1707 * to simplify this part by always returning
1708 * empty data.
1709 */
1710 break;
1711 #endif
1712
1713 case IPV6_RECVHOPOPTS:
1714 case IPV6_RECVDSTOPTS:
1715 case IPV6_RECVRTHDRDSTOPTS:
1716 case IPV6_UNICAST_HOPS:
1717 case IPV6_RECVPKTINFO:
1718 case IPV6_RECVHOPLIMIT:
1719 case IPV6_RECVRTHDR:
1720 case IPV6_RECVPATHMTU:
1721
1722 case IPV6_FAITH:
1723 case IPV6_V6ONLY:
1724 case IPV6_PORTRANGE:
1725 case IPV6_RECVTCLASS:
1726 switch (optname) {
1727
1728 case IPV6_RECVHOPOPTS:
1729 optval = OPTBIT(IN6P_HOPOPTS);
1730 break;
1731
1732 case IPV6_RECVDSTOPTS:
1733 optval = OPTBIT(IN6P_DSTOPTS);
1734 break;
1735
1736 case IPV6_RECVRTHDRDSTOPTS:
1737 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1738 break;
1739
1740 case IPV6_UNICAST_HOPS:
1741 optval = in6p->in6p_hops;
1742 break;
1743
1744 case IPV6_RECVPKTINFO:
1745 optval = OPTBIT(IN6P_PKTINFO);
1746 break;
1747
1748 case IPV6_RECVHOPLIMIT:
1749 optval = OPTBIT(IN6P_HOPLIMIT);
1750 break;
1751
1752 case IPV6_RECVRTHDR:
1753 optval = OPTBIT(IN6P_RTHDR);
1754 break;
1755
1756 case IPV6_RECVPATHMTU:
1757 optval = OPTBIT(IN6P_MTU);
1758 break;
1759
1760 case IPV6_FAITH:
1761 optval = OPTBIT(IN6P_FAITH);
1762 break;
1763
1764 case IPV6_V6ONLY:
1765 optval = OPTBIT(IN6P_IPV6_V6ONLY);
1766 break;
1767
1768 case IPV6_PORTRANGE:
1769 {
1770 int flags;
1771 flags = in6p->in6p_flags;
1772 if (flags & IN6P_HIGHPORT)
1773 optval = IPV6_PORTRANGE_HIGH;
1774 else if (flags & IN6P_LOWPORT)
1775 optval = IPV6_PORTRANGE_LOW;
1776 else
1777 optval = 0;
1778 break;
1779 }
1780 case IPV6_RECVTCLASS:
1781 optval = OPTBIT(IN6P_TCLASS);
1782 break;
1783
1784 }
1785 if (error)
1786 break;
1787 error = sockopt_setint(sopt, optval);
1788 break;
1789
1790 case IPV6_PATHMTU:
1791 {
1792 u_long pmtu = 0;
1793 struct ip6_mtuinfo mtuinfo;
1794 struct route *ro = &in6p->in6p_route;
1795
1796 if (!(so->so_state & SS_ISCONNECTED))
1797 return (ENOTCONN);
1798 /*
1799 * XXX: we dot not consider the case of source
1800 * routing, or optional information to specify
1801 * the outgoing interface.
1802 */
1803 error = ip6_getpmtu(ro, NULL, NULL,
1804 &in6p->in6p_faddr, &pmtu, NULL);
1805 if (error)
1806 break;
1807 if (pmtu > IPV6_MAXPACKET)
1808 pmtu = IPV6_MAXPACKET;
1809
1810 memset(&mtuinfo, 0, sizeof(mtuinfo));
1811 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1812 optdata = (void *)&mtuinfo;
1813 optdatalen = sizeof(mtuinfo);
1814 if (optdatalen > MCLBYTES)
1815 return (EMSGSIZE); /* XXX */
1816 error = sockopt_set(sopt, optdata, optdatalen);
1817 break;
1818 }
1819
1820 #ifdef RFC2292
1821 case IPV6_2292PKTINFO:
1822 case IPV6_2292HOPLIMIT:
1823 case IPV6_2292HOPOPTS:
1824 case IPV6_2292RTHDR:
1825 case IPV6_2292DSTOPTS:
1826 switch (optname) {
1827 case IPV6_2292PKTINFO:
1828 optval = OPTBIT(IN6P_PKTINFO);
1829 break;
1830 case IPV6_2292HOPLIMIT:
1831 optval = OPTBIT(IN6P_HOPLIMIT);
1832 break;
1833 case IPV6_2292HOPOPTS:
1834 optval = OPTBIT(IN6P_HOPOPTS);
1835 break;
1836 case IPV6_2292RTHDR:
1837 optval = OPTBIT(IN6P_RTHDR);
1838 break;
1839 case IPV6_2292DSTOPTS:
1840 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1841 break;
1842 }
1843 error = sockopt_setint(sopt, optval);
1844 break;
1845 #endif
1846 case IPV6_PKTINFO:
1847 case IPV6_HOPOPTS:
1848 case IPV6_RTHDR:
1849 case IPV6_DSTOPTS:
1850 case IPV6_RTHDRDSTOPTS:
1851 case IPV6_NEXTHOP:
1852 case IPV6_OTCLASS:
1853 case IPV6_TCLASS:
1854 case IPV6_DONTFRAG:
1855 case IPV6_USE_MIN_MTU:
1856 case IPV6_PREFER_TEMPADDR:
1857 error = ip6_getpcbopt(in6p->in6p_outputopts,
1858 optname, sopt);
1859 break;
1860
1861 case IPV6_MULTICAST_IF:
1862 case IPV6_MULTICAST_HOPS:
1863 case IPV6_MULTICAST_LOOP:
1864 case IPV6_JOIN_GROUP:
1865 case IPV6_LEAVE_GROUP:
1866 error = ip6_getmoptions(sopt, in6p);
1867 break;
1868
1869 case IPV6_PORTALGO:
1870 optval = ((struct inpcb_hdr *)in6p)->inph_portalgo;
1871 error = sockopt_setint(sopt, optval);
1872 break;
1873
1874 #if defined(IPSEC)
1875 case IPV6_IPSEC_POLICY:
1876 if (ipsec_used) {
1877 struct mbuf *m = NULL;
1878
1879 /*
1880 * XXX: this will return EINVAL as sopt is
1881 * empty
1882 */
1883 error = ipsec6_get_policy(in6p, sopt->sopt_data,
1884 sopt->sopt_size, &m);
1885 if (!error)
1886 error = sockopt_setmbuf(sopt, m);
1887 break;
1888 }
1889 /*FALLTHROUGH*/
1890 #endif /* IPSEC */
1891
1892 default:
1893 error = ENOPROTOOPT;
1894 break;
1895 }
1896 break;
1897 }
1898 return (error);
1899 }
1900
1901 int
1902 ip6_raw_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1903 {
1904 int error = 0, optval;
1905 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
1906 struct in6pcb *in6p = sotoin6pcb(so);
1907 int level, optname;
1908
1909 KASSERT(sopt != NULL);
1910
1911 level = sopt->sopt_level;
1912 optname = sopt->sopt_name;
1913
1914 if (level != IPPROTO_IPV6) {
1915 return ENOPROTOOPT;
1916 }
1917
1918 switch (optname) {
1919 case IPV6_CHECKSUM:
1920 /*
1921 * For ICMPv6 sockets, no modification allowed for checksum
1922 * offset, permit "no change" values to help existing apps.
1923 *
1924 * XXX RFC3542 says: "An attempt to set IPV6_CHECKSUM
1925 * for an ICMPv6 socket will fail." The current
1926 * behavior does not meet RFC3542.
1927 */
1928 switch (op) {
1929 case PRCO_SETOPT:
1930 error = sockopt_getint(sopt, &optval);
1931 if (error)
1932 break;
1933 if ((optval % 2) != 0) {
1934 /* the API assumes even offset values */
1935 error = EINVAL;
1936 } else if (so->so_proto->pr_protocol ==
1937 IPPROTO_ICMPV6) {
1938 if (optval != icmp6off)
1939 error = EINVAL;
1940 } else
1941 in6p->in6p_cksum = optval;
1942 break;
1943
1944 case PRCO_GETOPT:
1945 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
1946 optval = icmp6off;
1947 else
1948 optval = in6p->in6p_cksum;
1949
1950 error = sockopt_setint(sopt, optval);
1951 break;
1952
1953 default:
1954 error = EINVAL;
1955 break;
1956 }
1957 break;
1958
1959 default:
1960 error = ENOPROTOOPT;
1961 break;
1962 }
1963
1964 return (error);
1965 }
1966
1967 #ifdef RFC2292
1968 /*
1969 * Set up IP6 options in pcb for insertion in output packets or
1970 * specifying behavior of outgoing packets.
1971 */
1972 static int
1973 ip6_pcbopts(struct ip6_pktopts **pktopt, struct socket *so,
1974 struct sockopt *sopt)
1975 {
1976 struct ip6_pktopts *opt = *pktopt;
1977 struct mbuf *m;
1978 int error = 0;
1979
1980 /* turn off any old options. */
1981 if (opt) {
1982 #ifdef DIAGNOSTIC
1983 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
1984 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
1985 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
1986 printf("ip6_pcbopts: all specified options are cleared.\n");
1987 #endif
1988 ip6_clearpktopts(opt, -1);
1989 } else {
1990 opt = malloc(sizeof(*opt), M_IP6OPT, M_NOWAIT);
1991 if (opt == NULL)
1992 return (ENOBUFS);
1993 }
1994 *pktopt = NULL;
1995
1996 if (sopt == NULL || sopt->sopt_size == 0) {
1997 /*
1998 * Only turning off any previous options, regardless of
1999 * whether the opt is just created or given.
2000 */
2001 free(opt, M_IP6OPT);
2002 return (0);
2003 }
2004
2005 /* set options specified by user. */
2006 m = sockopt_getmbuf(sopt);
2007 if (m == NULL) {
2008 free(opt, M_IP6OPT);
2009 return (ENOBUFS);
2010 }
2011
2012 error = ip6_setpktopts(m, opt, NULL, kauth_cred_get(),
2013 so->so_proto->pr_protocol);
2014 m_freem(m);
2015 if (error != 0) {
2016 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2017 free(opt, M_IP6OPT);
2018 return (error);
2019 }
2020 *pktopt = opt;
2021 return (0);
2022 }
2023 #endif
2024
2025 /*
2026 * initialize ip6_pktopts. beware that there are non-zero default values in
2027 * the struct.
2028 */
2029 void
2030 ip6_initpktopts(struct ip6_pktopts *opt)
2031 {
2032
2033 memset(opt, 0, sizeof(*opt));
2034 opt->ip6po_hlim = -1; /* -1 means default hop limit */
2035 opt->ip6po_tclass = -1; /* -1 means default traffic class */
2036 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2037 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2038 }
2039
2040 #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) /* XXX */
2041 static int
2042 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2043 kauth_cred_t cred, int uproto)
2044 {
2045 struct ip6_pktopts *opt;
2046
2047 if (*pktopt == NULL) {
2048 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2049 M_NOWAIT);
2050 if (*pktopt == NULL)
2051 return (ENOBUFS);
2052
2053 ip6_initpktopts(*pktopt);
2054 }
2055 opt = *pktopt;
2056
2057 return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
2058 }
2059
2060 static int
2061 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2062 {
2063 void *optdata = NULL;
2064 int optdatalen = 0;
2065 struct ip6_ext *ip6e;
2066 int error = 0;
2067 struct in6_pktinfo null_pktinfo;
2068 int deftclass = 0, on;
2069 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2070 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2071
2072 switch (optname) {
2073 case IPV6_PKTINFO:
2074 if (pktopt && pktopt->ip6po_pktinfo)
2075 optdata = (void *)pktopt->ip6po_pktinfo;
2076 else {
2077 /* XXX: we don't have to do this every time... */
2078 memset(&null_pktinfo, 0, sizeof(null_pktinfo));
2079 optdata = (void *)&null_pktinfo;
2080 }
2081 optdatalen = sizeof(struct in6_pktinfo);
2082 break;
2083 case IPV6_OTCLASS:
2084 /* XXX */
2085 return (EINVAL);
2086 case IPV6_TCLASS:
2087 if (pktopt && pktopt->ip6po_tclass >= 0)
2088 optdata = (void *)&pktopt->ip6po_tclass;
2089 else
2090 optdata = (void *)&deftclass;
2091 optdatalen = sizeof(int);
2092 break;
2093 case IPV6_HOPOPTS:
2094 if (pktopt && pktopt->ip6po_hbh) {
2095 optdata = (void *)pktopt->ip6po_hbh;
2096 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2097 optdatalen = (ip6e->ip6e_len + 1) << 3;
2098 }
2099 break;
2100 case IPV6_RTHDR:
2101 if (pktopt && pktopt->ip6po_rthdr) {
2102 optdata = (void *)pktopt->ip6po_rthdr;
2103 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2104 optdatalen = (ip6e->ip6e_len + 1) << 3;
2105 }
2106 break;
2107 case IPV6_RTHDRDSTOPTS:
2108 if (pktopt && pktopt->ip6po_dest1) {
2109 optdata = (void *)pktopt->ip6po_dest1;
2110 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2111 optdatalen = (ip6e->ip6e_len + 1) << 3;
2112 }
2113 break;
2114 case IPV6_DSTOPTS:
2115 if (pktopt && pktopt->ip6po_dest2) {
2116 optdata = (void *)pktopt->ip6po_dest2;
2117 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2118 optdatalen = (ip6e->ip6e_len + 1) << 3;
2119 }
2120 break;
2121 case IPV6_NEXTHOP:
2122 if (pktopt && pktopt->ip6po_nexthop) {
2123 optdata = (void *)pktopt->ip6po_nexthop;
2124 optdatalen = pktopt->ip6po_nexthop->sa_len;
2125 }
2126 break;
2127 case IPV6_USE_MIN_MTU:
2128 if (pktopt)
2129 optdata = (void *)&pktopt->ip6po_minmtu;
2130 else
2131 optdata = (void *)&defminmtu;
2132 optdatalen = sizeof(int);
2133 break;
2134 case IPV6_DONTFRAG:
2135 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2136 on = 1;
2137 else
2138 on = 0;
2139 optdata = (void *)&on;
2140 optdatalen = sizeof(on);
2141 break;
2142 case IPV6_PREFER_TEMPADDR:
2143 if (pktopt)
2144 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2145 else
2146 optdata = (void *)&defpreftemp;
2147 optdatalen = sizeof(int);
2148 break;
2149 default: /* should not happen */
2150 #ifdef DIAGNOSTIC
2151 panic("ip6_getpcbopt: unexpected option\n");
2152 #endif
2153 return (ENOPROTOOPT);
2154 }
2155
2156 error = sockopt_set(sopt, optdata, optdatalen);
2157
2158 return (error);
2159 }
2160
2161 void
2162 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2163 {
2164 if (optname == -1 || optname == IPV6_PKTINFO) {
2165 if (pktopt->ip6po_pktinfo)
2166 free(pktopt->ip6po_pktinfo, M_IP6OPT);
2167 pktopt->ip6po_pktinfo = NULL;
2168 }
2169 if (optname == -1 || optname == IPV6_HOPLIMIT)
2170 pktopt->ip6po_hlim = -1;
2171 if (optname == -1 || optname == IPV6_TCLASS)
2172 pktopt->ip6po_tclass = -1;
2173 if (optname == -1 || optname == IPV6_NEXTHOP) {
2174 rtcache_free(&pktopt->ip6po_nextroute);
2175 if (pktopt->ip6po_nexthop)
2176 free(pktopt->ip6po_nexthop, M_IP6OPT);
2177 pktopt->ip6po_nexthop = NULL;
2178 }
2179 if (optname == -1 || optname == IPV6_HOPOPTS) {
2180 if (pktopt->ip6po_hbh)
2181 free(pktopt->ip6po_hbh, M_IP6OPT);
2182 pktopt->ip6po_hbh = NULL;
2183 }
2184 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2185 if (pktopt->ip6po_dest1)
2186 free(pktopt->ip6po_dest1, M_IP6OPT);
2187 pktopt->ip6po_dest1 = NULL;
2188 }
2189 if (optname == -1 || optname == IPV6_RTHDR) {
2190 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2191 free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2192 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2193 rtcache_free(&pktopt->ip6po_route);
2194 }
2195 if (optname == -1 || optname == IPV6_DSTOPTS) {
2196 if (pktopt->ip6po_dest2)
2197 free(pktopt->ip6po_dest2, M_IP6OPT);
2198 pktopt->ip6po_dest2 = NULL;
2199 }
2200 }
2201
2202 #define PKTOPT_EXTHDRCPY(type) \
2203 do { \
2204 if (src->type) { \
2205 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2206 dst->type = malloc(hlen, M_IP6OPT, canwait); \
2207 if (dst->type == NULL) \
2208 goto bad; \
2209 memcpy(dst->type, src->type, hlen); \
2210 } \
2211 } while (/*CONSTCOND*/ 0)
2212
2213 static int
2214 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2215 {
2216 dst->ip6po_hlim = src->ip6po_hlim;
2217 dst->ip6po_tclass = src->ip6po_tclass;
2218 dst->ip6po_flags = src->ip6po_flags;
2219 dst->ip6po_minmtu = src->ip6po_minmtu;
2220 dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
2221 if (src->ip6po_pktinfo) {
2222 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2223 M_IP6OPT, canwait);
2224 if (dst->ip6po_pktinfo == NULL)
2225 goto bad;
2226 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2227 }
2228 if (src->ip6po_nexthop) {
2229 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2230 M_IP6OPT, canwait);
2231 if (dst->ip6po_nexthop == NULL)
2232 goto bad;
2233 memcpy(dst->ip6po_nexthop, src->ip6po_nexthop,
2234 src->ip6po_nexthop->sa_len);
2235 }
2236 PKTOPT_EXTHDRCPY(ip6po_hbh);
2237 PKTOPT_EXTHDRCPY(ip6po_dest1);
2238 PKTOPT_EXTHDRCPY(ip6po_dest2);
2239 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2240 return (0);
2241
2242 bad:
2243 if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2244 if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2245 if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2246 if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2247 if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2248 if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2249
2250 return (ENOBUFS);
2251 }
2252 #undef PKTOPT_EXTHDRCPY
2253
2254 struct ip6_pktopts *
2255 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2256 {
2257 int error;
2258 struct ip6_pktopts *dst;
2259
2260 dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2261 if (dst == NULL)
2262 return (NULL);
2263 ip6_initpktopts(dst);
2264
2265 if ((error = copypktopts(dst, src, canwait)) != 0) {
2266 free(dst, M_IP6OPT);
2267 return (NULL);
2268 }
2269
2270 return (dst);
2271 }
2272
2273 void
2274 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2275 {
2276 if (pktopt == NULL)
2277 return;
2278
2279 ip6_clearpktopts(pktopt, -1);
2280
2281 free(pktopt, M_IP6OPT);
2282 }
2283
2284 int
2285 ip6_get_membership(const struct sockopt *sopt, struct ifnet **ifp, void *v,
2286 size_t l)
2287 {
2288 struct ipv6_mreq mreq;
2289 int error;
2290 struct in6_addr *ia = &mreq.ipv6mr_multiaddr;
2291 struct in_addr *ia4 = (void *)&ia->s6_addr32[3];
2292 error = sockopt_get(sopt, &mreq, sizeof(mreq));
2293 if (error != 0)
2294 return error;
2295
2296 if (IN6_IS_ADDR_UNSPECIFIED(ia)) {
2297 /*
2298 * We use the unspecified address to specify to accept
2299 * all multicast addresses. Only super user is allowed
2300 * to do this.
2301 */
2302 if (kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_IPV6,
2303 KAUTH_REQ_NETWORK_IPV6_JOIN_MULTICAST, NULL, NULL, NULL))
2304 return EACCES;
2305 } else if (IN6_IS_ADDR_V4MAPPED(ia)) {
2306 // Don't bother if we are not going to use ifp.
2307 if (l == sizeof(*ia)) {
2308 memcpy(v, ia, l);
2309 return 0;
2310 }
2311 } else if (!IN6_IS_ADDR_MULTICAST(ia)) {
2312 return EINVAL;
2313 }
2314
2315 /*
2316 * If no interface was explicitly specified, choose an
2317 * appropriate one according to the given multicast address.
2318 */
2319 if (mreq.ipv6mr_interface == 0) {
2320 struct rtentry *rt;
2321 union {
2322 struct sockaddr dst;
2323 struct sockaddr_in dst4;
2324 struct sockaddr_in6 dst6;
2325 } u;
2326 struct route ro;
2327
2328 /*
2329 * Look up the routing table for the
2330 * address, and choose the outgoing interface.
2331 * XXX: is it a good approach?
2332 */
2333 memset(&ro, 0, sizeof(ro));
2334 if (IN6_IS_ADDR_V4MAPPED(ia))
2335 sockaddr_in_init(&u.dst4, ia4, 0);
2336 else
2337 sockaddr_in6_init(&u.dst6, ia, 0, 0, 0);
2338 error = rtcache_setdst(&ro, &u.dst);
2339 if (error != 0)
2340 return error;
2341 *ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp : NULL;
2342 rtcache_free(&ro);
2343 } else {
2344 /*
2345 * If the interface is specified, validate it.
2346 */
2347 if ((*ifp = if_byindex(mreq.ipv6mr_interface)) == NULL)
2348 return ENXIO; /* XXX EINVAL? */
2349 }
2350 if (sizeof(*ia) == l)
2351 memcpy(v, ia, l);
2352 else
2353 memcpy(v, ia4, l);
2354 return 0;
2355 }
2356
2357 /*
2358 * Set the IP6 multicast options in response to user setsockopt().
2359 */
2360 static int
2361 ip6_setmoptions(const struct sockopt *sopt, struct in6pcb *in6p)
2362 {
2363 int error = 0;
2364 u_int loop, ifindex;
2365 struct ipv6_mreq mreq;
2366 struct in6_addr ia;
2367 struct ifnet *ifp;
2368 struct ip6_moptions *im6o = in6p->in6p_moptions;
2369 struct in6_multi_mship *imm;
2370
2371 if (im6o == NULL) {
2372 /*
2373 * No multicast option buffer attached to the pcb;
2374 * allocate one and initialize to default values.
2375 */
2376 im6o = malloc(sizeof(*im6o), M_IPMOPTS, M_NOWAIT);
2377 if (im6o == NULL)
2378 return (ENOBUFS);
2379 in6p->in6p_moptions = im6o;
2380 im6o->im6o_multicast_ifp = NULL;
2381 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2382 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2383 LIST_INIT(&im6o->im6o_memberships);
2384 }
2385
2386 switch (sopt->sopt_name) {
2387
2388 case IPV6_MULTICAST_IF:
2389 /*
2390 * Select the interface for outgoing multicast packets.
2391 */
2392 error = sockopt_get(sopt, &ifindex, sizeof(ifindex));
2393 if (error != 0)
2394 break;
2395
2396 if (ifindex != 0) {
2397 if ((ifp = if_byindex(ifindex)) == NULL) {
2398 error = ENXIO; /* XXX EINVAL? */
2399 break;
2400 }
2401 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2402 error = EADDRNOTAVAIL;
2403 break;
2404 }
2405 } else
2406 ifp = NULL;
2407 im6o->im6o_multicast_ifp = ifp;
2408 break;
2409
2410 case IPV6_MULTICAST_HOPS:
2411 {
2412 /*
2413 * Set the IP6 hoplimit for outgoing multicast packets.
2414 */
2415 int optval;
2416
2417 error = sockopt_getint(sopt, &optval);
2418 if (error != 0)
2419 break;
2420
2421 if (optval < -1 || optval >= 256)
2422 error = EINVAL;
2423 else if (optval == -1)
2424 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2425 else
2426 im6o->im6o_multicast_hlim = optval;
2427 break;
2428 }
2429
2430 case IPV6_MULTICAST_LOOP:
2431 /*
2432 * Set the loopback flag for outgoing multicast packets.
2433 * Must be zero or one.
2434 */
2435 error = sockopt_get(sopt, &loop, sizeof(loop));
2436 if (error != 0)
2437 break;
2438 if (loop > 1) {
2439 error = EINVAL;
2440 break;
2441 }
2442 im6o->im6o_multicast_loop = loop;
2443 break;
2444
2445 case IPV6_JOIN_GROUP:
2446 /*
2447 * Add a multicast group membership.
2448 * Group must be a valid IP6 multicast address.
2449 */
2450 if ((error = ip6_get_membership(sopt, &ifp, &ia, sizeof(ia))))
2451 return error;
2452
2453 if (IN6_IS_ADDR_V4MAPPED(&ia)) {
2454 error = ip_setmoptions(&in6p->in6p_v4moptions, sopt);
2455 break;
2456 }
2457 /*
2458 * See if we found an interface, and confirm that it
2459 * supports multicast
2460 */
2461 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2462 error = EADDRNOTAVAIL;
2463 break;
2464 }
2465
2466 if (in6_setscope(&ia, ifp, NULL)) {
2467 error = EADDRNOTAVAIL; /* XXX: should not happen */
2468 break;
2469 }
2470
2471 /*
2472 * See if the membership already exists.
2473 */
2474 for (imm = im6o->im6o_memberships.lh_first;
2475 imm != NULL; imm = imm->i6mm_chain.le_next)
2476 if (imm->i6mm_maddr->in6m_ifp == ifp &&
2477 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2478 &ia))
2479 break;
2480 if (imm != NULL) {
2481 error = EADDRINUSE;
2482 break;
2483 }
2484 /*
2485 * Everything looks good; add a new record to the multicast
2486 * address list for the given interface.
2487 */
2488 imm = in6_joingroup(ifp, &ia, &error, 0);
2489 if (imm == NULL)
2490 break;
2491 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2492 break;
2493
2494 case IPV6_LEAVE_GROUP:
2495 /*
2496 * Drop a multicast group membership.
2497 * Group must be a valid IP6 multicast address.
2498 */
2499 error = sockopt_get(sopt, &mreq, sizeof(mreq));
2500 if (error != 0)
2501 break;
2502
2503 if (IN6_IS_ADDR_V4MAPPED(&mreq.ipv6mr_multiaddr)) {
2504 error = ip_setmoptions(&in6p->in6p_v4moptions, sopt);
2505 break;
2506 }
2507 /*
2508 * If an interface address was specified, get a pointer
2509 * to its ifnet structure.
2510 */
2511 if (mreq.ipv6mr_interface != 0) {
2512 if ((ifp = if_byindex(mreq.ipv6mr_interface)) == NULL) {
2513 error = ENXIO; /* XXX EINVAL? */
2514 break;
2515 }
2516 } else
2517 ifp = NULL;
2518
2519 /* Fill in the scope zone ID */
2520 if (ifp) {
2521 if (in6_setscope(&mreq.ipv6mr_multiaddr, ifp, NULL)) {
2522 /* XXX: should not happen */
2523 error = EADDRNOTAVAIL;
2524 break;
2525 }
2526 } else if (mreq.ipv6mr_interface != 0) {
2527 /*
2528 * XXX: This case would happens when the (positive)
2529 * index is in the valid range, but the corresponding
2530 * interface has been detached dynamically. The above
2531 * check probably avoids such case to happen here, but
2532 * we check it explicitly for safety.
2533 */
2534 error = EADDRNOTAVAIL;
2535 break;
2536 } else { /* ipv6mr_interface == 0 */
2537 struct sockaddr_in6 sa6_mc;
2538
2539 /*
2540 * The API spec says as follows:
2541 * If the interface index is specified as 0, the
2542 * system may choose a multicast group membership to
2543 * drop by matching the multicast address only.
2544 * On the other hand, we cannot disambiguate the scope
2545 * zone unless an interface is provided. Thus, we
2546 * check if there's ambiguity with the default scope
2547 * zone as the last resort.
2548 */
2549 sockaddr_in6_init(&sa6_mc, &mreq.ipv6mr_multiaddr,
2550 0, 0, 0);
2551 error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
2552 if (error != 0)
2553 break;
2554 mreq.ipv6mr_multiaddr = sa6_mc.sin6_addr;
2555 }
2556
2557 /*
2558 * Find the membership in the membership list.
2559 */
2560 for (imm = im6o->im6o_memberships.lh_first;
2561 imm != NULL; imm = imm->i6mm_chain.le_next) {
2562 if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2563 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2564 &mreq.ipv6mr_multiaddr))
2565 break;
2566 }
2567 if (imm == NULL) {
2568 /* Unable to resolve interface */
2569 error = EADDRNOTAVAIL;
2570 break;
2571 }
2572 /*
2573 * Give up the multicast address record to which the
2574 * membership points.
2575 */
2576 LIST_REMOVE(imm, i6mm_chain);
2577 in6_leavegroup(imm);
2578 break;
2579
2580 default:
2581 error = EOPNOTSUPP;
2582 break;
2583 }
2584
2585 /*
2586 * If all options have default values, no need to keep the mbuf.
2587 */
2588 if (im6o->im6o_multicast_ifp == NULL &&
2589 im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2590 im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2591 im6o->im6o_memberships.lh_first == NULL) {
2592 free(in6p->in6p_moptions, M_IPMOPTS);
2593 in6p->in6p_moptions = NULL;
2594 }
2595
2596 return (error);
2597 }
2598
2599 /*
2600 * Return the IP6 multicast options in response to user getsockopt().
2601 */
2602 static int
2603 ip6_getmoptions(struct sockopt *sopt, struct in6pcb *in6p)
2604 {
2605 u_int optval;
2606 int error;
2607 struct ip6_moptions *im6o = in6p->in6p_moptions;
2608
2609 switch (sopt->sopt_name) {
2610 case IPV6_MULTICAST_IF:
2611 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2612 optval = 0;
2613 else
2614 optval = im6o->im6o_multicast_ifp->if_index;
2615
2616 error = sockopt_set(sopt, &optval, sizeof(optval));
2617 break;
2618
2619 case IPV6_MULTICAST_HOPS:
2620 if (im6o == NULL)
2621 optval = ip6_defmcasthlim;
2622 else
2623 optval = im6o->im6o_multicast_hlim;
2624
2625 error = sockopt_set(sopt, &optval, sizeof(optval));
2626 break;
2627
2628 case IPV6_MULTICAST_LOOP:
2629 if (im6o == NULL)
2630 optval = IPV6_DEFAULT_MULTICAST_LOOP;
2631 else
2632 optval = im6o->im6o_multicast_loop;
2633
2634 error = sockopt_set(sopt, &optval, sizeof(optval));
2635 break;
2636
2637 default:
2638 error = EOPNOTSUPP;
2639 }
2640
2641 return (error);
2642 }
2643
2644 /*
2645 * Discard the IP6 multicast options.
2646 */
2647 void
2648 ip6_freemoptions(struct ip6_moptions *im6o)
2649 {
2650 struct in6_multi_mship *imm;
2651
2652 if (im6o == NULL)
2653 return;
2654
2655 while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2656 LIST_REMOVE(imm, i6mm_chain);
2657 in6_leavegroup(imm);
2658 }
2659 free(im6o, M_IPMOPTS);
2660 }
2661
2662 /*
2663 * Set IPv6 outgoing packet options based on advanced API.
2664 */
2665 int
2666 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2667 struct ip6_pktopts *stickyopt, kauth_cred_t cred, int uproto)
2668 {
2669 struct cmsghdr *cm = 0;
2670
2671 if (control == NULL || opt == NULL)
2672 return (EINVAL);
2673
2674 ip6_initpktopts(opt);
2675 if (stickyopt) {
2676 int error;
2677
2678 /*
2679 * If stickyopt is provided, make a local copy of the options
2680 * for this particular packet, then override them by ancillary
2681 * objects.
2682 * XXX: copypktopts() does not copy the cached route to a next
2683 * hop (if any). This is not very good in terms of efficiency,
2684 * but we can allow this since this option should be rarely
2685 * used.
2686 */
2687 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2688 return (error);
2689 }
2690
2691 /*
2692 * XXX: Currently, we assume all the optional information is stored
2693 * in a single mbuf.
2694 */
2695 if (control->m_next)
2696 return (EINVAL);
2697
2698 /* XXX if cm->cmsg_len is not aligned, control->m_len can become <0 */
2699 for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2700 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2701 int error;
2702
2703 if (control->m_len < CMSG_LEN(0))
2704 return (EINVAL);
2705
2706 cm = mtod(control, struct cmsghdr *);
2707 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2708 return (EINVAL);
2709 if (cm->cmsg_level != IPPROTO_IPV6)
2710 continue;
2711
2712 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2713 cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2714 if (error)
2715 return (error);
2716 }
2717
2718 return (0);
2719 }
2720
2721 /*
2722 * Set a particular packet option, as a sticky option or an ancillary data
2723 * item. "len" can be 0 only when it's a sticky option.
2724 * We have 4 cases of combination of "sticky" and "cmsg":
2725 * "sticky=0, cmsg=0": impossible
2726 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2727 * "sticky=1, cmsg=0": RFC3542 socket option
2728 * "sticky=1, cmsg=1": RFC2292 socket option
2729 */
2730 static int
2731 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2732 kauth_cred_t cred, int sticky, int cmsg, int uproto)
2733 {
2734 int minmtupolicy;
2735 int error;
2736
2737 if (!sticky && !cmsg) {
2738 #ifdef DIAGNOSTIC
2739 printf("ip6_setpktopt: impossible case\n");
2740 #endif
2741 return (EINVAL);
2742 }
2743
2744 /*
2745 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2746 * not be specified in the context of RFC3542. Conversely,
2747 * RFC3542 types should not be specified in the context of RFC2292.
2748 */
2749 if (!cmsg) {
2750 switch (optname) {
2751 case IPV6_2292PKTINFO:
2752 case IPV6_2292HOPLIMIT:
2753 case IPV6_2292NEXTHOP:
2754 case IPV6_2292HOPOPTS:
2755 case IPV6_2292DSTOPTS:
2756 case IPV6_2292RTHDR:
2757 case IPV6_2292PKTOPTIONS:
2758 return (ENOPROTOOPT);
2759 }
2760 }
2761 if (sticky && cmsg) {
2762 switch (optname) {
2763 case IPV6_PKTINFO:
2764 case IPV6_HOPLIMIT:
2765 case IPV6_NEXTHOP:
2766 case IPV6_HOPOPTS:
2767 case IPV6_DSTOPTS:
2768 case IPV6_RTHDRDSTOPTS:
2769 case IPV6_RTHDR:
2770 case IPV6_USE_MIN_MTU:
2771 case IPV6_DONTFRAG:
2772 case IPV6_OTCLASS:
2773 case IPV6_TCLASS:
2774 case IPV6_PREFER_TEMPADDR: /* XXX not an RFC3542 option */
2775 return (ENOPROTOOPT);
2776 }
2777 }
2778
2779 switch (optname) {
2780 #ifdef RFC2292
2781 case IPV6_2292PKTINFO:
2782 #endif
2783 case IPV6_PKTINFO:
2784 {
2785 struct ifnet *ifp = NULL;
2786 struct in6_pktinfo *pktinfo;
2787
2788 if (len != sizeof(struct in6_pktinfo))
2789 return (EINVAL);
2790
2791 pktinfo = (struct in6_pktinfo *)buf;
2792
2793 /*
2794 * An application can clear any sticky IPV6_PKTINFO option by
2795 * doing a "regular" setsockopt with ipi6_addr being
2796 * in6addr_any and ipi6_ifindex being zero.
2797 * [RFC 3542, Section 6]
2798 */
2799 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2800 pktinfo->ipi6_ifindex == 0 &&
2801 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2802 ip6_clearpktopts(opt, optname);
2803 break;
2804 }
2805
2806 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2807 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2808 return (EINVAL);
2809 }
2810
2811 /* Validate the interface index if specified. */
2812 if (pktinfo->ipi6_ifindex) {
2813 ifp = if_byindex(pktinfo->ipi6_ifindex);
2814 if (ifp == NULL)
2815 return (ENXIO);
2816 }
2817
2818 /*
2819 * We store the address anyway, and let in6_selectsrc()
2820 * validate the specified address. This is because ipi6_addr
2821 * may not have enough information about its scope zone, and
2822 * we may need additional information (such as outgoing
2823 * interface or the scope zone of a destination address) to
2824 * disambiguate the scope.
2825 * XXX: the delay of the validation may confuse the
2826 * application when it is used as a sticky option.
2827 */
2828 if (opt->ip6po_pktinfo == NULL) {
2829 opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2830 M_IP6OPT, M_NOWAIT);
2831 if (opt->ip6po_pktinfo == NULL)
2832 return (ENOBUFS);
2833 }
2834 memcpy(opt->ip6po_pktinfo, pktinfo, sizeof(*pktinfo));
2835 break;
2836 }
2837
2838 #ifdef RFC2292
2839 case IPV6_2292HOPLIMIT:
2840 #endif
2841 case IPV6_HOPLIMIT:
2842 {
2843 int *hlimp;
2844
2845 /*
2846 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2847 * to simplify the ordering among hoplimit options.
2848 */
2849 if (optname == IPV6_HOPLIMIT && sticky)
2850 return (ENOPROTOOPT);
2851
2852 if (len != sizeof(int))
2853 return (EINVAL);
2854 hlimp = (int *)buf;
2855 if (*hlimp < -1 || *hlimp > 255)
2856 return (EINVAL);
2857
2858 opt->ip6po_hlim = *hlimp;
2859 break;
2860 }
2861
2862 case IPV6_OTCLASS:
2863 if (len != sizeof(u_int8_t))
2864 return (EINVAL);
2865
2866 opt->ip6po_tclass = *(u_int8_t *)buf;
2867 break;
2868
2869 case IPV6_TCLASS:
2870 {
2871 int tclass;
2872
2873 if (len != sizeof(int))
2874 return (EINVAL);
2875 tclass = *(int *)buf;
2876 if (tclass < -1 || tclass > 255)
2877 return (EINVAL);
2878
2879 opt->ip6po_tclass = tclass;
2880 break;
2881 }
2882
2883 #ifdef RFC2292
2884 case IPV6_2292NEXTHOP:
2885 #endif
2886 case IPV6_NEXTHOP:
2887 error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
2888 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
2889 if (error)
2890 return (error);
2891
2892 if (len == 0) { /* just remove the option */
2893 ip6_clearpktopts(opt, IPV6_NEXTHOP);
2894 break;
2895 }
2896
2897 /* check if cmsg_len is large enough for sa_len */
2898 if (len < sizeof(struct sockaddr) || len < *buf)
2899 return (EINVAL);
2900
2901 switch (((struct sockaddr *)buf)->sa_family) {
2902 case AF_INET6:
2903 {
2904 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
2905
2906 if (sa6->sin6_len != sizeof(struct sockaddr_in6))
2907 return (EINVAL);
2908
2909 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
2910 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
2911 return (EINVAL);
2912 }
2913 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
2914 != 0) {
2915 return (error);
2916 }
2917 break;
2918 }
2919 case AF_LINK: /* eventually be supported? */
2920 default:
2921 return (EAFNOSUPPORT);
2922 }
2923
2924 /* turn off the previous option, then set the new option. */
2925 ip6_clearpktopts(opt, IPV6_NEXTHOP);
2926 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
2927 if (opt->ip6po_nexthop == NULL)
2928 return (ENOBUFS);
2929 memcpy(opt->ip6po_nexthop, buf, *buf);
2930 break;
2931
2932 #ifdef RFC2292
2933 case IPV6_2292HOPOPTS:
2934 #endif
2935 case IPV6_HOPOPTS:
2936 {
2937 struct ip6_hbh *hbh;
2938 int hbhlen;
2939
2940 /*
2941 * XXX: We don't allow a non-privileged user to set ANY HbH
2942 * options, since per-option restriction has too much
2943 * overhead.
2944 */
2945 error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
2946 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
2947 if (error)
2948 return (error);
2949
2950 if (len == 0) {
2951 ip6_clearpktopts(opt, IPV6_HOPOPTS);
2952 break; /* just remove the option */
2953 }
2954
2955 /* message length validation */
2956 if (len < sizeof(struct ip6_hbh))
2957 return (EINVAL);
2958 hbh = (struct ip6_hbh *)buf;
2959 hbhlen = (hbh->ip6h_len + 1) << 3;
2960 if (len != hbhlen)
2961 return (EINVAL);
2962
2963 /* turn off the previous option, then set the new option. */
2964 ip6_clearpktopts(opt, IPV6_HOPOPTS);
2965 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
2966 if (opt->ip6po_hbh == NULL)
2967 return (ENOBUFS);
2968 memcpy(opt->ip6po_hbh, hbh, hbhlen);
2969
2970 break;
2971 }
2972
2973 #ifdef RFC2292
2974 case IPV6_2292DSTOPTS:
2975 #endif
2976 case IPV6_DSTOPTS:
2977 case IPV6_RTHDRDSTOPTS:
2978 {
2979 struct ip6_dest *dest, **newdest = NULL;
2980 int destlen;
2981
2982 /* XXX: see the comment for IPV6_HOPOPTS */
2983 error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
2984 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
2985 if (error)
2986 return (error);
2987
2988 if (len == 0) {
2989 ip6_clearpktopts(opt, optname);
2990 break; /* just remove the option */
2991 }
2992
2993 /* message length validation */
2994 if (len < sizeof(struct ip6_dest))
2995 return (EINVAL);
2996 dest = (struct ip6_dest *)buf;
2997 destlen = (dest->ip6d_len + 1) << 3;
2998 if (len != destlen)
2999 return (EINVAL);
3000 /*
3001 * Determine the position that the destination options header
3002 * should be inserted; before or after the routing header.
3003 */
3004 switch (optname) {
3005 case IPV6_2292DSTOPTS:
3006 /*
3007 * The old advanced API is ambiguous on this point.
3008 * Our approach is to determine the position based
3009 * according to the existence of a routing header.
3010 * Note, however, that this depends on the order of the
3011 * extension headers in the ancillary data; the 1st
3012 * part of the destination options header must appear
3013 * before the routing header in the ancillary data,
3014 * too.
3015 * RFC3542 solved the ambiguity by introducing
3016 * separate ancillary data or option types.
3017 */
3018 if (opt->ip6po_rthdr == NULL)
3019 newdest = &opt->ip6po_dest1;
3020 else
3021 newdest = &opt->ip6po_dest2;
3022 break;
3023 case IPV6_RTHDRDSTOPTS:
3024 newdest = &opt->ip6po_dest1;
3025 break;
3026 case IPV6_DSTOPTS:
3027 newdest = &opt->ip6po_dest2;
3028 break;
3029 }
3030
3031 /* turn off the previous option, then set the new option. */
3032 ip6_clearpktopts(opt, optname);
3033 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3034 if (*newdest == NULL)
3035 return (ENOBUFS);
3036 memcpy(*newdest, dest, destlen);
3037
3038 break;
3039 }
3040
3041 #ifdef RFC2292
3042 case IPV6_2292RTHDR:
3043 #endif
3044 case IPV6_RTHDR:
3045 {
3046 struct ip6_rthdr *rth;
3047 int rthlen;
3048
3049 if (len == 0) {
3050 ip6_clearpktopts(opt, IPV6_RTHDR);
3051 break; /* just remove the option */
3052 }
3053
3054 /* message length validation */
3055 if (len < sizeof(struct ip6_rthdr))
3056 return (EINVAL);
3057 rth = (struct ip6_rthdr *)buf;
3058 rthlen = (rth->ip6r_len + 1) << 3;
3059 if (len != rthlen)
3060 return (EINVAL);
3061 switch (rth->ip6r_type) {
3062 case IPV6_RTHDR_TYPE_0:
3063 if (rth->ip6r_len == 0) /* must contain one addr */
3064 return (EINVAL);
3065 if (rth->ip6r_len % 2) /* length must be even */
3066 return (EINVAL);
3067 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3068 return (EINVAL);
3069 break;
3070 default:
3071 return (EINVAL); /* not supported */
3072 }
3073 /* turn off the previous option */
3074 ip6_clearpktopts(opt, IPV6_RTHDR);
3075 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3076 if (opt->ip6po_rthdr == NULL)
3077 return (ENOBUFS);
3078 memcpy(opt->ip6po_rthdr, rth, rthlen);
3079 break;
3080 }
3081
3082 case IPV6_USE_MIN_MTU:
3083 if (len != sizeof(int))
3084 return (EINVAL);
3085 minmtupolicy = *(int *)buf;
3086 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3087 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3088 minmtupolicy != IP6PO_MINMTU_ALL) {
3089 return (EINVAL);
3090 }
3091 opt->ip6po_minmtu = minmtupolicy;
3092 break;
3093
3094 case IPV6_DONTFRAG:
3095 if (len != sizeof(int))
3096 return (EINVAL);
3097
3098 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3099 /*
3100 * we ignore this option for TCP sockets.
3101 * (RFC3542 leaves this case unspecified.)
3102 */
3103 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3104 } else
3105 opt->ip6po_flags |= IP6PO_DONTFRAG;
3106 break;
3107
3108 case IPV6_PREFER_TEMPADDR:
3109 {
3110 int preftemp;
3111
3112 if (len != sizeof(int))
3113 return (EINVAL);
3114 preftemp = *(int *)buf;
3115 switch (preftemp) {
3116 case IP6PO_TEMPADDR_SYSTEM:
3117 case IP6PO_TEMPADDR_NOTPREFER:
3118 case IP6PO_TEMPADDR_PREFER:
3119 break;
3120 default:
3121 return (EINVAL);
3122 }
3123 opt->ip6po_prefer_tempaddr = preftemp;
3124 break;
3125 }
3126
3127 default:
3128 return (ENOPROTOOPT);
3129 } /* end of switch */
3130
3131 return (0);
3132 }
3133
3134 /*
3135 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3136 * packet to the input queue of a specified interface. Note that this
3137 * calls the output routine of the loopback "driver", but with an interface
3138 * pointer that might NOT be lo0ifp -- easier than replicating that code here.
3139 */
3140 void
3141 ip6_mloopback(struct ifnet *ifp, struct mbuf *m,
3142 const struct sockaddr_in6 *dst)
3143 {
3144 struct mbuf *copym;
3145 struct ip6_hdr *ip6;
3146
3147 copym = m_copy(m, 0, M_COPYALL);
3148 if (copym == NULL)
3149 return;
3150
3151 /*
3152 * Make sure to deep-copy IPv6 header portion in case the data
3153 * is in an mbuf cluster, so that we can safely override the IPv6
3154 * header portion later.
3155 */
3156 if ((copym->m_flags & M_EXT) != 0 ||
3157 copym->m_len < sizeof(struct ip6_hdr)) {
3158 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3159 if (copym == NULL)
3160 return;
3161 }
3162
3163 #ifdef DIAGNOSTIC
3164 if (copym->m_len < sizeof(*ip6)) {
3165 m_freem(copym);
3166 return;
3167 }
3168 #endif
3169
3170 ip6 = mtod(copym, struct ip6_hdr *);
3171 /*
3172 * clear embedded scope identifiers if necessary.
3173 * in6_clearscope will touch the addresses only when necessary.
3174 */
3175 in6_clearscope(&ip6->ip6_src);
3176 in6_clearscope(&ip6->ip6_dst);
3177
3178 (void)looutput(ifp, copym, (const struct sockaddr *)dst, NULL);
3179 }
3180
3181 /*
3182 * Chop IPv6 header off from the payload.
3183 */
3184 static int
3185 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3186 {
3187 struct mbuf *mh;
3188 struct ip6_hdr *ip6;
3189
3190 ip6 = mtod(m, struct ip6_hdr *);
3191 if (m->m_len > sizeof(*ip6)) {
3192 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3193 if (mh == 0) {
3194 m_freem(m);
3195 return ENOBUFS;
3196 }
3197 M_MOVE_PKTHDR(mh, m);
3198 MH_ALIGN(mh, sizeof(*ip6));
3199 m->m_len -= sizeof(*ip6);
3200 m->m_data += sizeof(*ip6);
3201 mh->m_next = m;
3202 m = mh;
3203 m->m_len = sizeof(*ip6);
3204 bcopy((void *)ip6, mtod(m, void *), sizeof(*ip6));
3205 }
3206 exthdrs->ip6e_ip6 = m;
3207 return 0;
3208 }
3209
3210 /*
3211 * Compute IPv6 extension header length.
3212 */
3213 int
3214 ip6_optlen(struct in6pcb *in6p)
3215 {
3216 int len;
3217
3218 if (!in6p->in6p_outputopts)
3219 return 0;
3220
3221 len = 0;
3222 #define elen(x) \
3223 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3224
3225 len += elen(in6p->in6p_outputopts->ip6po_hbh);
3226 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3227 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3228 len += elen(in6p->in6p_outputopts->ip6po_dest2);
3229 return len;
3230 #undef elen
3231 }
3232