ip6_output.c revision 1.203.2.1 1 /* $NetBSD: ip6_output.c,v 1.203.2.1 2018/04/22 07:20:28 pgoyette Exp $ */
2 /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: ip6_output.c,v 1.203.2.1 2018/04/22 07:20:28 pgoyette Exp $");
66
67 #ifdef _KERNEL_OPT
68 #include "opt_inet.h"
69 #include "opt_inet6.h"
70 #include "opt_ipsec.h"
71 #endif
72
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/errno.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/syslog.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82 #include <sys/kauth.h>
83
84 #include <net/if.h>
85 #include <net/route.h>
86 #include <net/pfil.h>
87
88 #include <netinet/in.h>
89 #include <netinet/in_var.h>
90 #include <netinet/ip6.h>
91 #include <netinet/ip_var.h>
92 #include <netinet/icmp6.h>
93 #include <netinet/in_offload.h>
94 #include <netinet/portalgo.h>
95 #include <netinet6/in6_offload.h>
96 #include <netinet6/ip6_var.h>
97 #include <netinet6/ip6_private.h>
98 #include <netinet6/in6_pcb.h>
99 #include <netinet6/nd6.h>
100 #include <netinet6/ip6protosw.h>
101 #include <netinet6/scope6_var.h>
102
103 #ifdef IPSEC
104 #include <netipsec/ipsec.h>
105 #include <netipsec/ipsec6.h>
106 #include <netipsec/key.h>
107 #endif
108
109
110 #include <net/net_osdep.h>
111
112 extern pfil_head_t *inet6_pfil_hook; /* XXX */
113
114 struct ip6_exthdrs {
115 struct mbuf *ip6e_ip6;
116 struct mbuf *ip6e_hbh;
117 struct mbuf *ip6e_dest1;
118 struct mbuf *ip6e_rthdr;
119 struct mbuf *ip6e_dest2;
120 };
121
122 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
123 kauth_cred_t, int);
124 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
125 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, kauth_cred_t,
126 int, int, int);
127 static int ip6_setmoptions(const struct sockopt *, struct in6pcb *);
128 static int ip6_getmoptions(struct sockopt *, struct in6pcb *);
129 static int ip6_copyexthdr(struct mbuf **, void *, int);
130 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
131 struct ip6_frag **);
132 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
133 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
134 static int ip6_getpmtu(struct rtentry *, struct ifnet *, u_long *, int *);
135 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
136 static int ip6_ifaddrvalid(const struct in6_addr *, const struct in6_addr *);
137 static int ip6_handle_rthdr(struct ip6_rthdr *, struct ip6_hdr *);
138
139 #ifdef RFC2292
140 static int ip6_pcbopts(struct ip6_pktopts **, struct socket *, struct sockopt *);
141 #endif
142
143 static int
144 ip6_handle_rthdr(struct ip6_rthdr *rh, struct ip6_hdr *ip6)
145 {
146 struct ip6_rthdr0 *rh0;
147 struct in6_addr *addr;
148 struct sockaddr_in6 sa;
149 int error = 0;
150
151 switch (rh->ip6r_type) {
152 case IPV6_RTHDR_TYPE_0:
153 rh0 = (struct ip6_rthdr0 *)rh;
154 addr = (struct in6_addr *)(rh0 + 1);
155
156 /*
157 * construct a sockaddr_in6 form of the first hop.
158 *
159 * XXX we may not have enough information about its scope zone;
160 * there is no standard API to pass the information from the
161 * application.
162 */
163 sockaddr_in6_init(&sa, addr, 0, 0, 0);
164 error = sa6_embedscope(&sa, ip6_use_defzone);
165 if (error != 0)
166 break;
167 memmove(&addr[0], &addr[1],
168 sizeof(struct in6_addr) * (rh0->ip6r0_segleft - 1));
169 addr[rh0->ip6r0_segleft - 1] = ip6->ip6_dst;
170 ip6->ip6_dst = sa.sin6_addr;
171 /* XXX */
172 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
173 break;
174 default: /* is it possible? */
175 error = EINVAL;
176 }
177
178 return error;
179 }
180
181 /*
182 * Send an IP packet to a host.
183 */
184 int
185 ip6_if_output(struct ifnet * const ifp, struct ifnet * const origifp,
186 struct mbuf * const m, const struct sockaddr_in6 * const dst,
187 const struct rtentry *rt)
188 {
189 int error = 0;
190
191 if (rt != NULL) {
192 error = rt_check_reject_route(rt, ifp);
193 if (error != 0) {
194 m_freem(m);
195 return error;
196 }
197 }
198
199 if ((ifp->if_flags & IFF_LOOPBACK) != 0)
200 error = if_output_lock(ifp, origifp, m, sin6tocsa(dst), rt);
201 else
202 error = if_output_lock(ifp, ifp, m, sin6tocsa(dst), rt);
203 return error;
204 }
205
206 /*
207 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
208 * header (with pri, len, nxt, hlim, src, dst).
209 *
210 * This function may modify ver and hlim only. The mbuf chain containing the
211 * packet will be freed. The mbuf opt, if present, will not be freed.
212 *
213 * Type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
214 * nd_ifinfo.linkmtu is u_int32_t. So we use u_long to hold largest one,
215 * which is rt_rmx.rmx_mtu.
216 */
217 int
218 ip6_output(
219 struct mbuf *m0,
220 struct ip6_pktopts *opt,
221 struct route *ro,
222 int flags,
223 struct ip6_moptions *im6o,
224 struct in6pcb *in6p,
225 struct ifnet **ifpp /* XXX: just for statistics */
226 )
227 {
228 struct ip6_hdr *ip6, *mhip6;
229 struct ifnet *ifp = NULL, *origifp = NULL;
230 struct mbuf *m = m0;
231 int tlen, len, off;
232 bool tso;
233 struct route ip6route;
234 struct rtentry *rt = NULL, *rt_pmtu;
235 const struct sockaddr_in6 *dst;
236 struct sockaddr_in6 src_sa, dst_sa;
237 int error = 0;
238 struct in6_ifaddr *ia = NULL;
239 u_long mtu;
240 int alwaysfrag, dontfrag;
241 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
242 struct ip6_exthdrs exthdrs;
243 struct in6_addr finaldst, src0, dst0;
244 u_int32_t zone;
245 struct route *ro_pmtu = NULL;
246 int hdrsplit = 0;
247 int needipsec = 0;
248 #ifdef IPSEC
249 struct secpolicy *sp = NULL;
250 #endif
251 struct psref psref, psref_ia;
252 int bound = curlwp_bind();
253 bool release_psref_ia = false;
254
255 #ifdef DIAGNOSTIC
256 if ((m->m_flags & M_PKTHDR) == 0)
257 panic("ip6_output: no HDR");
258 if ((m->m_pkthdr.csum_flags &
259 (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TSOv4)) != 0) {
260 panic("ip6_output: IPv4 checksum offload flags: %d",
261 m->m_pkthdr.csum_flags);
262 }
263 if ((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) ==
264 (M_CSUM_TCPv6|M_CSUM_UDPv6)) {
265 panic("ip6_output: conflicting checksum offload flags: %d",
266 m->m_pkthdr.csum_flags);
267 }
268 #endif
269
270 M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data, sizeof(struct ip6_hdr));
271
272 #define MAKE_EXTHDR(hp, mp) \
273 do { \
274 if (hp) { \
275 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
276 error = ip6_copyexthdr((mp), (void *)(hp), \
277 ((eh)->ip6e_len + 1) << 3); \
278 if (error) \
279 goto freehdrs; \
280 } \
281 } while (/*CONSTCOND*/ 0)
282
283 memset(&exthdrs, 0, sizeof(exthdrs));
284 if (opt) {
285 /* Hop-by-Hop options header */
286 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
287 /* Destination options header (1st part) */
288 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
289 /* Routing header */
290 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
291 /* Destination options header (2nd part) */
292 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
293 }
294
295 /*
296 * Calculate the total length of the extension header chain.
297 * Keep the length of the unfragmentable part for fragmentation.
298 */
299 optlen = 0;
300 if (exthdrs.ip6e_hbh)
301 optlen += exthdrs.ip6e_hbh->m_len;
302 if (exthdrs.ip6e_dest1)
303 optlen += exthdrs.ip6e_dest1->m_len;
304 if (exthdrs.ip6e_rthdr)
305 optlen += exthdrs.ip6e_rthdr->m_len;
306 unfragpartlen = optlen + sizeof(struct ip6_hdr);
307 /* NOTE: we don't add AH/ESP length here. do that later. */
308 if (exthdrs.ip6e_dest2)
309 optlen += exthdrs.ip6e_dest2->m_len;
310
311 #ifdef IPSEC
312 if (ipsec_used) {
313 /* Check the security policy (SP) for the packet */
314 sp = ipsec6_check_policy(m, in6p, flags, &needipsec, &error);
315 if (error != 0) {
316 /*
317 * Hack: -EINVAL is used to signal that a packet
318 * should be silently discarded. This is typically
319 * because we asked key management for an SA and
320 * it was delayed (e.g. kicked up to IKE).
321 */
322 if (error == -EINVAL)
323 error = 0;
324 goto freehdrs;
325 }
326 }
327 #endif
328
329 if (needipsec &&
330 (m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
331 in6_delayed_cksum(m);
332 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
333 }
334
335 /*
336 * If we need IPsec, or there is at least one extension header,
337 * separate IP6 header from the payload.
338 */
339 if ((needipsec || optlen) && !hdrsplit) {
340 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
341 m = NULL;
342 goto freehdrs;
343 }
344 m = exthdrs.ip6e_ip6;
345 hdrsplit++;
346 }
347
348 /* adjust pointer */
349 ip6 = mtod(m, struct ip6_hdr *);
350
351 /* adjust mbuf packet header length */
352 m->m_pkthdr.len += optlen;
353 plen = m->m_pkthdr.len - sizeof(*ip6);
354
355 /* If this is a jumbo payload, insert a jumbo payload option. */
356 if (plen > IPV6_MAXPACKET) {
357 if (!hdrsplit) {
358 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
359 m = NULL;
360 goto freehdrs;
361 }
362 m = exthdrs.ip6e_ip6;
363 hdrsplit++;
364 }
365 /* adjust pointer */
366 ip6 = mtod(m, struct ip6_hdr *);
367 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
368 goto freehdrs;
369 optlen += 8; /* XXX JUMBOOPTLEN */
370 ip6->ip6_plen = 0;
371 } else
372 ip6->ip6_plen = htons(plen);
373
374 /*
375 * Concatenate headers and fill in next header fields.
376 * Here we have, on "m"
377 * IPv6 payload
378 * and we insert headers accordingly. Finally, we should be getting:
379 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
380 *
381 * during the header composing process, "m" points to IPv6 header.
382 * "mprev" points to an extension header prior to esp.
383 */
384 {
385 u_char *nexthdrp = &ip6->ip6_nxt;
386 struct mbuf *mprev = m;
387
388 /*
389 * we treat dest2 specially. this makes IPsec processing
390 * much easier. the goal here is to make mprev point the
391 * mbuf prior to dest2.
392 *
393 * result: IPv6 dest2 payload
394 * m and mprev will point to IPv6 header.
395 */
396 if (exthdrs.ip6e_dest2) {
397 if (!hdrsplit)
398 panic("assumption failed: hdr not split");
399 exthdrs.ip6e_dest2->m_next = m->m_next;
400 m->m_next = exthdrs.ip6e_dest2;
401 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
402 ip6->ip6_nxt = IPPROTO_DSTOPTS;
403 }
404
405 #define MAKE_CHAIN(m, mp, p, i)\
406 do {\
407 if (m) {\
408 if (!hdrsplit) \
409 panic("assumption failed: hdr not split"); \
410 *mtod((m), u_char *) = *(p);\
411 *(p) = (i);\
412 p = mtod((m), u_char *);\
413 (m)->m_next = (mp)->m_next;\
414 (mp)->m_next = (m);\
415 (mp) = (m);\
416 }\
417 } while (/*CONSTCOND*/ 0)
418 /*
419 * result: IPv6 hbh dest1 rthdr dest2 payload
420 * m will point to IPv6 header. mprev will point to the
421 * extension header prior to dest2 (rthdr in the above case).
422 */
423 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
424 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
425 IPPROTO_DSTOPTS);
426 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
427 IPPROTO_ROUTING);
428
429 M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data,
430 sizeof(struct ip6_hdr) + optlen);
431 }
432
433 /* Need to save for pmtu */
434 finaldst = ip6->ip6_dst;
435
436 /*
437 * If there is a routing header, replace destination address field
438 * with the first hop of the routing header.
439 */
440 if (exthdrs.ip6e_rthdr) {
441 struct ip6_rthdr *rh;
442
443 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
444
445 error = ip6_handle_rthdr(rh, ip6);
446 if (error != 0)
447 goto bad;
448 }
449
450 /* Source address validation */
451 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
452 (flags & IPV6_UNSPECSRC) == 0) {
453 error = EOPNOTSUPP;
454 IP6_STATINC(IP6_STAT_BADSCOPE);
455 goto bad;
456 }
457 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
458 error = EOPNOTSUPP;
459 IP6_STATINC(IP6_STAT_BADSCOPE);
460 goto bad;
461 }
462
463 IP6_STATINC(IP6_STAT_LOCALOUT);
464
465 /*
466 * Route packet.
467 */
468 /* initialize cached route */
469 if (ro == NULL) {
470 memset(&ip6route, 0, sizeof(ip6route));
471 ro = &ip6route;
472 }
473 ro_pmtu = ro;
474 if (opt && opt->ip6po_rthdr)
475 ro = &opt->ip6po_route;
476
477 /*
478 * if specified, try to fill in the traffic class field.
479 * do not override if a non-zero value is already set.
480 * we check the diffserv field and the ecn field separately.
481 */
482 if (opt && opt->ip6po_tclass >= 0) {
483 int mask = 0;
484
485 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
486 mask |= 0xfc;
487 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
488 mask |= 0x03;
489 if (mask != 0)
490 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
491 }
492
493 /* fill in or override the hop limit field, if necessary. */
494 if (opt && opt->ip6po_hlim != -1)
495 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
496 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
497 if (im6o != NULL)
498 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
499 else
500 ip6->ip6_hlim = ip6_defmcasthlim;
501 }
502
503 #ifdef IPSEC
504 if (needipsec) {
505 int s = splsoftnet();
506 error = ipsec6_process_packet(m, sp->req);
507 splx(s);
508
509 /*
510 * Preserve KAME behaviour: ENOENT can be returned
511 * when an SA acquire is in progress. Don't propagate
512 * this to user-level; it confuses applications.
513 * XXX this will go away when the SADB is redone.
514 */
515 if (error == ENOENT)
516 error = 0;
517
518 goto done;
519 }
520 #endif
521
522 /* adjust pointer */
523 ip6 = mtod(m, struct ip6_hdr *);
524
525 sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
526
527 /* We do not need a route for multicast */
528 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
529 struct in6_pktinfo *pi = NULL;
530
531 /*
532 * If the outgoing interface for the address is specified by
533 * the caller, use it.
534 */
535 if (opt && (pi = opt->ip6po_pktinfo) != NULL) {
536 /* XXX boundary check is assumed to be already done. */
537 ifp = if_get_byindex(pi->ipi6_ifindex, &psref);
538 } else if (im6o != NULL) {
539 ifp = if_get_byindex(im6o->im6o_multicast_if_index,
540 &psref);
541 }
542 }
543
544 if (ifp == NULL) {
545 error = in6_selectroute(&dst_sa, opt, &ro, &rt, true);
546 if (error != 0)
547 goto bad;
548 ifp = if_get_byindex(rt->rt_ifp->if_index, &psref);
549 }
550
551 if (rt == NULL) {
552 /*
553 * If in6_selectroute() does not return a route entry,
554 * dst may not have been updated.
555 */
556 error = rtcache_setdst(ro, sin6tosa(&dst_sa));
557 if (error) {
558 goto bad;
559 }
560 }
561
562 /*
563 * then rt (for unicast) and ifp must be non-NULL valid values.
564 */
565 if ((flags & IPV6_FORWARDING) == 0) {
566 /* XXX: the FORWARDING flag can be set for mrouting. */
567 in6_ifstat_inc(ifp, ifs6_out_request);
568 }
569 if (rt != NULL) {
570 ia = (struct in6_ifaddr *)(rt->rt_ifa);
571 rt->rt_use++;
572 }
573
574 /*
575 * The outgoing interface must be in the zone of source and
576 * destination addresses. We should use ia_ifp to support the
577 * case of sending packets to an address of our own.
578 */
579 if (ia != NULL && ia->ia_ifp) {
580 origifp = ia->ia_ifp;
581 if (if_is_deactivated(origifp))
582 goto bad;
583 if_acquire(origifp, &psref_ia);
584 release_psref_ia = true;
585 } else
586 origifp = ifp;
587
588 src0 = ip6->ip6_src;
589 if (in6_setscope(&src0, origifp, &zone))
590 goto badscope;
591 sockaddr_in6_init(&src_sa, &ip6->ip6_src, 0, 0, 0);
592 if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
593 goto badscope;
594
595 dst0 = ip6->ip6_dst;
596 if (in6_setscope(&dst0, origifp, &zone))
597 goto badscope;
598 /* re-initialize to be sure */
599 sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
600 if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id)
601 goto badscope;
602
603 /* scope check is done. */
604
605 /* Ensure we only send from a valid address. */
606 if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
607 (error = ip6_ifaddrvalid(&src0, &dst0)) != 0)
608 {
609 char ip6buf[INET6_ADDRSTRLEN];
610 nd6log(LOG_ERR,
611 "refusing to send from invalid address %s (pid %d)\n",
612 IN6_PRINT(ip6buf, &src0), curproc->p_pid);
613 IP6_STATINC(IP6_STAT_ODROPPED);
614 in6_ifstat_inc(origifp, ifs6_out_discard);
615 if (error == 1)
616 /*
617 * Address exists, but is tentative or detached.
618 * We can't send from it because it's invalid,
619 * so we drop the packet.
620 */
621 error = 0;
622 else
623 error = EADDRNOTAVAIL;
624 goto bad;
625 }
626
627 if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) &&
628 !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
629 dst = satocsin6(rt->rt_gateway);
630 else
631 dst = satocsin6(rtcache_getdst(ro));
632
633 /*
634 * XXXXXX: original code follows:
635 */
636 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
637 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
638 else {
639 bool ingroup;
640
641 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
642
643 in6_ifstat_inc(ifp, ifs6_out_mcast);
644
645 /*
646 * Confirm that the outgoing interface supports multicast.
647 */
648 if (!(ifp->if_flags & IFF_MULTICAST)) {
649 IP6_STATINC(IP6_STAT_NOROUTE);
650 in6_ifstat_inc(ifp, ifs6_out_discard);
651 error = ENETUNREACH;
652 goto bad;
653 }
654
655 ingroup = in6_multi_group(&ip6->ip6_dst, ifp);
656 if (ingroup && (im6o == NULL || im6o->im6o_multicast_loop)) {
657 /*
658 * If we belong to the destination multicast group
659 * on the outgoing interface, and the caller did not
660 * forbid loopback, loop back a copy.
661 */
662 KASSERT(dst != NULL);
663 ip6_mloopback(ifp, m, dst);
664 } else {
665 /*
666 * If we are acting as a multicast router, perform
667 * multicast forwarding as if the packet had just
668 * arrived on the interface to which we are about
669 * to send. The multicast forwarding function
670 * recursively calls this function, using the
671 * IPV6_FORWARDING flag to prevent infinite recursion.
672 *
673 * Multicasts that are looped back by ip6_mloopback(),
674 * above, will be forwarded by the ip6_input() routine,
675 * if necessary.
676 */
677 if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
678 if (ip6_mforward(ip6, ifp, m) != 0) {
679 m_freem(m);
680 goto done;
681 }
682 }
683 }
684 /*
685 * Multicasts with a hoplimit of zero may be looped back,
686 * above, but must not be transmitted on a network.
687 * Also, multicasts addressed to the loopback interface
688 * are not sent -- the above call to ip6_mloopback() will
689 * loop back a copy if this host actually belongs to the
690 * destination group on the loopback interface.
691 */
692 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
693 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
694 m_freem(m);
695 goto done;
696 }
697 }
698
699 /*
700 * Fill the outgoing inteface to tell the upper layer
701 * to increment per-interface statistics.
702 */
703 if (ifpp)
704 *ifpp = ifp;
705
706 /* Determine path MTU. */
707 /*
708 * ro_pmtu represent final destination while
709 * ro might represent immediate destination.
710 * Use ro_pmtu destination since MTU might differ.
711 */
712 if (ro_pmtu != ro) {
713 union {
714 struct sockaddr dst;
715 struct sockaddr_in6 dst6;
716 } u;
717
718 /* ro_pmtu may not have a cache */
719 sockaddr_in6_init(&u.dst6, &finaldst, 0, 0, 0);
720 rt_pmtu = rtcache_lookup(ro_pmtu, &u.dst);
721 } else
722 rt_pmtu = rt;
723 error = ip6_getpmtu(rt_pmtu, ifp, &mtu, &alwaysfrag);
724 if (rt_pmtu != NULL && rt_pmtu != rt)
725 rtcache_unref(rt_pmtu, ro_pmtu);
726 if (error != 0)
727 goto bad;
728
729 /*
730 * The caller of this function may specify to use the minimum MTU
731 * in some cases.
732 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
733 * setting. The logic is a bit complicated; by default, unicast
734 * packets will follow path MTU while multicast packets will be sent at
735 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
736 * including unicast ones will be sent at the minimum MTU. Multicast
737 * packets will always be sent at the minimum MTU unless
738 * IP6PO_MINMTU_DISABLE is explicitly specified.
739 * See RFC 3542 for more details.
740 */
741 if (mtu > IPV6_MMTU) {
742 if ((flags & IPV6_MINMTU))
743 mtu = IPV6_MMTU;
744 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
745 mtu = IPV6_MMTU;
746 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
747 (opt == NULL ||
748 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
749 mtu = IPV6_MMTU;
750 }
751 }
752
753 /*
754 * clear embedded scope identifiers if necessary.
755 * in6_clearscope will touch the addresses only when necessary.
756 */
757 in6_clearscope(&ip6->ip6_src);
758 in6_clearscope(&ip6->ip6_dst);
759
760 /*
761 * If the outgoing packet contains a hop-by-hop options header,
762 * it must be examined and processed even by the source node.
763 * (RFC 2460, section 4.)
764 *
765 * XXX Is this really necessary?
766 */
767 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
768 u_int32_t dummy1; /* XXX unused */
769 u_int32_t dummy2; /* XXX unused */
770 int hoff = sizeof(struct ip6_hdr);
771
772 if (ip6_hopopts_input(&dummy1, &dummy2, &m, &hoff)) {
773 /* m was already freed at this point */
774 error = EINVAL;
775 goto done;
776 }
777
778 ip6 = mtod(m, struct ip6_hdr *);
779 }
780
781 /*
782 * Run through list of hooks for output packets.
783 */
784 if ((error = pfil_run_hooks(inet6_pfil_hook, &m, ifp, PFIL_OUT)) != 0)
785 goto done;
786 if (m == NULL)
787 goto done;
788 ip6 = mtod(m, struct ip6_hdr *);
789
790 /*
791 * Send the packet to the outgoing interface.
792 * If necessary, do IPv6 fragmentation before sending.
793 *
794 * the logic here is rather complex:
795 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
796 * 1-a: send as is if tlen <= path mtu
797 * 1-b: fragment if tlen > path mtu
798 *
799 * 2: if user asks us not to fragment (dontfrag == 1)
800 * 2-a: send as is if tlen <= interface mtu
801 * 2-b: error if tlen > interface mtu
802 *
803 * 3: if we always need to attach fragment header (alwaysfrag == 1)
804 * always fragment
805 *
806 * 4: if dontfrag == 1 && alwaysfrag == 1
807 * error, as we cannot handle this conflicting request
808 */
809 tlen = m->m_pkthdr.len;
810 tso = (m->m_pkthdr.csum_flags & M_CSUM_TSOv6) != 0;
811 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
812 dontfrag = 1;
813 else
814 dontfrag = 0;
815
816 if (dontfrag && alwaysfrag) { /* case 4 */
817 /* conflicting request - can't transmit */
818 error = EMSGSIZE;
819 goto bad;
820 }
821 if (dontfrag && (!tso && tlen > IN6_LINKMTU(ifp))) { /* case 2-b */
822 /*
823 * Even if the DONTFRAG option is specified, we cannot send the
824 * packet when the data length is larger than the MTU of the
825 * outgoing interface.
826 * Notify the error by sending IPV6_PATHMTU ancillary data as
827 * well as returning an error code (the latter is not described
828 * in the API spec.)
829 */
830 u_int32_t mtu32;
831 struct ip6ctlparam ip6cp;
832
833 mtu32 = (u_int32_t)mtu;
834 memset(&ip6cp, 0, sizeof(ip6cp));
835 ip6cp.ip6c_cmdarg = (void *)&mtu32;
836 pfctlinput2(PRC_MSGSIZE,
837 rtcache_getdst(ro_pmtu), &ip6cp);
838
839 error = EMSGSIZE;
840 goto bad;
841 }
842
843 /*
844 * transmit packet without fragmentation
845 */
846 if (dontfrag || (!alwaysfrag && (tlen <= mtu || tso))) {
847 /* case 1-a and 2-a */
848 struct in6_ifaddr *ia6;
849 int sw_csum;
850 int s;
851
852 ip6 = mtod(m, struct ip6_hdr *);
853 s = pserialize_read_enter();
854 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
855 if (ia6) {
856 /* Record statistics for this interface address. */
857 ia6->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len;
858 }
859 pserialize_read_exit(s);
860
861 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
862 if ((sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
863 if (IN6_NEED_CHECKSUM(ifp,
864 sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
865 in6_delayed_cksum(m);
866 }
867 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
868 }
869
870 KASSERT(dst != NULL);
871 if (__predict_true(!tso ||
872 (ifp->if_capenable & IFCAP_TSOv6) != 0)) {
873 error = ip6_if_output(ifp, origifp, m, dst, rt);
874 } else {
875 error = ip6_tso_output(ifp, origifp, m, dst, rt);
876 }
877 goto done;
878 }
879
880 if (tso) {
881 error = EINVAL; /* XXX */
882 goto bad;
883 }
884
885 /*
886 * try to fragment the packet. case 1-b and 3
887 */
888 if (mtu < IPV6_MMTU) {
889 /* path MTU cannot be less than IPV6_MMTU */
890 error = EMSGSIZE;
891 in6_ifstat_inc(ifp, ifs6_out_fragfail);
892 goto bad;
893 } else if (ip6->ip6_plen == 0) {
894 /* jumbo payload cannot be fragmented */
895 error = EMSGSIZE;
896 in6_ifstat_inc(ifp, ifs6_out_fragfail);
897 goto bad;
898 } else {
899 const u_int32_t id = htonl(ip6_randomid());
900 struct mbuf **mnext, *m_frgpart;
901 const int hlen = unfragpartlen;
902 struct ip6_frag *ip6f;
903 u_char nextproto;
904 #if 0 /* see below */
905 struct ip6ctlparam ip6cp;
906 u_int32_t mtu32;
907 #endif
908
909 if (mtu > IPV6_MAXPACKET)
910 mtu = IPV6_MAXPACKET;
911
912 #if 0
913 /*
914 * It is believed this code is a leftover from the
915 * development of the IPV6_RECVPATHMTU sockopt and
916 * associated work to implement RFC3542.
917 * It's not entirely clear what the intent of the API
918 * is at this point, so disable this code for now.
919 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
920 * will send notifications if the application requests.
921 */
922
923 /* Notify a proper path MTU to applications. */
924 mtu32 = (u_int32_t)mtu;
925 memset(&ip6cp, 0, sizeof(ip6cp));
926 ip6cp.ip6c_cmdarg = (void *)&mtu32;
927 pfctlinput2(PRC_MSGSIZE,
928 rtcache_getdst(ro_pmtu), &ip6cp);
929 #endif
930
931 /*
932 * Must be able to put at least 8 bytes per fragment.
933 */
934 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
935 if (len < 8) {
936 error = EMSGSIZE;
937 in6_ifstat_inc(ifp, ifs6_out_fragfail);
938 goto bad;
939 }
940
941 mnext = &m->m_nextpkt;
942
943 /*
944 * Change the next header field of the last header in the
945 * unfragmentable part.
946 */
947 if (exthdrs.ip6e_rthdr) {
948 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
949 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
950 } else if (exthdrs.ip6e_dest1) {
951 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
952 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
953 } else if (exthdrs.ip6e_hbh) {
954 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
955 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
956 } else {
957 nextproto = ip6->ip6_nxt;
958 ip6->ip6_nxt = IPPROTO_FRAGMENT;
959 }
960
961 if ((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6))
962 != 0) {
963 if (IN6_NEED_CHECKSUM(ifp,
964 m->m_pkthdr.csum_flags &
965 (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
966 in6_delayed_cksum(m);
967 }
968 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
969 }
970
971 /*
972 * Loop through length of segment after first fragment,
973 * make new header and copy data of each part and link onto
974 * chain.
975 */
976 m0 = m;
977 for (off = hlen; off < tlen; off += len) {
978 struct mbuf *mlast;
979
980 MGETHDR(m, M_DONTWAIT, MT_HEADER);
981 if (!m) {
982 error = ENOBUFS;
983 IP6_STATINC(IP6_STAT_ODROPPED);
984 goto sendorfree;
985 }
986 m_reset_rcvif(m);
987 m->m_flags = m0->m_flags & M_COPYFLAGS;
988 *mnext = m;
989 mnext = &m->m_nextpkt;
990 m->m_data += max_linkhdr;
991 mhip6 = mtod(m, struct ip6_hdr *);
992 *mhip6 = *ip6;
993 m->m_len = sizeof(*mhip6);
994
995 ip6f = NULL;
996 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
997 if (error) {
998 IP6_STATINC(IP6_STAT_ODROPPED);
999 goto sendorfree;
1000 }
1001
1002 /* Fill in the Frag6 Header */
1003 ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
1004 if (off + len >= tlen)
1005 len = tlen - off;
1006 else
1007 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1008 ip6f->ip6f_reserved = 0;
1009 ip6f->ip6f_ident = id;
1010 ip6f->ip6f_nxt = nextproto;
1011
1012 mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
1013 sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1014 if ((m_frgpart = m_copy(m0, off, len)) == NULL) {
1015 error = ENOBUFS;
1016 IP6_STATINC(IP6_STAT_ODROPPED);
1017 goto sendorfree;
1018 }
1019 for (mlast = m; mlast->m_next; mlast = mlast->m_next)
1020 ;
1021 mlast->m_next = m_frgpart;
1022
1023 m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1024 m_reset_rcvif(m);
1025 IP6_STATINC(IP6_STAT_OFRAGMENTS);
1026 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1027 }
1028
1029 in6_ifstat_inc(ifp, ifs6_out_fragok);
1030 }
1031
1032 sendorfree:
1033 m = m0->m_nextpkt;
1034 m0->m_nextpkt = 0;
1035 m_freem(m0);
1036 for (m0 = m; m; m = m0) {
1037 m0 = m->m_nextpkt;
1038 m->m_nextpkt = 0;
1039 if (error == 0) {
1040 struct in6_ifaddr *ia6;
1041 int s;
1042 ip6 = mtod(m, struct ip6_hdr *);
1043 s = pserialize_read_enter();
1044 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1045 if (ia6) {
1046 /*
1047 * Record statistics for this interface
1048 * address.
1049 */
1050 ia6->ia_ifa.ifa_data.ifad_outbytes +=
1051 m->m_pkthdr.len;
1052 }
1053 pserialize_read_exit(s);
1054 KASSERT(dst != NULL);
1055 error = ip6_if_output(ifp, origifp, m, dst, rt);
1056 } else
1057 m_freem(m);
1058 }
1059
1060 if (error == 0)
1061 IP6_STATINC(IP6_STAT_FRAGMENTED);
1062
1063 done:
1064 rtcache_unref(rt, ro);
1065 if (ro == &ip6route)
1066 rtcache_free(&ip6route);
1067 #ifdef IPSEC
1068 if (sp != NULL)
1069 KEY_SP_UNREF(&sp);
1070 #endif
1071 if_put(ifp, &psref);
1072 if (release_psref_ia)
1073 if_put(origifp, &psref_ia);
1074 curlwp_bindx(bound);
1075
1076 return error;
1077
1078 freehdrs:
1079 m_freem(exthdrs.ip6e_hbh);
1080 m_freem(exthdrs.ip6e_dest1);
1081 m_freem(exthdrs.ip6e_rthdr);
1082 m_freem(exthdrs.ip6e_dest2);
1083 /* FALLTHROUGH */
1084 bad:
1085 m_freem(m);
1086 goto done;
1087
1088 badscope:
1089 IP6_STATINC(IP6_STAT_BADSCOPE);
1090 in6_ifstat_inc(origifp, ifs6_out_discard);
1091 if (error == 0)
1092 error = EHOSTUNREACH; /* XXX */
1093 goto bad;
1094 }
1095
1096 static int
1097 ip6_copyexthdr(struct mbuf **mp, void *hdr, int hlen)
1098 {
1099 struct mbuf *m;
1100
1101 if (hlen > MCLBYTES)
1102 return ENOBUFS; /* XXX */
1103
1104 MGET(m, M_DONTWAIT, MT_DATA);
1105 if (!m)
1106 return ENOBUFS;
1107
1108 if (hlen > MLEN) {
1109 MCLGET(m, M_DONTWAIT);
1110 if ((m->m_flags & M_EXT) == 0) {
1111 m_free(m);
1112 return ENOBUFS;
1113 }
1114 }
1115 m->m_len = hlen;
1116 if (hdr)
1117 memcpy(mtod(m, void *), hdr, hlen);
1118
1119 *mp = m;
1120 return 0;
1121 }
1122
1123 /*
1124 * Process a delayed payload checksum calculation.
1125 */
1126 void
1127 in6_delayed_cksum(struct mbuf *m)
1128 {
1129 uint16_t csum, offset;
1130
1131 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1132 KASSERT((~m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1133 KASSERT((m->m_pkthdr.csum_flags
1134 & (M_CSUM_UDPv4|M_CSUM_TCPv4|M_CSUM_TSOv4)) == 0);
1135
1136 offset = M_CSUM_DATA_IPv6_HL(m->m_pkthdr.csum_data);
1137 csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1138 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv6) != 0) {
1139 csum = 0xffff;
1140 }
1141
1142 offset += M_CSUM_DATA_IPv6_OFFSET(m->m_pkthdr.csum_data);
1143 if ((offset + sizeof(csum)) > m->m_len) {
1144 m_copyback(m, offset, sizeof(csum), &csum);
1145 } else {
1146 *(uint16_t *)(mtod(m, char *) + offset) = csum;
1147 }
1148 }
1149
1150 /*
1151 * Insert jumbo payload option.
1152 */
1153 static int
1154 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1155 {
1156 struct mbuf *mopt;
1157 u_int8_t *optbuf;
1158 u_int32_t v;
1159
1160 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1161
1162 /*
1163 * If there is no hop-by-hop options header, allocate new one.
1164 * If there is one but it doesn't have enough space to store the
1165 * jumbo payload option, allocate a cluster to store the whole options.
1166 * Otherwise, use it to store the options.
1167 */
1168 if (exthdrs->ip6e_hbh == NULL) {
1169 MGET(mopt, M_DONTWAIT, MT_DATA);
1170 if (mopt == 0)
1171 return (ENOBUFS);
1172 mopt->m_len = JUMBOOPTLEN;
1173 optbuf = mtod(mopt, u_int8_t *);
1174 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1175 exthdrs->ip6e_hbh = mopt;
1176 } else {
1177 struct ip6_hbh *hbh;
1178
1179 mopt = exthdrs->ip6e_hbh;
1180 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1181 const int oldoptlen = mopt->m_len;
1182 struct mbuf *n;
1183
1184 /*
1185 * Assumptions:
1186 * - exthdrs->ip6e_hbh is not referenced from places
1187 * other than exthdrs.
1188 * - exthdrs->ip6e_hbh is not an mbuf chain.
1189 */
1190 KASSERT(mopt->m_next == NULL);
1191
1192 /*
1193 * Give up if the whole (new) hbh header does not fit
1194 * even in an mbuf cluster.
1195 */
1196 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1197 return ENOBUFS;
1198
1199 /*
1200 * At this point, we must always prepare a cluster.
1201 */
1202 MGET(n, M_DONTWAIT, MT_DATA);
1203 if (n) {
1204 MCLGET(n, M_DONTWAIT);
1205 if ((n->m_flags & M_EXT) == 0) {
1206 m_freem(n);
1207 n = NULL;
1208 }
1209 }
1210 if (!n)
1211 return ENOBUFS;
1212
1213 n->m_len = oldoptlen + JUMBOOPTLEN;
1214 bcopy(mtod(mopt, void *), mtod(n, void *),
1215 oldoptlen);
1216 optbuf = mtod(n, u_int8_t *) + oldoptlen;
1217 m_freem(mopt);
1218 mopt = exthdrs->ip6e_hbh = n;
1219 } else {
1220 optbuf = mtod(mopt, u_int8_t *) + mopt->m_len;
1221 mopt->m_len += JUMBOOPTLEN;
1222 }
1223 optbuf[0] = IP6OPT_PADN;
1224 optbuf[1] = 0;
1225
1226 /*
1227 * Adjust the header length according to the pad and
1228 * the jumbo payload option.
1229 */
1230 hbh = mtod(mopt, struct ip6_hbh *);
1231 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1232 }
1233
1234 /* fill in the option. */
1235 optbuf[2] = IP6OPT_JUMBO;
1236 optbuf[3] = 4;
1237 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1238 memcpy(&optbuf[4], &v, sizeof(u_int32_t));
1239
1240 /* finally, adjust the packet header length */
1241 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1242
1243 return 0;
1244 #undef JUMBOOPTLEN
1245 }
1246
1247 /*
1248 * Insert fragment header and copy unfragmentable header portions.
1249 *
1250 * *frghdrp will not be read, and it is guaranteed that either an
1251 * error is returned or that *frghdrp will point to space allocated
1252 * for the fragment header.
1253 *
1254 * On entry, m contains:
1255 * IPv6 Header
1256 * On exit, it contains:
1257 * IPv6 Header -> Unfragmentable Part -> Frag6 Header
1258 */
1259 static int
1260 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1261 struct ip6_frag **frghdrp)
1262 {
1263 struct mbuf *n, *mlast;
1264
1265 if (hlen > sizeof(struct ip6_hdr)) {
1266 n = m_copym(m0, sizeof(struct ip6_hdr),
1267 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1268 if (n == NULL)
1269 return ENOBUFS;
1270 m->m_next = n;
1271 } else
1272 n = m;
1273
1274 /* Search for the last mbuf of unfragmentable part. */
1275 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1276 ;
1277
1278 if ((mlast->m_flags & M_EXT) == 0 &&
1279 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1280 /* use the trailing space of the last mbuf for the fragment hdr */
1281 *frghdrp = (struct ip6_frag *)(mtod(mlast, char *) +
1282 mlast->m_len);
1283 mlast->m_len += sizeof(struct ip6_frag);
1284 } else {
1285 /* allocate a new mbuf for the fragment header */
1286 struct mbuf *mfrg;
1287
1288 MGET(mfrg, M_DONTWAIT, MT_DATA);
1289 if (mfrg == NULL)
1290 return ENOBUFS;
1291 mfrg->m_len = sizeof(struct ip6_frag);
1292 *frghdrp = mtod(mfrg, struct ip6_frag *);
1293 mlast->m_next = mfrg;
1294 }
1295
1296 return 0;
1297 }
1298
1299 static int
1300 ip6_getpmtu(struct rtentry *rt, struct ifnet *ifp, u_long *mtup,
1301 int *alwaysfragp)
1302 {
1303 u_int32_t mtu = 0;
1304 int alwaysfrag = 0;
1305 int error = 0;
1306
1307 if (rt != NULL) {
1308 u_int32_t ifmtu;
1309
1310 if (ifp == NULL)
1311 ifp = rt->rt_ifp;
1312 ifmtu = IN6_LINKMTU(ifp);
1313 mtu = rt->rt_rmx.rmx_mtu;
1314 if (mtu == 0)
1315 mtu = ifmtu;
1316 else if (mtu < IPV6_MMTU) {
1317 /*
1318 * RFC2460 section 5, last paragraph:
1319 * if we record ICMPv6 too big message with
1320 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1321 * or smaller, with fragment header attached.
1322 * (fragment header is needed regardless from the
1323 * packet size, for translators to identify packets)
1324 */
1325 alwaysfrag = 1;
1326 mtu = IPV6_MMTU;
1327 } else if (mtu > ifmtu) {
1328 /*
1329 * The MTU on the route is larger than the MTU on
1330 * the interface! This shouldn't happen, unless the
1331 * MTU of the interface has been changed after the
1332 * interface was brought up. Change the MTU in the
1333 * route to match the interface MTU (as long as the
1334 * field isn't locked).
1335 */
1336 mtu = ifmtu;
1337 if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
1338 rt->rt_rmx.rmx_mtu = mtu;
1339 }
1340 } else if (ifp) {
1341 mtu = IN6_LINKMTU(ifp);
1342 } else
1343 error = EHOSTUNREACH; /* XXX */
1344
1345 *mtup = mtu;
1346 if (alwaysfragp)
1347 *alwaysfragp = alwaysfrag;
1348 return (error);
1349 }
1350
1351 /*
1352 * IP6 socket option processing.
1353 */
1354 int
1355 ip6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1356 {
1357 int optdatalen, uproto;
1358 void *optdata;
1359 struct in6pcb *in6p = sotoin6pcb(so);
1360 struct ip_moptions **mopts;
1361 int error, optval;
1362 int level, optname;
1363
1364 KASSERT(solocked(so));
1365 KASSERT(sopt != NULL);
1366
1367 level = sopt->sopt_level;
1368 optname = sopt->sopt_name;
1369
1370 error = optval = 0;
1371 uproto = (int)so->so_proto->pr_protocol;
1372
1373 switch (level) {
1374 case IPPROTO_IP:
1375 switch (optname) {
1376 case IP_ADD_MEMBERSHIP:
1377 case IP_DROP_MEMBERSHIP:
1378 case IP_MULTICAST_IF:
1379 case IP_MULTICAST_LOOP:
1380 case IP_MULTICAST_TTL:
1381 mopts = &in6p->in6p_v4moptions;
1382 switch (op) {
1383 case PRCO_GETOPT:
1384 return ip_getmoptions(*mopts, sopt);
1385 case PRCO_SETOPT:
1386 return ip_setmoptions(mopts, sopt);
1387 default:
1388 return EINVAL;
1389 }
1390 default:
1391 return ENOPROTOOPT;
1392 }
1393 case IPPROTO_IPV6:
1394 break;
1395 default:
1396 return ENOPROTOOPT;
1397 }
1398 switch (op) {
1399 case PRCO_SETOPT:
1400 switch (optname) {
1401 #ifdef RFC2292
1402 case IPV6_2292PKTOPTIONS:
1403 error = ip6_pcbopts(&in6p->in6p_outputopts, so, sopt);
1404 break;
1405 #endif
1406
1407 /*
1408 * Use of some Hop-by-Hop options or some
1409 * Destination options, might require special
1410 * privilege. That is, normal applications
1411 * (without special privilege) might be forbidden
1412 * from setting certain options in outgoing packets,
1413 * and might never see certain options in received
1414 * packets. [RFC 2292 Section 6]
1415 * KAME specific note:
1416 * KAME prevents non-privileged users from sending or
1417 * receiving ANY hbh/dst options in order to avoid
1418 * overhead of parsing options in the kernel.
1419 */
1420 case IPV6_RECVHOPOPTS:
1421 case IPV6_RECVDSTOPTS:
1422 case IPV6_RECVRTHDRDSTOPTS:
1423 error = kauth_authorize_network(kauth_cred_get(),
1424 KAUTH_NETWORK_IPV6, KAUTH_REQ_NETWORK_IPV6_HOPBYHOP,
1425 NULL, NULL, NULL);
1426 if (error)
1427 break;
1428 /* FALLTHROUGH */
1429 case IPV6_UNICAST_HOPS:
1430 case IPV6_HOPLIMIT:
1431 case IPV6_FAITH:
1432
1433 case IPV6_RECVPKTINFO:
1434 case IPV6_RECVHOPLIMIT:
1435 case IPV6_RECVRTHDR:
1436 case IPV6_RECVPATHMTU:
1437 case IPV6_RECVTCLASS:
1438 case IPV6_V6ONLY:
1439 error = sockopt_getint(sopt, &optval);
1440 if (error)
1441 break;
1442 switch (optname) {
1443 case IPV6_UNICAST_HOPS:
1444 if (optval < -1 || optval >= 256)
1445 error = EINVAL;
1446 else {
1447 /* -1 = kernel default */
1448 in6p->in6p_hops = optval;
1449 }
1450 break;
1451 #define OPTSET(bit) \
1452 do { \
1453 if (optval) \
1454 in6p->in6p_flags |= (bit); \
1455 else \
1456 in6p->in6p_flags &= ~(bit); \
1457 } while (/*CONSTCOND*/ 0)
1458
1459 #ifdef RFC2292
1460 #define OPTSET2292(bit) \
1461 do { \
1462 in6p->in6p_flags |= IN6P_RFC2292; \
1463 if (optval) \
1464 in6p->in6p_flags |= (bit); \
1465 else \
1466 in6p->in6p_flags &= ~(bit); \
1467 } while (/*CONSTCOND*/ 0)
1468 #endif
1469
1470 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1471
1472 case IPV6_RECVPKTINFO:
1473 #ifdef RFC2292
1474 /* cannot mix with RFC2292 */
1475 if (OPTBIT(IN6P_RFC2292)) {
1476 error = EINVAL;
1477 break;
1478 }
1479 #endif
1480 OPTSET(IN6P_PKTINFO);
1481 break;
1482
1483 case IPV6_HOPLIMIT:
1484 {
1485 struct ip6_pktopts **optp;
1486
1487 #ifdef RFC2292
1488 /* cannot mix with RFC2292 */
1489 if (OPTBIT(IN6P_RFC2292)) {
1490 error = EINVAL;
1491 break;
1492 }
1493 #endif
1494 optp = &in6p->in6p_outputopts;
1495 error = ip6_pcbopt(IPV6_HOPLIMIT,
1496 (u_char *)&optval,
1497 sizeof(optval),
1498 optp,
1499 kauth_cred_get(), uproto);
1500 break;
1501 }
1502
1503 case IPV6_RECVHOPLIMIT:
1504 #ifdef RFC2292
1505 /* cannot mix with RFC2292 */
1506 if (OPTBIT(IN6P_RFC2292)) {
1507 error = EINVAL;
1508 break;
1509 }
1510 #endif
1511 OPTSET(IN6P_HOPLIMIT);
1512 break;
1513
1514 case IPV6_RECVHOPOPTS:
1515 #ifdef RFC2292
1516 /* cannot mix with RFC2292 */
1517 if (OPTBIT(IN6P_RFC2292)) {
1518 error = EINVAL;
1519 break;
1520 }
1521 #endif
1522 OPTSET(IN6P_HOPOPTS);
1523 break;
1524
1525 case IPV6_RECVDSTOPTS:
1526 #ifdef RFC2292
1527 /* cannot mix with RFC2292 */
1528 if (OPTBIT(IN6P_RFC2292)) {
1529 error = EINVAL;
1530 break;
1531 }
1532 #endif
1533 OPTSET(IN6P_DSTOPTS);
1534 break;
1535
1536 case IPV6_RECVRTHDRDSTOPTS:
1537 #ifdef RFC2292
1538 /* cannot mix with RFC2292 */
1539 if (OPTBIT(IN6P_RFC2292)) {
1540 error = EINVAL;
1541 break;
1542 }
1543 #endif
1544 OPTSET(IN6P_RTHDRDSTOPTS);
1545 break;
1546
1547 case IPV6_RECVRTHDR:
1548 #ifdef RFC2292
1549 /* cannot mix with RFC2292 */
1550 if (OPTBIT(IN6P_RFC2292)) {
1551 error = EINVAL;
1552 break;
1553 }
1554 #endif
1555 OPTSET(IN6P_RTHDR);
1556 break;
1557
1558 case IPV6_FAITH:
1559 OPTSET(IN6P_FAITH);
1560 break;
1561
1562 case IPV6_RECVPATHMTU:
1563 /*
1564 * We ignore this option for TCP
1565 * sockets.
1566 * (RFC3542 leaves this case
1567 * unspecified.)
1568 */
1569 if (uproto != IPPROTO_TCP)
1570 OPTSET(IN6P_MTU);
1571 break;
1572
1573 case IPV6_V6ONLY:
1574 /*
1575 * make setsockopt(IPV6_V6ONLY)
1576 * available only prior to bind(2).
1577 * see ipng mailing list, Jun 22 2001.
1578 */
1579 if (in6p->in6p_lport ||
1580 !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1581 error = EINVAL;
1582 break;
1583 }
1584 #ifdef INET6_BINDV6ONLY
1585 if (!optval)
1586 error = EINVAL;
1587 #else
1588 OPTSET(IN6P_IPV6_V6ONLY);
1589 #endif
1590 break;
1591 case IPV6_RECVTCLASS:
1592 #ifdef RFC2292
1593 /* cannot mix with RFC2292 XXX */
1594 if (OPTBIT(IN6P_RFC2292)) {
1595 error = EINVAL;
1596 break;
1597 }
1598 #endif
1599 OPTSET(IN6P_TCLASS);
1600 break;
1601
1602 }
1603 break;
1604
1605 case IPV6_OTCLASS:
1606 {
1607 struct ip6_pktopts **optp;
1608 u_int8_t tclass;
1609
1610 error = sockopt_get(sopt, &tclass, sizeof(tclass));
1611 if (error)
1612 break;
1613 optp = &in6p->in6p_outputopts;
1614 error = ip6_pcbopt(optname,
1615 (u_char *)&tclass,
1616 sizeof(tclass),
1617 optp,
1618 kauth_cred_get(), uproto);
1619 break;
1620 }
1621
1622 case IPV6_TCLASS:
1623 case IPV6_DONTFRAG:
1624 case IPV6_USE_MIN_MTU:
1625 case IPV6_PREFER_TEMPADDR:
1626 error = sockopt_getint(sopt, &optval);
1627 if (error)
1628 break;
1629 {
1630 struct ip6_pktopts **optp;
1631 optp = &in6p->in6p_outputopts;
1632 error = ip6_pcbopt(optname,
1633 (u_char *)&optval,
1634 sizeof(optval),
1635 optp,
1636 kauth_cred_get(), uproto);
1637 break;
1638 }
1639
1640 #ifdef RFC2292
1641 case IPV6_2292PKTINFO:
1642 case IPV6_2292HOPLIMIT:
1643 case IPV6_2292HOPOPTS:
1644 case IPV6_2292DSTOPTS:
1645 case IPV6_2292RTHDR:
1646 /* RFC 2292 */
1647 error = sockopt_getint(sopt, &optval);
1648 if (error)
1649 break;
1650
1651 switch (optname) {
1652 case IPV6_2292PKTINFO:
1653 OPTSET2292(IN6P_PKTINFO);
1654 break;
1655 case IPV6_2292HOPLIMIT:
1656 OPTSET2292(IN6P_HOPLIMIT);
1657 break;
1658 case IPV6_2292HOPOPTS:
1659 /*
1660 * Check super-user privilege.
1661 * See comments for IPV6_RECVHOPOPTS.
1662 */
1663 error =
1664 kauth_authorize_network(kauth_cred_get(),
1665 KAUTH_NETWORK_IPV6,
1666 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
1667 NULL, NULL);
1668 if (error)
1669 return (error);
1670 OPTSET2292(IN6P_HOPOPTS);
1671 break;
1672 case IPV6_2292DSTOPTS:
1673 error =
1674 kauth_authorize_network(kauth_cred_get(),
1675 KAUTH_NETWORK_IPV6,
1676 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
1677 NULL, NULL);
1678 if (error)
1679 return (error);
1680 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1681 break;
1682 case IPV6_2292RTHDR:
1683 OPTSET2292(IN6P_RTHDR);
1684 break;
1685 }
1686 break;
1687 #endif
1688 case IPV6_PKTINFO:
1689 case IPV6_HOPOPTS:
1690 case IPV6_RTHDR:
1691 case IPV6_DSTOPTS:
1692 case IPV6_RTHDRDSTOPTS:
1693 case IPV6_NEXTHOP: {
1694 /* new advanced API (RFC3542) */
1695 void *optbuf;
1696 int optbuflen;
1697 struct ip6_pktopts **optp;
1698
1699 #ifdef RFC2292
1700 /* cannot mix with RFC2292 */
1701 if (OPTBIT(IN6P_RFC2292)) {
1702 error = EINVAL;
1703 break;
1704 }
1705 #endif
1706
1707 optbuflen = sopt->sopt_size;
1708 optbuf = malloc(optbuflen, M_IP6OPT, M_NOWAIT);
1709 if (optbuf == NULL) {
1710 error = ENOBUFS;
1711 break;
1712 }
1713
1714 error = sockopt_get(sopt, optbuf, optbuflen);
1715 if (error) {
1716 free(optbuf, M_IP6OPT);
1717 break;
1718 }
1719 optp = &in6p->in6p_outputopts;
1720 error = ip6_pcbopt(optname, optbuf, optbuflen,
1721 optp, kauth_cred_get(), uproto);
1722
1723 free(optbuf, M_IP6OPT);
1724 break;
1725 }
1726 #undef OPTSET
1727
1728 case IPV6_MULTICAST_IF:
1729 case IPV6_MULTICAST_HOPS:
1730 case IPV6_MULTICAST_LOOP:
1731 case IPV6_JOIN_GROUP:
1732 case IPV6_LEAVE_GROUP:
1733 error = ip6_setmoptions(sopt, in6p);
1734 break;
1735
1736 case IPV6_PORTRANGE:
1737 error = sockopt_getint(sopt, &optval);
1738 if (error)
1739 break;
1740
1741 switch (optval) {
1742 case IPV6_PORTRANGE_DEFAULT:
1743 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1744 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1745 break;
1746
1747 case IPV6_PORTRANGE_HIGH:
1748 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1749 in6p->in6p_flags |= IN6P_HIGHPORT;
1750 break;
1751
1752 case IPV6_PORTRANGE_LOW:
1753 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1754 in6p->in6p_flags |= IN6P_LOWPORT;
1755 break;
1756
1757 default:
1758 error = EINVAL;
1759 break;
1760 }
1761 break;
1762
1763 case IPV6_PORTALGO:
1764 error = sockopt_getint(sopt, &optval);
1765 if (error)
1766 break;
1767
1768 error = portalgo_algo_index_select(
1769 (struct inpcb_hdr *)in6p, optval);
1770 break;
1771
1772 #if defined(IPSEC)
1773 case IPV6_IPSEC_POLICY:
1774 if (ipsec_enabled) {
1775 error = ipsec_set_policy(in6p, optname,
1776 sopt->sopt_data, sopt->sopt_size,
1777 kauth_cred_get());
1778 break;
1779 }
1780 /*FALLTHROUGH*/
1781 #endif /* IPSEC */
1782
1783 default:
1784 error = ENOPROTOOPT;
1785 break;
1786 }
1787 break;
1788
1789 case PRCO_GETOPT:
1790 switch (optname) {
1791 #ifdef RFC2292
1792 case IPV6_2292PKTOPTIONS:
1793 /*
1794 * RFC3542 (effectively) deprecated the
1795 * semantics of the 2292-style pktoptions.
1796 * Since it was not reliable in nature (i.e.,
1797 * applications had to expect the lack of some
1798 * information after all), it would make sense
1799 * to simplify this part by always returning
1800 * empty data.
1801 */
1802 break;
1803 #endif
1804
1805 case IPV6_RECVHOPOPTS:
1806 case IPV6_RECVDSTOPTS:
1807 case IPV6_RECVRTHDRDSTOPTS:
1808 case IPV6_UNICAST_HOPS:
1809 case IPV6_RECVPKTINFO:
1810 case IPV6_RECVHOPLIMIT:
1811 case IPV6_RECVRTHDR:
1812 case IPV6_RECVPATHMTU:
1813
1814 case IPV6_FAITH:
1815 case IPV6_V6ONLY:
1816 case IPV6_PORTRANGE:
1817 case IPV6_RECVTCLASS:
1818 switch (optname) {
1819
1820 case IPV6_RECVHOPOPTS:
1821 optval = OPTBIT(IN6P_HOPOPTS);
1822 break;
1823
1824 case IPV6_RECVDSTOPTS:
1825 optval = OPTBIT(IN6P_DSTOPTS);
1826 break;
1827
1828 case IPV6_RECVRTHDRDSTOPTS:
1829 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1830 break;
1831
1832 case IPV6_UNICAST_HOPS:
1833 optval = in6p->in6p_hops;
1834 break;
1835
1836 case IPV6_RECVPKTINFO:
1837 optval = OPTBIT(IN6P_PKTINFO);
1838 break;
1839
1840 case IPV6_RECVHOPLIMIT:
1841 optval = OPTBIT(IN6P_HOPLIMIT);
1842 break;
1843
1844 case IPV6_RECVRTHDR:
1845 optval = OPTBIT(IN6P_RTHDR);
1846 break;
1847
1848 case IPV6_RECVPATHMTU:
1849 optval = OPTBIT(IN6P_MTU);
1850 break;
1851
1852 case IPV6_FAITH:
1853 optval = OPTBIT(IN6P_FAITH);
1854 break;
1855
1856 case IPV6_V6ONLY:
1857 optval = OPTBIT(IN6P_IPV6_V6ONLY);
1858 break;
1859
1860 case IPV6_PORTRANGE:
1861 {
1862 int flags;
1863 flags = in6p->in6p_flags;
1864 if (flags & IN6P_HIGHPORT)
1865 optval = IPV6_PORTRANGE_HIGH;
1866 else if (flags & IN6P_LOWPORT)
1867 optval = IPV6_PORTRANGE_LOW;
1868 else
1869 optval = 0;
1870 break;
1871 }
1872 case IPV6_RECVTCLASS:
1873 optval = OPTBIT(IN6P_TCLASS);
1874 break;
1875
1876 }
1877 if (error)
1878 break;
1879 error = sockopt_setint(sopt, optval);
1880 break;
1881
1882 case IPV6_PATHMTU:
1883 {
1884 u_long pmtu = 0;
1885 struct ip6_mtuinfo mtuinfo;
1886 struct route *ro = &in6p->in6p_route;
1887 struct rtentry *rt;
1888 union {
1889 struct sockaddr dst;
1890 struct sockaddr_in6 dst6;
1891 } u;
1892
1893 if (!(so->so_state & SS_ISCONNECTED))
1894 return (ENOTCONN);
1895 /*
1896 * XXX: we dot not consider the case of source
1897 * routing, or optional information to specify
1898 * the outgoing interface.
1899 */
1900 sockaddr_in6_init(&u.dst6, &in6p->in6p_faddr, 0, 0, 0);
1901 rt = rtcache_lookup(ro, &u.dst);
1902 error = ip6_getpmtu(rt, NULL, &pmtu, NULL);
1903 rtcache_unref(rt, ro);
1904 if (error)
1905 break;
1906 if (pmtu > IPV6_MAXPACKET)
1907 pmtu = IPV6_MAXPACKET;
1908
1909 memset(&mtuinfo, 0, sizeof(mtuinfo));
1910 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1911 optdata = (void *)&mtuinfo;
1912 optdatalen = sizeof(mtuinfo);
1913 if (optdatalen > MCLBYTES)
1914 return (EMSGSIZE); /* XXX */
1915 error = sockopt_set(sopt, optdata, optdatalen);
1916 break;
1917 }
1918
1919 #ifdef RFC2292
1920 case IPV6_2292PKTINFO:
1921 case IPV6_2292HOPLIMIT:
1922 case IPV6_2292HOPOPTS:
1923 case IPV6_2292RTHDR:
1924 case IPV6_2292DSTOPTS:
1925 switch (optname) {
1926 case IPV6_2292PKTINFO:
1927 optval = OPTBIT(IN6P_PKTINFO);
1928 break;
1929 case IPV6_2292HOPLIMIT:
1930 optval = OPTBIT(IN6P_HOPLIMIT);
1931 break;
1932 case IPV6_2292HOPOPTS:
1933 optval = OPTBIT(IN6P_HOPOPTS);
1934 break;
1935 case IPV6_2292RTHDR:
1936 optval = OPTBIT(IN6P_RTHDR);
1937 break;
1938 case IPV6_2292DSTOPTS:
1939 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1940 break;
1941 }
1942 error = sockopt_setint(sopt, optval);
1943 break;
1944 #endif
1945 case IPV6_PKTINFO:
1946 case IPV6_HOPOPTS:
1947 case IPV6_RTHDR:
1948 case IPV6_DSTOPTS:
1949 case IPV6_RTHDRDSTOPTS:
1950 case IPV6_NEXTHOP:
1951 case IPV6_OTCLASS:
1952 case IPV6_TCLASS:
1953 case IPV6_DONTFRAG:
1954 case IPV6_USE_MIN_MTU:
1955 case IPV6_PREFER_TEMPADDR:
1956 error = ip6_getpcbopt(in6p->in6p_outputopts,
1957 optname, sopt);
1958 break;
1959
1960 case IPV6_MULTICAST_IF:
1961 case IPV6_MULTICAST_HOPS:
1962 case IPV6_MULTICAST_LOOP:
1963 case IPV6_JOIN_GROUP:
1964 case IPV6_LEAVE_GROUP:
1965 error = ip6_getmoptions(sopt, in6p);
1966 break;
1967
1968 case IPV6_PORTALGO:
1969 optval = ((struct inpcb_hdr *)in6p)->inph_portalgo;
1970 error = sockopt_setint(sopt, optval);
1971 break;
1972
1973 #if defined(IPSEC)
1974 case IPV6_IPSEC_POLICY:
1975 if (ipsec_used) {
1976 struct mbuf *m = NULL;
1977
1978 /*
1979 * XXX: this will return EINVAL as sopt is
1980 * empty
1981 */
1982 error = ipsec_get_policy(in6p, sopt->sopt_data,
1983 sopt->sopt_size, &m);
1984 if (!error)
1985 error = sockopt_setmbuf(sopt, m);
1986 break;
1987 }
1988 /*FALLTHROUGH*/
1989 #endif /* IPSEC */
1990
1991 default:
1992 error = ENOPROTOOPT;
1993 break;
1994 }
1995 break;
1996 }
1997 return (error);
1998 }
1999
2000 int
2001 ip6_raw_ctloutput(int op, struct socket *so, struct sockopt *sopt)
2002 {
2003 int error = 0, optval;
2004 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2005 struct in6pcb *in6p = sotoin6pcb(so);
2006 int level, optname;
2007
2008 KASSERT(sopt != NULL);
2009
2010 level = sopt->sopt_level;
2011 optname = sopt->sopt_name;
2012
2013 if (level != IPPROTO_IPV6) {
2014 return ENOPROTOOPT;
2015 }
2016
2017 switch (optname) {
2018 case IPV6_CHECKSUM:
2019 /*
2020 * For ICMPv6 sockets, no modification allowed for checksum
2021 * offset, permit "no change" values to help existing apps.
2022 *
2023 * XXX RFC3542 says: "An attempt to set IPV6_CHECKSUM
2024 * for an ICMPv6 socket will fail." The current
2025 * behavior does not meet RFC3542.
2026 */
2027 switch (op) {
2028 case PRCO_SETOPT:
2029 error = sockopt_getint(sopt, &optval);
2030 if (error)
2031 break;
2032 if ((optval % 2) != 0) {
2033 /* the API assumes even offset values */
2034 error = EINVAL;
2035 } else if (so->so_proto->pr_protocol ==
2036 IPPROTO_ICMPV6) {
2037 if (optval != icmp6off)
2038 error = EINVAL;
2039 } else
2040 in6p->in6p_cksum = optval;
2041 break;
2042
2043 case PRCO_GETOPT:
2044 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2045 optval = icmp6off;
2046 else
2047 optval = in6p->in6p_cksum;
2048
2049 error = sockopt_setint(sopt, optval);
2050 break;
2051
2052 default:
2053 error = EINVAL;
2054 break;
2055 }
2056 break;
2057
2058 default:
2059 error = ENOPROTOOPT;
2060 break;
2061 }
2062
2063 return (error);
2064 }
2065
2066 #ifdef RFC2292
2067 /*
2068 * Set up IP6 options in pcb for insertion in output packets or
2069 * specifying behavior of outgoing packets.
2070 */
2071 static int
2072 ip6_pcbopts(struct ip6_pktopts **pktopt, struct socket *so,
2073 struct sockopt *sopt)
2074 {
2075 struct ip6_pktopts *opt = *pktopt;
2076 struct mbuf *m;
2077 int error = 0;
2078
2079 KASSERT(solocked(so));
2080
2081 /* turn off any old options. */
2082 if (opt) {
2083 #ifdef DIAGNOSTIC
2084 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2085 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2086 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2087 printf("ip6_pcbopts: all specified options are cleared.\n");
2088 #endif
2089 ip6_clearpktopts(opt, -1);
2090 } else {
2091 opt = malloc(sizeof(*opt), M_IP6OPT, M_NOWAIT);
2092 if (opt == NULL)
2093 return (ENOBUFS);
2094 }
2095 *pktopt = NULL;
2096
2097 if (sopt == NULL || sopt->sopt_size == 0) {
2098 /*
2099 * Only turning off any previous options, regardless of
2100 * whether the opt is just created or given.
2101 */
2102 free(opt, M_IP6OPT);
2103 return (0);
2104 }
2105
2106 /* set options specified by user. */
2107 m = sockopt_getmbuf(sopt);
2108 if (m == NULL) {
2109 free(opt, M_IP6OPT);
2110 return (ENOBUFS);
2111 }
2112
2113 error = ip6_setpktopts(m, opt, NULL, kauth_cred_get(),
2114 so->so_proto->pr_protocol);
2115 m_freem(m);
2116 if (error != 0) {
2117 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2118 free(opt, M_IP6OPT);
2119 return (error);
2120 }
2121 *pktopt = opt;
2122 return (0);
2123 }
2124 #endif
2125
2126 /*
2127 * initialize ip6_pktopts. beware that there are non-zero default values in
2128 * the struct.
2129 */
2130 void
2131 ip6_initpktopts(struct ip6_pktopts *opt)
2132 {
2133
2134 memset(opt, 0, sizeof(*opt));
2135 opt->ip6po_hlim = -1; /* -1 means default hop limit */
2136 opt->ip6po_tclass = -1; /* -1 means default traffic class */
2137 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2138 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2139 }
2140
2141 #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) /* XXX */
2142 static int
2143 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2144 kauth_cred_t cred, int uproto)
2145 {
2146 struct ip6_pktopts *opt;
2147
2148 if (*pktopt == NULL) {
2149 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2150 M_NOWAIT);
2151 if (*pktopt == NULL)
2152 return (ENOBUFS);
2153
2154 ip6_initpktopts(*pktopt);
2155 }
2156 opt = *pktopt;
2157
2158 return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
2159 }
2160
2161 static int
2162 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2163 {
2164 void *optdata = NULL;
2165 int optdatalen = 0;
2166 struct ip6_ext *ip6e;
2167 int error = 0;
2168 struct in6_pktinfo null_pktinfo;
2169 int deftclass = 0, on;
2170 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2171 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2172
2173 switch (optname) {
2174 case IPV6_PKTINFO:
2175 if (pktopt && pktopt->ip6po_pktinfo)
2176 optdata = (void *)pktopt->ip6po_pktinfo;
2177 else {
2178 /* XXX: we don't have to do this every time... */
2179 memset(&null_pktinfo, 0, sizeof(null_pktinfo));
2180 optdata = (void *)&null_pktinfo;
2181 }
2182 optdatalen = sizeof(struct in6_pktinfo);
2183 break;
2184 case IPV6_OTCLASS:
2185 /* XXX */
2186 return (EINVAL);
2187 case IPV6_TCLASS:
2188 if (pktopt && pktopt->ip6po_tclass >= 0)
2189 optdata = (void *)&pktopt->ip6po_tclass;
2190 else
2191 optdata = (void *)&deftclass;
2192 optdatalen = sizeof(int);
2193 break;
2194 case IPV6_HOPOPTS:
2195 if (pktopt && pktopt->ip6po_hbh) {
2196 optdata = (void *)pktopt->ip6po_hbh;
2197 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2198 optdatalen = (ip6e->ip6e_len + 1) << 3;
2199 }
2200 break;
2201 case IPV6_RTHDR:
2202 if (pktopt && pktopt->ip6po_rthdr) {
2203 optdata = (void *)pktopt->ip6po_rthdr;
2204 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2205 optdatalen = (ip6e->ip6e_len + 1) << 3;
2206 }
2207 break;
2208 case IPV6_RTHDRDSTOPTS:
2209 if (pktopt && pktopt->ip6po_dest1) {
2210 optdata = (void *)pktopt->ip6po_dest1;
2211 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2212 optdatalen = (ip6e->ip6e_len + 1) << 3;
2213 }
2214 break;
2215 case IPV6_DSTOPTS:
2216 if (pktopt && pktopt->ip6po_dest2) {
2217 optdata = (void *)pktopt->ip6po_dest2;
2218 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2219 optdatalen = (ip6e->ip6e_len + 1) << 3;
2220 }
2221 break;
2222 case IPV6_NEXTHOP:
2223 if (pktopt && pktopt->ip6po_nexthop) {
2224 optdata = (void *)pktopt->ip6po_nexthop;
2225 optdatalen = pktopt->ip6po_nexthop->sa_len;
2226 }
2227 break;
2228 case IPV6_USE_MIN_MTU:
2229 if (pktopt)
2230 optdata = (void *)&pktopt->ip6po_minmtu;
2231 else
2232 optdata = (void *)&defminmtu;
2233 optdatalen = sizeof(int);
2234 break;
2235 case IPV6_DONTFRAG:
2236 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2237 on = 1;
2238 else
2239 on = 0;
2240 optdata = (void *)&on;
2241 optdatalen = sizeof(on);
2242 break;
2243 case IPV6_PREFER_TEMPADDR:
2244 if (pktopt)
2245 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2246 else
2247 optdata = (void *)&defpreftemp;
2248 optdatalen = sizeof(int);
2249 break;
2250 default: /* should not happen */
2251 #ifdef DIAGNOSTIC
2252 panic("ip6_getpcbopt: unexpected option\n");
2253 #endif
2254 return (ENOPROTOOPT);
2255 }
2256
2257 error = sockopt_set(sopt, optdata, optdatalen);
2258
2259 return (error);
2260 }
2261
2262 void
2263 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2264 {
2265 if (optname == -1 || optname == IPV6_PKTINFO) {
2266 if (pktopt->ip6po_pktinfo)
2267 free(pktopt->ip6po_pktinfo, M_IP6OPT);
2268 pktopt->ip6po_pktinfo = NULL;
2269 }
2270 if (optname == -1 || optname == IPV6_HOPLIMIT)
2271 pktopt->ip6po_hlim = -1;
2272 if (optname == -1 || optname == IPV6_TCLASS)
2273 pktopt->ip6po_tclass = -1;
2274 if (optname == -1 || optname == IPV6_NEXTHOP) {
2275 rtcache_free(&pktopt->ip6po_nextroute);
2276 if (pktopt->ip6po_nexthop)
2277 free(pktopt->ip6po_nexthop, M_IP6OPT);
2278 pktopt->ip6po_nexthop = NULL;
2279 }
2280 if (optname == -1 || optname == IPV6_HOPOPTS) {
2281 if (pktopt->ip6po_hbh)
2282 free(pktopt->ip6po_hbh, M_IP6OPT);
2283 pktopt->ip6po_hbh = NULL;
2284 }
2285 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2286 if (pktopt->ip6po_dest1)
2287 free(pktopt->ip6po_dest1, M_IP6OPT);
2288 pktopt->ip6po_dest1 = NULL;
2289 }
2290 if (optname == -1 || optname == IPV6_RTHDR) {
2291 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2292 free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2293 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2294 rtcache_free(&pktopt->ip6po_route);
2295 }
2296 if (optname == -1 || optname == IPV6_DSTOPTS) {
2297 if (pktopt->ip6po_dest2)
2298 free(pktopt->ip6po_dest2, M_IP6OPT);
2299 pktopt->ip6po_dest2 = NULL;
2300 }
2301 }
2302
2303 #define PKTOPT_EXTHDRCPY(type) \
2304 do { \
2305 if (src->type) { \
2306 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2307 dst->type = malloc(hlen, M_IP6OPT, canwait); \
2308 if (dst->type == NULL) \
2309 goto bad; \
2310 memcpy(dst->type, src->type, hlen); \
2311 } \
2312 } while (/*CONSTCOND*/ 0)
2313
2314 static int
2315 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2316 {
2317 dst->ip6po_hlim = src->ip6po_hlim;
2318 dst->ip6po_tclass = src->ip6po_tclass;
2319 dst->ip6po_flags = src->ip6po_flags;
2320 dst->ip6po_minmtu = src->ip6po_minmtu;
2321 dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
2322 if (src->ip6po_pktinfo) {
2323 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2324 M_IP6OPT, canwait);
2325 if (dst->ip6po_pktinfo == NULL)
2326 goto bad;
2327 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2328 }
2329 if (src->ip6po_nexthop) {
2330 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2331 M_IP6OPT, canwait);
2332 if (dst->ip6po_nexthop == NULL)
2333 goto bad;
2334 memcpy(dst->ip6po_nexthop, src->ip6po_nexthop,
2335 src->ip6po_nexthop->sa_len);
2336 }
2337 PKTOPT_EXTHDRCPY(ip6po_hbh);
2338 PKTOPT_EXTHDRCPY(ip6po_dest1);
2339 PKTOPT_EXTHDRCPY(ip6po_dest2);
2340 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2341 return (0);
2342
2343 bad:
2344 if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2345 if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2346 if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2347 if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2348 if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2349 if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2350
2351 return (ENOBUFS);
2352 }
2353 #undef PKTOPT_EXTHDRCPY
2354
2355 struct ip6_pktopts *
2356 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2357 {
2358 int error;
2359 struct ip6_pktopts *dst;
2360
2361 dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2362 if (dst == NULL)
2363 return (NULL);
2364 ip6_initpktopts(dst);
2365
2366 if ((error = copypktopts(dst, src, canwait)) != 0) {
2367 free(dst, M_IP6OPT);
2368 return (NULL);
2369 }
2370
2371 return (dst);
2372 }
2373
2374 void
2375 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2376 {
2377 if (pktopt == NULL)
2378 return;
2379
2380 ip6_clearpktopts(pktopt, -1);
2381
2382 free(pktopt, M_IP6OPT);
2383 }
2384
2385 int
2386 ip6_get_membership(const struct sockopt *sopt, struct ifnet **ifp,
2387 struct psref *psref, void *v, size_t l)
2388 {
2389 struct ipv6_mreq mreq;
2390 int error;
2391 struct in6_addr *ia = &mreq.ipv6mr_multiaddr;
2392 struct in_addr *ia4 = (void *)&ia->s6_addr32[3];
2393
2394 error = sockopt_get(sopt, &mreq, sizeof(mreq));
2395 if (error != 0)
2396 return error;
2397
2398 if (IN6_IS_ADDR_UNSPECIFIED(ia)) {
2399 /*
2400 * We use the unspecified address to specify to accept
2401 * all multicast addresses. Only super user is allowed
2402 * to do this.
2403 */
2404 if (kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_IPV6,
2405 KAUTH_REQ_NETWORK_IPV6_JOIN_MULTICAST, NULL, NULL, NULL))
2406 return EACCES;
2407 } else if (IN6_IS_ADDR_V4MAPPED(ia)) {
2408 // Don't bother if we are not going to use ifp.
2409 if (l == sizeof(*ia)) {
2410 memcpy(v, ia, l);
2411 return 0;
2412 }
2413 } else if (!IN6_IS_ADDR_MULTICAST(ia)) {
2414 return EINVAL;
2415 }
2416
2417 /*
2418 * If no interface was explicitly specified, choose an
2419 * appropriate one according to the given multicast address.
2420 */
2421 if (mreq.ipv6mr_interface == 0) {
2422 struct rtentry *rt;
2423 union {
2424 struct sockaddr dst;
2425 struct sockaddr_in dst4;
2426 struct sockaddr_in6 dst6;
2427 } u;
2428 struct route ro;
2429
2430 /*
2431 * Look up the routing table for the
2432 * address, and choose the outgoing interface.
2433 * XXX: is it a good approach?
2434 */
2435 memset(&ro, 0, sizeof(ro));
2436 if (IN6_IS_ADDR_V4MAPPED(ia))
2437 sockaddr_in_init(&u.dst4, ia4, 0);
2438 else
2439 sockaddr_in6_init(&u.dst6, ia, 0, 0, 0);
2440 error = rtcache_setdst(&ro, &u.dst);
2441 if (error != 0)
2442 return error;
2443 rt = rtcache_init(&ro);
2444 *ifp = rt != NULL ?
2445 if_get_byindex(rt->rt_ifp->if_index, psref) : NULL;
2446 rtcache_unref(rt, &ro);
2447 rtcache_free(&ro);
2448 } else {
2449 /*
2450 * If the interface is specified, validate it.
2451 */
2452 *ifp = if_get_byindex(mreq.ipv6mr_interface, psref);
2453 if (*ifp == NULL)
2454 return ENXIO; /* XXX EINVAL? */
2455 }
2456 if (sizeof(*ia) == l)
2457 memcpy(v, ia, l);
2458 else
2459 memcpy(v, ia4, l);
2460 return 0;
2461 }
2462
2463 /*
2464 * Set the IP6 multicast options in response to user setsockopt().
2465 */
2466 static int
2467 ip6_setmoptions(const struct sockopt *sopt, struct in6pcb *in6p)
2468 {
2469 int error = 0;
2470 u_int loop, ifindex;
2471 struct ipv6_mreq mreq;
2472 struct in6_addr ia;
2473 struct ifnet *ifp;
2474 struct ip6_moptions *im6o = in6p->in6p_moptions;
2475 struct in6_multi_mship *imm;
2476
2477 KASSERT(in6p_locked(in6p));
2478
2479 if (im6o == NULL) {
2480 /*
2481 * No multicast option buffer attached to the pcb;
2482 * allocate one and initialize to default values.
2483 */
2484 im6o = malloc(sizeof(*im6o), M_IPMOPTS, M_NOWAIT);
2485 if (im6o == NULL)
2486 return (ENOBUFS);
2487 in6p->in6p_moptions = im6o;
2488 im6o->im6o_multicast_if_index = 0;
2489 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2490 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2491 LIST_INIT(&im6o->im6o_memberships);
2492 }
2493
2494 switch (sopt->sopt_name) {
2495
2496 case IPV6_MULTICAST_IF: {
2497 int s;
2498 /*
2499 * Select the interface for outgoing multicast packets.
2500 */
2501 error = sockopt_get(sopt, &ifindex, sizeof(ifindex));
2502 if (error != 0)
2503 break;
2504
2505 s = pserialize_read_enter();
2506 if (ifindex != 0) {
2507 if ((ifp = if_byindex(ifindex)) == NULL) {
2508 pserialize_read_exit(s);
2509 error = ENXIO; /* XXX EINVAL? */
2510 break;
2511 }
2512 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2513 pserialize_read_exit(s);
2514 error = EADDRNOTAVAIL;
2515 break;
2516 }
2517 } else
2518 ifp = NULL;
2519 im6o->im6o_multicast_if_index = if_get_index(ifp);
2520 pserialize_read_exit(s);
2521 break;
2522 }
2523
2524 case IPV6_MULTICAST_HOPS:
2525 {
2526 /*
2527 * Set the IP6 hoplimit for outgoing multicast packets.
2528 */
2529 int optval;
2530
2531 error = sockopt_getint(sopt, &optval);
2532 if (error != 0)
2533 break;
2534
2535 if (optval < -1 || optval >= 256)
2536 error = EINVAL;
2537 else if (optval == -1)
2538 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2539 else
2540 im6o->im6o_multicast_hlim = optval;
2541 break;
2542 }
2543
2544 case IPV6_MULTICAST_LOOP:
2545 /*
2546 * Set the loopback flag for outgoing multicast packets.
2547 * Must be zero or one.
2548 */
2549 error = sockopt_get(sopt, &loop, sizeof(loop));
2550 if (error != 0)
2551 break;
2552 if (loop > 1) {
2553 error = EINVAL;
2554 break;
2555 }
2556 im6o->im6o_multicast_loop = loop;
2557 break;
2558
2559 case IPV6_JOIN_GROUP: {
2560 int bound;
2561 struct psref psref;
2562 /*
2563 * Add a multicast group membership.
2564 * Group must be a valid IP6 multicast address.
2565 */
2566 bound = curlwp_bind();
2567 ifp = NULL;
2568 error = ip6_get_membership(sopt, &ifp, &psref, &ia, sizeof(ia));
2569 if (error != 0) {
2570 KASSERT(ifp == NULL);
2571 curlwp_bindx(bound);
2572 return error;
2573 }
2574
2575 if (IN6_IS_ADDR_V4MAPPED(&ia)) {
2576 error = ip_setmoptions(&in6p->in6p_v4moptions, sopt);
2577 goto put_break;
2578 }
2579 /*
2580 * See if we found an interface, and confirm that it
2581 * supports multicast
2582 */
2583 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2584 error = EADDRNOTAVAIL;
2585 goto put_break;
2586 }
2587
2588 if (in6_setscope(&ia, ifp, NULL)) {
2589 error = EADDRNOTAVAIL; /* XXX: should not happen */
2590 goto put_break;
2591 }
2592
2593 /*
2594 * See if the membership already exists.
2595 */
2596 LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) {
2597 if (imm->i6mm_maddr->in6m_ifp == ifp &&
2598 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2599 &ia))
2600 goto put_break;
2601 }
2602 if (imm != NULL) {
2603 error = EADDRINUSE;
2604 goto put_break;
2605 }
2606 /*
2607 * Everything looks good; add a new record to the multicast
2608 * address list for the given interface.
2609 */
2610 IFNET_LOCK(ifp);
2611 imm = in6_joingroup(ifp, &ia, &error, 0);
2612 IFNET_UNLOCK(ifp);
2613 if (imm == NULL)
2614 goto put_break;
2615 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2616 put_break:
2617 if_put(ifp, &psref);
2618 curlwp_bindx(bound);
2619 break;
2620 }
2621
2622 case IPV6_LEAVE_GROUP: {
2623 struct ifnet *in6m_ifp;
2624 /*
2625 * Drop a multicast group membership.
2626 * Group must be a valid IP6 multicast address.
2627 */
2628 error = sockopt_get(sopt, &mreq, sizeof(mreq));
2629 if (error != 0)
2630 break;
2631
2632 if (IN6_IS_ADDR_V4MAPPED(&mreq.ipv6mr_multiaddr)) {
2633 error = ip_setmoptions(&in6p->in6p_v4moptions, sopt);
2634 break;
2635 }
2636 /*
2637 * If an interface address was specified, get a pointer
2638 * to its ifnet structure.
2639 */
2640 if (mreq.ipv6mr_interface != 0) {
2641 if ((ifp = if_byindex(mreq.ipv6mr_interface)) == NULL) {
2642 error = ENXIO; /* XXX EINVAL? */
2643 break;
2644 }
2645 } else
2646 ifp = NULL;
2647
2648 /* Fill in the scope zone ID */
2649 if (ifp) {
2650 if (in6_setscope(&mreq.ipv6mr_multiaddr, ifp, NULL)) {
2651 /* XXX: should not happen */
2652 error = EADDRNOTAVAIL;
2653 break;
2654 }
2655 } else if (mreq.ipv6mr_interface != 0) {
2656 /*
2657 * XXX: This case would happens when the (positive)
2658 * index is in the valid range, but the corresponding
2659 * interface has been detached dynamically. The above
2660 * check probably avoids such case to happen here, but
2661 * we check it explicitly for safety.
2662 */
2663 error = EADDRNOTAVAIL;
2664 break;
2665 } else { /* ipv6mr_interface == 0 */
2666 struct sockaddr_in6 sa6_mc;
2667
2668 /*
2669 * The API spec says as follows:
2670 * If the interface index is specified as 0, the
2671 * system may choose a multicast group membership to
2672 * drop by matching the multicast address only.
2673 * On the other hand, we cannot disambiguate the scope
2674 * zone unless an interface is provided. Thus, we
2675 * check if there's ambiguity with the default scope
2676 * zone as the last resort.
2677 */
2678 sockaddr_in6_init(&sa6_mc, &mreq.ipv6mr_multiaddr,
2679 0, 0, 0);
2680 error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
2681 if (error != 0)
2682 break;
2683 mreq.ipv6mr_multiaddr = sa6_mc.sin6_addr;
2684 }
2685
2686 /*
2687 * Find the membership in the membership list.
2688 */
2689 LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) {
2690 if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2691 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2692 &mreq.ipv6mr_multiaddr))
2693 break;
2694 }
2695 if (imm == NULL) {
2696 /* Unable to resolve interface */
2697 error = EADDRNOTAVAIL;
2698 break;
2699 }
2700 /*
2701 * Give up the multicast address record to which the
2702 * membership points.
2703 */
2704 LIST_REMOVE(imm, i6mm_chain);
2705 in6m_ifp = imm->i6mm_maddr->in6m_ifp;
2706 IFNET_LOCK(in6m_ifp);
2707 in6_leavegroup(imm);
2708 /* in6m_ifp should not leave thanks to in6p_lock */
2709 IFNET_UNLOCK(in6m_ifp);
2710 break;
2711 }
2712
2713 default:
2714 error = EOPNOTSUPP;
2715 break;
2716 }
2717
2718 /*
2719 * If all options have default values, no need to keep the mbuf.
2720 */
2721 if (im6o->im6o_multicast_if_index == 0 &&
2722 im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2723 im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2724 LIST_EMPTY(&im6o->im6o_memberships)) {
2725 free(in6p->in6p_moptions, M_IPMOPTS);
2726 in6p->in6p_moptions = NULL;
2727 }
2728
2729 return (error);
2730 }
2731
2732 /*
2733 * Return the IP6 multicast options in response to user getsockopt().
2734 */
2735 static int
2736 ip6_getmoptions(struct sockopt *sopt, struct in6pcb *in6p)
2737 {
2738 u_int optval;
2739 int error;
2740 struct ip6_moptions *im6o = in6p->in6p_moptions;
2741
2742 switch (sopt->sopt_name) {
2743 case IPV6_MULTICAST_IF:
2744 if (im6o == NULL || im6o->im6o_multicast_if_index == 0)
2745 optval = 0;
2746 else
2747 optval = im6o->im6o_multicast_if_index;
2748
2749 error = sockopt_set(sopt, &optval, sizeof(optval));
2750 break;
2751
2752 case IPV6_MULTICAST_HOPS:
2753 if (im6o == NULL)
2754 optval = ip6_defmcasthlim;
2755 else
2756 optval = im6o->im6o_multicast_hlim;
2757
2758 error = sockopt_set(sopt, &optval, sizeof(optval));
2759 break;
2760
2761 case IPV6_MULTICAST_LOOP:
2762 if (im6o == NULL)
2763 optval = IPV6_DEFAULT_MULTICAST_LOOP;
2764 else
2765 optval = im6o->im6o_multicast_loop;
2766
2767 error = sockopt_set(sopt, &optval, sizeof(optval));
2768 break;
2769
2770 default:
2771 error = EOPNOTSUPP;
2772 }
2773
2774 return (error);
2775 }
2776
2777 /*
2778 * Discard the IP6 multicast options.
2779 */
2780 void
2781 ip6_freemoptions(struct ip6_moptions *im6o)
2782 {
2783 struct in6_multi_mship *imm, *nimm;
2784
2785 if (im6o == NULL)
2786 return;
2787
2788 /* The owner of im6o (in6p) should be protected by solock */
2789 LIST_FOREACH_SAFE(imm, &im6o->im6o_memberships, i6mm_chain, nimm) {
2790 struct ifnet *ifp;
2791
2792 LIST_REMOVE(imm, i6mm_chain);
2793
2794 ifp = imm->i6mm_maddr->in6m_ifp;
2795 IFNET_LOCK(ifp);
2796 in6_leavegroup(imm);
2797 /* ifp should not leave thanks to solock */
2798 IFNET_UNLOCK(ifp);
2799 }
2800 free(im6o, M_IPMOPTS);
2801 }
2802
2803 /*
2804 * Set IPv6 outgoing packet options based on advanced API.
2805 */
2806 int
2807 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2808 struct ip6_pktopts *stickyopt, kauth_cred_t cred, int uproto)
2809 {
2810 struct cmsghdr *cm = 0;
2811
2812 if (control == NULL || opt == NULL)
2813 return (EINVAL);
2814
2815 ip6_initpktopts(opt);
2816 if (stickyopt) {
2817 int error;
2818
2819 /*
2820 * If stickyopt is provided, make a local copy of the options
2821 * for this particular packet, then override them by ancillary
2822 * objects.
2823 * XXX: copypktopts() does not copy the cached route to a next
2824 * hop (if any). This is not very good in terms of efficiency,
2825 * but we can allow this since this option should be rarely
2826 * used.
2827 */
2828 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2829 return (error);
2830 }
2831
2832 /*
2833 * XXX: Currently, we assume all the optional information is stored
2834 * in a single mbuf.
2835 */
2836 if (control->m_next)
2837 return (EINVAL);
2838
2839 /* XXX if cm->cmsg_len is not aligned, control->m_len can become <0 */
2840 for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2841 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2842 int error;
2843
2844 if (control->m_len < CMSG_LEN(0))
2845 return (EINVAL);
2846
2847 cm = mtod(control, struct cmsghdr *);
2848 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2849 return (EINVAL);
2850 if (cm->cmsg_level != IPPROTO_IPV6)
2851 continue;
2852
2853 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2854 cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2855 if (error)
2856 return (error);
2857 }
2858
2859 return (0);
2860 }
2861
2862 /*
2863 * Set a particular packet option, as a sticky option or an ancillary data
2864 * item. "len" can be 0 only when it's a sticky option.
2865 * We have 4 cases of combination of "sticky" and "cmsg":
2866 * "sticky=0, cmsg=0": impossible
2867 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2868 * "sticky=1, cmsg=0": RFC3542 socket option
2869 * "sticky=1, cmsg=1": RFC2292 socket option
2870 */
2871 static int
2872 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2873 kauth_cred_t cred, int sticky, int cmsg, int uproto)
2874 {
2875 int minmtupolicy;
2876 int error;
2877
2878 if (!sticky && !cmsg) {
2879 #ifdef DIAGNOSTIC
2880 printf("ip6_setpktopt: impossible case\n");
2881 #endif
2882 return (EINVAL);
2883 }
2884
2885 /*
2886 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2887 * not be specified in the context of RFC3542. Conversely,
2888 * RFC3542 types should not be specified in the context of RFC2292.
2889 */
2890 if (!cmsg) {
2891 switch (optname) {
2892 case IPV6_2292PKTINFO:
2893 case IPV6_2292HOPLIMIT:
2894 case IPV6_2292NEXTHOP:
2895 case IPV6_2292HOPOPTS:
2896 case IPV6_2292DSTOPTS:
2897 case IPV6_2292RTHDR:
2898 case IPV6_2292PKTOPTIONS:
2899 return (ENOPROTOOPT);
2900 }
2901 }
2902 if (sticky && cmsg) {
2903 switch (optname) {
2904 case IPV6_PKTINFO:
2905 case IPV6_HOPLIMIT:
2906 case IPV6_NEXTHOP:
2907 case IPV6_HOPOPTS:
2908 case IPV6_DSTOPTS:
2909 case IPV6_RTHDRDSTOPTS:
2910 case IPV6_RTHDR:
2911 case IPV6_USE_MIN_MTU:
2912 case IPV6_DONTFRAG:
2913 case IPV6_OTCLASS:
2914 case IPV6_TCLASS:
2915 case IPV6_PREFER_TEMPADDR: /* XXX not an RFC3542 option */
2916 return (ENOPROTOOPT);
2917 }
2918 }
2919
2920 switch (optname) {
2921 #ifdef RFC2292
2922 case IPV6_2292PKTINFO:
2923 #endif
2924 case IPV6_PKTINFO:
2925 {
2926 struct in6_pktinfo *pktinfo;
2927
2928 if (len != sizeof(struct in6_pktinfo))
2929 return (EINVAL);
2930
2931 pktinfo = (struct in6_pktinfo *)buf;
2932
2933 /*
2934 * An application can clear any sticky IPV6_PKTINFO option by
2935 * doing a "regular" setsockopt with ipi6_addr being
2936 * in6addr_any and ipi6_ifindex being zero.
2937 * [RFC 3542, Section 6]
2938 */
2939 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2940 pktinfo->ipi6_ifindex == 0 &&
2941 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2942 ip6_clearpktopts(opt, optname);
2943 break;
2944 }
2945
2946 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2947 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2948 return (EINVAL);
2949 }
2950
2951 /* Validate the interface index if specified. */
2952 if (pktinfo->ipi6_ifindex) {
2953 struct ifnet *ifp;
2954 int s = pserialize_read_enter();
2955 ifp = if_byindex(pktinfo->ipi6_ifindex);
2956 if (ifp == NULL) {
2957 pserialize_read_exit(s);
2958 return ENXIO;
2959 }
2960 pserialize_read_exit(s);
2961 }
2962
2963 /*
2964 * We store the address anyway, and let in6_selectsrc()
2965 * validate the specified address. This is because ipi6_addr
2966 * may not have enough information about its scope zone, and
2967 * we may need additional information (such as outgoing
2968 * interface or the scope zone of a destination address) to
2969 * disambiguate the scope.
2970 * XXX: the delay of the validation may confuse the
2971 * application when it is used as a sticky option.
2972 */
2973 if (opt->ip6po_pktinfo == NULL) {
2974 opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2975 M_IP6OPT, M_NOWAIT);
2976 if (opt->ip6po_pktinfo == NULL)
2977 return (ENOBUFS);
2978 }
2979 memcpy(opt->ip6po_pktinfo, pktinfo, sizeof(*pktinfo));
2980 break;
2981 }
2982
2983 #ifdef RFC2292
2984 case IPV6_2292HOPLIMIT:
2985 #endif
2986 case IPV6_HOPLIMIT:
2987 {
2988 int *hlimp;
2989
2990 /*
2991 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2992 * to simplify the ordering among hoplimit options.
2993 */
2994 if (optname == IPV6_HOPLIMIT && sticky)
2995 return (ENOPROTOOPT);
2996
2997 if (len != sizeof(int))
2998 return (EINVAL);
2999 hlimp = (int *)buf;
3000 if (*hlimp < -1 || *hlimp > 255)
3001 return (EINVAL);
3002
3003 opt->ip6po_hlim = *hlimp;
3004 break;
3005 }
3006
3007 case IPV6_OTCLASS:
3008 if (len != sizeof(u_int8_t))
3009 return (EINVAL);
3010
3011 opt->ip6po_tclass = *(u_int8_t *)buf;
3012 break;
3013
3014 case IPV6_TCLASS:
3015 {
3016 int tclass;
3017
3018 if (len != sizeof(int))
3019 return (EINVAL);
3020 tclass = *(int *)buf;
3021 if (tclass < -1 || tclass > 255)
3022 return (EINVAL);
3023
3024 opt->ip6po_tclass = tclass;
3025 break;
3026 }
3027
3028 #ifdef RFC2292
3029 case IPV6_2292NEXTHOP:
3030 #endif
3031 case IPV6_NEXTHOP:
3032 error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
3033 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
3034 if (error)
3035 return (error);
3036
3037 if (len == 0) { /* just remove the option */
3038 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3039 break;
3040 }
3041
3042 /* check if cmsg_len is large enough for sa_len */
3043 if (len < sizeof(struct sockaddr) || len < *buf)
3044 return (EINVAL);
3045
3046 switch (((struct sockaddr *)buf)->sa_family) {
3047 case AF_INET6:
3048 {
3049 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3050
3051 if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3052 return (EINVAL);
3053
3054 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3055 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3056 return (EINVAL);
3057 }
3058 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3059 != 0) {
3060 return (error);
3061 }
3062 break;
3063 }
3064 case AF_LINK: /* eventually be supported? */
3065 default:
3066 return (EAFNOSUPPORT);
3067 }
3068
3069 /* turn off the previous option, then set the new option. */
3070 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3071 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
3072 if (opt->ip6po_nexthop == NULL)
3073 return (ENOBUFS);
3074 memcpy(opt->ip6po_nexthop, buf, *buf);
3075 break;
3076
3077 #ifdef RFC2292
3078 case IPV6_2292HOPOPTS:
3079 #endif
3080 case IPV6_HOPOPTS:
3081 {
3082 struct ip6_hbh *hbh;
3083 int hbhlen;
3084
3085 /*
3086 * XXX: We don't allow a non-privileged user to set ANY HbH
3087 * options, since per-option restriction has too much
3088 * overhead.
3089 */
3090 error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
3091 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
3092 if (error)
3093 return (error);
3094
3095 if (len == 0) {
3096 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3097 break; /* just remove the option */
3098 }
3099
3100 /* message length validation */
3101 if (len < sizeof(struct ip6_hbh))
3102 return (EINVAL);
3103 hbh = (struct ip6_hbh *)buf;
3104 hbhlen = (hbh->ip6h_len + 1) << 3;
3105 if (len != hbhlen)
3106 return (EINVAL);
3107
3108 /* turn off the previous option, then set the new option. */
3109 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3110 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
3111 if (opt->ip6po_hbh == NULL)
3112 return (ENOBUFS);
3113 memcpy(opt->ip6po_hbh, hbh, hbhlen);
3114
3115 break;
3116 }
3117
3118 #ifdef RFC2292
3119 case IPV6_2292DSTOPTS:
3120 #endif
3121 case IPV6_DSTOPTS:
3122 case IPV6_RTHDRDSTOPTS:
3123 {
3124 struct ip6_dest *dest, **newdest = NULL;
3125 int destlen;
3126
3127 /* XXX: see the comment for IPV6_HOPOPTS */
3128 error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
3129 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
3130 if (error)
3131 return (error);
3132
3133 if (len == 0) {
3134 ip6_clearpktopts(opt, optname);
3135 break; /* just remove the option */
3136 }
3137
3138 /* message length validation */
3139 if (len < sizeof(struct ip6_dest))
3140 return (EINVAL);
3141 dest = (struct ip6_dest *)buf;
3142 destlen = (dest->ip6d_len + 1) << 3;
3143 if (len != destlen)
3144 return (EINVAL);
3145 /*
3146 * Determine the position that the destination options header
3147 * should be inserted; before or after the routing header.
3148 */
3149 switch (optname) {
3150 case IPV6_2292DSTOPTS:
3151 /*
3152 * The old advanced API is ambiguous on this point.
3153 * Our approach is to determine the position based
3154 * according to the existence of a routing header.
3155 * Note, however, that this depends on the order of the
3156 * extension headers in the ancillary data; the 1st
3157 * part of the destination options header must appear
3158 * before the routing header in the ancillary data,
3159 * too.
3160 * RFC3542 solved the ambiguity by introducing
3161 * separate ancillary data or option types.
3162 */
3163 if (opt->ip6po_rthdr == NULL)
3164 newdest = &opt->ip6po_dest1;
3165 else
3166 newdest = &opt->ip6po_dest2;
3167 break;
3168 case IPV6_RTHDRDSTOPTS:
3169 newdest = &opt->ip6po_dest1;
3170 break;
3171 case IPV6_DSTOPTS:
3172 newdest = &opt->ip6po_dest2;
3173 break;
3174 }
3175
3176 /* turn off the previous option, then set the new option. */
3177 ip6_clearpktopts(opt, optname);
3178 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3179 if (*newdest == NULL)
3180 return (ENOBUFS);
3181 memcpy(*newdest, dest, destlen);
3182
3183 break;
3184 }
3185
3186 #ifdef RFC2292
3187 case IPV6_2292RTHDR:
3188 #endif
3189 case IPV6_RTHDR:
3190 {
3191 struct ip6_rthdr *rth;
3192 int rthlen;
3193
3194 if (len == 0) {
3195 ip6_clearpktopts(opt, IPV6_RTHDR);
3196 break; /* just remove the option */
3197 }
3198
3199 /* message length validation */
3200 if (len < sizeof(struct ip6_rthdr))
3201 return (EINVAL);
3202 rth = (struct ip6_rthdr *)buf;
3203 rthlen = (rth->ip6r_len + 1) << 3;
3204 if (len != rthlen)
3205 return (EINVAL);
3206 switch (rth->ip6r_type) {
3207 case IPV6_RTHDR_TYPE_0:
3208 if (rth->ip6r_len == 0) /* must contain one addr */
3209 return (EINVAL);
3210 if (rth->ip6r_len % 2) /* length must be even */
3211 return (EINVAL);
3212 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3213 return (EINVAL);
3214 break;
3215 default:
3216 return (EINVAL); /* not supported */
3217 }
3218 /* turn off the previous option */
3219 ip6_clearpktopts(opt, IPV6_RTHDR);
3220 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3221 if (opt->ip6po_rthdr == NULL)
3222 return (ENOBUFS);
3223 memcpy(opt->ip6po_rthdr, rth, rthlen);
3224 break;
3225 }
3226
3227 case IPV6_USE_MIN_MTU:
3228 if (len != sizeof(int))
3229 return (EINVAL);
3230 minmtupolicy = *(int *)buf;
3231 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3232 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3233 minmtupolicy != IP6PO_MINMTU_ALL) {
3234 return (EINVAL);
3235 }
3236 opt->ip6po_minmtu = minmtupolicy;
3237 break;
3238
3239 case IPV6_DONTFRAG:
3240 if (len != sizeof(int))
3241 return (EINVAL);
3242
3243 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3244 /*
3245 * we ignore this option for TCP sockets.
3246 * (RFC3542 leaves this case unspecified.)
3247 */
3248 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3249 } else
3250 opt->ip6po_flags |= IP6PO_DONTFRAG;
3251 break;
3252
3253 case IPV6_PREFER_TEMPADDR:
3254 {
3255 int preftemp;
3256
3257 if (len != sizeof(int))
3258 return (EINVAL);
3259 preftemp = *(int *)buf;
3260 switch (preftemp) {
3261 case IP6PO_TEMPADDR_SYSTEM:
3262 case IP6PO_TEMPADDR_NOTPREFER:
3263 case IP6PO_TEMPADDR_PREFER:
3264 break;
3265 default:
3266 return (EINVAL);
3267 }
3268 opt->ip6po_prefer_tempaddr = preftemp;
3269 break;
3270 }
3271
3272 default:
3273 return (ENOPROTOOPT);
3274 } /* end of switch */
3275
3276 return (0);
3277 }
3278
3279 /*
3280 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3281 * packet to the input queue of a specified interface. Note that this
3282 * calls the output routine of the loopback "driver", but with an interface
3283 * pointer that might NOT be lo0ifp -- easier than replicating that code here.
3284 */
3285 void
3286 ip6_mloopback(struct ifnet *ifp, struct mbuf *m,
3287 const struct sockaddr_in6 *dst)
3288 {
3289 struct mbuf *copym;
3290 struct ip6_hdr *ip6;
3291
3292 copym = m_copy(m, 0, M_COPYALL);
3293 if (copym == NULL)
3294 return;
3295
3296 /*
3297 * Make sure to deep-copy IPv6 header portion in case the data
3298 * is in an mbuf cluster, so that we can safely override the IPv6
3299 * header portion later.
3300 */
3301 if ((copym->m_flags & M_EXT) != 0 ||
3302 copym->m_len < sizeof(struct ip6_hdr)) {
3303 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3304 if (copym == NULL)
3305 return;
3306 }
3307
3308 #ifdef DIAGNOSTIC
3309 if (copym->m_len < sizeof(*ip6)) {
3310 m_freem(copym);
3311 return;
3312 }
3313 #endif
3314
3315 ip6 = mtod(copym, struct ip6_hdr *);
3316 /*
3317 * clear embedded scope identifiers if necessary.
3318 * in6_clearscope will touch the addresses only when necessary.
3319 */
3320 in6_clearscope(&ip6->ip6_src);
3321 in6_clearscope(&ip6->ip6_dst);
3322
3323 (void)looutput(ifp, copym, (const struct sockaddr *)dst, NULL);
3324 }
3325
3326 /*
3327 * Chop IPv6 header off from the payload.
3328 */
3329 static int
3330 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3331 {
3332 struct mbuf *mh;
3333 struct ip6_hdr *ip6;
3334
3335 ip6 = mtod(m, struct ip6_hdr *);
3336 if (m->m_len > sizeof(*ip6)) {
3337 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3338 if (mh == NULL) {
3339 m_freem(m);
3340 return ENOBUFS;
3341 }
3342 M_MOVE_PKTHDR(mh, m);
3343 MH_ALIGN(mh, sizeof(*ip6));
3344 m->m_len -= sizeof(*ip6);
3345 m->m_data += sizeof(*ip6);
3346 mh->m_next = m;
3347 mh->m_len = sizeof(*ip6);
3348 memcpy(mtod(mh, void *), (void *)ip6, sizeof(*ip6));
3349 m = mh;
3350 }
3351 exthdrs->ip6e_ip6 = m;
3352 return 0;
3353 }
3354
3355 /*
3356 * Compute IPv6 extension header length.
3357 */
3358 int
3359 ip6_optlen(struct in6pcb *in6p)
3360 {
3361 int len;
3362
3363 if (!in6p->in6p_outputopts)
3364 return 0;
3365
3366 len = 0;
3367 #define elen(x) \
3368 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3369
3370 len += elen(in6p->in6p_outputopts->ip6po_hbh);
3371 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3372 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3373 len += elen(in6p->in6p_outputopts->ip6po_dest2);
3374 return len;
3375 #undef elen
3376 }
3377
3378 /*
3379 * Ensure sending address is valid.
3380 * Returns 0 on success, -1 if an error should be sent back or 1
3381 * if the packet could be dropped without error (protocol dependent).
3382 */
3383 static int
3384 ip6_ifaddrvalid(const struct in6_addr *src, const struct in6_addr *dst)
3385 {
3386 struct sockaddr_in6 sin6;
3387 int s, error;
3388 struct ifaddr *ifa;
3389 struct in6_ifaddr *ia6;
3390
3391 if (IN6_IS_ADDR_UNSPECIFIED(src))
3392 return 0;
3393
3394 memset(&sin6, 0, sizeof(sin6));
3395 sin6.sin6_family = AF_INET6;
3396 sin6.sin6_len = sizeof(sin6);
3397 sin6.sin6_addr = *src;
3398
3399 s = pserialize_read_enter();
3400 ifa = ifa_ifwithaddr(sin6tosa(&sin6));
3401 if ((ia6 = ifatoia6(ifa)) == NULL ||
3402 ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_DUPLICATED))
3403 error = -1;
3404 else if (ia6->ia6_flags & IN6_IFF_TENTATIVE)
3405 error = 1;
3406 else if (ia6->ia6_flags & IN6_IFF_DETACHED &&
3407 (sin6.sin6_addr = *dst, ifa_ifwithaddr(sin6tosa(&sin6)) == NULL))
3408 /* Allow internal traffic to DETACHED addresses */
3409 error = 1;
3410 else
3411 error = 0;
3412 pserialize_read_exit(s);
3413
3414 return error;
3415 }
3416