ip6_output.c revision 1.140.8.2 1 /* $NetBSD: ip6_output.c,v 1.140.8.2 2012/04/05 21:33:46 mrg Exp $ */
2 /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: ip6_output.c,v 1.140.8.2 2012/04/05 21:33:46 mrg Exp $");
66
67 #include "opt_inet.h"
68 #include "opt_inet6.h"
69 #include "opt_ipsec.h"
70 #include "opt_pfil_hooks.h"
71
72 #include <sys/param.h>
73 #include <sys/malloc.h>
74 #include <sys/mbuf.h>
75 #include <sys/errno.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/systm.h>
80 #include <sys/proc.h>
81 #include <sys/kauth.h>
82
83 #include <net/if.h>
84 #include <net/route.h>
85 #ifdef PFIL_HOOKS
86 #include <net/pfil.h>
87 #endif
88
89 #include <netinet/in.h>
90 #include <netinet/in_var.h>
91 #include <netinet/ip6.h>
92 #include <netinet/icmp6.h>
93 #include <netinet/in_offload.h>
94 #include <netinet6/in6_offload.h>
95 #include <netinet6/ip6_var.h>
96 #include <netinet6/ip6_private.h>
97 #include <netinet6/in6_pcb.h>
98 #include <netinet6/nd6.h>
99 #include <netinet6/ip6protosw.h>
100 #include <netinet6/scope6_var.h>
101
102 #ifdef FAST_IPSEC
103 #include <netipsec/ipsec.h>
104 #include <netipsec/ipsec6.h>
105 #include <netipsec/key.h>
106 #include <netipsec/xform.h>
107 #endif
108
109
110 #include <net/net_osdep.h>
111
112 #ifdef PFIL_HOOKS
113 extern struct pfil_head inet6_pfil_hook; /* XXX */
114 #endif
115
116 struct ip6_exthdrs {
117 struct mbuf *ip6e_ip6;
118 struct mbuf *ip6e_hbh;
119 struct mbuf *ip6e_dest1;
120 struct mbuf *ip6e_rthdr;
121 struct mbuf *ip6e_dest2;
122 };
123
124 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
125 kauth_cred_t, int);
126 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
127 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, kauth_cred_t,
128 int, int, int);
129 static int ip6_setmoptions(const struct sockopt *, struct ip6_moptions **);
130 static int ip6_getmoptions(struct sockopt *, struct ip6_moptions *);
131 static int ip6_copyexthdr(struct mbuf **, void *, int);
132 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
133 struct ip6_frag **);
134 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
135 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
136 static int ip6_getpmtu(struct route *, struct route *, struct ifnet *,
137 const struct in6_addr *, u_long *, int *);
138 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
139
140 #ifdef RFC2292
141 static int ip6_pcbopts(struct ip6_pktopts **, struct socket *, struct sockopt *);
142 #endif
143
144 /*
145 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
146 * header (with pri, len, nxt, hlim, src, dst).
147 * This function may modify ver and hlim only.
148 * The mbuf chain containing the packet will be freed.
149 * The mbuf opt, if present, will not be freed.
150 *
151 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
152 * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
153 * which is rt_rmx.rmx_mtu.
154 */
155 int
156 ip6_output(
157 struct mbuf *m0,
158 struct ip6_pktopts *opt,
159 struct route *ro,
160 int flags,
161 struct ip6_moptions *im6o,
162 struct socket *so,
163 struct ifnet **ifpp /* XXX: just for statistics */
164 )
165 {
166 struct ip6_hdr *ip6, *mhip6;
167 struct ifnet *ifp, *origifp;
168 struct mbuf *m = m0;
169 int hlen, tlen, len, off;
170 bool tso;
171 struct route ip6route;
172 struct rtentry *rt = NULL;
173 const struct sockaddr_in6 *dst = NULL;
174 struct sockaddr_in6 src_sa, dst_sa;
175 int error = 0;
176 struct in6_ifaddr *ia = NULL;
177 u_long mtu;
178 int alwaysfrag, dontfrag;
179 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
180 struct ip6_exthdrs exthdrs;
181 struct in6_addr finaldst, src0, dst0;
182 u_int32_t zone;
183 struct route *ro_pmtu = NULL;
184 int hdrsplit = 0;
185 int needipsec = 0;
186 #ifdef FAST_IPSEC
187 struct secpolicy *sp = NULL;
188 int s;
189 #endif
190
191 memset(&ip6route, 0, sizeof(ip6route));
192
193 #ifdef DIAGNOSTIC
194 if ((m->m_flags & M_PKTHDR) == 0)
195 panic("ip6_output: no HDR");
196
197 if ((m->m_pkthdr.csum_flags &
198 (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TSOv4)) != 0) {
199 panic("ip6_output: IPv4 checksum offload flags: %d",
200 m->m_pkthdr.csum_flags);
201 }
202
203 if ((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) ==
204 (M_CSUM_TCPv6|M_CSUM_UDPv6)) {
205 panic("ip6_output: conflicting checksum offload flags: %d",
206 m->m_pkthdr.csum_flags);
207 }
208 #endif
209
210 M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data, sizeof(struct ip6_hdr));
211
212 #define MAKE_EXTHDR(hp, mp) \
213 do { \
214 if (hp) { \
215 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
216 error = ip6_copyexthdr((mp), (void *)(hp), \
217 ((eh)->ip6e_len + 1) << 3); \
218 if (error) \
219 goto freehdrs; \
220 } \
221 } while (/*CONSTCOND*/ 0)
222
223 memset(&exthdrs, 0, sizeof(exthdrs));
224 if (opt) {
225 /* Hop-by-Hop options header */
226 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
227 /* Destination options header(1st part) */
228 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
229 /* Routing header */
230 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
231 /* Destination options header(2nd part) */
232 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
233 }
234
235 /*
236 * Calculate the total length of the extension header chain.
237 * Keep the length of the unfragmentable part for fragmentation.
238 */
239 optlen = 0;
240 if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
241 if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
242 if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
243 unfragpartlen = optlen + sizeof(struct ip6_hdr);
244 /* NOTE: we don't add AH/ESP length here. do that later. */
245 if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
246
247 #ifdef FAST_IPSEC
248 /* Check the security policy (SP) for the packet */
249
250 sp = ipsec6_check_policy(m,so,flags,&needipsec,&error);
251 if (error != 0) {
252 /*
253 * Hack: -EINVAL is used to signal that a packet
254 * should be silently discarded. This is typically
255 * because we asked key management for an SA and
256 * it was delayed (e.g. kicked up to IKE).
257 */
258 if (error == -EINVAL)
259 error = 0;
260 goto freehdrs;
261 }
262 #endif /* FAST_IPSEC */
263
264
265 if (needipsec &&
266 (m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
267 in6_delayed_cksum(m);
268 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
269 }
270
271
272 /*
273 * If we need IPsec, or there is at least one extension header,
274 * separate IP6 header from the payload.
275 */
276 if ((needipsec || optlen) && !hdrsplit) {
277 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
278 m = NULL;
279 goto freehdrs;
280 }
281 m = exthdrs.ip6e_ip6;
282 hdrsplit++;
283 }
284
285 /* adjust pointer */
286 ip6 = mtod(m, struct ip6_hdr *);
287
288 /* adjust mbuf packet header length */
289 m->m_pkthdr.len += optlen;
290 plen = m->m_pkthdr.len - sizeof(*ip6);
291
292 /* If this is a jumbo payload, insert a jumbo payload option. */
293 if (plen > IPV6_MAXPACKET) {
294 if (!hdrsplit) {
295 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
296 m = NULL;
297 goto freehdrs;
298 }
299 m = exthdrs.ip6e_ip6;
300 hdrsplit++;
301 }
302 /* adjust pointer */
303 ip6 = mtod(m, struct ip6_hdr *);
304 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
305 goto freehdrs;
306 optlen += 8; /* XXX JUMBOOPTLEN */
307 ip6->ip6_plen = 0;
308 } else
309 ip6->ip6_plen = htons(plen);
310
311 /*
312 * Concatenate headers and fill in next header fields.
313 * Here we have, on "m"
314 * IPv6 payload
315 * and we insert headers accordingly. Finally, we should be getting:
316 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
317 *
318 * during the header composing process, "m" points to IPv6 header.
319 * "mprev" points to an extension header prior to esp.
320 */
321 {
322 u_char *nexthdrp = &ip6->ip6_nxt;
323 struct mbuf *mprev = m;
324
325 /*
326 * we treat dest2 specially. this makes IPsec processing
327 * much easier. the goal here is to make mprev point the
328 * mbuf prior to dest2.
329 *
330 * result: IPv6 dest2 payload
331 * m and mprev will point to IPv6 header.
332 */
333 if (exthdrs.ip6e_dest2) {
334 if (!hdrsplit)
335 panic("assumption failed: hdr not split");
336 exthdrs.ip6e_dest2->m_next = m->m_next;
337 m->m_next = exthdrs.ip6e_dest2;
338 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
339 ip6->ip6_nxt = IPPROTO_DSTOPTS;
340 }
341
342 #define MAKE_CHAIN(m, mp, p, i)\
343 do {\
344 if (m) {\
345 if (!hdrsplit) \
346 panic("assumption failed: hdr not split"); \
347 *mtod((m), u_char *) = *(p);\
348 *(p) = (i);\
349 p = mtod((m), u_char *);\
350 (m)->m_next = (mp)->m_next;\
351 (mp)->m_next = (m);\
352 (mp) = (m);\
353 }\
354 } while (/*CONSTCOND*/ 0)
355 /*
356 * result: IPv6 hbh dest1 rthdr dest2 payload
357 * m will point to IPv6 header. mprev will point to the
358 * extension header prior to dest2 (rthdr in the above case).
359 */
360 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
361 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
362 IPPROTO_DSTOPTS);
363 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
364 IPPROTO_ROUTING);
365
366 M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data,
367 sizeof(struct ip6_hdr) + optlen);
368 }
369
370 /*
371 * If there is a routing header, replace destination address field
372 * with the first hop of the routing header.
373 */
374 if (exthdrs.ip6e_rthdr) {
375 struct ip6_rthdr *rh;
376 struct ip6_rthdr0 *rh0;
377 struct in6_addr *addr;
378 struct sockaddr_in6 sa;
379
380 rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
381 struct ip6_rthdr *));
382 finaldst = ip6->ip6_dst;
383 switch (rh->ip6r_type) {
384 case IPV6_RTHDR_TYPE_0:
385 rh0 = (struct ip6_rthdr0 *)rh;
386 addr = (struct in6_addr *)(rh0 + 1);
387
388 /*
389 * construct a sockaddr_in6 form of
390 * the first hop.
391 *
392 * XXX: we may not have enough
393 * information about its scope zone;
394 * there is no standard API to pass
395 * the information from the
396 * application.
397 */
398 sockaddr_in6_init(&sa, addr, 0, 0, 0);
399 if ((error = sa6_embedscope(&sa,
400 ip6_use_defzone)) != 0) {
401 goto bad;
402 }
403 ip6->ip6_dst = sa.sin6_addr;
404 (void)memmove(&addr[0], &addr[1],
405 sizeof(struct in6_addr) *
406 (rh0->ip6r0_segleft - 1));
407 addr[rh0->ip6r0_segleft - 1] = finaldst;
408 /* XXX */
409 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
410 break;
411 default: /* is it possible? */
412 error = EINVAL;
413 goto bad;
414 }
415 }
416
417 /* Source address validation */
418 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
419 (flags & IPV6_UNSPECSRC) == 0) {
420 error = EOPNOTSUPP;
421 IP6_STATINC(IP6_STAT_BADSCOPE);
422 goto bad;
423 }
424 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
425 error = EOPNOTSUPP;
426 IP6_STATINC(IP6_STAT_BADSCOPE);
427 goto bad;
428 }
429
430 IP6_STATINC(IP6_STAT_LOCALOUT);
431
432 /*
433 * Route packet.
434 */
435 /* initialize cached route */
436 if (ro == NULL) {
437 ro = &ip6route;
438 }
439 ro_pmtu = ro;
440 if (opt && opt->ip6po_rthdr)
441 ro = &opt->ip6po_route;
442
443 /*
444 * if specified, try to fill in the traffic class field.
445 * do not override if a non-zero value is already set.
446 * we check the diffserv field and the ecn field separately.
447 */
448 if (opt && opt->ip6po_tclass >= 0) {
449 int mask = 0;
450
451 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
452 mask |= 0xfc;
453 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
454 mask |= 0x03;
455 if (mask != 0)
456 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
457 }
458
459 /* fill in or override the hop limit field, if necessary. */
460 if (opt && opt->ip6po_hlim != -1)
461 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
462 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
463 if (im6o != NULL)
464 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
465 else
466 ip6->ip6_hlim = ip6_defmcasthlim;
467 }
468
469 #ifdef FAST_IPSEC
470 if (needipsec) {
471 s = splsoftnet();
472 error = ipsec6_process_packet(m,sp->req);
473
474 /*
475 * Preserve KAME behaviour: ENOENT can be returned
476 * when an SA acquire is in progress. Don't propagate
477 * this to user-level; it confuses applications.
478 * XXX this will go away when the SADB is redone.
479 */
480 if (error == ENOENT)
481 error = 0;
482 splx(s);
483 goto done;
484 }
485 #endif /* FAST_IPSEC */
486
487
488
489 /* adjust pointer */
490 ip6 = mtod(m, struct ip6_hdr *);
491
492 sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
493 if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
494 &ifp, &rt, 0)) != 0) {
495 if (ifp != NULL)
496 in6_ifstat_inc(ifp, ifs6_out_discard);
497 goto bad;
498 }
499 if (rt == NULL) {
500 /*
501 * If in6_selectroute() does not return a route entry,
502 * dst may not have been updated.
503 */
504 error = rtcache_setdst(ro, sin6tosa(&dst_sa));
505 if (error) {
506 goto bad;
507 }
508 }
509
510 /*
511 * then rt (for unicast) and ifp must be non-NULL valid values.
512 */
513 if ((flags & IPV6_FORWARDING) == 0) {
514 /* XXX: the FORWARDING flag can be set for mrouting. */
515 in6_ifstat_inc(ifp, ifs6_out_request);
516 }
517 if (rt != NULL) {
518 ia = (struct in6_ifaddr *)(rt->rt_ifa);
519 rt->rt_use++;
520 }
521
522 /*
523 * The outgoing interface must be in the zone of source and
524 * destination addresses. We should use ia_ifp to support the
525 * case of sending packets to an address of our own.
526 */
527 if (ia != NULL && ia->ia_ifp)
528 origifp = ia->ia_ifp;
529 else
530 origifp = ifp;
531
532 src0 = ip6->ip6_src;
533 if (in6_setscope(&src0, origifp, &zone))
534 goto badscope;
535 sockaddr_in6_init(&src_sa, &ip6->ip6_src, 0, 0, 0);
536 if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
537 goto badscope;
538
539 dst0 = ip6->ip6_dst;
540 if (in6_setscope(&dst0, origifp, &zone))
541 goto badscope;
542 /* re-initialize to be sure */
543 sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
544 if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id)
545 goto badscope;
546
547 /* scope check is done. */
548
549 if (rt == NULL || IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
550 if (dst == NULL)
551 dst = satocsin6(rtcache_getdst(ro));
552 KASSERT(dst != NULL);
553 } else if (opt && rtcache_validate(&opt->ip6po_nextroute) != NULL) {
554 /*
555 * The nexthop is explicitly specified by the
556 * application. We assume the next hop is an IPv6
557 * address.
558 */
559 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
560 } else if ((rt->rt_flags & RTF_GATEWAY))
561 dst = (struct sockaddr_in6 *)rt->rt_gateway;
562 else if (dst == NULL)
563 dst = satocsin6(rtcache_getdst(ro));
564
565 /*
566 * XXXXXX: original code follows:
567 */
568 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
569 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
570 else {
571 struct in6_multi *in6m;
572
573 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
574
575 in6_ifstat_inc(ifp, ifs6_out_mcast);
576
577 /*
578 * Confirm that the outgoing interface supports multicast.
579 */
580 if (!(ifp->if_flags & IFF_MULTICAST)) {
581 IP6_STATINC(IP6_STAT_NOROUTE);
582 in6_ifstat_inc(ifp, ifs6_out_discard);
583 error = ENETUNREACH;
584 goto bad;
585 }
586
587 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
588 if (in6m != NULL &&
589 (im6o == NULL || im6o->im6o_multicast_loop)) {
590 /*
591 * If we belong to the destination multicast group
592 * on the outgoing interface, and the caller did not
593 * forbid loopback, loop back a copy.
594 */
595 KASSERT(dst != NULL);
596 ip6_mloopback(ifp, m, dst);
597 } else {
598 /*
599 * If we are acting as a multicast router, perform
600 * multicast forwarding as if the packet had just
601 * arrived on the interface to which we are about
602 * to send. The multicast forwarding function
603 * recursively calls this function, using the
604 * IPV6_FORWARDING flag to prevent infinite recursion.
605 *
606 * Multicasts that are looped back by ip6_mloopback(),
607 * above, will be forwarded by the ip6_input() routine,
608 * if necessary.
609 */
610 if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
611 if (ip6_mforward(ip6, ifp, m) != 0) {
612 m_freem(m);
613 goto done;
614 }
615 }
616 }
617 /*
618 * Multicasts with a hoplimit of zero may be looped back,
619 * above, but must not be transmitted on a network.
620 * Also, multicasts addressed to the loopback interface
621 * are not sent -- the above call to ip6_mloopback() will
622 * loop back a copy if this host actually belongs to the
623 * destination group on the loopback interface.
624 */
625 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
626 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
627 m_freem(m);
628 goto done;
629 }
630 }
631
632 /*
633 * Fill the outgoing inteface to tell the upper layer
634 * to increment per-interface statistics.
635 */
636 if (ifpp)
637 *ifpp = ifp;
638
639 /* Determine path MTU. */
640 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
641 &alwaysfrag)) != 0)
642 goto bad;
643
644 /*
645 * The caller of this function may specify to use the minimum MTU
646 * in some cases.
647 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
648 * setting. The logic is a bit complicated; by default, unicast
649 * packets will follow path MTU while multicast packets will be sent at
650 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
651 * including unicast ones will be sent at the minimum MTU. Multicast
652 * packets will always be sent at the minimum MTU unless
653 * IP6PO_MINMTU_DISABLE is explicitly specified.
654 * See RFC 3542 for more details.
655 */
656 if (mtu > IPV6_MMTU) {
657 if ((flags & IPV6_MINMTU))
658 mtu = IPV6_MMTU;
659 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
660 mtu = IPV6_MMTU;
661 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
662 (opt == NULL ||
663 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
664 mtu = IPV6_MMTU;
665 }
666 }
667
668 /*
669 * clear embedded scope identifiers if necessary.
670 * in6_clearscope will touch the addresses only when necessary.
671 */
672 in6_clearscope(&ip6->ip6_src);
673 in6_clearscope(&ip6->ip6_dst);
674
675 /*
676 * If the outgoing packet contains a hop-by-hop options header,
677 * it must be examined and processed even by the source node.
678 * (RFC 2460, section 4.)
679 */
680 if (ip6->ip6_nxt == IPV6_HOPOPTS) {
681 u_int32_t dummy1; /* XXX unused */
682 u_int32_t dummy2; /* XXX unused */
683 int hoff = sizeof(struct ip6_hdr);
684
685 if (ip6_hopopts_input(&dummy1, &dummy2, &m, &hoff)) {
686 /* m was already freed at this point */
687 error = EINVAL;/* better error? */
688 goto done;
689 }
690
691 ip6 = mtod(m, struct ip6_hdr *);
692 }
693
694 #ifdef PFIL_HOOKS
695 /*
696 * Run through list of hooks for output packets.
697 */
698 if ((error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT)) != 0)
699 goto done;
700 if (m == NULL)
701 goto done;
702 ip6 = mtod(m, struct ip6_hdr *);
703 #endif /* PFIL_HOOKS */
704 /*
705 * Send the packet to the outgoing interface.
706 * If necessary, do IPv6 fragmentation before sending.
707 *
708 * the logic here is rather complex:
709 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
710 * 1-a: send as is if tlen <= path mtu
711 * 1-b: fragment if tlen > path mtu
712 *
713 * 2: if user asks us not to fragment (dontfrag == 1)
714 * 2-a: send as is if tlen <= interface mtu
715 * 2-b: error if tlen > interface mtu
716 *
717 * 3: if we always need to attach fragment header (alwaysfrag == 1)
718 * always fragment
719 *
720 * 4: if dontfrag == 1 && alwaysfrag == 1
721 * error, as we cannot handle this conflicting request
722 */
723 tlen = m->m_pkthdr.len;
724 tso = (m->m_pkthdr.csum_flags & M_CSUM_TSOv6) != 0;
725 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
726 dontfrag = 1;
727 else
728 dontfrag = 0;
729
730 if (dontfrag && alwaysfrag) { /* case 4 */
731 /* conflicting request - can't transmit */
732 error = EMSGSIZE;
733 goto bad;
734 }
735 if (dontfrag && (!tso && tlen > IN6_LINKMTU(ifp))) { /* case 2-b */
736 /*
737 * Even if the DONTFRAG option is specified, we cannot send the
738 * packet when the data length is larger than the MTU of the
739 * outgoing interface.
740 * Notify the error by sending IPV6_PATHMTU ancillary data as
741 * well as returning an error code (the latter is not described
742 * in the API spec.)
743 */
744 u_int32_t mtu32;
745 struct ip6ctlparam ip6cp;
746
747 mtu32 = (u_int32_t)mtu;
748 memset(&ip6cp, 0, sizeof(ip6cp));
749 ip6cp.ip6c_cmdarg = (void *)&mtu32;
750 pfctlinput2(PRC_MSGSIZE,
751 rtcache_getdst(ro_pmtu), &ip6cp);
752
753 error = EMSGSIZE;
754 goto bad;
755 }
756
757 /*
758 * transmit packet without fragmentation
759 */
760 if (dontfrag || (!alwaysfrag && (tlen <= mtu || tso))) {
761 /* case 1-a and 2-a */
762 struct in6_ifaddr *ia6;
763 int sw_csum;
764
765 ip6 = mtod(m, struct ip6_hdr *);
766 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
767 if (ia6) {
768 /* Record statistics for this interface address. */
769 ia6->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len;
770 }
771
772 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
773 if ((sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
774 if (IN6_NEED_CHECKSUM(ifp,
775 sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
776 in6_delayed_cksum(m);
777 }
778 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
779 }
780
781 KASSERT(dst != NULL);
782 if (__predict_true(!tso ||
783 (ifp->if_capenable & IFCAP_TSOv6) != 0)) {
784 error = nd6_output(ifp, origifp, m, dst, rt);
785 } else {
786 error = ip6_tso_output(ifp, origifp, m, dst, rt);
787 }
788 goto done;
789 }
790
791 if (tso) {
792 error = EINVAL; /* XXX */
793 goto bad;
794 }
795
796 /*
797 * try to fragment the packet. case 1-b and 3
798 */
799 if (mtu < IPV6_MMTU) {
800 /* path MTU cannot be less than IPV6_MMTU */
801 error = EMSGSIZE;
802 in6_ifstat_inc(ifp, ifs6_out_fragfail);
803 goto bad;
804 } else if (ip6->ip6_plen == 0) {
805 /* jumbo payload cannot be fragmented */
806 error = EMSGSIZE;
807 in6_ifstat_inc(ifp, ifs6_out_fragfail);
808 goto bad;
809 } else {
810 struct mbuf **mnext, *m_frgpart;
811 struct ip6_frag *ip6f;
812 u_int32_t id = htonl(ip6_randomid());
813 u_char nextproto;
814 #if 0 /* see below */
815 struct ip6ctlparam ip6cp;
816 u_int32_t mtu32;
817 #endif
818
819 /*
820 * Too large for the destination or interface;
821 * fragment if possible.
822 * Must be able to put at least 8 bytes per fragment.
823 */
824 hlen = unfragpartlen;
825 if (mtu > IPV6_MAXPACKET)
826 mtu = IPV6_MAXPACKET;
827
828 #if 0
829 /*
830 * It is believed this code is a leftover from the
831 * development of the IPV6_RECVPATHMTU sockopt and
832 * associated work to implement RFC3542.
833 * It's not entirely clear what the intent of the API
834 * is at this point, so disable this code for now.
835 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
836 * will send notifications if the application requests.
837 */
838
839 /* Notify a proper path MTU to applications. */
840 mtu32 = (u_int32_t)mtu;
841 memset(&ip6cp, 0, sizeof(ip6cp));
842 ip6cp.ip6c_cmdarg = (void *)&mtu32;
843 pfctlinput2(PRC_MSGSIZE,
844 rtcache_getdst(ro_pmtu), &ip6cp);
845 #endif
846
847 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
848 if (len < 8) {
849 error = EMSGSIZE;
850 in6_ifstat_inc(ifp, ifs6_out_fragfail);
851 goto bad;
852 }
853
854 mnext = &m->m_nextpkt;
855
856 /*
857 * Change the next header field of the last header in the
858 * unfragmentable part.
859 */
860 if (exthdrs.ip6e_rthdr) {
861 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
862 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
863 } else if (exthdrs.ip6e_dest1) {
864 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
865 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
866 } else if (exthdrs.ip6e_hbh) {
867 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
868 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
869 } else {
870 nextproto = ip6->ip6_nxt;
871 ip6->ip6_nxt = IPPROTO_FRAGMENT;
872 }
873
874 if ((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6))
875 != 0) {
876 if (IN6_NEED_CHECKSUM(ifp,
877 m->m_pkthdr.csum_flags &
878 (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
879 in6_delayed_cksum(m);
880 }
881 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
882 }
883
884 /*
885 * Loop through length of segment after first fragment,
886 * make new header and copy data of each part and link onto
887 * chain.
888 */
889 m0 = m;
890 for (off = hlen; off < tlen; off += len) {
891 struct mbuf *mlast;
892
893 MGETHDR(m, M_DONTWAIT, MT_HEADER);
894 if (!m) {
895 error = ENOBUFS;
896 IP6_STATINC(IP6_STAT_ODROPPED);
897 goto sendorfree;
898 }
899 m->m_pkthdr.rcvif = NULL;
900 m->m_flags = m0->m_flags & M_COPYFLAGS;
901 *mnext = m;
902 mnext = &m->m_nextpkt;
903 m->m_data += max_linkhdr;
904 mhip6 = mtod(m, struct ip6_hdr *);
905 *mhip6 = *ip6;
906 m->m_len = sizeof(*mhip6);
907 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
908 if (error) {
909 IP6_STATINC(IP6_STAT_ODROPPED);
910 goto sendorfree;
911 }
912 ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
913 if (off + len >= tlen)
914 len = tlen - off;
915 else
916 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
917 mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
918 sizeof(*ip6f) - sizeof(struct ip6_hdr)));
919 if ((m_frgpart = m_copy(m0, off, len)) == 0) {
920 error = ENOBUFS;
921 IP6_STATINC(IP6_STAT_ODROPPED);
922 goto sendorfree;
923 }
924 for (mlast = m; mlast->m_next; mlast = mlast->m_next)
925 ;
926 mlast->m_next = m_frgpart;
927 m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
928 m->m_pkthdr.rcvif = NULL;
929 ip6f->ip6f_reserved = 0;
930 ip6f->ip6f_ident = id;
931 ip6f->ip6f_nxt = nextproto;
932 IP6_STATINC(IP6_STAT_OFRAGMENTS);
933 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
934 }
935
936 in6_ifstat_inc(ifp, ifs6_out_fragok);
937 }
938
939 /*
940 * Remove leading garbages.
941 */
942 sendorfree:
943 m = m0->m_nextpkt;
944 m0->m_nextpkt = 0;
945 m_freem(m0);
946 for (m0 = m; m; m = m0) {
947 m0 = m->m_nextpkt;
948 m->m_nextpkt = 0;
949 if (error == 0) {
950 struct in6_ifaddr *ia6;
951 ip6 = mtod(m, struct ip6_hdr *);
952 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
953 if (ia6) {
954 /*
955 * Record statistics for this interface
956 * address.
957 */
958 ia6->ia_ifa.ifa_data.ifad_outbytes +=
959 m->m_pkthdr.len;
960 }
961 KASSERT(dst != NULL);
962 error = nd6_output(ifp, origifp, m, dst, rt);
963 } else
964 m_freem(m);
965 }
966
967 if (error == 0)
968 IP6_STATINC(IP6_STAT_FRAGMENTED);
969
970 done:
971 rtcache_free(&ip6route);
972
973 #ifdef FAST_IPSEC
974 if (sp != NULL)
975 KEY_FREESP(&sp);
976 #endif /* FAST_IPSEC */
977
978
979 return (error);
980
981 freehdrs:
982 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
983 m_freem(exthdrs.ip6e_dest1);
984 m_freem(exthdrs.ip6e_rthdr);
985 m_freem(exthdrs.ip6e_dest2);
986 /* FALLTHROUGH */
987 bad:
988 m_freem(m);
989 goto done;
990 badscope:
991 IP6_STATINC(IP6_STAT_BADSCOPE);
992 in6_ifstat_inc(origifp, ifs6_out_discard);
993 if (error == 0)
994 error = EHOSTUNREACH; /* XXX */
995 goto bad;
996 }
997
998 static int
999 ip6_copyexthdr(struct mbuf **mp, void *hdr, int hlen)
1000 {
1001 struct mbuf *m;
1002
1003 if (hlen > MCLBYTES)
1004 return (ENOBUFS); /* XXX */
1005
1006 MGET(m, M_DONTWAIT, MT_DATA);
1007 if (!m)
1008 return (ENOBUFS);
1009
1010 if (hlen > MLEN) {
1011 MCLGET(m, M_DONTWAIT);
1012 if ((m->m_flags & M_EXT) == 0) {
1013 m_free(m);
1014 return (ENOBUFS);
1015 }
1016 }
1017 m->m_len = hlen;
1018 if (hdr)
1019 bcopy(hdr, mtod(m, void *), hlen);
1020
1021 *mp = m;
1022 return (0);
1023 }
1024
1025 /*
1026 * Process a delayed payload checksum calculation.
1027 */
1028 void
1029 in6_delayed_cksum(struct mbuf *m)
1030 {
1031 uint16_t csum, offset;
1032
1033 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1034 KASSERT((~m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1035 KASSERT((m->m_pkthdr.csum_flags
1036 & (M_CSUM_UDPv4|M_CSUM_TCPv4|M_CSUM_TSOv4)) == 0);
1037
1038 offset = M_CSUM_DATA_IPv6_HL(m->m_pkthdr.csum_data);
1039 csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1040 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv6) != 0) {
1041 csum = 0xffff;
1042 }
1043
1044 offset += M_CSUM_DATA_IPv6_OFFSET(m->m_pkthdr.csum_data);
1045 if ((offset + sizeof(csum)) > m->m_len) {
1046 m_copyback(m, offset, sizeof(csum), &csum);
1047 } else {
1048 *(uint16_t *)(mtod(m, char *) + offset) = csum;
1049 }
1050 }
1051
1052 /*
1053 * Insert jumbo payload option.
1054 */
1055 static int
1056 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1057 {
1058 struct mbuf *mopt;
1059 u_int8_t *optbuf;
1060 u_int32_t v;
1061
1062 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1063
1064 /*
1065 * If there is no hop-by-hop options header, allocate new one.
1066 * If there is one but it doesn't have enough space to store the
1067 * jumbo payload option, allocate a cluster to store the whole options.
1068 * Otherwise, use it to store the options.
1069 */
1070 if (exthdrs->ip6e_hbh == 0) {
1071 MGET(mopt, M_DONTWAIT, MT_DATA);
1072 if (mopt == 0)
1073 return (ENOBUFS);
1074 mopt->m_len = JUMBOOPTLEN;
1075 optbuf = mtod(mopt, u_int8_t *);
1076 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1077 exthdrs->ip6e_hbh = mopt;
1078 } else {
1079 struct ip6_hbh *hbh;
1080
1081 mopt = exthdrs->ip6e_hbh;
1082 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1083 /*
1084 * XXX assumption:
1085 * - exthdrs->ip6e_hbh is not referenced from places
1086 * other than exthdrs.
1087 * - exthdrs->ip6e_hbh is not an mbuf chain.
1088 */
1089 int oldoptlen = mopt->m_len;
1090 struct mbuf *n;
1091
1092 /*
1093 * XXX: give up if the whole (new) hbh header does
1094 * not fit even in an mbuf cluster.
1095 */
1096 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1097 return (ENOBUFS);
1098
1099 /*
1100 * As a consequence, we must always prepare a cluster
1101 * at this point.
1102 */
1103 MGET(n, M_DONTWAIT, MT_DATA);
1104 if (n) {
1105 MCLGET(n, M_DONTWAIT);
1106 if ((n->m_flags & M_EXT) == 0) {
1107 m_freem(n);
1108 n = NULL;
1109 }
1110 }
1111 if (!n)
1112 return (ENOBUFS);
1113 n->m_len = oldoptlen + JUMBOOPTLEN;
1114 bcopy(mtod(mopt, void *), mtod(n, void *),
1115 oldoptlen);
1116 optbuf = mtod(n, u_int8_t *) + oldoptlen;
1117 m_freem(mopt);
1118 mopt = exthdrs->ip6e_hbh = n;
1119 } else {
1120 optbuf = mtod(mopt, u_int8_t *) + mopt->m_len;
1121 mopt->m_len += JUMBOOPTLEN;
1122 }
1123 optbuf[0] = IP6OPT_PADN;
1124 optbuf[1] = 0;
1125
1126 /*
1127 * Adjust the header length according to the pad and
1128 * the jumbo payload option.
1129 */
1130 hbh = mtod(mopt, struct ip6_hbh *);
1131 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1132 }
1133
1134 /* fill in the option. */
1135 optbuf[2] = IP6OPT_JUMBO;
1136 optbuf[3] = 4;
1137 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1138 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1139
1140 /* finally, adjust the packet header length */
1141 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1142
1143 return (0);
1144 #undef JUMBOOPTLEN
1145 }
1146
1147 /*
1148 * Insert fragment header and copy unfragmentable header portions.
1149 */
1150 static int
1151 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1152 struct ip6_frag **frghdrp)
1153 {
1154 struct mbuf *n, *mlast;
1155
1156 if (hlen > sizeof(struct ip6_hdr)) {
1157 n = m_copym(m0, sizeof(struct ip6_hdr),
1158 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1159 if (n == 0)
1160 return (ENOBUFS);
1161 m->m_next = n;
1162 } else
1163 n = m;
1164
1165 /* Search for the last mbuf of unfragmentable part. */
1166 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1167 ;
1168
1169 if ((mlast->m_flags & M_EXT) == 0 &&
1170 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1171 /* use the trailing space of the last mbuf for the fragment hdr */
1172 *frghdrp = (struct ip6_frag *)(mtod(mlast, char *) +
1173 mlast->m_len);
1174 mlast->m_len += sizeof(struct ip6_frag);
1175 m->m_pkthdr.len += sizeof(struct ip6_frag);
1176 } else {
1177 /* allocate a new mbuf for the fragment header */
1178 struct mbuf *mfrg;
1179
1180 MGET(mfrg, M_DONTWAIT, MT_DATA);
1181 if (mfrg == 0)
1182 return (ENOBUFS);
1183 mfrg->m_len = sizeof(struct ip6_frag);
1184 *frghdrp = mtod(mfrg, struct ip6_frag *);
1185 mlast->m_next = mfrg;
1186 }
1187
1188 return (0);
1189 }
1190
1191 static int
1192 ip6_getpmtu(struct route *ro_pmtu, struct route *ro, struct ifnet *ifp,
1193 const struct in6_addr *dst, u_long *mtup, int *alwaysfragp)
1194 {
1195 struct rtentry *rt;
1196 u_int32_t mtu = 0;
1197 int alwaysfrag = 0;
1198 int error = 0;
1199
1200 if (ro_pmtu != ro) {
1201 union {
1202 struct sockaddr dst;
1203 struct sockaddr_in6 dst6;
1204 } u;
1205
1206 /* The first hop and the final destination may differ. */
1207 sockaddr_in6_init(&u.dst6, dst, 0, 0, 0);
1208 rt = rtcache_lookup(ro_pmtu, &u.dst);
1209 } else
1210 rt = rtcache_validate(ro_pmtu);
1211 if (rt != NULL) {
1212 u_int32_t ifmtu;
1213
1214 if (ifp == NULL)
1215 ifp = rt->rt_ifp;
1216 ifmtu = IN6_LINKMTU(ifp);
1217 mtu = rt->rt_rmx.rmx_mtu;
1218 if (mtu == 0)
1219 mtu = ifmtu;
1220 else if (mtu < IPV6_MMTU) {
1221 /*
1222 * RFC2460 section 5, last paragraph:
1223 * if we record ICMPv6 too big message with
1224 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1225 * or smaller, with fragment header attached.
1226 * (fragment header is needed regardless from the
1227 * packet size, for translators to identify packets)
1228 */
1229 alwaysfrag = 1;
1230 mtu = IPV6_MMTU;
1231 } else if (mtu > ifmtu) {
1232 /*
1233 * The MTU on the route is larger than the MTU on
1234 * the interface! This shouldn't happen, unless the
1235 * MTU of the interface has been changed after the
1236 * interface was brought up. Change the MTU in the
1237 * route to match the interface MTU (as long as the
1238 * field isn't locked).
1239 */
1240 mtu = ifmtu;
1241 if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
1242 rt->rt_rmx.rmx_mtu = mtu;
1243 }
1244 } else if (ifp) {
1245 mtu = IN6_LINKMTU(ifp);
1246 } else
1247 error = EHOSTUNREACH; /* XXX */
1248
1249 *mtup = mtu;
1250 if (alwaysfragp)
1251 *alwaysfragp = alwaysfrag;
1252 return (error);
1253 }
1254
1255 /*
1256 * IP6 socket option processing.
1257 */
1258 int
1259 ip6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1260 {
1261 int optdatalen, uproto;
1262 void *optdata;
1263 struct in6pcb *in6p = sotoin6pcb(so);
1264 int error, optval;
1265 int level, optname;
1266
1267 KASSERT(sopt != NULL);
1268
1269 level = sopt->sopt_level;
1270 optname = sopt->sopt_name;
1271
1272 error = optval = 0;
1273 uproto = (int)so->so_proto->pr_protocol;
1274
1275 if (level != IPPROTO_IPV6) {
1276 return ENOPROTOOPT;
1277 }
1278 switch (op) {
1279 case PRCO_SETOPT:
1280 switch (optname) {
1281 #ifdef RFC2292
1282 case IPV6_2292PKTOPTIONS:
1283 error = ip6_pcbopts(&in6p->in6p_outputopts, so, sopt);
1284 break;
1285 #endif
1286
1287 /*
1288 * Use of some Hop-by-Hop options or some
1289 * Destination options, might require special
1290 * privilege. That is, normal applications
1291 * (without special privilege) might be forbidden
1292 * from setting certain options in outgoing packets,
1293 * and might never see certain options in received
1294 * packets. [RFC 2292 Section 6]
1295 * KAME specific note:
1296 * KAME prevents non-privileged users from sending or
1297 * receiving ANY hbh/dst options in order to avoid
1298 * overhead of parsing options in the kernel.
1299 */
1300 case IPV6_RECVHOPOPTS:
1301 case IPV6_RECVDSTOPTS:
1302 case IPV6_RECVRTHDRDSTOPTS:
1303 error = kauth_authorize_network(kauth_cred_get(),
1304 KAUTH_NETWORK_IPV6, KAUTH_REQ_NETWORK_IPV6_HOPBYHOP,
1305 NULL, NULL, NULL);
1306 if (error)
1307 break;
1308 /* FALLTHROUGH */
1309 case IPV6_UNICAST_HOPS:
1310 case IPV6_HOPLIMIT:
1311 case IPV6_FAITH:
1312
1313 case IPV6_RECVPKTINFO:
1314 case IPV6_RECVHOPLIMIT:
1315 case IPV6_RECVRTHDR:
1316 case IPV6_RECVPATHMTU:
1317 case IPV6_RECVTCLASS:
1318 case IPV6_V6ONLY:
1319 error = sockopt_getint(sopt, &optval);
1320 if (error)
1321 break;
1322 switch (optname) {
1323 case IPV6_UNICAST_HOPS:
1324 if (optval < -1 || optval >= 256)
1325 error = EINVAL;
1326 else {
1327 /* -1 = kernel default */
1328 in6p->in6p_hops = optval;
1329 }
1330 break;
1331 #define OPTSET(bit) \
1332 do { \
1333 if (optval) \
1334 in6p->in6p_flags |= (bit); \
1335 else \
1336 in6p->in6p_flags &= ~(bit); \
1337 } while (/*CONSTCOND*/ 0)
1338
1339 #ifdef RFC2292
1340 #define OPTSET2292(bit) \
1341 do { \
1342 in6p->in6p_flags |= IN6P_RFC2292; \
1343 if (optval) \
1344 in6p->in6p_flags |= (bit); \
1345 else \
1346 in6p->in6p_flags &= ~(bit); \
1347 } while (/*CONSTCOND*/ 0)
1348 #endif
1349
1350 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1351
1352 case IPV6_RECVPKTINFO:
1353 #ifdef RFC2292
1354 /* cannot mix with RFC2292 */
1355 if (OPTBIT(IN6P_RFC2292)) {
1356 error = EINVAL;
1357 break;
1358 }
1359 #endif
1360 OPTSET(IN6P_PKTINFO);
1361 break;
1362
1363 case IPV6_HOPLIMIT:
1364 {
1365 struct ip6_pktopts **optp;
1366
1367 #ifdef RFC2292
1368 /* cannot mix with RFC2292 */
1369 if (OPTBIT(IN6P_RFC2292)) {
1370 error = EINVAL;
1371 break;
1372 }
1373 #endif
1374 optp = &in6p->in6p_outputopts;
1375 error = ip6_pcbopt(IPV6_HOPLIMIT,
1376 (u_char *)&optval,
1377 sizeof(optval),
1378 optp,
1379 kauth_cred_get(), uproto);
1380 break;
1381 }
1382
1383 case IPV6_RECVHOPLIMIT:
1384 #ifdef RFC2292
1385 /* cannot mix with RFC2292 */
1386 if (OPTBIT(IN6P_RFC2292)) {
1387 error = EINVAL;
1388 break;
1389 }
1390 #endif
1391 OPTSET(IN6P_HOPLIMIT);
1392 break;
1393
1394 case IPV6_RECVHOPOPTS:
1395 #ifdef RFC2292
1396 /* cannot mix with RFC2292 */
1397 if (OPTBIT(IN6P_RFC2292)) {
1398 error = EINVAL;
1399 break;
1400 }
1401 #endif
1402 OPTSET(IN6P_HOPOPTS);
1403 break;
1404
1405 case IPV6_RECVDSTOPTS:
1406 #ifdef RFC2292
1407 /* cannot mix with RFC2292 */
1408 if (OPTBIT(IN6P_RFC2292)) {
1409 error = EINVAL;
1410 break;
1411 }
1412 #endif
1413 OPTSET(IN6P_DSTOPTS);
1414 break;
1415
1416 case IPV6_RECVRTHDRDSTOPTS:
1417 #ifdef RFC2292
1418 /* cannot mix with RFC2292 */
1419 if (OPTBIT(IN6P_RFC2292)) {
1420 error = EINVAL;
1421 break;
1422 }
1423 #endif
1424 OPTSET(IN6P_RTHDRDSTOPTS);
1425 break;
1426
1427 case IPV6_RECVRTHDR:
1428 #ifdef RFC2292
1429 /* cannot mix with RFC2292 */
1430 if (OPTBIT(IN6P_RFC2292)) {
1431 error = EINVAL;
1432 break;
1433 }
1434 #endif
1435 OPTSET(IN6P_RTHDR);
1436 break;
1437
1438 case IPV6_FAITH:
1439 OPTSET(IN6P_FAITH);
1440 break;
1441
1442 case IPV6_RECVPATHMTU:
1443 /*
1444 * We ignore this option for TCP
1445 * sockets.
1446 * (RFC3542 leaves this case
1447 * unspecified.)
1448 */
1449 if (uproto != IPPROTO_TCP)
1450 OPTSET(IN6P_MTU);
1451 break;
1452
1453 case IPV6_V6ONLY:
1454 /*
1455 * make setsockopt(IPV6_V6ONLY)
1456 * available only prior to bind(2).
1457 * see ipng mailing list, Jun 22 2001.
1458 */
1459 if (in6p->in6p_lport ||
1460 !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1461 error = EINVAL;
1462 break;
1463 }
1464 #ifdef INET6_BINDV6ONLY
1465 if (!optval)
1466 error = EINVAL;
1467 #else
1468 OPTSET(IN6P_IPV6_V6ONLY);
1469 #endif
1470 break;
1471 case IPV6_RECVTCLASS:
1472 #ifdef RFC2292
1473 /* cannot mix with RFC2292 XXX */
1474 if (OPTBIT(IN6P_RFC2292)) {
1475 error = EINVAL;
1476 break;
1477 }
1478 #endif
1479 OPTSET(IN6P_TCLASS);
1480 break;
1481
1482 }
1483 break;
1484
1485 case IPV6_OTCLASS:
1486 {
1487 struct ip6_pktopts **optp;
1488 u_int8_t tclass;
1489
1490 error = sockopt_get(sopt, &tclass, sizeof(tclass));
1491 if (error)
1492 break;
1493 optp = &in6p->in6p_outputopts;
1494 error = ip6_pcbopt(optname,
1495 (u_char *)&tclass,
1496 sizeof(tclass),
1497 optp,
1498 kauth_cred_get(), uproto);
1499 break;
1500 }
1501
1502 case IPV6_TCLASS:
1503 case IPV6_DONTFRAG:
1504 case IPV6_USE_MIN_MTU:
1505 error = sockopt_getint(sopt, &optval);
1506 if (error)
1507 break;
1508 {
1509 struct ip6_pktopts **optp;
1510 optp = &in6p->in6p_outputopts;
1511 error = ip6_pcbopt(optname,
1512 (u_char *)&optval,
1513 sizeof(optval),
1514 optp,
1515 kauth_cred_get(), uproto);
1516 break;
1517 }
1518
1519 #ifdef RFC2292
1520 case IPV6_2292PKTINFO:
1521 case IPV6_2292HOPLIMIT:
1522 case IPV6_2292HOPOPTS:
1523 case IPV6_2292DSTOPTS:
1524 case IPV6_2292RTHDR:
1525 /* RFC 2292 */
1526 error = sockopt_getint(sopt, &optval);
1527 if (error)
1528 break;
1529
1530 switch (optname) {
1531 case IPV6_2292PKTINFO:
1532 OPTSET2292(IN6P_PKTINFO);
1533 break;
1534 case IPV6_2292HOPLIMIT:
1535 OPTSET2292(IN6P_HOPLIMIT);
1536 break;
1537 case IPV6_2292HOPOPTS:
1538 /*
1539 * Check super-user privilege.
1540 * See comments for IPV6_RECVHOPOPTS.
1541 */
1542 error =
1543 kauth_authorize_network(kauth_cred_get(),
1544 KAUTH_NETWORK_IPV6,
1545 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
1546 NULL, NULL);
1547 if (error)
1548 return (error);
1549 OPTSET2292(IN6P_HOPOPTS);
1550 break;
1551 case IPV6_2292DSTOPTS:
1552 error =
1553 kauth_authorize_network(kauth_cred_get(),
1554 KAUTH_NETWORK_IPV6,
1555 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
1556 NULL, NULL);
1557 if (error)
1558 return (error);
1559 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1560 break;
1561 case IPV6_2292RTHDR:
1562 OPTSET2292(IN6P_RTHDR);
1563 break;
1564 }
1565 break;
1566 #endif
1567 case IPV6_PKTINFO:
1568 case IPV6_HOPOPTS:
1569 case IPV6_RTHDR:
1570 case IPV6_DSTOPTS:
1571 case IPV6_RTHDRDSTOPTS:
1572 case IPV6_NEXTHOP: {
1573 /* new advanced API (RFC3542) */
1574 void *optbuf;
1575 int optbuflen;
1576 struct ip6_pktopts **optp;
1577
1578 #ifdef RFC2292
1579 /* cannot mix with RFC2292 */
1580 if (OPTBIT(IN6P_RFC2292)) {
1581 error = EINVAL;
1582 break;
1583 }
1584 #endif
1585
1586 optbuflen = sopt->sopt_size;
1587 optbuf = malloc(optbuflen, M_IP6OPT, M_NOWAIT);
1588 if (optbuf == NULL) {
1589 error = ENOBUFS;
1590 break;
1591 }
1592
1593 sockopt_get(sopt, optbuf, optbuflen);
1594 optp = &in6p->in6p_outputopts;
1595 error = ip6_pcbopt(optname, optbuf, optbuflen,
1596 optp, kauth_cred_get(), uproto);
1597 break;
1598 }
1599 #undef OPTSET
1600
1601 case IPV6_MULTICAST_IF:
1602 case IPV6_MULTICAST_HOPS:
1603 case IPV6_MULTICAST_LOOP:
1604 case IPV6_JOIN_GROUP:
1605 case IPV6_LEAVE_GROUP:
1606 error = ip6_setmoptions(sopt, &in6p->in6p_moptions);
1607 break;
1608
1609 case IPV6_PORTRANGE:
1610 error = sockopt_getint(sopt, &optval);
1611 if (error)
1612 break;
1613
1614 switch (optval) {
1615 case IPV6_PORTRANGE_DEFAULT:
1616 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1617 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1618 break;
1619
1620 case IPV6_PORTRANGE_HIGH:
1621 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1622 in6p->in6p_flags |= IN6P_HIGHPORT;
1623 break;
1624
1625 case IPV6_PORTRANGE_LOW:
1626 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1627 in6p->in6p_flags |= IN6P_LOWPORT;
1628 break;
1629
1630 default:
1631 error = EINVAL;
1632 break;
1633 }
1634 break;
1635
1636
1637 #if defined(FAST_IPSEC)
1638 case IPV6_IPSEC_POLICY:
1639 error = ipsec6_set_policy(in6p, optname,
1640 sopt->sopt_data, sopt->sopt_size, kauth_cred_get());
1641 break;
1642 #endif /* IPSEC */
1643
1644 default:
1645 error = ENOPROTOOPT;
1646 break;
1647 }
1648 break;
1649
1650 case PRCO_GETOPT:
1651 switch (optname) {
1652 #ifdef RFC2292
1653 case IPV6_2292PKTOPTIONS:
1654 /*
1655 * RFC3542 (effectively) deprecated the
1656 * semantics of the 2292-style pktoptions.
1657 * Since it was not reliable in nature (i.e.,
1658 * applications had to expect the lack of some
1659 * information after all), it would make sense
1660 * to simplify this part by always returning
1661 * empty data.
1662 */
1663 break;
1664 #endif
1665
1666 case IPV6_RECVHOPOPTS:
1667 case IPV6_RECVDSTOPTS:
1668 case IPV6_RECVRTHDRDSTOPTS:
1669 case IPV6_UNICAST_HOPS:
1670 case IPV6_RECVPKTINFO:
1671 case IPV6_RECVHOPLIMIT:
1672 case IPV6_RECVRTHDR:
1673 case IPV6_RECVPATHMTU:
1674
1675 case IPV6_FAITH:
1676 case IPV6_V6ONLY:
1677 case IPV6_PORTRANGE:
1678 case IPV6_RECVTCLASS:
1679 switch (optname) {
1680
1681 case IPV6_RECVHOPOPTS:
1682 optval = OPTBIT(IN6P_HOPOPTS);
1683 break;
1684
1685 case IPV6_RECVDSTOPTS:
1686 optval = OPTBIT(IN6P_DSTOPTS);
1687 break;
1688
1689 case IPV6_RECVRTHDRDSTOPTS:
1690 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1691 break;
1692
1693 case IPV6_UNICAST_HOPS:
1694 optval = in6p->in6p_hops;
1695 break;
1696
1697 case IPV6_RECVPKTINFO:
1698 optval = OPTBIT(IN6P_PKTINFO);
1699 break;
1700
1701 case IPV6_RECVHOPLIMIT:
1702 optval = OPTBIT(IN6P_HOPLIMIT);
1703 break;
1704
1705 case IPV6_RECVRTHDR:
1706 optval = OPTBIT(IN6P_RTHDR);
1707 break;
1708
1709 case IPV6_RECVPATHMTU:
1710 optval = OPTBIT(IN6P_MTU);
1711 break;
1712
1713 case IPV6_FAITH:
1714 optval = OPTBIT(IN6P_FAITH);
1715 break;
1716
1717 case IPV6_V6ONLY:
1718 optval = OPTBIT(IN6P_IPV6_V6ONLY);
1719 break;
1720
1721 case IPV6_PORTRANGE:
1722 {
1723 int flags;
1724 flags = in6p->in6p_flags;
1725 if (flags & IN6P_HIGHPORT)
1726 optval = IPV6_PORTRANGE_HIGH;
1727 else if (flags & IN6P_LOWPORT)
1728 optval = IPV6_PORTRANGE_LOW;
1729 else
1730 optval = 0;
1731 break;
1732 }
1733 case IPV6_RECVTCLASS:
1734 optval = OPTBIT(IN6P_TCLASS);
1735 break;
1736
1737 }
1738 if (error)
1739 break;
1740 error = sockopt_setint(sopt, optval);
1741 break;
1742
1743 case IPV6_PATHMTU:
1744 {
1745 u_long pmtu = 0;
1746 struct ip6_mtuinfo mtuinfo;
1747 struct route *ro = &in6p->in6p_route;
1748
1749 if (!(so->so_state & SS_ISCONNECTED))
1750 return (ENOTCONN);
1751 /*
1752 * XXX: we dot not consider the case of source
1753 * routing, or optional information to specify
1754 * the outgoing interface.
1755 */
1756 error = ip6_getpmtu(ro, NULL, NULL,
1757 &in6p->in6p_faddr, &pmtu, NULL);
1758 if (error)
1759 break;
1760 if (pmtu > IPV6_MAXPACKET)
1761 pmtu = IPV6_MAXPACKET;
1762
1763 memset(&mtuinfo, 0, sizeof(mtuinfo));
1764 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1765 optdata = (void *)&mtuinfo;
1766 optdatalen = sizeof(mtuinfo);
1767 if (optdatalen > MCLBYTES)
1768 return (EMSGSIZE); /* XXX */
1769 error = sockopt_set(sopt, optdata, optdatalen);
1770 break;
1771 }
1772
1773 #ifdef RFC2292
1774 case IPV6_2292PKTINFO:
1775 case IPV6_2292HOPLIMIT:
1776 case IPV6_2292HOPOPTS:
1777 case IPV6_2292RTHDR:
1778 case IPV6_2292DSTOPTS:
1779 switch (optname) {
1780 case IPV6_2292PKTINFO:
1781 optval = OPTBIT(IN6P_PKTINFO);
1782 break;
1783 case IPV6_2292HOPLIMIT:
1784 optval = OPTBIT(IN6P_HOPLIMIT);
1785 break;
1786 case IPV6_2292HOPOPTS:
1787 optval = OPTBIT(IN6P_HOPOPTS);
1788 break;
1789 case IPV6_2292RTHDR:
1790 optval = OPTBIT(IN6P_RTHDR);
1791 break;
1792 case IPV6_2292DSTOPTS:
1793 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1794 break;
1795 }
1796 error = sockopt_setint(sopt, optval);
1797 break;
1798 #endif
1799 case IPV6_PKTINFO:
1800 case IPV6_HOPOPTS:
1801 case IPV6_RTHDR:
1802 case IPV6_DSTOPTS:
1803 case IPV6_RTHDRDSTOPTS:
1804 case IPV6_NEXTHOP:
1805 case IPV6_OTCLASS:
1806 case IPV6_TCLASS:
1807 case IPV6_DONTFRAG:
1808 case IPV6_USE_MIN_MTU:
1809 error = ip6_getpcbopt(in6p->in6p_outputopts,
1810 optname, sopt);
1811 break;
1812
1813 case IPV6_MULTICAST_IF:
1814 case IPV6_MULTICAST_HOPS:
1815 case IPV6_MULTICAST_LOOP:
1816 case IPV6_JOIN_GROUP:
1817 case IPV6_LEAVE_GROUP:
1818 error = ip6_getmoptions(sopt, in6p->in6p_moptions);
1819 break;
1820
1821 #if defined(FAST_IPSEC)
1822 case IPV6_IPSEC_POLICY:
1823 {
1824 struct mbuf *m = NULL;
1825
1826 /* XXX this will return EINVAL as sopt is empty */
1827 error = ipsec6_get_policy(in6p, sopt->sopt_data,
1828 sopt->sopt_size, &m);
1829 if (!error)
1830 error = sockopt_setmbuf(sopt, m);
1831
1832 break;
1833 }
1834 #endif /* IPSEC */
1835
1836 default:
1837 error = ENOPROTOOPT;
1838 break;
1839 }
1840 break;
1841 }
1842 return (error);
1843 }
1844
1845 int
1846 ip6_raw_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1847 {
1848 int error = 0, optval;
1849 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
1850 struct in6pcb *in6p = sotoin6pcb(so);
1851 int level, optname;
1852
1853 KASSERT(sopt != NULL);
1854
1855 level = sopt->sopt_level;
1856 optname = sopt->sopt_name;
1857
1858 if (level != IPPROTO_IPV6) {
1859 return ENOPROTOOPT;
1860 }
1861
1862 switch (optname) {
1863 case IPV6_CHECKSUM:
1864 /*
1865 * For ICMPv6 sockets, no modification allowed for checksum
1866 * offset, permit "no change" values to help existing apps.
1867 *
1868 * XXX RFC3542 says: "An attempt to set IPV6_CHECKSUM
1869 * for an ICMPv6 socket will fail." The current
1870 * behavior does not meet RFC3542.
1871 */
1872 switch (op) {
1873 case PRCO_SETOPT:
1874 error = sockopt_getint(sopt, &optval);
1875 if (error)
1876 break;
1877 if ((optval % 2) != 0) {
1878 /* the API assumes even offset values */
1879 error = EINVAL;
1880 } else if (so->so_proto->pr_protocol ==
1881 IPPROTO_ICMPV6) {
1882 if (optval != icmp6off)
1883 error = EINVAL;
1884 } else
1885 in6p->in6p_cksum = optval;
1886 break;
1887
1888 case PRCO_GETOPT:
1889 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
1890 optval = icmp6off;
1891 else
1892 optval = in6p->in6p_cksum;
1893
1894 error = sockopt_setint(sopt, optval);
1895 break;
1896
1897 default:
1898 error = EINVAL;
1899 break;
1900 }
1901 break;
1902
1903 default:
1904 error = ENOPROTOOPT;
1905 break;
1906 }
1907
1908 return (error);
1909 }
1910
1911 #ifdef RFC2292
1912 /*
1913 * Set up IP6 options in pcb for insertion in output packets or
1914 * specifying behavior of outgoing packets.
1915 */
1916 static int
1917 ip6_pcbopts(struct ip6_pktopts **pktopt, struct socket *so,
1918 struct sockopt *sopt)
1919 {
1920 struct ip6_pktopts *opt = *pktopt;
1921 struct mbuf *m;
1922 int error = 0;
1923
1924 /* turn off any old options. */
1925 if (opt) {
1926 #ifdef DIAGNOSTIC
1927 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
1928 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
1929 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
1930 printf("ip6_pcbopts: all specified options are cleared.\n");
1931 #endif
1932 ip6_clearpktopts(opt, -1);
1933 } else {
1934 opt = malloc(sizeof(*opt), M_IP6OPT, M_NOWAIT);
1935 if (opt == NULL)
1936 return (ENOBUFS);
1937 }
1938 *pktopt = NULL;
1939
1940 if (sopt == NULL || sopt->sopt_size == 0) {
1941 /*
1942 * Only turning off any previous options, regardless of
1943 * whether the opt is just created or given.
1944 */
1945 free(opt, M_IP6OPT);
1946 return (0);
1947 }
1948
1949 /* set options specified by user. */
1950 m = sockopt_getmbuf(sopt);
1951 if (m == NULL) {
1952 free(opt, M_IP6OPT);
1953 return (ENOBUFS);
1954 }
1955
1956 error = ip6_setpktopts(m, opt, NULL, kauth_cred_get(),
1957 so->so_proto->pr_protocol);
1958 m_freem(m);
1959 if (error != 0) {
1960 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
1961 free(opt, M_IP6OPT);
1962 return (error);
1963 }
1964 *pktopt = opt;
1965 return (0);
1966 }
1967 #endif
1968
1969 /*
1970 * initialize ip6_pktopts. beware that there are non-zero default values in
1971 * the struct.
1972 */
1973 void
1974 ip6_initpktopts(struct ip6_pktopts *opt)
1975 {
1976
1977 memset(opt, 0, sizeof(*opt));
1978 opt->ip6po_hlim = -1; /* -1 means default hop limit */
1979 opt->ip6po_tclass = -1; /* -1 means default traffic class */
1980 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
1981 }
1982
1983 #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) /* XXX */
1984 static int
1985 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
1986 kauth_cred_t cred, int uproto)
1987 {
1988 struct ip6_pktopts *opt;
1989
1990 if (*pktopt == NULL) {
1991 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
1992 M_NOWAIT);
1993 if (*pktopt == NULL)
1994 return (ENOBUFS);
1995
1996 ip6_initpktopts(*pktopt);
1997 }
1998 opt = *pktopt;
1999
2000 return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
2001 }
2002
2003 static int
2004 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2005 {
2006 void *optdata = NULL;
2007 int optdatalen = 0;
2008 struct ip6_ext *ip6e;
2009 int error = 0;
2010 struct in6_pktinfo null_pktinfo;
2011 int deftclass = 0, on;
2012 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2013
2014 switch (optname) {
2015 case IPV6_PKTINFO:
2016 if (pktopt && pktopt->ip6po_pktinfo)
2017 optdata = (void *)pktopt->ip6po_pktinfo;
2018 else {
2019 /* XXX: we don't have to do this every time... */
2020 memset(&null_pktinfo, 0, sizeof(null_pktinfo));
2021 optdata = (void *)&null_pktinfo;
2022 }
2023 optdatalen = sizeof(struct in6_pktinfo);
2024 break;
2025 case IPV6_OTCLASS:
2026 /* XXX */
2027 return (EINVAL);
2028 case IPV6_TCLASS:
2029 if (pktopt && pktopt->ip6po_tclass >= 0)
2030 optdata = (void *)&pktopt->ip6po_tclass;
2031 else
2032 optdata = (void *)&deftclass;
2033 optdatalen = sizeof(int);
2034 break;
2035 case IPV6_HOPOPTS:
2036 if (pktopt && pktopt->ip6po_hbh) {
2037 optdata = (void *)pktopt->ip6po_hbh;
2038 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2039 optdatalen = (ip6e->ip6e_len + 1) << 3;
2040 }
2041 break;
2042 case IPV6_RTHDR:
2043 if (pktopt && pktopt->ip6po_rthdr) {
2044 optdata = (void *)pktopt->ip6po_rthdr;
2045 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2046 optdatalen = (ip6e->ip6e_len + 1) << 3;
2047 }
2048 break;
2049 case IPV6_RTHDRDSTOPTS:
2050 if (pktopt && pktopt->ip6po_dest1) {
2051 optdata = (void *)pktopt->ip6po_dest1;
2052 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2053 optdatalen = (ip6e->ip6e_len + 1) << 3;
2054 }
2055 break;
2056 case IPV6_DSTOPTS:
2057 if (pktopt && pktopt->ip6po_dest2) {
2058 optdata = (void *)pktopt->ip6po_dest2;
2059 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2060 optdatalen = (ip6e->ip6e_len + 1) << 3;
2061 }
2062 break;
2063 case IPV6_NEXTHOP:
2064 if (pktopt && pktopt->ip6po_nexthop) {
2065 optdata = (void *)pktopt->ip6po_nexthop;
2066 optdatalen = pktopt->ip6po_nexthop->sa_len;
2067 }
2068 break;
2069 case IPV6_USE_MIN_MTU:
2070 if (pktopt)
2071 optdata = (void *)&pktopt->ip6po_minmtu;
2072 else
2073 optdata = (void *)&defminmtu;
2074 optdatalen = sizeof(int);
2075 break;
2076 case IPV6_DONTFRAG:
2077 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2078 on = 1;
2079 else
2080 on = 0;
2081 optdata = (void *)&on;
2082 optdatalen = sizeof(on);
2083 break;
2084 default: /* should not happen */
2085 #ifdef DIAGNOSTIC
2086 panic("ip6_getpcbopt: unexpected option\n");
2087 #endif
2088 return (ENOPROTOOPT);
2089 }
2090
2091 error = sockopt_set(sopt, optdata, optdatalen);
2092
2093 return (error);
2094 }
2095
2096 void
2097 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2098 {
2099 if (optname == -1 || optname == IPV6_PKTINFO) {
2100 if (pktopt->ip6po_pktinfo)
2101 free(pktopt->ip6po_pktinfo, M_IP6OPT);
2102 pktopt->ip6po_pktinfo = NULL;
2103 }
2104 if (optname == -1 || optname == IPV6_HOPLIMIT)
2105 pktopt->ip6po_hlim = -1;
2106 if (optname == -1 || optname == IPV6_TCLASS)
2107 pktopt->ip6po_tclass = -1;
2108 if (optname == -1 || optname == IPV6_NEXTHOP) {
2109 rtcache_free(&pktopt->ip6po_nextroute);
2110 if (pktopt->ip6po_nexthop)
2111 free(pktopt->ip6po_nexthop, M_IP6OPT);
2112 pktopt->ip6po_nexthop = NULL;
2113 }
2114 if (optname == -1 || optname == IPV6_HOPOPTS) {
2115 if (pktopt->ip6po_hbh)
2116 free(pktopt->ip6po_hbh, M_IP6OPT);
2117 pktopt->ip6po_hbh = NULL;
2118 }
2119 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2120 if (pktopt->ip6po_dest1)
2121 free(pktopt->ip6po_dest1, M_IP6OPT);
2122 pktopt->ip6po_dest1 = NULL;
2123 }
2124 if (optname == -1 || optname == IPV6_RTHDR) {
2125 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2126 free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2127 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2128 rtcache_free(&pktopt->ip6po_route);
2129 }
2130 if (optname == -1 || optname == IPV6_DSTOPTS) {
2131 if (pktopt->ip6po_dest2)
2132 free(pktopt->ip6po_dest2, M_IP6OPT);
2133 pktopt->ip6po_dest2 = NULL;
2134 }
2135 }
2136
2137 #define PKTOPT_EXTHDRCPY(type) \
2138 do { \
2139 if (src->type) { \
2140 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2141 dst->type = malloc(hlen, M_IP6OPT, canwait); \
2142 if (dst->type == NULL) \
2143 goto bad; \
2144 memcpy(dst->type, src->type, hlen); \
2145 } \
2146 } while (/*CONSTCOND*/ 0)
2147
2148 static int
2149 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2150 {
2151 dst->ip6po_hlim = src->ip6po_hlim;
2152 dst->ip6po_tclass = src->ip6po_tclass;
2153 dst->ip6po_flags = src->ip6po_flags;
2154 if (src->ip6po_pktinfo) {
2155 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2156 M_IP6OPT, canwait);
2157 if (dst->ip6po_pktinfo == NULL)
2158 goto bad;
2159 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2160 }
2161 if (src->ip6po_nexthop) {
2162 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2163 M_IP6OPT, canwait);
2164 if (dst->ip6po_nexthop == NULL)
2165 goto bad;
2166 memcpy(dst->ip6po_nexthop, src->ip6po_nexthop,
2167 src->ip6po_nexthop->sa_len);
2168 }
2169 PKTOPT_EXTHDRCPY(ip6po_hbh);
2170 PKTOPT_EXTHDRCPY(ip6po_dest1);
2171 PKTOPT_EXTHDRCPY(ip6po_dest2);
2172 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2173 return (0);
2174
2175 bad:
2176 if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2177 if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2178 if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2179 if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2180 if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2181 if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2182
2183 return (ENOBUFS);
2184 }
2185 #undef PKTOPT_EXTHDRCPY
2186
2187 struct ip6_pktopts *
2188 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2189 {
2190 int error;
2191 struct ip6_pktopts *dst;
2192
2193 dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2194 if (dst == NULL)
2195 return (NULL);
2196 ip6_initpktopts(dst);
2197
2198 if ((error = copypktopts(dst, src, canwait)) != 0) {
2199 free(dst, M_IP6OPT);
2200 return (NULL);
2201 }
2202
2203 return (dst);
2204 }
2205
2206 void
2207 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2208 {
2209 if (pktopt == NULL)
2210 return;
2211
2212 ip6_clearpktopts(pktopt, -1);
2213
2214 free(pktopt, M_IP6OPT);
2215 }
2216
2217 /*
2218 * Set the IP6 multicast options in response to user setsockopt().
2219 */
2220 static int
2221 ip6_setmoptions(const struct sockopt *sopt, struct ip6_moptions **im6op)
2222 {
2223 int error = 0;
2224 u_int loop, ifindex;
2225 struct ipv6_mreq mreq;
2226 struct ifnet *ifp;
2227 struct ip6_moptions *im6o = *im6op;
2228 struct route ro;
2229 struct in6_multi_mship *imm;
2230 struct lwp *l = curlwp; /* XXX */
2231
2232 if (im6o == NULL) {
2233 /*
2234 * No multicast option buffer attached to the pcb;
2235 * allocate one and initialize to default values.
2236 */
2237 im6o = malloc(sizeof(*im6o), M_IPMOPTS, M_NOWAIT);
2238 if (im6o == NULL)
2239 return (ENOBUFS);
2240
2241 *im6op = im6o;
2242 im6o->im6o_multicast_ifp = NULL;
2243 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2244 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2245 LIST_INIT(&im6o->im6o_memberships);
2246 }
2247
2248 switch (sopt->sopt_name) {
2249
2250 case IPV6_MULTICAST_IF:
2251 /*
2252 * Select the interface for outgoing multicast packets.
2253 */
2254 error = sockopt_get(sopt, &ifindex, sizeof(ifindex));
2255 if (error != 0)
2256 break;
2257
2258 if (ifindex != 0) {
2259 if (if_indexlim <= ifindex || !ifindex2ifnet[ifindex]) {
2260 error = ENXIO; /* XXX EINVAL? */
2261 break;
2262 }
2263 ifp = ifindex2ifnet[ifindex];
2264 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2265 error = EADDRNOTAVAIL;
2266 break;
2267 }
2268 } else
2269 ifp = NULL;
2270 im6o->im6o_multicast_ifp = ifp;
2271 break;
2272
2273 case IPV6_MULTICAST_HOPS:
2274 {
2275 /*
2276 * Set the IP6 hoplimit for outgoing multicast packets.
2277 */
2278 int optval;
2279
2280 error = sockopt_getint(sopt, &optval);
2281 if (error != 0)
2282 break;
2283
2284 if (optval < -1 || optval >= 256)
2285 error = EINVAL;
2286 else if (optval == -1)
2287 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2288 else
2289 im6o->im6o_multicast_hlim = optval;
2290 break;
2291 }
2292
2293 case IPV6_MULTICAST_LOOP:
2294 /*
2295 * Set the loopback flag for outgoing multicast packets.
2296 * Must be zero or one.
2297 */
2298 error = sockopt_get(sopt, &loop, sizeof(loop));
2299 if (error != 0)
2300 break;
2301 if (loop > 1) {
2302 error = EINVAL;
2303 break;
2304 }
2305 im6o->im6o_multicast_loop = loop;
2306 break;
2307
2308 case IPV6_JOIN_GROUP:
2309 /*
2310 * Add a multicast group membership.
2311 * Group must be a valid IP6 multicast address.
2312 */
2313 error = sockopt_get(sopt, &mreq, sizeof(mreq));
2314 if (error != 0)
2315 break;
2316
2317 if (IN6_IS_ADDR_UNSPECIFIED(&mreq.ipv6mr_multiaddr)) {
2318 /*
2319 * We use the unspecified address to specify to accept
2320 * all multicast addresses. Only super user is allowed
2321 * to do this.
2322 */
2323 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_IPV6,
2324 KAUTH_REQ_NETWORK_IPV6_JOIN_MULTICAST, NULL, NULL, NULL))
2325 {
2326 error = EACCES;
2327 break;
2328 }
2329 } else if (!IN6_IS_ADDR_MULTICAST(&mreq.ipv6mr_multiaddr)) {
2330 error = EINVAL;
2331 break;
2332 }
2333
2334 /*
2335 * If no interface was explicitly specified, choose an
2336 * appropriate one according to the given multicast address.
2337 */
2338 if (mreq.ipv6mr_interface == 0) {
2339 struct rtentry *rt;
2340 union {
2341 struct sockaddr dst;
2342 struct sockaddr_in6 dst6;
2343 } u;
2344
2345 /*
2346 * Look up the routing table for the
2347 * address, and choose the outgoing interface.
2348 * XXX: is it a good approach?
2349 */
2350 memset(&ro, 0, sizeof(ro));
2351 sockaddr_in6_init(&u.dst6, &mreq.ipv6mr_multiaddr, 0,
2352 0, 0);
2353 rtcache_setdst(&ro, &u.dst);
2354 ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp
2355 : NULL;
2356 rtcache_free(&ro);
2357 } else {
2358 /*
2359 * If the interface is specified, validate it.
2360 */
2361 if (if_indexlim <= mreq.ipv6mr_interface ||
2362 !ifindex2ifnet[mreq.ipv6mr_interface]) {
2363 error = ENXIO; /* XXX EINVAL? */
2364 break;
2365 }
2366 ifp = ifindex2ifnet[mreq.ipv6mr_interface];
2367 }
2368
2369 /*
2370 * See if we found an interface, and confirm that it
2371 * supports multicast
2372 */
2373 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2374 error = EADDRNOTAVAIL;
2375 break;
2376 }
2377
2378 if (in6_setscope(&mreq.ipv6mr_multiaddr, ifp, NULL)) {
2379 error = EADDRNOTAVAIL; /* XXX: should not happen */
2380 break;
2381 }
2382
2383 /*
2384 * See if the membership already exists.
2385 */
2386 for (imm = im6o->im6o_memberships.lh_first;
2387 imm != NULL; imm = imm->i6mm_chain.le_next)
2388 if (imm->i6mm_maddr->in6m_ifp == ifp &&
2389 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2390 &mreq.ipv6mr_multiaddr))
2391 break;
2392 if (imm != NULL) {
2393 error = EADDRINUSE;
2394 break;
2395 }
2396 /*
2397 * Everything looks good; add a new record to the multicast
2398 * address list for the given interface.
2399 */
2400 imm = in6_joingroup(ifp, &mreq.ipv6mr_multiaddr, &error, 0);
2401 if (imm == NULL)
2402 break;
2403 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2404 break;
2405
2406 case IPV6_LEAVE_GROUP:
2407 /*
2408 * Drop a multicast group membership.
2409 * Group must be a valid IP6 multicast address.
2410 */
2411 error = sockopt_get(sopt, &mreq, sizeof(mreq));
2412 if (error != 0)
2413 break;
2414
2415 /*
2416 * If an interface address was specified, get a pointer
2417 * to its ifnet structure.
2418 */
2419 if (mreq.ipv6mr_interface != 0) {
2420 if (if_indexlim <= mreq.ipv6mr_interface ||
2421 !ifindex2ifnet[mreq.ipv6mr_interface]) {
2422 error = ENXIO; /* XXX EINVAL? */
2423 break;
2424 }
2425 ifp = ifindex2ifnet[mreq.ipv6mr_interface];
2426 } else
2427 ifp = NULL;
2428
2429 /* Fill in the scope zone ID */
2430 if (ifp) {
2431 if (in6_setscope(&mreq.ipv6mr_multiaddr, ifp, NULL)) {
2432 /* XXX: should not happen */
2433 error = EADDRNOTAVAIL;
2434 break;
2435 }
2436 } else if (mreq.ipv6mr_interface != 0) {
2437 /*
2438 * XXX: This case would happens when the (positive)
2439 * index is in the valid range, but the corresponding
2440 * interface has been detached dynamically. The above
2441 * check probably avoids such case to happen here, but
2442 * we check it explicitly for safety.
2443 */
2444 error = EADDRNOTAVAIL;
2445 break;
2446 } else { /* ipv6mr_interface == 0 */
2447 struct sockaddr_in6 sa6_mc;
2448
2449 /*
2450 * The API spec says as follows:
2451 * If the interface index is specified as 0, the
2452 * system may choose a multicast group membership to
2453 * drop by matching the multicast address only.
2454 * On the other hand, we cannot disambiguate the scope
2455 * zone unless an interface is provided. Thus, we
2456 * check if there's ambiguity with the default scope
2457 * zone as the last resort.
2458 */
2459 sockaddr_in6_init(&sa6_mc, &mreq.ipv6mr_multiaddr,
2460 0, 0, 0);
2461 error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
2462 if (error != 0)
2463 break;
2464 mreq.ipv6mr_multiaddr = sa6_mc.sin6_addr;
2465 }
2466
2467 /*
2468 * Find the membership in the membership list.
2469 */
2470 for (imm = im6o->im6o_memberships.lh_first;
2471 imm != NULL; imm = imm->i6mm_chain.le_next) {
2472 if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2473 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2474 &mreq.ipv6mr_multiaddr))
2475 break;
2476 }
2477 if (imm == NULL) {
2478 /* Unable to resolve interface */
2479 error = EADDRNOTAVAIL;
2480 break;
2481 }
2482 /*
2483 * Give up the multicast address record to which the
2484 * membership points.
2485 */
2486 LIST_REMOVE(imm, i6mm_chain);
2487 in6_leavegroup(imm);
2488 break;
2489
2490 default:
2491 error = EOPNOTSUPP;
2492 break;
2493 }
2494
2495 /*
2496 * If all options have default values, no need to keep the mbuf.
2497 */
2498 if (im6o->im6o_multicast_ifp == NULL &&
2499 im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2500 im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2501 im6o->im6o_memberships.lh_first == NULL) {
2502 free(*im6op, M_IPMOPTS);
2503 *im6op = NULL;
2504 }
2505
2506 return (error);
2507 }
2508
2509 /*
2510 * Return the IP6 multicast options in response to user getsockopt().
2511 */
2512 static int
2513 ip6_getmoptions(struct sockopt *sopt, struct ip6_moptions *im6o)
2514 {
2515 u_int optval;
2516 int error;
2517
2518 switch (sopt->sopt_name) {
2519 case IPV6_MULTICAST_IF:
2520 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2521 optval = 0;
2522 else
2523 optval = im6o->im6o_multicast_ifp->if_index;
2524
2525 error = sockopt_set(sopt, &optval, sizeof(optval));
2526 break;
2527
2528 case IPV6_MULTICAST_HOPS:
2529 if (im6o == NULL)
2530 optval = ip6_defmcasthlim;
2531 else
2532 optval = im6o->im6o_multicast_hlim;
2533
2534 error = sockopt_set(sopt, &optval, sizeof(optval));
2535 break;
2536
2537 case IPV6_MULTICAST_LOOP:
2538 if (im6o == NULL)
2539 optval = ip6_defmcasthlim;
2540 else
2541 optval = im6o->im6o_multicast_loop;
2542
2543 error = sockopt_set(sopt, &optval, sizeof(optval));
2544 break;
2545
2546 default:
2547 error = EOPNOTSUPP;
2548 }
2549
2550 return (error);
2551 }
2552
2553 /*
2554 * Discard the IP6 multicast options.
2555 */
2556 void
2557 ip6_freemoptions(struct ip6_moptions *im6o)
2558 {
2559 struct in6_multi_mship *imm;
2560
2561 if (im6o == NULL)
2562 return;
2563
2564 while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2565 LIST_REMOVE(imm, i6mm_chain);
2566 in6_leavegroup(imm);
2567 }
2568 free(im6o, M_IPMOPTS);
2569 }
2570
2571 /*
2572 * Set IPv6 outgoing packet options based on advanced API.
2573 */
2574 int
2575 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2576 struct ip6_pktopts *stickyopt, kauth_cred_t cred, int uproto)
2577 {
2578 struct cmsghdr *cm = 0;
2579
2580 if (control == NULL || opt == NULL)
2581 return (EINVAL);
2582
2583 ip6_initpktopts(opt);
2584 if (stickyopt) {
2585 int error;
2586
2587 /*
2588 * If stickyopt is provided, make a local copy of the options
2589 * for this particular packet, then override them by ancillary
2590 * objects.
2591 * XXX: copypktopts() does not copy the cached route to a next
2592 * hop (if any). This is not very good in terms of efficiency,
2593 * but we can allow this since this option should be rarely
2594 * used.
2595 */
2596 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2597 return (error);
2598 }
2599
2600 /*
2601 * XXX: Currently, we assume all the optional information is stored
2602 * in a single mbuf.
2603 */
2604 if (control->m_next)
2605 return (EINVAL);
2606
2607 /* XXX if cm->cmsg_len is not aligned, control->m_len can become <0 */
2608 for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2609 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2610 int error;
2611
2612 if (control->m_len < CMSG_LEN(0))
2613 return (EINVAL);
2614
2615 cm = mtod(control, struct cmsghdr *);
2616 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2617 return (EINVAL);
2618 if (cm->cmsg_level != IPPROTO_IPV6)
2619 continue;
2620
2621 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2622 cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2623 if (error)
2624 return (error);
2625 }
2626
2627 return (0);
2628 }
2629
2630 /*
2631 * Set a particular packet option, as a sticky option or an ancillary data
2632 * item. "len" can be 0 only when it's a sticky option.
2633 * We have 4 cases of combination of "sticky" and "cmsg":
2634 * "sticky=0, cmsg=0": impossible
2635 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2636 * "sticky=1, cmsg=0": RFC3542 socket option
2637 * "sticky=1, cmsg=1": RFC2292 socket option
2638 */
2639 static int
2640 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2641 kauth_cred_t cred, int sticky, int cmsg, int uproto)
2642 {
2643 int minmtupolicy;
2644 int error;
2645
2646 if (!sticky && !cmsg) {
2647 #ifdef DIAGNOSTIC
2648 printf("ip6_setpktopt: impossible case\n");
2649 #endif
2650 return (EINVAL);
2651 }
2652
2653 /*
2654 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2655 * not be specified in the context of RFC3542. Conversely,
2656 * RFC3542 types should not be specified in the context of RFC2292.
2657 */
2658 if (!cmsg) {
2659 switch (optname) {
2660 case IPV6_2292PKTINFO:
2661 case IPV6_2292HOPLIMIT:
2662 case IPV6_2292NEXTHOP:
2663 case IPV6_2292HOPOPTS:
2664 case IPV6_2292DSTOPTS:
2665 case IPV6_2292RTHDR:
2666 case IPV6_2292PKTOPTIONS:
2667 return (ENOPROTOOPT);
2668 }
2669 }
2670 if (sticky && cmsg) {
2671 switch (optname) {
2672 case IPV6_PKTINFO:
2673 case IPV6_HOPLIMIT:
2674 case IPV6_NEXTHOP:
2675 case IPV6_HOPOPTS:
2676 case IPV6_DSTOPTS:
2677 case IPV6_RTHDRDSTOPTS:
2678 case IPV6_RTHDR:
2679 case IPV6_USE_MIN_MTU:
2680 case IPV6_DONTFRAG:
2681 case IPV6_OTCLASS:
2682 case IPV6_TCLASS:
2683 return (ENOPROTOOPT);
2684 }
2685 }
2686
2687 switch (optname) {
2688 #ifdef RFC2292
2689 case IPV6_2292PKTINFO:
2690 #endif
2691 case IPV6_PKTINFO:
2692 {
2693 struct ifnet *ifp = NULL;
2694 struct in6_pktinfo *pktinfo;
2695
2696 if (len != sizeof(struct in6_pktinfo))
2697 return (EINVAL);
2698
2699 pktinfo = (struct in6_pktinfo *)buf;
2700
2701 /*
2702 * An application can clear any sticky IPV6_PKTINFO option by
2703 * doing a "regular" setsockopt with ipi6_addr being
2704 * in6addr_any and ipi6_ifindex being zero.
2705 * [RFC 3542, Section 6]
2706 */
2707 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2708 pktinfo->ipi6_ifindex == 0 &&
2709 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2710 ip6_clearpktopts(opt, optname);
2711 break;
2712 }
2713
2714 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2715 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2716 return (EINVAL);
2717 }
2718
2719 /* validate the interface index if specified. */
2720 if (pktinfo->ipi6_ifindex >= if_indexlim) {
2721 return (ENXIO);
2722 }
2723 if (pktinfo->ipi6_ifindex) {
2724 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
2725 if (ifp == NULL)
2726 return (ENXIO);
2727 }
2728
2729 /*
2730 * We store the address anyway, and let in6_selectsrc()
2731 * validate the specified address. This is because ipi6_addr
2732 * may not have enough information about its scope zone, and
2733 * we may need additional information (such as outgoing
2734 * interface or the scope zone of a destination address) to
2735 * disambiguate the scope.
2736 * XXX: the delay of the validation may confuse the
2737 * application when it is used as a sticky option.
2738 */
2739 if (opt->ip6po_pktinfo == NULL) {
2740 opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2741 M_IP6OPT, M_NOWAIT);
2742 if (opt->ip6po_pktinfo == NULL)
2743 return (ENOBUFS);
2744 }
2745 memcpy(opt->ip6po_pktinfo, pktinfo, sizeof(*pktinfo));
2746 break;
2747 }
2748
2749 #ifdef RFC2292
2750 case IPV6_2292HOPLIMIT:
2751 #endif
2752 case IPV6_HOPLIMIT:
2753 {
2754 int *hlimp;
2755
2756 /*
2757 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2758 * to simplify the ordering among hoplimit options.
2759 */
2760 if (optname == IPV6_HOPLIMIT && sticky)
2761 return (ENOPROTOOPT);
2762
2763 if (len != sizeof(int))
2764 return (EINVAL);
2765 hlimp = (int *)buf;
2766 if (*hlimp < -1 || *hlimp > 255)
2767 return (EINVAL);
2768
2769 opt->ip6po_hlim = *hlimp;
2770 break;
2771 }
2772
2773 case IPV6_OTCLASS:
2774 if (len != sizeof(u_int8_t))
2775 return (EINVAL);
2776
2777 opt->ip6po_tclass = *(u_int8_t *)buf;
2778 break;
2779
2780 case IPV6_TCLASS:
2781 {
2782 int tclass;
2783
2784 if (len != sizeof(int))
2785 return (EINVAL);
2786 tclass = *(int *)buf;
2787 if (tclass < -1 || tclass > 255)
2788 return (EINVAL);
2789
2790 opt->ip6po_tclass = tclass;
2791 break;
2792 }
2793
2794 #ifdef RFC2292
2795 case IPV6_2292NEXTHOP:
2796 #endif
2797 case IPV6_NEXTHOP:
2798 error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
2799 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
2800 if (error)
2801 return (error);
2802
2803 if (len == 0) { /* just remove the option */
2804 ip6_clearpktopts(opt, IPV6_NEXTHOP);
2805 break;
2806 }
2807
2808 /* check if cmsg_len is large enough for sa_len */
2809 if (len < sizeof(struct sockaddr) || len < *buf)
2810 return (EINVAL);
2811
2812 switch (((struct sockaddr *)buf)->sa_family) {
2813 case AF_INET6:
2814 {
2815 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
2816
2817 if (sa6->sin6_len != sizeof(struct sockaddr_in6))
2818 return (EINVAL);
2819
2820 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
2821 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
2822 return (EINVAL);
2823 }
2824 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
2825 != 0) {
2826 return (error);
2827 }
2828 break;
2829 }
2830 case AF_LINK: /* eventually be supported? */
2831 default:
2832 return (EAFNOSUPPORT);
2833 }
2834
2835 /* turn off the previous option, then set the new option. */
2836 ip6_clearpktopts(opt, IPV6_NEXTHOP);
2837 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
2838 if (opt->ip6po_nexthop == NULL)
2839 return (ENOBUFS);
2840 memcpy(opt->ip6po_nexthop, buf, *buf);
2841 break;
2842
2843 #ifdef RFC2292
2844 case IPV6_2292HOPOPTS:
2845 #endif
2846 case IPV6_HOPOPTS:
2847 {
2848 struct ip6_hbh *hbh;
2849 int hbhlen;
2850
2851 /*
2852 * XXX: We don't allow a non-privileged user to set ANY HbH
2853 * options, since per-option restriction has too much
2854 * overhead.
2855 */
2856 error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
2857 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
2858 if (error)
2859 return (error);
2860
2861 if (len == 0) {
2862 ip6_clearpktopts(opt, IPV6_HOPOPTS);
2863 break; /* just remove the option */
2864 }
2865
2866 /* message length validation */
2867 if (len < sizeof(struct ip6_hbh))
2868 return (EINVAL);
2869 hbh = (struct ip6_hbh *)buf;
2870 hbhlen = (hbh->ip6h_len + 1) << 3;
2871 if (len != hbhlen)
2872 return (EINVAL);
2873
2874 /* turn off the previous option, then set the new option. */
2875 ip6_clearpktopts(opt, IPV6_HOPOPTS);
2876 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
2877 if (opt->ip6po_hbh == NULL)
2878 return (ENOBUFS);
2879 memcpy(opt->ip6po_hbh, hbh, hbhlen);
2880
2881 break;
2882 }
2883
2884 #ifdef RFC2292
2885 case IPV6_2292DSTOPTS:
2886 #endif
2887 case IPV6_DSTOPTS:
2888 case IPV6_RTHDRDSTOPTS:
2889 {
2890 struct ip6_dest *dest, **newdest = NULL;
2891 int destlen;
2892
2893 /* XXX: see the comment for IPV6_HOPOPTS */
2894 error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
2895 KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
2896 if (error)
2897 return (error);
2898
2899 if (len == 0) {
2900 ip6_clearpktopts(opt, optname);
2901 break; /* just remove the option */
2902 }
2903
2904 /* message length validation */
2905 if (len < sizeof(struct ip6_dest))
2906 return (EINVAL);
2907 dest = (struct ip6_dest *)buf;
2908 destlen = (dest->ip6d_len + 1) << 3;
2909 if (len != destlen)
2910 return (EINVAL);
2911 /*
2912 * Determine the position that the destination options header
2913 * should be inserted; before or after the routing header.
2914 */
2915 switch (optname) {
2916 case IPV6_2292DSTOPTS:
2917 /*
2918 * The old advanced API is ambiguous on this point.
2919 * Our approach is to determine the position based
2920 * according to the existence of a routing header.
2921 * Note, however, that this depends on the order of the
2922 * extension headers in the ancillary data; the 1st
2923 * part of the destination options header must appear
2924 * before the routing header in the ancillary data,
2925 * too.
2926 * RFC3542 solved the ambiguity by introducing
2927 * separate ancillary data or option types.
2928 */
2929 if (opt->ip6po_rthdr == NULL)
2930 newdest = &opt->ip6po_dest1;
2931 else
2932 newdest = &opt->ip6po_dest2;
2933 break;
2934 case IPV6_RTHDRDSTOPTS:
2935 newdest = &opt->ip6po_dest1;
2936 break;
2937 case IPV6_DSTOPTS:
2938 newdest = &opt->ip6po_dest2;
2939 break;
2940 }
2941
2942 /* turn off the previous option, then set the new option. */
2943 ip6_clearpktopts(opt, optname);
2944 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
2945 if (*newdest == NULL)
2946 return (ENOBUFS);
2947 memcpy(*newdest, dest, destlen);
2948
2949 break;
2950 }
2951
2952 #ifdef RFC2292
2953 case IPV6_2292RTHDR:
2954 #endif
2955 case IPV6_RTHDR:
2956 {
2957 struct ip6_rthdr *rth;
2958 int rthlen;
2959
2960 if (len == 0) {
2961 ip6_clearpktopts(opt, IPV6_RTHDR);
2962 break; /* just remove the option */
2963 }
2964
2965 /* message length validation */
2966 if (len < sizeof(struct ip6_rthdr))
2967 return (EINVAL);
2968 rth = (struct ip6_rthdr *)buf;
2969 rthlen = (rth->ip6r_len + 1) << 3;
2970 if (len != rthlen)
2971 return (EINVAL);
2972 switch (rth->ip6r_type) {
2973 case IPV6_RTHDR_TYPE_0:
2974 if (rth->ip6r_len == 0) /* must contain one addr */
2975 return (EINVAL);
2976 if (rth->ip6r_len % 2) /* length must be even */
2977 return (EINVAL);
2978 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
2979 return (EINVAL);
2980 break;
2981 default:
2982 return (EINVAL); /* not supported */
2983 }
2984 /* turn off the previous option */
2985 ip6_clearpktopts(opt, IPV6_RTHDR);
2986 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
2987 if (opt->ip6po_rthdr == NULL)
2988 return (ENOBUFS);
2989 memcpy(opt->ip6po_rthdr, rth, rthlen);
2990 break;
2991 }
2992
2993 case IPV6_USE_MIN_MTU:
2994 if (len != sizeof(int))
2995 return (EINVAL);
2996 minmtupolicy = *(int *)buf;
2997 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
2998 minmtupolicy != IP6PO_MINMTU_DISABLE &&
2999 minmtupolicy != IP6PO_MINMTU_ALL) {
3000 return (EINVAL);
3001 }
3002 opt->ip6po_minmtu = minmtupolicy;
3003 break;
3004
3005 case IPV6_DONTFRAG:
3006 if (len != sizeof(int))
3007 return (EINVAL);
3008
3009 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3010 /*
3011 * we ignore this option for TCP sockets.
3012 * (RFC3542 leaves this case unspecified.)
3013 */
3014 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3015 } else
3016 opt->ip6po_flags |= IP6PO_DONTFRAG;
3017 break;
3018
3019 default:
3020 return (ENOPROTOOPT);
3021 } /* end of switch */
3022
3023 return (0);
3024 }
3025
3026 /*
3027 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3028 * packet to the input queue of a specified interface. Note that this
3029 * calls the output routine of the loopback "driver", but with an interface
3030 * pointer that might NOT be lo0ifp -- easier than replicating that code here.
3031 */
3032 void
3033 ip6_mloopback(struct ifnet *ifp, struct mbuf *m,
3034 const struct sockaddr_in6 *dst)
3035 {
3036 struct mbuf *copym;
3037 struct ip6_hdr *ip6;
3038
3039 copym = m_copy(m, 0, M_COPYALL);
3040 if (copym == NULL)
3041 return;
3042
3043 /*
3044 * Make sure to deep-copy IPv6 header portion in case the data
3045 * is in an mbuf cluster, so that we can safely override the IPv6
3046 * header portion later.
3047 */
3048 if ((copym->m_flags & M_EXT) != 0 ||
3049 copym->m_len < sizeof(struct ip6_hdr)) {
3050 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3051 if (copym == NULL)
3052 return;
3053 }
3054
3055 #ifdef DIAGNOSTIC
3056 if (copym->m_len < sizeof(*ip6)) {
3057 m_freem(copym);
3058 return;
3059 }
3060 #endif
3061
3062 ip6 = mtod(copym, struct ip6_hdr *);
3063 /*
3064 * clear embedded scope identifiers if necessary.
3065 * in6_clearscope will touch the addresses only when necessary.
3066 */
3067 in6_clearscope(&ip6->ip6_src);
3068 in6_clearscope(&ip6->ip6_dst);
3069
3070 (void)looutput(ifp, copym, (const struct sockaddr *)dst, NULL);
3071 }
3072
3073 /*
3074 * Chop IPv6 header off from the payload.
3075 */
3076 static int
3077 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3078 {
3079 struct mbuf *mh;
3080 struct ip6_hdr *ip6;
3081
3082 ip6 = mtod(m, struct ip6_hdr *);
3083 if (m->m_len > sizeof(*ip6)) {
3084 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3085 if (mh == 0) {
3086 m_freem(m);
3087 return ENOBUFS;
3088 }
3089 M_MOVE_PKTHDR(mh, m);
3090 MH_ALIGN(mh, sizeof(*ip6));
3091 m->m_len -= sizeof(*ip6);
3092 m->m_data += sizeof(*ip6);
3093 mh->m_next = m;
3094 m = mh;
3095 m->m_len = sizeof(*ip6);
3096 bcopy((void *)ip6, mtod(m, void *), sizeof(*ip6));
3097 }
3098 exthdrs->ip6e_ip6 = m;
3099 return 0;
3100 }
3101
3102 /*
3103 * Compute IPv6 extension header length.
3104 */
3105 int
3106 ip6_optlen(struct in6pcb *in6p)
3107 {
3108 int len;
3109
3110 if (!in6p->in6p_outputopts)
3111 return 0;
3112
3113 len = 0;
3114 #define elen(x) \
3115 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3116
3117 len += elen(in6p->in6p_outputopts->ip6po_hbh);
3118 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3119 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3120 len += elen(in6p->in6p_outputopts->ip6po_dest2);
3121 return len;
3122 #undef elen
3123 }
3124