ip6_output.c revision 1.105 1 /* $NetBSD: ip6_output.c,v 1.105 2006/11/23 19:41:58 yamt Exp $ */
2 /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: ip6_output.c,v 1.105 2006/11/23 19:41:58 yamt Exp $");
66
67 #include "opt_inet.h"
68 #include "opt_inet6.h"
69 #include "opt_ipsec.h"
70 #include "opt_pfil_hooks.h"
71
72 #include <sys/param.h>
73 #include <sys/malloc.h>
74 #include <sys/mbuf.h>
75 #include <sys/errno.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/systm.h>
80 #include <sys/proc.h>
81 #include <sys/kauth.h>
82
83 #include <net/if.h>
84 #include <net/route.h>
85 #ifdef PFIL_HOOKS
86 #include <net/pfil.h>
87 #endif
88
89 #include <netinet/in.h>
90 #include <netinet/in_var.h>
91 #include <netinet/ip6.h>
92 #include <netinet/icmp6.h>
93 #include <netinet/in_offload.h>
94 #include <netinet6/in6_offload.h>
95 #include <netinet6/ip6_var.h>
96 #include <netinet6/in6_pcb.h>
97 #include <netinet6/nd6.h>
98 #include <netinet6/ip6protosw.h>
99 #include <netinet6/scope6_var.h>
100
101 #ifdef IPSEC
102 #include <netinet6/ipsec.h>
103 #include <netkey/key.h>
104 #endif /* IPSEC */
105
106 #include <net/net_osdep.h>
107
108 #ifdef PFIL_HOOKS
109 extern struct pfil_head inet6_pfil_hook; /* XXX */
110 #endif
111
112 struct ip6_exthdrs {
113 struct mbuf *ip6e_ip6;
114 struct mbuf *ip6e_hbh;
115 struct mbuf *ip6e_dest1;
116 struct mbuf *ip6e_rthdr;
117 struct mbuf *ip6e_dest2;
118 };
119
120 static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
121 int, int));
122 static int ip6_getpcbopt __P((struct ip6_pktopts *, int, struct mbuf **));
123 static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *, int,
124 int, int, int));
125 static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
126 static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
127 static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
128 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
129 struct ip6_frag **));
130 static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
131 static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
132 static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
133 struct ifnet *, struct in6_addr *, u_long *, int *));
134 static int copypktopts __P((struct ip6_pktopts *, struct ip6_pktopts *, int));
135
136 #ifdef RFC2292
137 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
138 struct socket *));
139 #endif
140
141 #define IN6_NEED_CHECKSUM(ifp, csum_flags) \
142 (__predict_true(((ifp)->if_flags & IFF_LOOPBACK) == 0 || \
143 (((csum_flags) & M_CSUM_UDPv6) != 0 && udp_do_loopback_cksum) || \
144 (((csum_flags) & M_CSUM_TCPv6) != 0 && tcp_do_loopback_cksum)))
145
146 struct ip6_tso_output_args {
147 struct ifnet *ifp;
148 struct ifnet *origifp;
149 struct sockaddr_in6 *dst;
150 struct rtentry *rt;
151 };
152
153 static int ip6_tso_output_callback(void *, struct mbuf *);
154 static int ip6_tso_output(struct ifnet *, struct ifnet *, struct mbuf *,
155 struct sockaddr_in6 *, struct rtentry *);
156
157 static int
158 ip6_tso_output_callback(void *vp, struct mbuf *m)
159 {
160 struct ip6_tso_output_args *args = vp;
161
162 return nd6_output(args->ifp, args->origifp, m, args->dst, args->rt);
163 }
164
165 static int
166 ip6_tso_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
167 struct sockaddr_in6 *dst, struct rtentry *rt)
168 {
169 struct ip6_tso_output_args args;
170
171 args.ifp = ifp;
172 args.origifp = origifp;
173 args.dst = dst;
174 args.rt = rt;
175
176 return tcp6_segment(m, ip6_tso_output_callback, &args);
177 }
178
179 /*
180 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
181 * header (with pri, len, nxt, hlim, src, dst).
182 * This function may modify ver and hlim only.
183 * The mbuf chain containing the packet will be freed.
184 * The mbuf opt, if present, will not be freed.
185 *
186 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
187 * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
188 * which is rt_rmx.rmx_mtu.
189 */
190 int
191 ip6_output(
192 struct mbuf *m0,
193 struct ip6_pktopts *opt,
194 struct route_in6 *ro,
195 int flags,
196 struct ip6_moptions *im6o,
197 struct socket *so,
198 struct ifnet **ifpp /* XXX: just for statistics */
199 )
200 {
201 struct ip6_hdr *ip6, *mhip6;
202 struct ifnet *ifp, *origifp;
203 struct mbuf *m = m0;
204 int hlen, tlen, len, off;
205 boolean_t tso;
206 struct route_in6 ip6route;
207 struct rtentry *rt = NULL;
208 struct sockaddr_in6 *dst, src_sa, dst_sa;
209 int error = 0;
210 struct in6_ifaddr *ia = NULL;
211 u_long mtu;
212 int alwaysfrag, dontfrag;
213 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
214 struct ip6_exthdrs exthdrs;
215 struct in6_addr finaldst, src0, dst0;
216 u_int32_t zone;
217 struct route_in6 *ro_pmtu = NULL;
218 int hdrsplit = 0;
219 int needipsec = 0;
220 #ifdef IPSEC
221 int needipsectun = 0;
222 struct secpolicy *sp = NULL;
223
224 ip6 = mtod(m, struct ip6_hdr *);
225 #endif /* IPSEC */
226
227 #ifdef DIAGNOSTIC
228 if ((m->m_flags & M_PKTHDR) == 0)
229 panic("ip6_output: no HDR");
230
231 if ((m->m_pkthdr.csum_flags &
232 (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TSOv4)) != 0) {
233 panic("ip6_output: IPv4 checksum offload flags: %d",
234 m->m_pkthdr.csum_flags);
235 }
236
237 if ((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) ==
238 (M_CSUM_TCPv6|M_CSUM_UDPv6)) {
239 panic("ip6_output: conflicting checksum offload flags: %d",
240 m->m_pkthdr.csum_flags);
241 }
242 #endif
243
244 M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data, sizeof(struct ip6_hdr));
245
246 #define MAKE_EXTHDR(hp, mp) \
247 do { \
248 if (hp) { \
249 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
250 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
251 ((eh)->ip6e_len + 1) << 3); \
252 if (error) \
253 goto freehdrs; \
254 } \
255 } while (/*CONSTCOND*/ 0)
256
257 bzero(&exthdrs, sizeof(exthdrs));
258 if (opt) {
259 /* Hop-by-Hop options header */
260 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
261 /* Destination options header(1st part) */
262 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
263 /* Routing header */
264 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
265 /* Destination options header(2nd part) */
266 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
267 }
268
269 #ifdef IPSEC
270 if ((flags & IPV6_FORWARDING) != 0) {
271 needipsec = 0;
272 goto skippolicycheck;
273 }
274
275 /* get a security policy for this packet */
276 if (so == NULL)
277 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
278 else {
279 if (IPSEC_PCB_SKIP_IPSEC(sotoinpcb_hdr(so)->inph_sp,
280 IPSEC_DIR_OUTBOUND)) {
281 needipsec = 0;
282 goto skippolicycheck;
283 }
284 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
285 }
286
287 if (sp == NULL) {
288 ipsec6stat.out_inval++;
289 goto freehdrs;
290 }
291
292 error = 0;
293
294 /* check policy */
295 switch (sp->policy) {
296 case IPSEC_POLICY_DISCARD:
297 /*
298 * This packet is just discarded.
299 */
300 ipsec6stat.out_polvio++;
301 goto freehdrs;
302
303 case IPSEC_POLICY_BYPASS:
304 case IPSEC_POLICY_NONE:
305 /* no need to do IPsec. */
306 needipsec = 0;
307 break;
308
309 case IPSEC_POLICY_IPSEC:
310 if (sp->req == NULL) {
311 /* XXX should be panic ? */
312 printf("ip6_output: No IPsec request specified.\n");
313 error = EINVAL;
314 goto freehdrs;
315 }
316 needipsec = 1;
317 break;
318
319 case IPSEC_POLICY_ENTRUST:
320 default:
321 printf("ip6_output: Invalid policy found. %d\n", sp->policy);
322 }
323
324 skippolicycheck:;
325 #endif /* IPSEC */
326
327 if (needipsec &&
328 (m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
329 in6_delayed_cksum(m);
330 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
331 }
332
333 /*
334 * Calculate the total length of the extension header chain.
335 * Keep the length of the unfragmentable part for fragmentation.
336 */
337 optlen = 0;
338 if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
339 if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
340 if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
341 unfragpartlen = optlen + sizeof(struct ip6_hdr);
342 /* NOTE: we don't add AH/ESP length here. do that later. */
343 if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
344
345 /*
346 * If we need IPsec, or there is at least one extension header,
347 * separate IP6 header from the payload.
348 */
349 if ((needipsec || optlen) && !hdrsplit) {
350 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
351 m = NULL;
352 goto freehdrs;
353 }
354 m = exthdrs.ip6e_ip6;
355 hdrsplit++;
356 }
357
358 /* adjust pointer */
359 ip6 = mtod(m, struct ip6_hdr *);
360
361 /* adjust mbuf packet header length */
362 m->m_pkthdr.len += optlen;
363 plen = m->m_pkthdr.len - sizeof(*ip6);
364
365 /* If this is a jumbo payload, insert a jumbo payload option. */
366 if (plen > IPV6_MAXPACKET) {
367 if (!hdrsplit) {
368 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
369 m = NULL;
370 goto freehdrs;
371 }
372 m = exthdrs.ip6e_ip6;
373 hdrsplit++;
374 }
375 /* adjust pointer */
376 ip6 = mtod(m, struct ip6_hdr *);
377 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
378 goto freehdrs;
379 optlen += 8; /* XXX JUMBOOPTLEN */
380 ip6->ip6_plen = 0;
381 } else
382 ip6->ip6_plen = htons(plen);
383
384 /*
385 * Concatenate headers and fill in next header fields.
386 * Here we have, on "m"
387 * IPv6 payload
388 * and we insert headers accordingly. Finally, we should be getting:
389 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
390 *
391 * during the header composing process, "m" points to IPv6 header.
392 * "mprev" points to an extension header prior to esp.
393 */
394 {
395 u_char *nexthdrp = &ip6->ip6_nxt;
396 struct mbuf *mprev = m;
397
398 /*
399 * we treat dest2 specially. this makes IPsec processing
400 * much easier. the goal here is to make mprev point the
401 * mbuf prior to dest2.
402 *
403 * result: IPv6 dest2 payload
404 * m and mprev will point to IPv6 header.
405 */
406 if (exthdrs.ip6e_dest2) {
407 if (!hdrsplit)
408 panic("assumption failed: hdr not split");
409 exthdrs.ip6e_dest2->m_next = m->m_next;
410 m->m_next = exthdrs.ip6e_dest2;
411 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
412 ip6->ip6_nxt = IPPROTO_DSTOPTS;
413 }
414
415 #define MAKE_CHAIN(m, mp, p, i)\
416 do {\
417 if (m) {\
418 if (!hdrsplit) \
419 panic("assumption failed: hdr not split"); \
420 *mtod((m), u_char *) = *(p);\
421 *(p) = (i);\
422 p = mtod((m), u_char *);\
423 (m)->m_next = (mp)->m_next;\
424 (mp)->m_next = (m);\
425 (mp) = (m);\
426 }\
427 } while (/*CONSTCOND*/ 0)
428 /*
429 * result: IPv6 hbh dest1 rthdr dest2 payload
430 * m will point to IPv6 header. mprev will point to the
431 * extension header prior to dest2 (rthdr in the above case).
432 */
433 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
434 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
435 IPPROTO_DSTOPTS);
436 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
437 IPPROTO_ROUTING);
438
439 M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data,
440 sizeof(struct ip6_hdr) + optlen);
441
442 #ifdef IPSEC
443 if (!needipsec)
444 goto skip_ipsec2;
445
446 /*
447 * pointers after IPsec headers are not valid any more.
448 * other pointers need a great care too.
449 * (IPsec routines should not mangle mbufs prior to AH/ESP)
450 */
451 exthdrs.ip6e_dest2 = NULL;
452
453 {
454 struct ip6_rthdr *rh = NULL;
455 int segleft_org = 0;
456 struct ipsec_output_state state;
457
458 if (exthdrs.ip6e_rthdr) {
459 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
460 segleft_org = rh->ip6r_segleft;
461 rh->ip6r_segleft = 0;
462 }
463
464 bzero(&state, sizeof(state));
465 state.m = m;
466 error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
467 &needipsectun);
468 m = state.m;
469 if (error) {
470 /* mbuf is already reclaimed in ipsec6_output_trans. */
471 m = NULL;
472 switch (error) {
473 case EHOSTUNREACH:
474 case ENETUNREACH:
475 case EMSGSIZE:
476 case ENOBUFS:
477 case ENOMEM:
478 break;
479 default:
480 printf("ip6_output (ipsec): error code %d\n", error);
481 /* FALLTHROUGH */
482 case ENOENT:
483 /* don't show these error codes to the user */
484 error = 0;
485 break;
486 }
487 goto bad;
488 }
489 if (exthdrs.ip6e_rthdr) {
490 /* ah6_output doesn't modify mbuf chain */
491 rh->ip6r_segleft = segleft_org;
492 }
493 }
494 skip_ipsec2:;
495 #endif
496 }
497
498 /*
499 * If there is a routing header, replace destination address field
500 * with the first hop of the routing header.
501 */
502 if (exthdrs.ip6e_rthdr) {
503 struct ip6_rthdr *rh;
504 struct ip6_rthdr0 *rh0;
505 struct in6_addr *addr;
506 struct sockaddr_in6 sa;
507
508 rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
509 struct ip6_rthdr *));
510 finaldst = ip6->ip6_dst;
511 switch (rh->ip6r_type) {
512 case IPV6_RTHDR_TYPE_0:
513 rh0 = (struct ip6_rthdr0 *)rh;
514 addr = (struct in6_addr *)(rh0 + 1);
515
516 /*
517 * construct a sockaddr_in6 form of
518 * the first hop.
519 *
520 * XXX: we may not have enough
521 * information about its scope zone;
522 * there is no standard API to pass
523 * the information from the
524 * application.
525 */
526 bzero(&sa, sizeof(sa));
527 sa.sin6_family = AF_INET6;
528 sa.sin6_len = sizeof(sa);
529 sa.sin6_addr = addr[0];
530 if ((error = sa6_embedscope(&sa,
531 ip6_use_defzone)) != 0) {
532 goto bad;
533 }
534 ip6->ip6_dst = sa.sin6_addr;
535 (void)memmove(&addr[0], &addr[1],
536 sizeof(struct in6_addr) *
537 (rh0->ip6r0_segleft - 1));
538 addr[rh0->ip6r0_segleft - 1] = finaldst;
539 /* XXX */
540 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
541 break;
542 default: /* is it possible? */
543 error = EINVAL;
544 goto bad;
545 }
546 }
547
548 /* Source address validation */
549 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
550 (flags & IPV6_UNSPECSRC) == 0) {
551 error = EOPNOTSUPP;
552 ip6stat.ip6s_badscope++;
553 goto bad;
554 }
555 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
556 error = EOPNOTSUPP;
557 ip6stat.ip6s_badscope++;
558 goto bad;
559 }
560
561 ip6stat.ip6s_localout++;
562
563 /*
564 * Route packet.
565 */
566 /* initialize cached route */
567 if (ro == 0) {
568 ro = &ip6route;
569 bzero((caddr_t)ro, sizeof(*ro));
570 }
571 ro_pmtu = ro;
572 if (opt && opt->ip6po_rthdr)
573 ro = &opt->ip6po_route;
574 dst = (struct sockaddr_in6 *)&ro->ro_dst;
575
576 /*
577 * if specified, try to fill in the traffic class field.
578 * do not override if a non-zero value is already set.
579 * we check the diffserv field and the ecn field separately.
580 */
581 if (opt && opt->ip6po_tclass >= 0) {
582 int mask = 0;
583
584 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
585 mask |= 0xfc;
586 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
587 mask |= 0x03;
588 if (mask != 0)
589 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
590 }
591
592 /* fill in or override the hop limit field, if necessary. */
593 if (opt && opt->ip6po_hlim != -1)
594 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
595 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
596 if (im6o != NULL)
597 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
598 else
599 ip6->ip6_hlim = ip6_defmcasthlim;
600 }
601
602 #ifdef IPSEC
603 if (needipsec && needipsectun) {
604 struct ipsec_output_state state;
605
606 /*
607 * All the extension headers will become inaccessible
608 * (since they can be encrypted).
609 * Don't panic, we need no more updates to extension headers
610 * on inner IPv6 packet (since they are now encapsulated).
611 *
612 * IPv6 [ESP|AH] IPv6 [extension headers] payload
613 */
614 bzero(&exthdrs, sizeof(exthdrs));
615 exthdrs.ip6e_ip6 = m;
616
617 bzero(&state, sizeof(state));
618 state.m = m;
619 state.ro = (struct route *)ro;
620 state.dst = (struct sockaddr *)dst;
621
622 error = ipsec6_output_tunnel(&state, sp, flags);
623
624 m = state.m;
625 ro_pmtu = ro = (struct route_in6 *)state.ro;
626 dst = (struct sockaddr_in6 *)state.dst;
627 if (error) {
628 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
629 m0 = m = NULL;
630 m = NULL;
631 switch (error) {
632 case EHOSTUNREACH:
633 case ENETUNREACH:
634 case EMSGSIZE:
635 case ENOBUFS:
636 case ENOMEM:
637 break;
638 default:
639 printf("ip6_output (ipsec): error code %d\n", error);
640 /* FALLTHROUGH */
641 case ENOENT:
642 /* don't show these error codes to the user */
643 error = 0;
644 break;
645 }
646 goto bad;
647 }
648
649 exthdrs.ip6e_ip6 = m;
650 }
651 #endif /* IPSEC */
652
653 /* adjust pointer */
654 ip6 = mtod(m, struct ip6_hdr *);
655
656 bzero(&dst_sa, sizeof(dst_sa));
657 dst_sa.sin6_family = AF_INET6;
658 dst_sa.sin6_len = sizeof(dst_sa);
659 dst_sa.sin6_addr = ip6->ip6_dst;
660 if ((error = in6_selectroute(&dst_sa, opt, im6o, ro, &ifp, &rt, 0))
661 != 0) {
662 switch (error) {
663 case EHOSTUNREACH:
664 ip6stat.ip6s_noroute++;
665 break;
666 case EADDRNOTAVAIL:
667 default:
668 break; /* XXX statistics? */
669 }
670 if (ifp != NULL)
671 in6_ifstat_inc(ifp, ifs6_out_discard);
672 goto bad;
673 }
674 if (rt == NULL) {
675 /*
676 * If in6_selectroute() does not return a route entry,
677 * dst may not have been updated.
678 */
679 *dst = dst_sa; /* XXX */
680 }
681
682 /*
683 * then rt (for unicast) and ifp must be non-NULL valid values.
684 */
685 if ((flags & IPV6_FORWARDING) == 0) {
686 /* XXX: the FORWARDING flag can be set for mrouting. */
687 in6_ifstat_inc(ifp, ifs6_out_request);
688 }
689 if (rt != NULL) {
690 ia = (struct in6_ifaddr *)(rt->rt_ifa);
691 rt->rt_use++;
692 }
693
694 /*
695 * The outgoing interface must be in the zone of source and
696 * destination addresses. We should use ia_ifp to support the
697 * case of sending packets to an address of our own.
698 */
699 if (ia != NULL && ia->ia_ifp)
700 origifp = ia->ia_ifp;
701 else
702 origifp = ifp;
703
704 src0 = ip6->ip6_src;
705 if (in6_setscope(&src0, origifp, &zone))
706 goto badscope;
707 bzero(&src_sa, sizeof(src_sa));
708 src_sa.sin6_family = AF_INET6;
709 src_sa.sin6_len = sizeof(src_sa);
710 src_sa.sin6_addr = ip6->ip6_src;
711 if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
712 goto badscope;
713
714 dst0 = ip6->ip6_dst;
715 if (in6_setscope(&dst0, origifp, &zone))
716 goto badscope;
717 /* re-initialize to be sure */
718 bzero(&dst_sa, sizeof(dst_sa));
719 dst_sa.sin6_family = AF_INET6;
720 dst_sa.sin6_len = sizeof(dst_sa);
721 dst_sa.sin6_addr = ip6->ip6_dst;
722 if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id)
723 goto badscope;
724
725 /* scope check is done. */
726 goto routefound;
727
728 badscope:
729 ip6stat.ip6s_badscope++;
730 in6_ifstat_inc(origifp, ifs6_out_discard);
731 if (error == 0)
732 error = EHOSTUNREACH; /* XXX */
733 goto bad;
734
735 routefound:
736 if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
737 if (opt && opt->ip6po_nextroute.ro_rt) {
738 /*
739 * The nexthop is explicitly specified by the
740 * application. We assume the next hop is an IPv6
741 * address.
742 */
743 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
744 } else if ((rt->rt_flags & RTF_GATEWAY))
745 dst = (struct sockaddr_in6 *)rt->rt_gateway;
746 }
747
748 /*
749 * XXXXXX: original code follows:
750 */
751 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
752 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
753 else {
754 struct in6_multi *in6m;
755
756 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
757
758 in6_ifstat_inc(ifp, ifs6_out_mcast);
759
760 /*
761 * Confirm that the outgoing interface supports multicast.
762 */
763 if (!(ifp->if_flags & IFF_MULTICAST)) {
764 ip6stat.ip6s_noroute++;
765 in6_ifstat_inc(ifp, ifs6_out_discard);
766 error = ENETUNREACH;
767 goto bad;
768 }
769
770 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
771 if (in6m != NULL &&
772 (im6o == NULL || im6o->im6o_multicast_loop)) {
773 /*
774 * If we belong to the destination multicast group
775 * on the outgoing interface, and the caller did not
776 * forbid loopback, loop back a copy.
777 */
778 ip6_mloopback(ifp, m, dst);
779 } else {
780 /*
781 * If we are acting as a multicast router, perform
782 * multicast forwarding as if the packet had just
783 * arrived on the interface to which we are about
784 * to send. The multicast forwarding function
785 * recursively calls this function, using the
786 * IPV6_FORWARDING flag to prevent infinite recursion.
787 *
788 * Multicasts that are looped back by ip6_mloopback(),
789 * above, will be forwarded by the ip6_input() routine,
790 * if necessary.
791 */
792 if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
793 if (ip6_mforward(ip6, ifp, m) != 0) {
794 m_freem(m);
795 goto done;
796 }
797 }
798 }
799 /*
800 * Multicasts with a hoplimit of zero may be looped back,
801 * above, but must not be transmitted on a network.
802 * Also, multicasts addressed to the loopback interface
803 * are not sent -- the above call to ip6_mloopback() will
804 * loop back a copy if this host actually belongs to the
805 * destination group on the loopback interface.
806 */
807 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
808 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
809 m_freem(m);
810 goto done;
811 }
812 }
813
814 /*
815 * Fill the outgoing inteface to tell the upper layer
816 * to increment per-interface statistics.
817 */
818 if (ifpp)
819 *ifpp = ifp;
820
821 /* Determine path MTU. */
822 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
823 &alwaysfrag)) != 0)
824 goto bad;
825 #ifdef IPSEC
826 if (needipsectun)
827 mtu = IPV6_MMTU;
828 #endif
829
830 /*
831 * The caller of this function may specify to use the minimum MTU
832 * in some cases.
833 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
834 * setting. The logic is a bit complicated; by default, unicast
835 * packets will follow path MTU while multicast packets will be sent at
836 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
837 * including unicast ones will be sent at the minimum MTU. Multicast
838 * packets will always be sent at the minimum MTU unless
839 * IP6PO_MINMTU_DISABLE is explicitly specified.
840 * See RFC 3542 for more details.
841 */
842 if (mtu > IPV6_MMTU) {
843 if ((flags & IPV6_MINMTU))
844 mtu = IPV6_MMTU;
845 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
846 mtu = IPV6_MMTU;
847 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
848 (opt == NULL ||
849 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
850 mtu = IPV6_MMTU;
851 }
852 }
853
854 /*
855 * clear embedded scope identifiers if necessary.
856 * in6_clearscope will touch the addresses only when necessary.
857 */
858 in6_clearscope(&ip6->ip6_src);
859 in6_clearscope(&ip6->ip6_dst);
860
861 /*
862 * If the outgoing packet contains a hop-by-hop options header,
863 * it must be examined and processed even by the source node.
864 * (RFC 2460, section 4.)
865 */
866 if (exthdrs.ip6e_hbh) {
867 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
868 u_int32_t dummy1; /* XXX unused */
869 u_int32_t dummy2; /* XXX unused */
870
871 /*
872 * XXX: if we have to send an ICMPv6 error to the sender,
873 * we need the M_LOOP flag since icmp6_error() expects
874 * the IPv6 and the hop-by-hop options header are
875 * continuous unless the flag is set.
876 */
877 m->m_flags |= M_LOOP;
878 m->m_pkthdr.rcvif = ifp;
879 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
880 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
881 &dummy1, &dummy2) < 0) {
882 /* m was already freed at this point */
883 error = EINVAL;/* better error? */
884 goto done;
885 }
886 m->m_flags &= ~M_LOOP; /* XXX */
887 m->m_pkthdr.rcvif = NULL;
888 }
889
890 #ifdef PFIL_HOOKS
891 /*
892 * Run through list of hooks for output packets.
893 */
894 if ((error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT)) != 0)
895 goto done;
896 if (m == NULL)
897 goto done;
898 ip6 = mtod(m, struct ip6_hdr *);
899 #endif /* PFIL_HOOKS */
900 /*
901 * Send the packet to the outgoing interface.
902 * If necessary, do IPv6 fragmentation before sending.
903 *
904 * the logic here is rather complex:
905 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
906 * 1-a: send as is if tlen <= path mtu
907 * 1-b: fragment if tlen > path mtu
908 *
909 * 2: if user asks us not to fragment (dontfrag == 1)
910 * 2-a: send as is if tlen <= interface mtu
911 * 2-b: error if tlen > interface mtu
912 *
913 * 3: if we always need to attach fragment header (alwaysfrag == 1)
914 * always fragment
915 *
916 * 4: if dontfrag == 1 && alwaysfrag == 1
917 * error, as we cannot handle this conflicting request
918 */
919 tlen = m->m_pkthdr.len;
920 tso = (m->m_pkthdr.csum_flags & M_CSUM_TSOv6) != 0;
921 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
922 dontfrag = 1;
923 else
924 dontfrag = 0;
925
926 if (dontfrag && alwaysfrag) { /* case 4 */
927 /* conflicting request - can't transmit */
928 error = EMSGSIZE;
929 goto bad;
930 }
931 if (dontfrag && (!tso && tlen > IN6_LINKMTU(ifp))) { /* case 2-b */
932 /*
933 * Even if the DONTFRAG option is specified, we cannot send the
934 * packet when the data length is larger than the MTU of the
935 * outgoing interface.
936 * Notify the error by sending IPV6_PATHMTU ancillary data as
937 * well as returning an error code (the latter is not described
938 * in the API spec.)
939 */
940 u_int32_t mtu32;
941 struct ip6ctlparam ip6cp;
942
943 mtu32 = (u_int32_t)mtu;
944 bzero(&ip6cp, sizeof(ip6cp));
945 ip6cp.ip6c_cmdarg = (void *)&mtu32;
946 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
947 (void *)&ip6cp);
948
949 error = EMSGSIZE;
950 goto bad;
951 }
952
953 /*
954 * transmit packet without fragmentation
955 */
956 if (dontfrag || (!alwaysfrag && (tlen <= mtu || tso))) {
957 /* case 1-a and 2-a */
958 struct in6_ifaddr *ia6;
959 int sw_csum;
960
961 ip6 = mtod(m, struct ip6_hdr *);
962 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
963 if (ia6) {
964 /* Record statistics for this interface address. */
965 ia6->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len;
966 }
967 #ifdef IPSEC
968 /* clean ipsec history once it goes out of the node */
969 ipsec_delaux(m);
970 #endif
971
972 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
973 if ((sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
974 if (IN6_NEED_CHECKSUM(ifp,
975 sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
976 in6_delayed_cksum(m);
977 }
978 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
979 }
980
981 if (__predict_true(!tso ||
982 (ifp->if_capenable & IFCAP_TSOv6) != 0)) {
983 error = nd6_output(ifp, origifp, m, dst, rt);
984 } else {
985 error = ip6_tso_output(ifp, origifp, m, dst, rt);
986 }
987 goto done;
988 }
989
990 if (tso) {
991 error = EINVAL; /* XXX */
992 goto bad;
993 }
994
995 /*
996 * try to fragment the packet. case 1-b and 3
997 */
998 if (mtu < IPV6_MMTU) {
999 /* path MTU cannot be less than IPV6_MMTU */
1000 error = EMSGSIZE;
1001 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1002 goto bad;
1003 } else if (ip6->ip6_plen == 0) {
1004 /* jumbo payload cannot be fragmented */
1005 error = EMSGSIZE;
1006 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1007 goto bad;
1008 } else {
1009 struct mbuf **mnext, *m_frgpart;
1010 struct ip6_frag *ip6f;
1011 u_int32_t id = htonl(ip6_randomid());
1012 u_char nextproto;
1013 #if 0 /* see below */
1014 struct ip6ctlparam ip6cp;
1015 u_int32_t mtu32;
1016 #endif
1017
1018 /*
1019 * Too large for the destination or interface;
1020 * fragment if possible.
1021 * Must be able to put at least 8 bytes per fragment.
1022 */
1023 hlen = unfragpartlen;
1024 if (mtu > IPV6_MAXPACKET)
1025 mtu = IPV6_MAXPACKET;
1026
1027 #if 0
1028 /*
1029 * It is believed this code is a leftover from the
1030 * development of the IPV6_RECVPATHMTU sockopt and
1031 * associated work to implement RFC3542.
1032 * It's not entirely clear what the intent of the API
1033 * is at this point, so disable this code for now.
1034 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
1035 * will send notifications if the application requests.
1036 */
1037
1038 /* Notify a proper path MTU to applications. */
1039 mtu32 = (u_int32_t)mtu;
1040 bzero(&ip6cp, sizeof(ip6cp));
1041 ip6cp.ip6c_cmdarg = (void *)&mtu32;
1042 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1043 (void *)&ip6cp);
1044 #endif
1045
1046 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1047 if (len < 8) {
1048 error = EMSGSIZE;
1049 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1050 goto bad;
1051 }
1052
1053 mnext = &m->m_nextpkt;
1054
1055 /*
1056 * Change the next header field of the last header in the
1057 * unfragmentable part.
1058 */
1059 if (exthdrs.ip6e_rthdr) {
1060 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1061 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1062 } else if (exthdrs.ip6e_dest1) {
1063 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1064 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1065 } else if (exthdrs.ip6e_hbh) {
1066 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1067 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1068 } else {
1069 nextproto = ip6->ip6_nxt;
1070 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1071 }
1072
1073 if ((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6))
1074 != 0) {
1075 if (IN6_NEED_CHECKSUM(ifp,
1076 m->m_pkthdr.csum_flags &
1077 (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
1078 in6_delayed_cksum(m);
1079 }
1080 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
1081 }
1082
1083 /*
1084 * Loop through length of segment after first fragment,
1085 * make new header and copy data of each part and link onto
1086 * chain.
1087 */
1088 m0 = m;
1089 for (off = hlen; off < tlen; off += len) {
1090 struct mbuf *mlast;
1091
1092 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1093 if (!m) {
1094 error = ENOBUFS;
1095 ip6stat.ip6s_odropped++;
1096 goto sendorfree;
1097 }
1098 m->m_pkthdr.rcvif = NULL;
1099 m->m_flags = m0->m_flags & M_COPYFLAGS;
1100 *mnext = m;
1101 mnext = &m->m_nextpkt;
1102 m->m_data += max_linkhdr;
1103 mhip6 = mtod(m, struct ip6_hdr *);
1104 *mhip6 = *ip6;
1105 m->m_len = sizeof(*mhip6);
1106 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1107 if (error) {
1108 ip6stat.ip6s_odropped++;
1109 goto sendorfree;
1110 }
1111 ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
1112 if (off + len >= tlen)
1113 len = tlen - off;
1114 else
1115 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1116 mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
1117 sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1118 if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1119 error = ENOBUFS;
1120 ip6stat.ip6s_odropped++;
1121 goto sendorfree;
1122 }
1123 for (mlast = m; mlast->m_next; mlast = mlast->m_next)
1124 ;
1125 mlast->m_next = m_frgpart;
1126 m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1127 m->m_pkthdr.rcvif = (struct ifnet *)0;
1128 ip6f->ip6f_reserved = 0;
1129 ip6f->ip6f_ident = id;
1130 ip6f->ip6f_nxt = nextproto;
1131 ip6stat.ip6s_ofragments++;
1132 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1133 }
1134
1135 in6_ifstat_inc(ifp, ifs6_out_fragok);
1136 }
1137
1138 /*
1139 * Remove leading garbages.
1140 */
1141 sendorfree:
1142 m = m0->m_nextpkt;
1143 m0->m_nextpkt = 0;
1144 m_freem(m0);
1145 for (m0 = m; m; m = m0) {
1146 m0 = m->m_nextpkt;
1147 m->m_nextpkt = 0;
1148 if (error == 0) {
1149 struct in6_ifaddr *ia6;
1150 ip6 = mtod(m, struct ip6_hdr *);
1151 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1152 if (ia6) {
1153 /*
1154 * Record statistics for this interface
1155 * address.
1156 */
1157 ia6->ia_ifa.ifa_data.ifad_outbytes +=
1158 m->m_pkthdr.len;
1159 }
1160 #ifdef IPSEC
1161 /* clean ipsec history once it goes out of the node */
1162 ipsec_delaux(m);
1163 #endif
1164 error = nd6_output(ifp, origifp, m, dst, rt);
1165 } else
1166 m_freem(m);
1167 }
1168
1169 if (error == 0)
1170 ip6stat.ip6s_fragmented++;
1171
1172 done:
1173 if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1174 RTFREE(ro->ro_rt);
1175 } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1176 RTFREE(ro_pmtu->ro_rt);
1177 }
1178
1179 #ifdef IPSEC
1180 if (sp != NULL)
1181 key_freesp(sp);
1182 #endif /* IPSEC */
1183
1184 return (error);
1185
1186 freehdrs:
1187 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
1188 m_freem(exthdrs.ip6e_dest1);
1189 m_freem(exthdrs.ip6e_rthdr);
1190 m_freem(exthdrs.ip6e_dest2);
1191 /* FALLTHROUGH */
1192 bad:
1193 m_freem(m);
1194 goto done;
1195 }
1196
1197 static int
1198 ip6_copyexthdr(mp, hdr, hlen)
1199 struct mbuf **mp;
1200 caddr_t hdr;
1201 int hlen;
1202 {
1203 struct mbuf *m;
1204
1205 if (hlen > MCLBYTES)
1206 return (ENOBUFS); /* XXX */
1207
1208 MGET(m, M_DONTWAIT, MT_DATA);
1209 if (!m)
1210 return (ENOBUFS);
1211
1212 if (hlen > MLEN) {
1213 MCLGET(m, M_DONTWAIT);
1214 if ((m->m_flags & M_EXT) == 0) {
1215 m_free(m);
1216 return (ENOBUFS);
1217 }
1218 }
1219 m->m_len = hlen;
1220 if (hdr)
1221 bcopy(hdr, mtod(m, caddr_t), hlen);
1222
1223 *mp = m;
1224 return (0);
1225 }
1226
1227 /*
1228 * Process a delayed payload checksum calculation.
1229 */
1230 void
1231 in6_delayed_cksum(struct mbuf *m)
1232 {
1233 uint16_t csum, offset;
1234
1235 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1236 KASSERT((~m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1237 KASSERT((m->m_pkthdr.csum_flags
1238 & (M_CSUM_UDPv4|M_CSUM_TCPv4|M_CSUM_TSOv4)) == 0);
1239
1240 offset = M_CSUM_DATA_IPv6_HL(m->m_pkthdr.csum_data);
1241 csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1242 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv6) != 0) {
1243 csum = 0xffff;
1244 }
1245
1246 offset += M_CSUM_DATA_IPv6_OFFSET(m->m_pkthdr.csum_data);
1247 if ((offset + sizeof(csum)) > m->m_len) {
1248 m_copyback(m, offset, sizeof(csum), &csum);
1249 } else {
1250 *(uint16_t *)(mtod(m, caddr_t) + offset) = csum;
1251 }
1252 }
1253
1254 /*
1255 * Insert jumbo payload option.
1256 */
1257 static int
1258 ip6_insert_jumboopt(exthdrs, plen)
1259 struct ip6_exthdrs *exthdrs;
1260 u_int32_t plen;
1261 {
1262 struct mbuf *mopt;
1263 u_int8_t *optbuf;
1264 u_int32_t v;
1265
1266 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1267
1268 /*
1269 * If there is no hop-by-hop options header, allocate new one.
1270 * If there is one but it doesn't have enough space to store the
1271 * jumbo payload option, allocate a cluster to store the whole options.
1272 * Otherwise, use it to store the options.
1273 */
1274 if (exthdrs->ip6e_hbh == 0) {
1275 MGET(mopt, M_DONTWAIT, MT_DATA);
1276 if (mopt == 0)
1277 return (ENOBUFS);
1278 mopt->m_len = JUMBOOPTLEN;
1279 optbuf = mtod(mopt, u_int8_t *);
1280 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1281 exthdrs->ip6e_hbh = mopt;
1282 } else {
1283 struct ip6_hbh *hbh;
1284
1285 mopt = exthdrs->ip6e_hbh;
1286 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1287 /*
1288 * XXX assumption:
1289 * - exthdrs->ip6e_hbh is not referenced from places
1290 * other than exthdrs.
1291 * - exthdrs->ip6e_hbh is not an mbuf chain.
1292 */
1293 int oldoptlen = mopt->m_len;
1294 struct mbuf *n;
1295
1296 /*
1297 * XXX: give up if the whole (new) hbh header does
1298 * not fit even in an mbuf cluster.
1299 */
1300 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1301 return (ENOBUFS);
1302
1303 /*
1304 * As a consequence, we must always prepare a cluster
1305 * at this point.
1306 */
1307 MGET(n, M_DONTWAIT, MT_DATA);
1308 if (n) {
1309 MCLGET(n, M_DONTWAIT);
1310 if ((n->m_flags & M_EXT) == 0) {
1311 m_freem(n);
1312 n = NULL;
1313 }
1314 }
1315 if (!n)
1316 return (ENOBUFS);
1317 n->m_len = oldoptlen + JUMBOOPTLEN;
1318 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1319 oldoptlen);
1320 optbuf = mtod(n, u_int8_t *) + oldoptlen;
1321 m_freem(mopt);
1322 mopt = exthdrs->ip6e_hbh = n;
1323 } else {
1324 optbuf = mtod(mopt, u_int8_t *) + mopt->m_len;
1325 mopt->m_len += JUMBOOPTLEN;
1326 }
1327 optbuf[0] = IP6OPT_PADN;
1328 optbuf[1] = 0;
1329
1330 /*
1331 * Adjust the header length according to the pad and
1332 * the jumbo payload option.
1333 */
1334 hbh = mtod(mopt, struct ip6_hbh *);
1335 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1336 }
1337
1338 /* fill in the option. */
1339 optbuf[2] = IP6OPT_JUMBO;
1340 optbuf[3] = 4;
1341 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1342 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1343
1344 /* finally, adjust the packet header length */
1345 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1346
1347 return (0);
1348 #undef JUMBOOPTLEN
1349 }
1350
1351 /*
1352 * Insert fragment header and copy unfragmentable header portions.
1353 */
1354 static int
1355 ip6_insertfraghdr(m0, m, hlen, frghdrp)
1356 struct mbuf *m0, *m;
1357 int hlen;
1358 struct ip6_frag **frghdrp;
1359 {
1360 struct mbuf *n, *mlast;
1361
1362 if (hlen > sizeof(struct ip6_hdr)) {
1363 n = m_copym(m0, sizeof(struct ip6_hdr),
1364 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1365 if (n == 0)
1366 return (ENOBUFS);
1367 m->m_next = n;
1368 } else
1369 n = m;
1370
1371 /* Search for the last mbuf of unfragmentable part. */
1372 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1373 ;
1374
1375 if ((mlast->m_flags & M_EXT) == 0 &&
1376 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1377 /* use the trailing space of the last mbuf for the fragment hdr */
1378 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1379 mlast->m_len);
1380 mlast->m_len += sizeof(struct ip6_frag);
1381 m->m_pkthdr.len += sizeof(struct ip6_frag);
1382 } else {
1383 /* allocate a new mbuf for the fragment header */
1384 struct mbuf *mfrg;
1385
1386 MGET(mfrg, M_DONTWAIT, MT_DATA);
1387 if (mfrg == 0)
1388 return (ENOBUFS);
1389 mfrg->m_len = sizeof(struct ip6_frag);
1390 *frghdrp = mtod(mfrg, struct ip6_frag *);
1391 mlast->m_next = mfrg;
1392 }
1393
1394 return (0);
1395 }
1396
1397 static int
1398 ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
1399 struct route_in6 *ro_pmtu, *ro;
1400 struct ifnet *ifp;
1401 struct in6_addr *dst;
1402 u_long *mtup;
1403 int *alwaysfragp;
1404 {
1405 u_int32_t mtu = 0;
1406 int alwaysfrag = 0;
1407 int error = 0;
1408
1409 if (ro_pmtu != ro) {
1410 /* The first hop and the final destination may differ. */
1411 struct sockaddr_in6 *sa6_dst =
1412 (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1413 if (ro_pmtu->ro_rt &&
1414 ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1415 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1416 RTFREE(ro_pmtu->ro_rt);
1417 ro_pmtu->ro_rt = (struct rtentry *)NULL;
1418 }
1419 if (ro_pmtu->ro_rt == NULL) {
1420 bzero(sa6_dst, sizeof(*sa6_dst)); /* for safety */
1421 sa6_dst->sin6_family = AF_INET6;
1422 sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1423 sa6_dst->sin6_addr = *dst;
1424
1425 rtalloc((struct route *)ro_pmtu);
1426 }
1427 }
1428 if (ro_pmtu->ro_rt) {
1429 u_int32_t ifmtu;
1430
1431 if (ifp == NULL)
1432 ifp = ro_pmtu->ro_rt->rt_ifp;
1433 ifmtu = IN6_LINKMTU(ifp);
1434 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1435 if (mtu == 0)
1436 mtu = ifmtu;
1437 else if (mtu < IPV6_MMTU) {
1438 /*
1439 * RFC2460 section 5, last paragraph:
1440 * if we record ICMPv6 too big message with
1441 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1442 * or smaller, with fragment header attached.
1443 * (fragment header is needed regardless from the
1444 * packet size, for translators to identify packets)
1445 */
1446 alwaysfrag = 1;
1447 mtu = IPV6_MMTU;
1448 } else if (mtu > ifmtu) {
1449 /*
1450 * The MTU on the route is larger than the MTU on
1451 * the interface! This shouldn't happen, unless the
1452 * MTU of the interface has been changed after the
1453 * interface was brought up. Change the MTU in the
1454 * route to match the interface MTU (as long as the
1455 * field isn't locked).
1456 */
1457 mtu = ifmtu;
1458 if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU))
1459 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
1460 }
1461 } else if (ifp) {
1462 mtu = IN6_LINKMTU(ifp);
1463 } else
1464 error = EHOSTUNREACH; /* XXX */
1465
1466 *mtup = mtu;
1467 if (alwaysfragp)
1468 *alwaysfragp = alwaysfrag;
1469 return (error);
1470 }
1471
1472 /*
1473 * IP6 socket option processing.
1474 */
1475 int
1476 ip6_ctloutput(op, so, level, optname, mp)
1477 int op;
1478 struct socket *so;
1479 int level, optname;
1480 struct mbuf **mp;
1481 {
1482 int privileged, optdatalen, uproto;
1483 void *optdata;
1484 struct in6pcb *in6p = sotoin6pcb(so);
1485 struct mbuf *m = *mp;
1486 int error, optval;
1487 int optlen;
1488 struct lwp *l = curlwp; /* XXX */
1489
1490 optlen = m ? m->m_len : 0;
1491 error = optval = 0;
1492 privileged = (l == 0 || kauth_authorize_generic(l->l_cred,
1493 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) ? 0 : 1;
1494 uproto = (int)so->so_proto->pr_protocol;
1495
1496 if (level == IPPROTO_IPV6) {
1497 switch (op) {
1498 case PRCO_SETOPT:
1499 switch (optname) {
1500 #ifdef RFC2292
1501 case IPV6_2292PKTOPTIONS:
1502 /* m is freed in ip6_pcbopts */
1503 error = ip6_pcbopts(&in6p->in6p_outputopts,
1504 m, so);
1505 break;
1506 #endif
1507
1508 /*
1509 * Use of some Hop-by-Hop options or some
1510 * Destination options, might require special
1511 * privilege. That is, normal applications
1512 * (without special privilege) might be forbidden
1513 * from setting certain options in outgoing packets,
1514 * and might never see certain options in received
1515 * packets. [RFC 2292 Section 6]
1516 * KAME specific note:
1517 * KAME prevents non-privileged users from sending or
1518 * receiving ANY hbh/dst options in order to avoid
1519 * overhead of parsing options in the kernel.
1520 */
1521 case IPV6_RECVHOPOPTS:
1522 case IPV6_RECVDSTOPTS:
1523 case IPV6_RECVRTHDRDSTOPTS:
1524 if (!privileged) {
1525 error = EPERM;
1526 break;
1527 }
1528 /* FALLTHROUGH */
1529 case IPV6_UNICAST_HOPS:
1530 case IPV6_HOPLIMIT:
1531 case IPV6_FAITH:
1532
1533 case IPV6_RECVPKTINFO:
1534 case IPV6_RECVHOPLIMIT:
1535 case IPV6_RECVRTHDR:
1536 case IPV6_RECVPATHMTU:
1537 case IPV6_RECVTCLASS:
1538 case IPV6_V6ONLY:
1539 if (optlen != sizeof(int)) {
1540 error = EINVAL;
1541 break;
1542 }
1543 optval = *mtod(m, int *);
1544 switch (optname) {
1545
1546 case IPV6_UNICAST_HOPS:
1547 if (optval < -1 || optval >= 256)
1548 error = EINVAL;
1549 else {
1550 /* -1 = kernel default */
1551 in6p->in6p_hops = optval;
1552 }
1553 break;
1554 #define OPTSET(bit) \
1555 do { \
1556 if (optval) \
1557 in6p->in6p_flags |= (bit); \
1558 else \
1559 in6p->in6p_flags &= ~(bit); \
1560 } while (/*CONSTCOND*/ 0)
1561
1562 #ifdef RFC2292
1563 #define OPTSET2292(bit) \
1564 do { \
1565 in6p->in6p_flags |= IN6P_RFC2292; \
1566 if (optval) \
1567 in6p->in6p_flags |= (bit); \
1568 else \
1569 in6p->in6p_flags &= ~(bit); \
1570 } while (/*CONSTCOND*/ 0)
1571 #endif
1572
1573 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1574
1575 case IPV6_RECVPKTINFO:
1576 #ifdef RFC2292
1577 /* cannot mix with RFC2292 */
1578 if (OPTBIT(IN6P_RFC2292)) {
1579 error = EINVAL;
1580 break;
1581 }
1582 #endif
1583 OPTSET(IN6P_PKTINFO);
1584 break;
1585
1586 case IPV6_HOPLIMIT:
1587 {
1588 struct ip6_pktopts **optp;
1589
1590 #ifdef RFC2292
1591 /* cannot mix with RFC2292 */
1592 if (OPTBIT(IN6P_RFC2292)) {
1593 error = EINVAL;
1594 break;
1595 }
1596 #endif
1597 optp = &in6p->in6p_outputopts;
1598 error = ip6_pcbopt(IPV6_HOPLIMIT,
1599 (u_char *)&optval,
1600 sizeof(optval),
1601 optp,
1602 privileged, uproto);
1603 break;
1604 }
1605
1606 case IPV6_RECVHOPLIMIT:
1607 #ifdef RFC2292
1608 /* cannot mix with RFC2292 */
1609 if (OPTBIT(IN6P_RFC2292)) {
1610 error = EINVAL;
1611 break;
1612 }
1613 #endif
1614 OPTSET(IN6P_HOPLIMIT);
1615 break;
1616
1617 case IPV6_RECVHOPOPTS:
1618 #ifdef RFC2292
1619 /* cannot mix with RFC2292 */
1620 if (OPTBIT(IN6P_RFC2292)) {
1621 error = EINVAL;
1622 break;
1623 }
1624 #endif
1625 OPTSET(IN6P_HOPOPTS);
1626 break;
1627
1628 case IPV6_RECVDSTOPTS:
1629 #ifdef RFC2292
1630 /* cannot mix with RFC2292 */
1631 if (OPTBIT(IN6P_RFC2292)) {
1632 error = EINVAL;
1633 break;
1634 }
1635 #endif
1636 OPTSET(IN6P_DSTOPTS);
1637 break;
1638
1639 case IPV6_RECVRTHDRDSTOPTS:
1640 #ifdef RFC2292
1641 /* cannot mix with RFC2292 */
1642 if (OPTBIT(IN6P_RFC2292)) {
1643 error = EINVAL;
1644 break;
1645 }
1646 #endif
1647 OPTSET(IN6P_RTHDRDSTOPTS);
1648 break;
1649
1650 case IPV6_RECVRTHDR:
1651 #ifdef RFC2292
1652 /* cannot mix with RFC2292 */
1653 if (OPTBIT(IN6P_RFC2292)) {
1654 error = EINVAL;
1655 break;
1656 }
1657 #endif
1658 OPTSET(IN6P_RTHDR);
1659 break;
1660
1661 case IPV6_FAITH:
1662 OPTSET(IN6P_FAITH);
1663 break;
1664
1665 case IPV6_RECVPATHMTU:
1666 /*
1667 * We ignore this option for TCP
1668 * sockets.
1669 * (RFC3542 leaves this case
1670 * unspecified.)
1671 */
1672 if (uproto != IPPROTO_TCP)
1673 OPTSET(IN6P_MTU);
1674 break;
1675
1676 case IPV6_V6ONLY:
1677 /*
1678 * make setsockopt(IPV6_V6ONLY)
1679 * available only prior to bind(2).
1680 * see ipng mailing list, Jun 22 2001.
1681 */
1682 if (in6p->in6p_lport ||
1683 !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1684 error = EINVAL;
1685 break;
1686 }
1687 #ifdef INET6_BINDV6ONLY
1688 if (!optval)
1689 error = EINVAL;
1690 #else
1691 OPTSET(IN6P_IPV6_V6ONLY);
1692 #endif
1693 break;
1694 case IPV6_RECVTCLASS:
1695 #ifdef RFC2292
1696 /* cannot mix with RFC2292 XXX */
1697 if (OPTBIT(IN6P_RFC2292)) {
1698 error = EINVAL;
1699 break;
1700 }
1701 #endif
1702 OPTSET(IN6P_TCLASS);
1703 break;
1704
1705 }
1706 break;
1707
1708 case IPV6_OTCLASS:
1709 {
1710 struct ip6_pktopts **optp;
1711 u_int8_t tclass;
1712
1713 if (optlen != sizeof(tclass)) {
1714 error = EINVAL;
1715 break;
1716 }
1717 tclass = *mtod(m, u_int8_t *);
1718 optp = &in6p->in6p_outputopts;
1719 error = ip6_pcbopt(optname,
1720 (u_char *)&tclass,
1721 sizeof(tclass),
1722 optp,
1723 privileged, uproto);
1724 break;
1725 }
1726
1727 case IPV6_TCLASS:
1728 case IPV6_DONTFRAG:
1729 case IPV6_USE_MIN_MTU:
1730 if (optlen != sizeof(optval)) {
1731 error = EINVAL;
1732 break;
1733 }
1734 optval = *mtod(m, int *);
1735 {
1736 struct ip6_pktopts **optp;
1737 optp = &in6p->in6p_outputopts;
1738 error = ip6_pcbopt(optname,
1739 (u_char *)&optval,
1740 sizeof(optval),
1741 optp,
1742 privileged, uproto);
1743 break;
1744 }
1745
1746 #ifdef RFC2292
1747 case IPV6_2292PKTINFO:
1748 case IPV6_2292HOPLIMIT:
1749 case IPV6_2292HOPOPTS:
1750 case IPV6_2292DSTOPTS:
1751 case IPV6_2292RTHDR:
1752 /* RFC 2292 */
1753 if (optlen != sizeof(int)) {
1754 error = EINVAL;
1755 break;
1756 }
1757 optval = *mtod(m, int *);
1758 switch (optname) {
1759 case IPV6_2292PKTINFO:
1760 OPTSET2292(IN6P_PKTINFO);
1761 break;
1762 case IPV6_2292HOPLIMIT:
1763 OPTSET2292(IN6P_HOPLIMIT);
1764 break;
1765 case IPV6_2292HOPOPTS:
1766 /*
1767 * Check super-user privilege.
1768 * See comments for IPV6_RECVHOPOPTS.
1769 */
1770 if (!privileged)
1771 return (EPERM);
1772 OPTSET2292(IN6P_HOPOPTS);
1773 break;
1774 case IPV6_2292DSTOPTS:
1775 if (!privileged)
1776 return (EPERM);
1777 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1778 break;
1779 case IPV6_2292RTHDR:
1780 OPTSET2292(IN6P_RTHDR);
1781 break;
1782 }
1783 break;
1784 #endif
1785 case IPV6_PKTINFO:
1786 case IPV6_HOPOPTS:
1787 case IPV6_RTHDR:
1788 case IPV6_DSTOPTS:
1789 case IPV6_RTHDRDSTOPTS:
1790 case IPV6_NEXTHOP:
1791 {
1792 /* new advanced API (RFC3542) */
1793 u_char *optbuf;
1794 int optbuflen;
1795 struct ip6_pktopts **optp;
1796
1797 #ifdef RFC2292
1798 /* cannot mix with RFC2292 */
1799 if (OPTBIT(IN6P_RFC2292)) {
1800 error = EINVAL;
1801 break;
1802 }
1803 #endif
1804
1805 if (m && m->m_next) {
1806 error = EINVAL; /* XXX */
1807 break;
1808 }
1809 if (m) {
1810 optbuf = mtod(m, u_char *);
1811 optbuflen = m->m_len;
1812 } else {
1813 optbuf = NULL;
1814 optbuflen = 0;
1815 }
1816 optp = &in6p->in6p_outputopts;
1817 error = ip6_pcbopt(optname,
1818 optbuf, optbuflen,
1819 optp, privileged, uproto);
1820 break;
1821 }
1822 #undef OPTSET
1823
1824 case IPV6_MULTICAST_IF:
1825 case IPV6_MULTICAST_HOPS:
1826 case IPV6_MULTICAST_LOOP:
1827 case IPV6_JOIN_GROUP:
1828 case IPV6_LEAVE_GROUP:
1829 error = ip6_setmoptions(optname,
1830 &in6p->in6p_moptions, m);
1831 break;
1832
1833 case IPV6_PORTRANGE:
1834 optval = *mtod(m, int *);
1835
1836 switch (optval) {
1837 case IPV6_PORTRANGE_DEFAULT:
1838 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1839 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1840 break;
1841
1842 case IPV6_PORTRANGE_HIGH:
1843 in6p->in6p_flags &= ~(IN6P_LOWPORT);
1844 in6p->in6p_flags |= IN6P_HIGHPORT;
1845 break;
1846
1847 case IPV6_PORTRANGE_LOW:
1848 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1849 in6p->in6p_flags |= IN6P_LOWPORT;
1850 break;
1851
1852 default:
1853 error = EINVAL;
1854 break;
1855 }
1856 break;
1857
1858 #ifdef IPSEC
1859 case IPV6_IPSEC_POLICY:
1860 {
1861 caddr_t req = NULL;
1862 size_t len = 0;
1863 if (m) {
1864 req = mtod(m, caddr_t);
1865 len = m->m_len;
1866 }
1867 error = ipsec6_set_policy(in6p, optname, req,
1868 len, privileged);
1869 }
1870 break;
1871 #endif /* IPSEC */
1872
1873 default:
1874 error = ENOPROTOOPT;
1875 break;
1876 }
1877 if (m)
1878 (void)m_free(m);
1879 break;
1880
1881 case PRCO_GETOPT:
1882 switch (optname) {
1883 #ifdef RFC2292
1884 case IPV6_2292PKTOPTIONS:
1885 /*
1886 * RFC3542 (effectively) deprecated the
1887 * semantics of the 2292-style pktoptions.
1888 * Since it was not reliable in nature (i.e.,
1889 * applications had to expect the lack of some
1890 * information after all), it would make sense
1891 * to simplify this part by always returning
1892 * empty data.
1893 */
1894 *mp = m_get(M_WAIT, MT_SOOPTS);
1895 (*mp)->m_len = 0;
1896 break;
1897 #endif
1898
1899 case IPV6_RECVHOPOPTS:
1900 case IPV6_RECVDSTOPTS:
1901 case IPV6_RECVRTHDRDSTOPTS:
1902 case IPV6_UNICAST_HOPS:
1903 case IPV6_RECVPKTINFO:
1904 case IPV6_RECVHOPLIMIT:
1905 case IPV6_RECVRTHDR:
1906 case IPV6_RECVPATHMTU:
1907
1908 case IPV6_FAITH:
1909 case IPV6_V6ONLY:
1910 case IPV6_PORTRANGE:
1911 case IPV6_RECVTCLASS:
1912 switch (optname) {
1913
1914 case IPV6_RECVHOPOPTS:
1915 optval = OPTBIT(IN6P_HOPOPTS);
1916 break;
1917
1918 case IPV6_RECVDSTOPTS:
1919 optval = OPTBIT(IN6P_DSTOPTS);
1920 break;
1921
1922 case IPV6_RECVRTHDRDSTOPTS:
1923 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1924 break;
1925
1926 case IPV6_UNICAST_HOPS:
1927 optval = in6p->in6p_hops;
1928 break;
1929
1930 case IPV6_RECVPKTINFO:
1931 optval = OPTBIT(IN6P_PKTINFO);
1932 break;
1933
1934 case IPV6_RECVHOPLIMIT:
1935 optval = OPTBIT(IN6P_HOPLIMIT);
1936 break;
1937
1938 case IPV6_RECVRTHDR:
1939 optval = OPTBIT(IN6P_RTHDR);
1940 break;
1941
1942 case IPV6_RECVPATHMTU:
1943 optval = OPTBIT(IN6P_MTU);
1944 break;
1945
1946 case IPV6_FAITH:
1947 optval = OPTBIT(IN6P_FAITH);
1948 break;
1949
1950 case IPV6_V6ONLY:
1951 optval = OPTBIT(IN6P_IPV6_V6ONLY);
1952 break;
1953
1954 case IPV6_PORTRANGE:
1955 {
1956 int flags;
1957 flags = in6p->in6p_flags;
1958 if (flags & IN6P_HIGHPORT)
1959 optval = IPV6_PORTRANGE_HIGH;
1960 else if (flags & IN6P_LOWPORT)
1961 optval = IPV6_PORTRANGE_LOW;
1962 else
1963 optval = 0;
1964 break;
1965 }
1966 case IPV6_RECVTCLASS:
1967 optval = OPTBIT(IN6P_TCLASS);
1968 break;
1969
1970 }
1971 if (error)
1972 break;
1973 *mp = m = m_get(M_WAIT, MT_SOOPTS);
1974 m->m_len = sizeof(int);
1975 *mtod(m, int *) = optval;
1976 break;
1977
1978 case IPV6_PATHMTU:
1979 {
1980 u_long pmtu = 0;
1981 struct ip6_mtuinfo mtuinfo;
1982 struct route_in6 *ro = (struct route_in6 *)&in6p
1983 ->in6p_route;
1984
1985 if (!(so->so_state & SS_ISCONNECTED))
1986 return (ENOTCONN);
1987 /*
1988 * XXX: we dot not consider the case of source
1989 * routing, or optional information to specify
1990 * the outgoing interface.
1991 */
1992 error = ip6_getpmtu(ro, NULL, NULL,
1993 &in6p->in6p_faddr, &pmtu, NULL);
1994 if (error)
1995 break;
1996 if (pmtu > IPV6_MAXPACKET)
1997 pmtu = IPV6_MAXPACKET;
1998
1999 memset(&mtuinfo, 0, sizeof(mtuinfo));
2000 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2001 optdata = (void *)&mtuinfo;
2002 optdatalen = sizeof(mtuinfo);
2003 if (optdatalen > MCLBYTES)
2004 return (EMSGSIZE); /* XXX */
2005 *mp = m = m_get(M_WAIT, MT_SOOPTS);
2006 if (optdatalen > MLEN)
2007 MCLGET(m, M_WAIT);
2008 m->m_len = optdatalen;
2009 memcpy(mtod(m, void *), optdata, optdatalen);
2010 break;
2011 }
2012
2013 #ifdef RFC2292
2014 case IPV6_2292PKTINFO:
2015 case IPV6_2292HOPLIMIT:
2016 case IPV6_2292HOPOPTS:
2017 case IPV6_2292RTHDR:
2018 case IPV6_2292DSTOPTS:
2019 switch (optname) {
2020 case IPV6_2292PKTINFO:
2021 optval = OPTBIT(IN6P_PKTINFO);
2022 break;
2023 case IPV6_2292HOPLIMIT:
2024 optval = OPTBIT(IN6P_HOPLIMIT);
2025 break;
2026 case IPV6_2292HOPOPTS:
2027 optval = OPTBIT(IN6P_HOPOPTS);
2028 break;
2029 case IPV6_2292RTHDR:
2030 optval = OPTBIT(IN6P_RTHDR);
2031 break;
2032 case IPV6_2292DSTOPTS:
2033 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2034 break;
2035 }
2036 *mp = m = m_get(M_WAIT, MT_SOOPTS);
2037 m->m_len = sizeof(int);
2038 *mtod(m, int *) = optval;
2039 break;
2040 #endif
2041 case IPV6_PKTINFO:
2042 case IPV6_HOPOPTS:
2043 case IPV6_RTHDR:
2044 case IPV6_DSTOPTS:
2045 case IPV6_RTHDRDSTOPTS:
2046 case IPV6_NEXTHOP:
2047 case IPV6_OTCLASS:
2048 case IPV6_TCLASS:
2049 case IPV6_DONTFRAG:
2050 case IPV6_USE_MIN_MTU:
2051 error = ip6_getpcbopt(in6p->in6p_outputopts,
2052 optname, mp);
2053 break;
2054
2055 case IPV6_MULTICAST_IF:
2056 case IPV6_MULTICAST_HOPS:
2057 case IPV6_MULTICAST_LOOP:
2058 case IPV6_JOIN_GROUP:
2059 case IPV6_LEAVE_GROUP:
2060 error = ip6_getmoptions(optname,
2061 in6p->in6p_moptions, mp);
2062 break;
2063
2064 #ifdef IPSEC
2065 case IPV6_IPSEC_POLICY:
2066 {
2067 caddr_t req = NULL;
2068 size_t len = 0;
2069 if (m) {
2070 req = mtod(m, caddr_t);
2071 len = m->m_len;
2072 }
2073 error = ipsec6_get_policy(in6p, req, len, mp);
2074 break;
2075 }
2076 #endif /* IPSEC */
2077
2078
2079
2080
2081 default:
2082 error = ENOPROTOOPT;
2083 break;
2084 }
2085 break;
2086 }
2087 } else {
2088 error = EINVAL;
2089 if (op == PRCO_SETOPT && *mp)
2090 (void)m_free(*mp);
2091 }
2092 return (error);
2093 }
2094
2095 int
2096 ip6_raw_ctloutput(op, so, level, optname, mp)
2097 int op;
2098 struct socket *so;
2099 int level, optname;
2100 struct mbuf **mp;
2101 {
2102 int error = 0, optval, optlen;
2103 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2104 struct in6pcb *in6p = sotoin6pcb(so);
2105 struct mbuf *m = *mp;
2106
2107 optlen = m ? m->m_len : 0;
2108
2109 if (level != IPPROTO_IPV6) {
2110 if (op == PRCO_SETOPT && *mp)
2111 (void)m_free(*mp);
2112 return (EINVAL);
2113 }
2114
2115 switch (optname) {
2116 case IPV6_CHECKSUM:
2117 /*
2118 * For ICMPv6 sockets, no modification allowed for checksum
2119 * offset, permit "no change" values to help existing apps.
2120 *
2121 * XXX RFC3542 says: "An attempt to set IPV6_CHECKSUM
2122 * for an ICMPv6 socket will fail." The current
2123 * behavior does not meet RFC3542.
2124 */
2125 switch (op) {
2126 case PRCO_SETOPT:
2127 if (optlen != sizeof(int)) {
2128 error = EINVAL;
2129 break;
2130 }
2131 optval = *mtod(m, int *);
2132 if ((optval % 2) != 0) {
2133 /* the API assumes even offset values */
2134 error = EINVAL;
2135 } else if (so->so_proto->pr_protocol ==
2136 IPPROTO_ICMPV6) {
2137 if (optval != icmp6off)
2138 error = EINVAL;
2139 } else
2140 in6p->in6p_cksum = optval;
2141 break;
2142
2143 case PRCO_GETOPT:
2144 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2145 optval = icmp6off;
2146 else
2147 optval = in6p->in6p_cksum;
2148
2149 *mp = m = m_get(M_WAIT, MT_SOOPTS);
2150 m->m_len = sizeof(int);
2151 *mtod(m, int *) = optval;
2152 break;
2153
2154 default:
2155 error = EINVAL;
2156 break;
2157 }
2158 break;
2159
2160 default:
2161 error = ENOPROTOOPT;
2162 break;
2163 }
2164
2165 if (op == PRCO_SETOPT && m)
2166 (void)m_free(m);
2167
2168 return (error);
2169 }
2170
2171 #ifdef RFC2292
2172 /*
2173 * Set up IP6 options in pcb for insertion in output packets or
2174 * specifying behavior of outgoing packets.
2175 */
2176 static int
2177 ip6_pcbopts(pktopt, m, so)
2178 struct ip6_pktopts **pktopt;
2179 struct mbuf *m;
2180 struct socket *so;
2181 {
2182 struct ip6_pktopts *opt = *pktopt;
2183 int error = 0;
2184 struct lwp *l = curlwp; /* XXX */
2185 int priv = 0;
2186
2187 /* turn off any old options. */
2188 if (opt) {
2189 #ifdef DIAGNOSTIC
2190 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2191 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2192 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2193 printf("ip6_pcbopts: all specified options are cleared.\n");
2194 #endif
2195 ip6_clearpktopts(opt, -1);
2196 } else
2197 opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2198 *pktopt = NULL;
2199
2200 if (!m || m->m_len == 0) {
2201 /*
2202 * Only turning off any previous options, regardless of
2203 * whether the opt is just created or given.
2204 */
2205 free(opt, M_IP6OPT);
2206 return (0);
2207 }
2208
2209 /* set options specified by user. */
2210 if (l && !kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2211 &l->l_acflag))
2212 priv = 1;
2213 if ((error = ip6_setpktopts(m, opt, NULL, priv,
2214 so->so_proto->pr_protocol)) != 0) {
2215 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2216 free(opt, M_IP6OPT);
2217 return (error);
2218 }
2219 *pktopt = opt;
2220 return (0);
2221 }
2222 #endif
2223
2224 /*
2225 * initialize ip6_pktopts. beware that there are non-zero default values in
2226 * the struct.
2227 */
2228 void
2229 ip6_initpktopts(struct ip6_pktopts *opt)
2230 {
2231
2232 memset(opt, 0, sizeof(*opt));
2233 opt->ip6po_hlim = -1; /* -1 means default hop limit */
2234 opt->ip6po_tclass = -1; /* -1 means default traffic class */
2235 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2236 }
2237
2238 #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) /* XXX */
2239 static int
2240 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2241 int priv, int uproto)
2242 {
2243 struct ip6_pktopts *opt;
2244
2245 if (*pktopt == NULL) {
2246 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2247 M_WAITOK);
2248 ip6_initpktopts(*pktopt);
2249 }
2250 opt = *pktopt;
2251
2252 return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto));
2253 }
2254
2255 static int
2256 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct mbuf **mp)
2257 {
2258 void *optdata = NULL;
2259 int optdatalen = 0;
2260 struct ip6_ext *ip6e;
2261 int error = 0;
2262 struct in6_pktinfo null_pktinfo;
2263 int deftclass = 0, on;
2264 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2265 struct mbuf *m;
2266
2267 switch (optname) {
2268 case IPV6_PKTINFO:
2269 if (pktopt && pktopt->ip6po_pktinfo)
2270 optdata = (void *)pktopt->ip6po_pktinfo;
2271 else {
2272 /* XXX: we don't have to do this every time... */
2273 memset(&null_pktinfo, 0, sizeof(null_pktinfo));
2274 optdata = (void *)&null_pktinfo;
2275 }
2276 optdatalen = sizeof(struct in6_pktinfo);
2277 break;
2278 case IPV6_OTCLASS:
2279 /* XXX */
2280 return (EINVAL);
2281 case IPV6_TCLASS:
2282 if (pktopt && pktopt->ip6po_tclass >= 0)
2283 optdata = (void *)&pktopt->ip6po_tclass;
2284 else
2285 optdata = (void *)&deftclass;
2286 optdatalen = sizeof(int);
2287 break;
2288 case IPV6_HOPOPTS:
2289 if (pktopt && pktopt->ip6po_hbh) {
2290 optdata = (void *)pktopt->ip6po_hbh;
2291 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2292 optdatalen = (ip6e->ip6e_len + 1) << 3;
2293 }
2294 break;
2295 case IPV6_RTHDR:
2296 if (pktopt && pktopt->ip6po_rthdr) {
2297 optdata = (void *)pktopt->ip6po_rthdr;
2298 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2299 optdatalen = (ip6e->ip6e_len + 1) << 3;
2300 }
2301 break;
2302 case IPV6_RTHDRDSTOPTS:
2303 if (pktopt && pktopt->ip6po_dest1) {
2304 optdata = (void *)pktopt->ip6po_dest1;
2305 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2306 optdatalen = (ip6e->ip6e_len + 1) << 3;
2307 }
2308 break;
2309 case IPV6_DSTOPTS:
2310 if (pktopt && pktopt->ip6po_dest2) {
2311 optdata = (void *)pktopt->ip6po_dest2;
2312 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2313 optdatalen = (ip6e->ip6e_len + 1) << 3;
2314 }
2315 break;
2316 case IPV6_NEXTHOP:
2317 if (pktopt && pktopt->ip6po_nexthop) {
2318 optdata = (void *)pktopt->ip6po_nexthop;
2319 optdatalen = pktopt->ip6po_nexthop->sa_len;
2320 }
2321 break;
2322 case IPV6_USE_MIN_MTU:
2323 if (pktopt)
2324 optdata = (void *)&pktopt->ip6po_minmtu;
2325 else
2326 optdata = (void *)&defminmtu;
2327 optdatalen = sizeof(int);
2328 break;
2329 case IPV6_DONTFRAG:
2330 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2331 on = 1;
2332 else
2333 on = 0;
2334 optdata = (void *)&on;
2335 optdatalen = sizeof(on);
2336 break;
2337 default: /* should not happen */
2338 #ifdef DIAGNOSTIC
2339 panic("ip6_getpcbopt: unexpected option\n");
2340 #endif
2341 return (ENOPROTOOPT);
2342 }
2343
2344 if (optdatalen > MCLBYTES)
2345 return (EMSGSIZE); /* XXX */
2346 *mp = m = m_get(M_WAIT, MT_SOOPTS);
2347 if (optdatalen > MLEN)
2348 MCLGET(m, M_WAIT);
2349 m->m_len = optdatalen;
2350 if (optdatalen)
2351 memcpy(mtod(m, void *), optdata, optdatalen);
2352
2353 return (error);
2354 }
2355
2356 void
2357 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2358 {
2359 if (optname == -1 || optname == IPV6_PKTINFO) {
2360 if (pktopt->ip6po_pktinfo)
2361 free(pktopt->ip6po_pktinfo, M_IP6OPT);
2362 pktopt->ip6po_pktinfo = NULL;
2363 }
2364 if (optname == -1 || optname == IPV6_HOPLIMIT)
2365 pktopt->ip6po_hlim = -1;
2366 if (optname == -1 || optname == IPV6_TCLASS)
2367 pktopt->ip6po_tclass = -1;
2368 if (optname == -1 || optname == IPV6_NEXTHOP) {
2369 if (pktopt->ip6po_nextroute.ro_rt) {
2370 RTFREE(pktopt->ip6po_nextroute.ro_rt);
2371 pktopt->ip6po_nextroute.ro_rt = NULL;
2372 }
2373 if (pktopt->ip6po_nexthop)
2374 free(pktopt->ip6po_nexthop, M_IP6OPT);
2375 pktopt->ip6po_nexthop = NULL;
2376 }
2377 if (optname == -1 || optname == IPV6_HOPOPTS) {
2378 if (pktopt->ip6po_hbh)
2379 free(pktopt->ip6po_hbh, M_IP6OPT);
2380 pktopt->ip6po_hbh = NULL;
2381 }
2382 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2383 if (pktopt->ip6po_dest1)
2384 free(pktopt->ip6po_dest1, M_IP6OPT);
2385 pktopt->ip6po_dest1 = NULL;
2386 }
2387 if (optname == -1 || optname == IPV6_RTHDR) {
2388 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2389 free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2390 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2391 if (pktopt->ip6po_route.ro_rt) {
2392 RTFREE(pktopt->ip6po_route.ro_rt);
2393 pktopt->ip6po_route.ro_rt = NULL;
2394 }
2395 }
2396 if (optname == -1 || optname == IPV6_DSTOPTS) {
2397 if (pktopt->ip6po_dest2)
2398 free(pktopt->ip6po_dest2, M_IP6OPT);
2399 pktopt->ip6po_dest2 = NULL;
2400 }
2401 }
2402
2403 #define PKTOPT_EXTHDRCPY(type) \
2404 do { \
2405 if (src->type) { \
2406 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2407 dst->type = malloc(hlen, M_IP6OPT, canwait); \
2408 if (dst->type == NULL && canwait == M_NOWAIT) \
2409 goto bad; \
2410 memcpy(dst->type, src->type, hlen); \
2411 } \
2412 } while (/*CONSTCOND*/ 0)
2413
2414 static int
2415 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2416 {
2417 dst->ip6po_hlim = src->ip6po_hlim;
2418 dst->ip6po_tclass = src->ip6po_tclass;
2419 dst->ip6po_flags = src->ip6po_flags;
2420 if (src->ip6po_pktinfo) {
2421 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2422 M_IP6OPT, canwait);
2423 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2424 goto bad;
2425 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2426 }
2427 if (src->ip6po_nexthop) {
2428 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2429 M_IP6OPT, canwait);
2430 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2431 goto bad;
2432 memcpy(dst->ip6po_nexthop, src->ip6po_nexthop,
2433 src->ip6po_nexthop->sa_len);
2434 }
2435 PKTOPT_EXTHDRCPY(ip6po_hbh);
2436 PKTOPT_EXTHDRCPY(ip6po_dest1);
2437 PKTOPT_EXTHDRCPY(ip6po_dest2);
2438 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2439 return (0);
2440
2441 bad:
2442 if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2443 if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2444 if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2445 if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2446 if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2447 if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2448
2449 return (ENOBUFS);
2450 }
2451 #undef PKTOPT_EXTHDRCPY
2452
2453 struct ip6_pktopts *
2454 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2455 {
2456 int error;
2457 struct ip6_pktopts *dst;
2458
2459 dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2460 if (dst == NULL && canwait == M_NOWAIT)
2461 return (NULL);
2462 ip6_initpktopts(dst);
2463
2464 if ((error = copypktopts(dst, src, canwait)) != 0) {
2465 free(dst, M_IP6OPT);
2466 return (NULL);
2467 }
2468
2469 return (dst);
2470 }
2471
2472 void
2473 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2474 {
2475 if (pktopt == NULL)
2476 return;
2477
2478 ip6_clearpktopts(pktopt, -1);
2479
2480 free(pktopt, M_IP6OPT);
2481 }
2482
2483 /*
2484 * Set the IP6 multicast options in response to user setsockopt().
2485 */
2486 static int
2487 ip6_setmoptions(optname, im6op, m)
2488 int optname;
2489 struct ip6_moptions **im6op;
2490 struct mbuf *m;
2491 {
2492 int error = 0;
2493 u_int loop, ifindex;
2494 struct ipv6_mreq *mreq;
2495 struct ifnet *ifp;
2496 struct ip6_moptions *im6o = *im6op;
2497 struct route_in6 ro;
2498 struct in6_multi_mship *imm;
2499 struct lwp *l = curlwp; /* XXX */
2500
2501 if (im6o == NULL) {
2502 /*
2503 * No multicast option buffer attached to the pcb;
2504 * allocate one and initialize to default values.
2505 */
2506 im6o = (struct ip6_moptions *)
2507 malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2508
2509 if (im6o == NULL)
2510 return (ENOBUFS);
2511 *im6op = im6o;
2512 im6o->im6o_multicast_ifp = NULL;
2513 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2514 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2515 LIST_INIT(&im6o->im6o_memberships);
2516 }
2517
2518 switch (optname) {
2519
2520 case IPV6_MULTICAST_IF:
2521 /*
2522 * Select the interface for outgoing multicast packets.
2523 */
2524 if (m == NULL || m->m_len != sizeof(u_int)) {
2525 error = EINVAL;
2526 break;
2527 }
2528 bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2529 if (ifindex != 0) {
2530 if (if_indexlim <= ifindex || !ifindex2ifnet[ifindex]) {
2531 error = ENXIO; /* XXX EINVAL? */
2532 break;
2533 }
2534 ifp = ifindex2ifnet[ifindex];
2535 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2536 error = EADDRNOTAVAIL;
2537 break;
2538 }
2539 } else
2540 ifp = NULL;
2541 im6o->im6o_multicast_ifp = ifp;
2542 break;
2543
2544 case IPV6_MULTICAST_HOPS:
2545 {
2546 /*
2547 * Set the IP6 hoplimit for outgoing multicast packets.
2548 */
2549 int optval;
2550 if (m == NULL || m->m_len != sizeof(int)) {
2551 error = EINVAL;
2552 break;
2553 }
2554 bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2555 if (optval < -1 || optval >= 256)
2556 error = EINVAL;
2557 else if (optval == -1)
2558 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2559 else
2560 im6o->im6o_multicast_hlim = optval;
2561 break;
2562 }
2563
2564 case IPV6_MULTICAST_LOOP:
2565 /*
2566 * Set the loopback flag for outgoing multicast packets.
2567 * Must be zero or one.
2568 */
2569 if (m == NULL || m->m_len != sizeof(u_int)) {
2570 error = EINVAL;
2571 break;
2572 }
2573 bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2574 if (loop > 1) {
2575 error = EINVAL;
2576 break;
2577 }
2578 im6o->im6o_multicast_loop = loop;
2579 break;
2580
2581 case IPV6_JOIN_GROUP:
2582 /*
2583 * Add a multicast group membership.
2584 * Group must be a valid IP6 multicast address.
2585 */
2586 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2587 error = EINVAL;
2588 break;
2589 }
2590 mreq = mtod(m, struct ipv6_mreq *);
2591 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2592 /*
2593 * We use the unspecified address to specify to accept
2594 * all multicast addresses. Only super user is allowed
2595 * to do this.
2596 */
2597 if (kauth_authorize_generic(l->l_cred,
2598 KAUTH_GENERIC_ISSUSER, &l->l_acflag))
2599 {
2600 error = EACCES;
2601 break;
2602 }
2603 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2604 error = EINVAL;
2605 break;
2606 }
2607
2608 /*
2609 * If no interface was explicitly specified, choose an
2610 * appropriate one according to the given multicast address.
2611 */
2612 if (mreq->ipv6mr_interface == 0) {
2613 struct sockaddr_in6 *dst;
2614
2615 /*
2616 * Look up the routing table for the
2617 * address, and choose the outgoing interface.
2618 * XXX: is it a good approach?
2619 */
2620 ro.ro_rt = NULL;
2621 dst = (struct sockaddr_in6 *)&ro.ro_dst;
2622 bzero(dst, sizeof(*dst));
2623 dst->sin6_family = AF_INET6;
2624 dst->sin6_len = sizeof(*dst);
2625 dst->sin6_addr = mreq->ipv6mr_multiaddr;
2626 rtalloc((struct route *)&ro);
2627 if (ro.ro_rt == NULL) {
2628 error = EADDRNOTAVAIL;
2629 break;
2630 }
2631 ifp = ro.ro_rt->rt_ifp;
2632 rtfree(ro.ro_rt);
2633 } else {
2634 /*
2635 * If the interface is specified, validate it.
2636 */
2637 if (if_indexlim <= mreq->ipv6mr_interface ||
2638 !ifindex2ifnet[mreq->ipv6mr_interface]) {
2639 error = ENXIO; /* XXX EINVAL? */
2640 break;
2641 }
2642 ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2643 }
2644
2645 /*
2646 * See if we found an interface, and confirm that it
2647 * supports multicast
2648 */
2649 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2650 error = EADDRNOTAVAIL;
2651 break;
2652 }
2653
2654 if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2655 error = EADDRNOTAVAIL; /* XXX: should not happen */
2656 break;
2657 }
2658
2659 /*
2660 * See if the membership already exists.
2661 */
2662 for (imm = im6o->im6o_memberships.lh_first;
2663 imm != NULL; imm = imm->i6mm_chain.le_next)
2664 if (imm->i6mm_maddr->in6m_ifp == ifp &&
2665 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2666 &mreq->ipv6mr_multiaddr))
2667 break;
2668 if (imm != NULL) {
2669 error = EADDRINUSE;
2670 break;
2671 }
2672 /*
2673 * Everything looks good; add a new record to the multicast
2674 * address list for the given interface.
2675 */
2676 imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr, &error, 0);
2677 if (imm == NULL)
2678 break;
2679 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2680 break;
2681
2682 case IPV6_LEAVE_GROUP:
2683 /*
2684 * Drop a multicast group membership.
2685 * Group must be a valid IP6 multicast address.
2686 */
2687 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2688 error = EINVAL;
2689 break;
2690 }
2691 mreq = mtod(m, struct ipv6_mreq *);
2692
2693 /*
2694 * If an interface address was specified, get a pointer
2695 * to its ifnet structure.
2696 */
2697 if (mreq->ipv6mr_interface != 0) {
2698 if (if_indexlim <= mreq->ipv6mr_interface ||
2699 !ifindex2ifnet[mreq->ipv6mr_interface]) {
2700 error = ENXIO; /* XXX EINVAL? */
2701 break;
2702 }
2703 ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2704 } else
2705 ifp = NULL;
2706
2707 /* Fill in the scope zone ID */
2708 if (ifp) {
2709 if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2710 /* XXX: should not happen */
2711 error = EADDRNOTAVAIL;
2712 break;
2713 }
2714 } else if (mreq->ipv6mr_interface != 0) {
2715 /*
2716 * XXX: This case would happens when the (positive)
2717 * index is in the valid range, but the corresponding
2718 * interface has been detached dynamically. The above
2719 * check probably avoids such case to happen here, but
2720 * we check it explicitly for safety.
2721 */
2722 error = EADDRNOTAVAIL;
2723 break;
2724 } else { /* ipv6mr_interface == 0 */
2725 struct sockaddr_in6 sa6_mc;
2726
2727 /*
2728 * The API spec says as follows:
2729 * If the interface index is specified as 0, the
2730 * system may choose a multicast group membership to
2731 * drop by matching the multicast address only.
2732 * On the other hand, we cannot disambiguate the scope
2733 * zone unless an interface is provided. Thus, we
2734 * check if there's ambiguity with the default scope
2735 * zone as the last resort.
2736 */
2737 bzero(&sa6_mc, sizeof(sa6_mc));
2738 sa6_mc.sin6_family = AF_INET6;
2739 sa6_mc.sin6_len = sizeof(sa6_mc);
2740 sa6_mc.sin6_addr = mreq->ipv6mr_multiaddr;
2741 error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
2742 if (error != 0)
2743 break;
2744 mreq->ipv6mr_multiaddr = sa6_mc.sin6_addr;
2745 }
2746
2747 /*
2748 * Find the membership in the membership list.
2749 */
2750 for (imm = im6o->im6o_memberships.lh_first;
2751 imm != NULL; imm = imm->i6mm_chain.le_next) {
2752 if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2753 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2754 &mreq->ipv6mr_multiaddr))
2755 break;
2756 }
2757 if (imm == NULL) {
2758 /* Unable to resolve interface */
2759 error = EADDRNOTAVAIL;
2760 break;
2761 }
2762 /*
2763 * Give up the multicast address record to which the
2764 * membership points.
2765 */
2766 LIST_REMOVE(imm, i6mm_chain);
2767 in6_leavegroup(imm);
2768 break;
2769
2770 default:
2771 error = EOPNOTSUPP;
2772 break;
2773 }
2774
2775 /*
2776 * If all options have default values, no need to keep the mbuf.
2777 */
2778 if (im6o->im6o_multicast_ifp == NULL &&
2779 im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2780 im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2781 im6o->im6o_memberships.lh_first == NULL) {
2782 free(*im6op, M_IPMOPTS);
2783 *im6op = NULL;
2784 }
2785
2786 return (error);
2787 }
2788
2789 /*
2790 * Return the IP6 multicast options in response to user getsockopt().
2791 */
2792 static int
2793 ip6_getmoptions(optname, im6o, mp)
2794 int optname;
2795 struct ip6_moptions *im6o;
2796 struct mbuf **mp;
2797 {
2798 u_int *hlim, *loop, *ifindex;
2799
2800 *mp = m_get(M_WAIT, MT_SOOPTS);
2801
2802 switch (optname) {
2803
2804 case IPV6_MULTICAST_IF:
2805 ifindex = mtod(*mp, u_int *);
2806 (*mp)->m_len = sizeof(u_int);
2807 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2808 *ifindex = 0;
2809 else
2810 *ifindex = im6o->im6o_multicast_ifp->if_index;
2811 return (0);
2812
2813 case IPV6_MULTICAST_HOPS:
2814 hlim = mtod(*mp, u_int *);
2815 (*mp)->m_len = sizeof(u_int);
2816 if (im6o == NULL)
2817 *hlim = ip6_defmcasthlim;
2818 else
2819 *hlim = im6o->im6o_multicast_hlim;
2820 return (0);
2821
2822 case IPV6_MULTICAST_LOOP:
2823 loop = mtod(*mp, u_int *);
2824 (*mp)->m_len = sizeof(u_int);
2825 if (im6o == NULL)
2826 *loop = ip6_defmcasthlim;
2827 else
2828 *loop = im6o->im6o_multicast_loop;
2829 return (0);
2830
2831 default:
2832 return (EOPNOTSUPP);
2833 }
2834 }
2835
2836 /*
2837 * Discard the IP6 multicast options.
2838 */
2839 void
2840 ip6_freemoptions(im6o)
2841 struct ip6_moptions *im6o;
2842 {
2843 struct in6_multi_mship *imm;
2844
2845 if (im6o == NULL)
2846 return;
2847
2848 while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2849 LIST_REMOVE(imm, i6mm_chain);
2850 in6_leavegroup(imm);
2851 }
2852 free(im6o, M_IPMOPTS);
2853 }
2854
2855 /*
2856 * Set IPv6 outgoing packet options based on advanced API.
2857 */
2858 int
2859 ip6_setpktopts(control, opt, stickyopt, priv, uproto)
2860 struct mbuf *control;
2861 struct ip6_pktopts *opt, *stickyopt;
2862 int priv, uproto;
2863 {
2864 struct cmsghdr *cm = 0;
2865
2866 if (control == NULL || opt == NULL)
2867 return (EINVAL);
2868
2869 ip6_initpktopts(opt);
2870 if (stickyopt) {
2871 int error;
2872
2873 /*
2874 * If stickyopt is provided, make a local copy of the options
2875 * for this particular packet, then override them by ancillary
2876 * objects.
2877 * XXX: copypktopts() does not copy the cached route to a next
2878 * hop (if any). This is not very good in terms of efficiency,
2879 * but we can allow this since this option should be rarely
2880 * used.
2881 */
2882 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2883 return (error);
2884 }
2885
2886 /*
2887 * XXX: Currently, we assume all the optional information is stored
2888 * in a single mbuf.
2889 */
2890 if (control->m_next)
2891 return (EINVAL);
2892
2893 for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2894 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2895 int error;
2896
2897 if (control->m_len < CMSG_LEN(0))
2898 return (EINVAL);
2899
2900 cm = mtod(control, struct cmsghdr *);
2901 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2902 return (EINVAL);
2903 if (cm->cmsg_level != IPPROTO_IPV6)
2904 continue;
2905
2906 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2907 cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto);
2908 if (error)
2909 return (error);
2910 }
2911
2912 return (0);
2913 }
2914
2915 /*
2916 * Set a particular packet option, as a sticky option or an ancillary data
2917 * item. "len" can be 0 only when it's a sticky option.
2918 * We have 4 cases of combination of "sticky" and "cmsg":
2919 * "sticky=0, cmsg=0": impossible
2920 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2921 * "sticky=1, cmsg=0": RFC3542 socket option
2922 * "sticky=1, cmsg=1": RFC2292 socket option
2923 */
2924 static int
2925 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2926 int priv, int sticky, int cmsg, int uproto)
2927 {
2928 int minmtupolicy;
2929
2930 if (!sticky && !cmsg) {
2931 #ifdef DIAGNOSTIC
2932 printf("ip6_setpktopt: impossible case\n");
2933 #endif
2934 return (EINVAL);
2935 }
2936
2937 /*
2938 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2939 * not be specified in the context of RFC3542. Conversely,
2940 * RFC3542 types should not be specified in the context of RFC2292.
2941 */
2942 if (!cmsg) {
2943 switch (optname) {
2944 case IPV6_2292PKTINFO:
2945 case IPV6_2292HOPLIMIT:
2946 case IPV6_2292NEXTHOP:
2947 case IPV6_2292HOPOPTS:
2948 case IPV6_2292DSTOPTS:
2949 case IPV6_2292RTHDR:
2950 case IPV6_2292PKTOPTIONS:
2951 return (ENOPROTOOPT);
2952 }
2953 }
2954 if (sticky && cmsg) {
2955 switch (optname) {
2956 case IPV6_PKTINFO:
2957 case IPV6_HOPLIMIT:
2958 case IPV6_NEXTHOP:
2959 case IPV6_HOPOPTS:
2960 case IPV6_DSTOPTS:
2961 case IPV6_RTHDRDSTOPTS:
2962 case IPV6_RTHDR:
2963 case IPV6_USE_MIN_MTU:
2964 case IPV6_DONTFRAG:
2965 case IPV6_OTCLASS:
2966 case IPV6_TCLASS:
2967 return (ENOPROTOOPT);
2968 }
2969 }
2970
2971 switch (optname) {
2972 #ifdef RFC2292
2973 case IPV6_2292PKTINFO:
2974 #endif
2975 case IPV6_PKTINFO:
2976 {
2977 struct ifnet *ifp = NULL;
2978 struct in6_pktinfo *pktinfo;
2979
2980 if (len != sizeof(struct in6_pktinfo))
2981 return (EINVAL);
2982
2983 pktinfo = (struct in6_pktinfo *)buf;
2984
2985 /*
2986 * An application can clear any sticky IPV6_PKTINFO option by
2987 * doing a "regular" setsockopt with ipi6_addr being
2988 * in6addr_any and ipi6_ifindex being zero.
2989 * [RFC 3542, Section 6]
2990 */
2991 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2992 pktinfo->ipi6_ifindex == 0 &&
2993 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2994 ip6_clearpktopts(opt, optname);
2995 break;
2996 }
2997
2998 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2999 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3000 return (EINVAL);
3001 }
3002
3003 /* validate the interface index if specified. */
3004 if (pktinfo->ipi6_ifindex >= if_indexlim) {
3005 return (ENXIO);
3006 }
3007 if (pktinfo->ipi6_ifindex) {
3008 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3009 if (ifp == NULL)
3010 return (ENXIO);
3011 }
3012
3013 /*
3014 * We store the address anyway, and let in6_selectsrc()
3015 * validate the specified address. This is because ipi6_addr
3016 * may not have enough information about its scope zone, and
3017 * we may need additional information (such as outgoing
3018 * interface or the scope zone of a destination address) to
3019 * disambiguate the scope.
3020 * XXX: the delay of the validation may confuse the
3021 * application when it is used as a sticky option.
3022 */
3023 if (opt->ip6po_pktinfo == NULL) {
3024 opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
3025 M_IP6OPT, M_NOWAIT);
3026 if (opt->ip6po_pktinfo == NULL)
3027 return (ENOBUFS);
3028 }
3029 memcpy(opt->ip6po_pktinfo, pktinfo, sizeof(*pktinfo));
3030 break;
3031 }
3032
3033 #ifdef RFC2292
3034 case IPV6_2292HOPLIMIT:
3035 #endif
3036 case IPV6_HOPLIMIT:
3037 {
3038 int *hlimp;
3039
3040 /*
3041 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3042 * to simplify the ordering among hoplimit options.
3043 */
3044 if (optname == IPV6_HOPLIMIT && sticky)
3045 return (ENOPROTOOPT);
3046
3047 if (len != sizeof(int))
3048 return (EINVAL);
3049 hlimp = (int *)buf;
3050 if (*hlimp < -1 || *hlimp > 255)
3051 return (EINVAL);
3052
3053 opt->ip6po_hlim = *hlimp;
3054 break;
3055 }
3056
3057 case IPV6_OTCLASS:
3058 if (len != sizeof(u_int8_t))
3059 return (EINVAL);
3060
3061 opt->ip6po_tclass = *(u_int8_t *)buf;
3062 break;
3063
3064 case IPV6_TCLASS:
3065 {
3066 int tclass;
3067
3068 if (len != sizeof(int))
3069 return (EINVAL);
3070 tclass = *(int *)buf;
3071 if (tclass < -1 || tclass > 255)
3072 return (EINVAL);
3073
3074 opt->ip6po_tclass = tclass;
3075 break;
3076 }
3077
3078 #ifdef RFC2292
3079 case IPV6_2292NEXTHOP:
3080 #endif
3081 case IPV6_NEXTHOP:
3082 if (!priv)
3083 return (EPERM);
3084
3085 if (len == 0) { /* just remove the option */
3086 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3087 break;
3088 }
3089
3090 /* check if cmsg_len is large enough for sa_len */
3091 if (len < sizeof(struct sockaddr) || len < *buf)
3092 return (EINVAL);
3093
3094 switch (((struct sockaddr *)buf)->sa_family) {
3095 case AF_INET6:
3096 {
3097 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3098 int error;
3099
3100 if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3101 return (EINVAL);
3102
3103 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3104 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3105 return (EINVAL);
3106 }
3107 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3108 != 0) {
3109 return (error);
3110 }
3111 break;
3112 }
3113 case AF_LINK: /* eventually be supported? */
3114 default:
3115 return (EAFNOSUPPORT);
3116 }
3117
3118 /* turn off the previous option, then set the new option. */
3119 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3120 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
3121 if (opt->ip6po_nexthop == NULL)
3122 return (ENOBUFS);
3123 memcpy(opt->ip6po_nexthop, buf, *buf);
3124 break;
3125
3126 #ifdef RFC2292
3127 case IPV6_2292HOPOPTS:
3128 #endif
3129 case IPV6_HOPOPTS:
3130 {
3131 struct ip6_hbh *hbh;
3132 int hbhlen;
3133
3134 /*
3135 * XXX: We don't allow a non-privileged user to set ANY HbH
3136 * options, since per-option restriction has too much
3137 * overhead.
3138 */
3139 if (!priv)
3140 return (EPERM);
3141
3142 if (len == 0) {
3143 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3144 break; /* just remove the option */
3145 }
3146
3147 /* message length validation */
3148 if (len < sizeof(struct ip6_hbh))
3149 return (EINVAL);
3150 hbh = (struct ip6_hbh *)buf;
3151 hbhlen = (hbh->ip6h_len + 1) << 3;
3152 if (len != hbhlen)
3153 return (EINVAL);
3154
3155 /* turn off the previous option, then set the new option. */
3156 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3157 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
3158 if (opt->ip6po_hbh == NULL)
3159 return (ENOBUFS);
3160 memcpy(opt->ip6po_hbh, hbh, hbhlen);
3161
3162 break;
3163 }
3164
3165 #ifdef RFC2292
3166 case IPV6_2292DSTOPTS:
3167 #endif
3168 case IPV6_DSTOPTS:
3169 case IPV6_RTHDRDSTOPTS:
3170 {
3171 struct ip6_dest *dest, **newdest = NULL;
3172 int destlen;
3173
3174 if (!priv) /* XXX: see the comment for IPV6_HOPOPTS */
3175 return (EPERM);
3176
3177 if (len == 0) {
3178 ip6_clearpktopts(opt, optname);
3179 break; /* just remove the option */
3180 }
3181
3182 /* message length validation */
3183 if (len < sizeof(struct ip6_dest))
3184 return (EINVAL);
3185 dest = (struct ip6_dest *)buf;
3186 destlen = (dest->ip6d_len + 1) << 3;
3187 if (len != destlen)
3188 return (EINVAL);
3189 /*
3190 * Determine the position that the destination options header
3191 * should be inserted; before or after the routing header.
3192 */
3193 switch (optname) {
3194 case IPV6_2292DSTOPTS:
3195 /*
3196 * The old advanced API is ambiguous on this point.
3197 * Our approach is to determine the position based
3198 * according to the existence of a routing header.
3199 * Note, however, that this depends on the order of the
3200 * extension headers in the ancillary data; the 1st
3201 * part of the destination options header must appear
3202 * before the routing header in the ancillary data,
3203 * too.
3204 * RFC3542 solved the ambiguity by introducing
3205 * separate ancillary data or option types.
3206 */
3207 if (opt->ip6po_rthdr == NULL)
3208 newdest = &opt->ip6po_dest1;
3209 else
3210 newdest = &opt->ip6po_dest2;
3211 break;
3212 case IPV6_RTHDRDSTOPTS:
3213 newdest = &opt->ip6po_dest1;
3214 break;
3215 case IPV6_DSTOPTS:
3216 newdest = &opt->ip6po_dest2;
3217 break;
3218 }
3219
3220 /* turn off the previous option, then set the new option. */
3221 ip6_clearpktopts(opt, optname);
3222 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3223 if (*newdest == NULL)
3224 return (ENOBUFS);
3225 memcpy(*newdest, dest, destlen);
3226
3227 break;
3228 }
3229
3230 #ifdef RFC2292
3231 case IPV6_2292RTHDR:
3232 #endif
3233 case IPV6_RTHDR:
3234 {
3235 struct ip6_rthdr *rth;
3236 int rthlen;
3237
3238 if (len == 0) {
3239 ip6_clearpktopts(opt, IPV6_RTHDR);
3240 break; /* just remove the option */
3241 }
3242
3243 /* message length validation */
3244 if (len < sizeof(struct ip6_rthdr))
3245 return (EINVAL);
3246 rth = (struct ip6_rthdr *)buf;
3247 rthlen = (rth->ip6r_len + 1) << 3;
3248 if (len != rthlen)
3249 return (EINVAL);
3250 switch (rth->ip6r_type) {
3251 case IPV6_RTHDR_TYPE_0:
3252 if (rth->ip6r_len == 0) /* must contain one addr */
3253 return (EINVAL);
3254 if (rth->ip6r_len % 2) /* length must be even */
3255 return (EINVAL);
3256 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3257 return (EINVAL);
3258 break;
3259 default:
3260 return (EINVAL); /* not supported */
3261 }
3262 /* turn off the previous option */
3263 ip6_clearpktopts(opt, IPV6_RTHDR);
3264 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3265 if (opt->ip6po_rthdr == NULL)
3266 return (ENOBUFS);
3267 memcpy(opt->ip6po_rthdr, rth, rthlen);
3268 break;
3269 }
3270
3271 case IPV6_USE_MIN_MTU:
3272 if (len != sizeof(int))
3273 return (EINVAL);
3274 minmtupolicy = *(int *)buf;
3275 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3276 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3277 minmtupolicy != IP6PO_MINMTU_ALL) {
3278 return (EINVAL);
3279 }
3280 opt->ip6po_minmtu = minmtupolicy;
3281 break;
3282
3283 case IPV6_DONTFRAG:
3284 if (len != sizeof(int))
3285 return (EINVAL);
3286
3287 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3288 /*
3289 * we ignore this option for TCP sockets.
3290 * (RFC3542 leaves this case unspecified.)
3291 */
3292 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3293 } else
3294 opt->ip6po_flags |= IP6PO_DONTFRAG;
3295 break;
3296
3297 default:
3298 return (ENOPROTOOPT);
3299 } /* end of switch */
3300
3301 return (0);
3302 }
3303
3304 /*
3305 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3306 * packet to the input queue of a specified interface. Note that this
3307 * calls the output routine of the loopback "driver", but with an interface
3308 * pointer that might NOT be lo0ifp -- easier than replicating that code here.
3309 */
3310 void
3311 ip6_mloopback(ifp, m, dst)
3312 struct ifnet *ifp;
3313 struct mbuf *m;
3314 struct sockaddr_in6 *dst;
3315 {
3316 struct mbuf *copym;
3317 struct ip6_hdr *ip6;
3318
3319 copym = m_copy(m, 0, M_COPYALL);
3320 if (copym == NULL)
3321 return;
3322
3323 /*
3324 * Make sure to deep-copy IPv6 header portion in case the data
3325 * is in an mbuf cluster, so that we can safely override the IPv6
3326 * header portion later.
3327 */
3328 if ((copym->m_flags & M_EXT) != 0 ||
3329 copym->m_len < sizeof(struct ip6_hdr)) {
3330 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3331 if (copym == NULL)
3332 return;
3333 }
3334
3335 #ifdef DIAGNOSTIC
3336 if (copym->m_len < sizeof(*ip6)) {
3337 m_freem(copym);
3338 return;
3339 }
3340 #endif
3341
3342 ip6 = mtod(copym, struct ip6_hdr *);
3343 /*
3344 * clear embedded scope identifiers if necessary.
3345 * in6_clearscope will touch the addresses only when necessary.
3346 */
3347 in6_clearscope(&ip6->ip6_src);
3348 in6_clearscope(&ip6->ip6_dst);
3349
3350 (void)looutput(ifp, copym, (struct sockaddr *)dst, NULL);
3351 }
3352
3353 /*
3354 * Chop IPv6 header off from the payload.
3355 */
3356 static int
3357 ip6_splithdr(m, exthdrs)
3358 struct mbuf *m;
3359 struct ip6_exthdrs *exthdrs;
3360 {
3361 struct mbuf *mh;
3362 struct ip6_hdr *ip6;
3363
3364 ip6 = mtod(m, struct ip6_hdr *);
3365 if (m->m_len > sizeof(*ip6)) {
3366 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3367 if (mh == 0) {
3368 m_freem(m);
3369 return ENOBUFS;
3370 }
3371 M_MOVE_PKTHDR(mh, m);
3372 MH_ALIGN(mh, sizeof(*ip6));
3373 m->m_len -= sizeof(*ip6);
3374 m->m_data += sizeof(*ip6);
3375 mh->m_next = m;
3376 m = mh;
3377 m->m_len = sizeof(*ip6);
3378 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3379 }
3380 exthdrs->ip6e_ip6 = m;
3381 return 0;
3382 }
3383
3384 /*
3385 * Compute IPv6 extension header length.
3386 */
3387 int
3388 ip6_optlen(in6p)
3389 struct in6pcb *in6p;
3390 {
3391 int len;
3392
3393 if (!in6p->in6p_outputopts)
3394 return 0;
3395
3396 len = 0;
3397 #define elen(x) \
3398 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3399
3400 len += elen(in6p->in6p_outputopts->ip6po_hbh);
3401 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3402 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3403 len += elen(in6p->in6p_outputopts->ip6po_dest2);
3404 return len;
3405 #undef elen
3406 }
3407