ip6_output.c revision 1.126.2.1 1 /* $NetBSD: ip6_output.c,v 1.126.2.1 2008/02/22 02:53:33 keiichi Exp $ */
2 /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */
3
4 /*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1988, 1990, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: ip6_output.c,v 1.126.2.1 2008/02/22 02:53:33 keiichi Exp $");
66
67 #include "opt_inet.h"
68 #include "opt_inet6.h"
69 #include "opt_ipsec.h"
70 #include "opt_pfil_hooks.h"
71
72 #include <sys/param.h>
73 #include <sys/malloc.h>
74 #include <sys/mbuf.h>
75 #include <sys/errno.h>
76 #include <sys/protosw.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/systm.h>
80 #include <sys/proc.h>
81 #include <sys/kauth.h>
82
83 #include <net/if.h>
84 #include <net/route.h>
85 #ifdef PFIL_HOOKS
86 #include <net/pfil.h>
87 #endif
88
89 #include <netinet/in.h>
90 #include <netinet/in_var.h>
91 #include <netinet/ip6.h>
92 #include <netinet/icmp6.h>
93 #include <netinet/in_offload.h>
94 #include <netinet6/in6_offload.h>
95 #include <netinet6/ip6_var.h>
96 #include <netinet6/in6_pcb.h>
97 #include <netinet6/nd6.h>
98 #include <netinet6/ip6protosw.h>
99 #include <netinet6/scope6_var.h>
100
101 #ifdef IPSEC
102 #include <netinet6/ipsec.h>
103 #include <netkey/key.h>
104 #endif /* IPSEC */
105
106 #ifdef MOBILE_IPV6
107 #include <net/mipsock.h>
108 #include <netinet6/mip6.h>
109 #include <netinet6/mip6_var.h>
110 #include "mip.h"
111 #if NMIP > 0
112 #include <netinet/ip6mh.h>
113 #endif /* NMIP > 0*/
114 #endif /* MOBILE_IPV6 */
115
116 #ifdef FAST_IPSEC
117 #include <netipsec/ipsec.h>
118 #include <netipsec/ipsec6.h>
119 #include <netipsec/key.h>
120 #include <netipsec/xform.h>
121 #endif
122
123 #include <net/net_osdep.h>
124
125 #ifdef PFIL_HOOKS
126 extern struct pfil_head inet6_pfil_hook; /* XXX */
127 #endif
128
129 struct ip6_exthdrs {
130 struct mbuf *ip6e_ip6;
131 struct mbuf *ip6e_hbh;
132 struct mbuf *ip6e_dest1;
133 struct mbuf *ip6e_rthdr;
134 #ifdef MOBILE_IPV6
135 struct mbuf *ip6e_rthdr2;
136 #endif /* MOBILE_IPV6 */
137 #if defined(MOBILE_IPV6) && NMIP > 0
138 struct mbuf *ip6e_hoa;
139 #endif /* MOBILE_IPV6 && NMIP > 0 */
140 struct mbuf *ip6e_dest2;
141 };
142
143 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
144 int, int);
145 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct mbuf **);
146 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int,
147 int, int, int);
148 static int ip6_setmoptions(int, struct ip6_moptions **, struct mbuf *);
149 static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **);
150 static int ip6_copyexthdr(struct mbuf **, void *, int);
151 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
152 struct ip6_frag **);
153 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
154 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
155 static int ip6_getpmtu(struct route *, struct route *, struct ifnet *,
156 const struct in6_addr *, u_long *, int *);
157 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
158
159 #ifdef RFC2292
160 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
161 struct socket *);
162 #endif
163
164 #define IN6_NEED_CHECKSUM(ifp, csum_flags) \
165 (__predict_true(((ifp)->if_flags & IFF_LOOPBACK) == 0 || \
166 (((csum_flags) & M_CSUM_UDPv6) != 0 && udp_do_loopback_cksum) || \
167 (((csum_flags) & M_CSUM_TCPv6) != 0 && tcp_do_loopback_cksum)))
168
169 /*
170 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
171 * header (with pri, len, nxt, hlim, src, dst).
172 * This function may modify ver and hlim only.
173 * The mbuf chain containing the packet will be freed.
174 * The mbuf opt, if present, will not be freed.
175 *
176 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
177 * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
178 * which is rt_rmx.rmx_mtu.
179 */
180 int
181 ip6_output(
182 struct mbuf *m0,
183 struct ip6_pktopts *opt,
184 struct route *ro,
185 int flags,
186 struct ip6_moptions *im6o,
187 struct socket *so,
188 struct ifnet **ifpp /* XXX: just for statistics */
189 )
190 {
191 struct ip6_hdr *ip6 = NULL, *mhip6;
192 struct ifnet *ifp, *origifp;
193 struct mbuf *m = m0;
194 int hlen, tlen, len, off;
195 bool tso;
196 struct route ip6route;
197 struct rtentry *rt = NULL;
198 const struct sockaddr_in6 *dst = NULL;
199 struct sockaddr_in6 src_sa, dst_sa;
200 int error = 0;
201 struct in6_ifaddr *ia = NULL;
202 u_long mtu;
203 int alwaysfrag, dontfrag;
204 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
205 struct ip6_exthdrs exthdrs;
206 struct in6_addr finaldst, src0, dst0;
207 u_int32_t zone;
208 struct route *ro_pmtu = NULL;
209 int hdrsplit = 0;
210 int needipsec = 0;
211 #ifdef IPSEC
212 int needipsectun = 0;
213 struct secpolicy *sp = NULL;
214
215 ip6 = mtod(m, struct ip6_hdr *);
216 #endif /* IPSEC */
217 #ifdef MOBILE_IPV6
218 struct mip6_bc_internal *mbc;
219 #if NMIP > 0
220 struct mip6_bul_internal *mbul = NULL;
221 #endif /* NMIP > 0 */
222 #endif /* MOBILE_IPV6 */
223 #ifdef FAST_IPSEC
224 struct secpolicy *sp = NULL;
225 int s;
226 #endif
227
228
229 memset(&ip6route, 0, sizeof(ip6route));
230
231 #ifdef DIAGNOSTIC
232 if ((m->m_flags & M_PKTHDR) == 0)
233 panic("ip6_output: no HDR");
234
235 if ((m->m_pkthdr.csum_flags &
236 (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TSOv4)) != 0) {
237 panic("ip6_output: IPv4 checksum offload flags: %d",
238 m->m_pkthdr.csum_flags);
239 }
240
241 if ((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) ==
242 (M_CSUM_TCPv6|M_CSUM_UDPv6)) {
243 panic("ip6_output: conflicting checksum offload flags: %d",
244 m->m_pkthdr.csum_flags);
245 }
246 #endif
247
248 M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data, sizeof(struct ip6_hdr));
249
250 #define MAKE_EXTHDR(hp, mp) \
251 do { \
252 if (hp) { \
253 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
254 error = ip6_copyexthdr((mp), (void *)(hp), \
255 ((eh)->ip6e_len + 1) << 3); \
256 if (error) \
257 goto freehdrs; \
258 } \
259 } while (/*CONSTCOND*/ 0)
260
261 bzero(&exthdrs, sizeof(exthdrs));
262 if (opt) {
263 /* Hop-by-Hop options header */
264 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
265 /* Destination options header(1st part) */
266 if (opt->ip6po_rthdr
267 #ifdef MOBILE_IPV6
268 || opt->ip6po_rthdr2
269 #endif /* MOBILE_IPV6 */
270 ) {
271 /*
272 * Destination options header(1st part)
273 * This only makes sence with a routing header.
274 * See Section 9.2 of RFC 3542.
275 * Disabling this part just for MIP6 convenience is
276 * a bad idea. We need to think carefully about a
277 * way to make the advanced API coexist with MIP6
278 * options, which might automatically be inserted in
279 * the kernel.
280 */
281 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
282 }
283 /* Routing header */
284 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
285 #ifdef MOBILE_IPV6
286 /* Type 2 Routing header */
287 MAKE_EXTHDR(opt->ip6po_rthdr2, &exthdrs.ip6e_rthdr2);
288 #if NMIP > 0
289 /* Home Address Destination options header */
290 MAKE_EXTHDR(opt->ip6po_hoa, &exthdrs.ip6e_hoa);
291 #endif /* NMIP > 0*/
292 #endif /* MOBILE_IPV6 */
293 /* Destination options header(2nd part) */
294 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
295 }
296
297 #ifdef MOBILE_IPV6
298 /* Find binding cache entry */
299 /* XXX need policy to determine bid for MCOA*/
300 ip6 = mtod(m, struct ip6_hdr *); /* in case of !IPSEC */
301 mbc = mip6_bce_get(&ip6->ip6_dst, &ip6->ip6_src, NULL, 0);
302 /*
303 * If a node has a corresponding binding cache, put a Type 2
304 * Routing Header to directly deliver the packet. Except, a
305 * caller didn't specify a Type 2 Routing Header explicitly.
306 */
307 if ((mbc != NULL) && (ip6->ip6_nxt != IPPROTO_MH) &&
308 (exthdrs.ip6e_rthdr2 == NULL)) {
309 struct ip6_rthdr2 *rthdr2;
310
311 rthdr2 = mip6_create_rthdr2(&mbc->mbc_coa);
312 if (rthdr2 == NULL)
313 goto freehdrs;
314
315 MAKE_EXTHDR(rthdr2, &exthdrs.ip6e_rthdr2);
316 free(rthdr2, M_IP6OPT);
317 }
318 #endif /* MOBILE_IPV6 */
319
320 #if defined(MOBILE_IPV6) && NMIP > 0
321 /*
322 * If a correspondent binding update list is found and its
323 * status is BOUND, a packet is sent directly to the
324 * destination with a Home Address Option. Except a caller
325 * didn't specify a Home Address Option explicitly.
326 */
327 mbul = mip6_bul_get(&ip6->ip6_src, &ip6->ip6_dst, 0/* XXX */);
328 /*
329 * Route Optimization: appending a HoA option.
330 */
331 if ((mbul != NULL) && (exthdrs.ip6e_hoa == NULL)) {
332 u_int8_t *hoa_opt;
333
334 if (mbul->mbul_state & MIP6_BUL_STATE_NEEDTUNNEL)
335 goto skip_hoa;
336
337 if (ip6->ip6_nxt == IPPROTO_MH) {
338 #if 0
339 m_copydata(m, sizeof(struct ip6_hdr),
340 sizeof(struct ip6_mh), (void *)&mh);
341 if (mh.ip6mh_type != IP6_MH_TYPE_BU)
342 #endif /* 0 */
343 goto skip_hoa;
344 }
345
346 hoa_opt = mip6_create_hoa_opt(&mbul->mbul_coa);
347 if (hoa_opt == NULL)
348 goto freehdrs;
349
350 MAKE_EXTHDR(hoa_opt, &exthdrs.ip6e_hoa);
351 free(hoa_opt, M_IP6OPT);
352 }
353 skip_hoa:
354 #endif /* MOBILE_IPV6 && NMIP > 0 */
355
356 #ifdef IPSEC
357 if ((flags & IPV6_FORWARDING) != 0) {
358 needipsec = 0;
359 goto skippolicycheck;
360 }
361
362 /* get a security policy for this packet */
363 if (so == NULL)
364 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
365 else {
366 if (IPSEC_PCB_SKIP_IPSEC(sotoinpcb_hdr(so)->inph_sp,
367 IPSEC_DIR_OUTBOUND)) {
368 needipsec = 0;
369 goto skippolicycheck;
370 }
371 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
372 }
373
374 if (sp == NULL) {
375 ipsec6stat.out_inval++;
376 goto freehdrs;
377 }
378
379 error = 0;
380
381 /* check policy */
382 switch (sp->policy) {
383 case IPSEC_POLICY_DISCARD:
384 /*
385 * This packet is just discarded.
386 */
387 ipsec6stat.out_polvio++;
388 goto freehdrs;
389
390 case IPSEC_POLICY_BYPASS:
391 case IPSEC_POLICY_NONE:
392 /* no need to do IPsec. */
393 needipsec = 0;
394 break;
395
396 case IPSEC_POLICY_IPSEC:
397 if (sp->req == NULL) {
398 /* XXX should be panic ? */
399 printf("ip6_output: No IPsec request specified.\n");
400 error = EINVAL;
401 goto freehdrs;
402 }
403 needipsec = 1;
404 break;
405
406 case IPSEC_POLICY_ENTRUST:
407 default:
408 printf("ip6_output: Invalid policy found. %d\n", sp->policy);
409 }
410
411 skippolicycheck:;
412 #endif /* IPSEC */
413
414 /*
415 * Calculate the total length of the extension header chain.
416 * Keep the length of the unfragmentable part for fragmentation.
417 */
418 optlen = 0;
419 if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
420 if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
421 if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
422 #ifdef MOBILE_IPV6
423 if (exthdrs.ip6e_rthdr2) optlen += exthdrs.ip6e_rthdr2->m_len;
424 #if NMIP > 0
425 if (exthdrs.ip6e_hoa) optlen += exthdrs.ip6e_hoa->m_len;
426 #endif /* NMIP > 0 */
427 #endif /* MOBILE_IPV6 */
428 unfragpartlen = optlen + sizeof(struct ip6_hdr);
429 /* NOTE: we don't add AH/ESP length here. do that later. */
430 if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
431
432 #ifdef FAST_IPSEC
433 /* Check the security policy (SP) for the packet */
434
435 #ifndef MOBILE_IPV6 /* XXX */
436 /* XXX For moment, we doesn't support packet with extented action */
437 if (optlen !=0)
438 goto freehdrs;
439 #endif /* !MOBILE_IPV6 */
440
441 sp = ipsec6_check_policy(m,so,flags,&needipsec,&error);
442 if (error != 0) {
443 /*
444 * Hack: -EINVAL is used to signal that a packet
445 * should be silently discarded. This is typically
446 * because we asked key management for an SA and
447 * it was delayed (e.g. kicked up to IKE).
448 */
449 if (error == -EINVAL)
450 error = 0;
451 goto freehdrs;
452 }
453 #endif /* FAST_IPSEC */
454
455
456 if (needipsec &&
457 (m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
458 in6_delayed_cksum(m);
459 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
460 }
461
462
463 /*
464 * If we need IPsec, or there is at least one extension header,
465 * separate IP6 header from the payload.
466 */
467 if ((needipsec || optlen) && !hdrsplit) {
468 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
469 m = NULL;
470 goto freehdrs;
471 }
472 m = exthdrs.ip6e_ip6;
473 hdrsplit++;
474 }
475
476 /* adjust pointer */
477 ip6 = mtod(m, struct ip6_hdr *);
478
479 /* adjust mbuf packet header length */
480 m->m_pkthdr.len += optlen;
481 plen = m->m_pkthdr.len - sizeof(*ip6);
482
483 /* If this is a jumbo payload, insert a jumbo payload option. */
484 if (plen > IPV6_MAXPACKET) {
485 if (!hdrsplit) {
486 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
487 m = NULL;
488 goto freehdrs;
489 }
490 m = exthdrs.ip6e_ip6;
491 hdrsplit++;
492 }
493 /* adjust pointer */
494 ip6 = mtod(m, struct ip6_hdr *);
495 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
496 goto freehdrs;
497 optlen += 8; /* XXX JUMBOOPTLEN */
498 ip6->ip6_plen = 0;
499 } else
500 ip6->ip6_plen = htons(plen);
501
502 /*
503 * Concatenate headers and fill in next header fields.
504 * Here we have, on "m"
505 * IPv6 payload
506 * and we insert headers accordingly. Finally, we should be getting:
507 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
508 *
509 * during the header composing process, "m" points to IPv6 header.
510 * "mprev" points to an extension header prior to esp.
511 */
512 {
513 u_char *nexthdrp = &ip6->ip6_nxt;
514 struct mbuf *mprev = m;
515
516 /*
517 * we treat dest2 specially. this makes IPsec processing
518 * much easier. the goal here is to make mprev point the
519 * mbuf prior to dest2.
520 *
521 * result: IPv6 dest2 payload
522 * m and mprev will point to IPv6 header.
523 */
524 if (exthdrs.ip6e_dest2) {
525 if (!hdrsplit)
526 panic("assumption failed: hdr not split");
527 exthdrs.ip6e_dest2->m_next = m->m_next;
528 m->m_next = exthdrs.ip6e_dest2;
529 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
530 ip6->ip6_nxt = IPPROTO_DSTOPTS;
531 }
532
533 #define MAKE_CHAIN(m, mp, p, i)\
534 do {\
535 if (m) {\
536 if (!hdrsplit) \
537 panic("assumption failed: hdr not split"); \
538 *mtod((m), u_char *) = *(p);\
539 *(p) = (i);\
540 p = mtod((m), u_char *);\
541 (m)->m_next = (mp)->m_next;\
542 (mp)->m_next = (m);\
543 (mp) = (m);\
544 }\
545 } while (/*CONSTCOND*/ 0)
546 /*
547 * result: IPv6 hbh dest1 rthdr dest2 payload
548 * m will point to IPv6 header. mprev will point to the
549 * extension header prior to dest2 (rthdr in the above case).
550 */
551 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
552 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
553 IPPROTO_DSTOPTS);
554 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
555 IPPROTO_ROUTING);
556 #ifdef MOBILE_IPV6
557 /* a type 2 routing header for route optimization. */
558 MAKE_CHAIN(exthdrs.ip6e_rthdr2, mprev, nexthdrp,
559 IPPROTO_ROUTING);
560 #if NMIP > 0
561 /*
562 * MIP6 homeaddress destination option must reside
563 * after rthdr and before ah/esp/frag hdr.
564 * this order is not recommended in the ipv6 spec of course.
565 * result: IPv6 hbh dest1 rthdr ha dest2 payload.
566 */
567 MAKE_CHAIN(exthdrs.ip6e_hoa, mprev, nexthdrp, IPPROTO_DSTOPTS);
568 #endif /* NMIP > 0 */
569 #endif /* MOBILE_IPV6 */
570 M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data,
571 sizeof(struct ip6_hdr) + optlen);
572
573 #ifdef IPSEC
574 if (!needipsec)
575 goto skip_ipsec2;
576
577 /*
578 * pointers after IPsec headers are not valid any more.
579 * other pointers need a great care too.
580 * (IPsec routines should not mangle mbufs prior to AH/ESP)
581 */
582 exthdrs.ip6e_dest2 = NULL;
583
584 {
585 struct ip6_rthdr *rh = NULL;
586 int segleft_org = 0;
587 #ifdef MOBILE_IPV6
588 int segleft2_org = 0;
589 #endif /* MOBILE_IPV6 */
590 struct ipsec_output_state state;
591
592 if (exthdrs.ip6e_rthdr) {
593 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
594 segleft_org = rh->ip6r_segleft;
595 rh->ip6r_segleft = 0;
596 }
597
598 #ifdef MOBILE_IPV6
599 if (exthdrs.ip6e_rthdr2) {
600 rh = mtod(exthdrs.ip6e_rthdr2, struct ip6_rthdr *);
601 segleft2_org = rh->ip6r_segleft;
602 rh->ip6r_segleft = 0;
603 }
604 #endif /* MOBILE_IPV6 */
605
606 bzero(&state, sizeof(state));
607 state.m = m;
608 error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
609 &needipsectun);
610 m = state.m;
611 if (error) {
612 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
613 /* mbuf is already reclaimed in ipsec6_output_trans. */
614 m = NULL;
615 switch (error) {
616 case EHOSTUNREACH:
617 case ENETUNREACH:
618 case EMSGSIZE:
619 case ENOBUFS:
620 case ENOMEM:
621 break;
622 default:
623 printf("ip6_output (ipsec): error code %d\n", error);
624 /* FALLTHROUGH */
625 case ENOENT:
626 /* don't show these error codes to the user */
627 error = 0;
628 break;
629 }
630 goto bad;
631 }
632 if (exthdrs.ip6e_rthdr) {
633 /* ah6_output doesn't modify mbuf chain */
634 rh->ip6r_segleft = segleft_org;
635 }
636 #ifdef MOBILE_IPV6
637 if (exthdrs.ip6e_rthdr2) {
638 /* ah6_output doesn't modify mbuf chain */
639 rh->ip6r_segleft = segleft2_org;
640 }
641 #endif /* MOBILE_IPV6 */
642 }
643 skip_ipsec2:;
644 #endif
645 }
646
647 #if defined(MOBILE_IPV6) && NMIP > 0
648 /* Swap HoA and CoA */
649 if (exthdrs.ip6e_hoa) {
650 struct ip6_opt_home_address *hoaopt = NULL;
651 struct in6_addr tmpaddr;
652
653 bzero(&tmpaddr, sizeof(tmpaddr));
654
655 hoaopt = mip6_search_hoa_in_destopt(mtod(exthdrs.ip6e_hoa, void *));
656 if (hoaopt == NULL)
657 goto freehdrs;
658
659 if (mip6_ifa_ifwithin6addr(&ip6->ip6_src) == NULL)
660 goto freehdrs;
661
662 ip6 = mtod(m, struct ip6_hdr *);
663 bcopy(&ip6->ip6_src, &tmpaddr, sizeof(ip6->ip6_src));
664 bcopy(hoaopt->ip6oh_addr,
665 &ip6->ip6_src, sizeof(hoaopt->ip6oh_addr));
666 bcopy(&tmpaddr, hoaopt->ip6oh_addr, sizeof(tmpaddr));
667 }
668 #endif /* MOBILE_IPV6 && NMIP > 0 */
669
670 /*
671 * If there is a routing header, replace destination address field
672 * with the first hop of the routing header.
673 */
674 {
675 struct ip6_rthdr *rh = NULL;
676
677 if (exthdrs.ip6e_rthdr)
678 rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
679 struct ip6_rthdr *));
680 #ifdef MOBILE_IPV6
681 else if (exthdrs.ip6e_rthdr2)
682 rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr2,
683 struct ip6_rthdr *));
684 #endif /* MOBILE_IPV6 */
685 if (rh) {
686 struct ip6_rthdr0 *rh0;
687 struct in6_addr *addr;
688 struct sockaddr_in6 sa;
689
690 finaldst = ip6->ip6_dst;
691 switch (rh->ip6r_type) {
692 case IPV6_RTHDR_TYPE_0:
693 #ifdef MOBILE_IPV6
694 case IPV6_RTHDR_TYPE_2:
695 #endif /* MOBILE_IPV6 */
696 rh0 = (struct ip6_rthdr0 *)rh;
697 addr = (struct in6_addr *)(rh0 + 1);
698
699 /*
700 * construct a sockaddr_in6 form of
701 * the first hop.
702 *
703 * XXX: we may not have enough
704 * information about its scope zone;
705 * there is no standard API to pass
706 * the information from the
707 * application.
708 */
709 bzero(&sa, sizeof(sa));
710 sa.sin6_family = AF_INET6;
711 sa.sin6_len = sizeof(sa);
712 sa.sin6_addr = addr[0];
713 if ((error = sa6_embedscope(&sa,
714 ip6_use_defzone)) != 0) {
715 goto bad;
716 }
717 ip6->ip6_dst = sa.sin6_addr;
718 (void)memmove(&addr[0], &addr[1],
719 sizeof(struct in6_addr) *
720 (rh0->ip6r0_segleft - 1));
721 addr[rh0->ip6r0_segleft - 1] = finaldst;
722 /* XXX */
723 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
724 break;
725 default: /* is it possible? */
726 error = EINVAL;
727 goto bad;
728 }
729 }
730 }
731
732 /* Source address validation */
733 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
734 (flags & IPV6_UNSPECSRC) == 0) {
735 error = EOPNOTSUPP;
736 ip6stat.ip6s_badscope++;
737 goto bad;
738 }
739 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
740 error = EOPNOTSUPP;
741 ip6stat.ip6s_badscope++;
742 goto bad;
743 }
744
745 ip6stat.ip6s_localout++;
746
747 /*
748 * Route packet.
749 */
750 /* initialize cached route */
751 if (ro == NULL) {
752 ro = &ip6route;
753 }
754 ro_pmtu = ro;
755 if (opt && opt->ip6po_rthdr)
756 ro = &opt->ip6po_route;
757 #ifdef MOBILE_IPV6
758 if (exthdrs.ip6e_rthdr2) {
759 ro = &ip6route;
760 bzero((void *)ro, sizeof(*ro));
761 ro_pmtu = ro;
762 }
763 #endif /* MOBILE_IPV6 */
764
765 /*
766 * if specified, try to fill in the traffic class field.
767 * do not override if a non-zero value is already set.
768 * we check the diffserv field and the ecn field separately.
769 */
770 if (opt && opt->ip6po_tclass >= 0) {
771 int mask = 0;
772
773 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
774 mask |= 0xfc;
775 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
776 mask |= 0x03;
777 if (mask != 0)
778 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
779 }
780
781 /* fill in or override the hop limit field, if necessary. */
782 if (opt && opt->ip6po_hlim != -1)
783 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
784 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
785 if (im6o != NULL)
786 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
787 else
788 ip6->ip6_hlim = ip6_defmcasthlim;
789 }
790
791 #ifdef IPSEC
792 if (needipsec && needipsectun
793 #if defined(MOBILE_IPV6) && NMIP > 0
794 && !((opt && opt->ip6po_hoa) || exthdrs.ip6e_hoa)
795 #endif /* MOBILE_IPV6 && NMIP > 0 */
796 ) {
797 struct ipsec_output_state state;
798
799 /*
800 * All the extension headers will become inaccessible
801 * (since they can be encrypted).
802 * Don't panic, we need no more updates to extension headers
803 * on inner IPv6 packet (since they are now encapsulated).
804 *
805 * IPv6 [ESP|AH] IPv6 [extension headers] payload
806 */
807 bzero(&exthdrs, sizeof(exthdrs));
808 exthdrs.ip6e_ip6 = m;
809
810 bzero(&state, sizeof(state));
811 state.m = m;
812 state.ro = ro;
813 state.dst = rtcache_getdst(ro);
814
815 error = ipsec6_output_tunnel(&state, sp, flags);
816
817 m = state.m;
818 ro_pmtu = ro = state.ro;
819 dst = satocsin6(state.dst);
820 if (error) {
821 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
822 m0 = m = NULL;
823 m = NULL;
824 switch (error) {
825 case EHOSTUNREACH:
826 case ENETUNREACH:
827 case EMSGSIZE:
828 case ENOBUFS:
829 case ENOMEM:
830 break;
831 default:
832 printf("ip6_output (ipsec): error code %d\n", error);
833 /* FALLTHROUGH */
834 case ENOENT:
835 /* don't show these error codes to the user */
836 error = 0;
837 break;
838 }
839 goto bad;
840 }
841
842 exthdrs.ip6e_ip6 = m;
843 }
844 #endif /* IPSEC */
845 #ifdef FAST_IPSEC
846 if (needipsec) {
847 s = splsoftnet();
848 error = ipsec6_process_packet(m,sp->req);
849
850 /*
851 * Preserve KAME behaviour: ENOENT can be returned
852 * when an SA acquire is in progress. Don't propagate
853 * this to user-level; it confuses applications.
854 * XXX this will go away when the SADB is redone.
855 */
856 if (error == ENOENT)
857 error = 0;
858 splx(s);
859 goto done;
860 }
861 #endif /* FAST_IPSEC */
862
863
864
865 /* adjust pointer */
866 ip6 = mtod(m, struct ip6_hdr *);
867
868 sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
869 if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
870 &ifp, &rt, 0)) != 0) {
871 if (ifp != NULL)
872 in6_ifstat_inc(ifp, ifs6_out_discard);
873 goto bad;
874 }
875 if (rt == NULL) {
876 /*
877 * If in6_selectroute() does not return a route entry,
878 * dst may not have been updated.
879 */
880 rtcache_setdst(ro, sin6tosa(&dst_sa));
881 }
882
883 /*
884 * then rt (for unicast) and ifp must be non-NULL valid values.
885 */
886 if ((flags & IPV6_FORWARDING) == 0) {
887 /* XXX: the FORWARDING flag can be set for mrouting. */
888 in6_ifstat_inc(ifp, ifs6_out_request);
889 }
890 if (rt != NULL) {
891 ia = (struct in6_ifaddr *)(rt->rt_ifa);
892 rt->rt_use++;
893 }
894
895 /*
896 * The outgoing interface must be in the zone of source and
897 * destination addresses. We should use ia_ifp to support the
898 * case of sending packets to an address of our own.
899 */
900 if (ia != NULL && ia->ia_ifp)
901 origifp = ia->ia_ifp;
902 else
903 origifp = ifp;
904
905 src0 = ip6->ip6_src;
906 if (in6_setscope(&src0, origifp, &zone))
907 goto badscope;
908 sockaddr_in6_init(&src_sa, &ip6->ip6_src, 0, 0, 0);
909 if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
910 goto badscope;
911
912 dst0 = ip6->ip6_dst;
913 if (in6_setscope(&dst0, origifp, &zone))
914 goto badscope;
915 /* re-initialize to be sure */
916 sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
917 if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id)
918 goto badscope;
919
920 /* scope check is done. */
921
922 if (rt == NULL || IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
923 if (dst == NULL)
924 dst = satocsin6(rtcache_getdst(ro));
925 KASSERT(dst != NULL);
926 } else if (opt && rtcache_validate(&opt->ip6po_nextroute) != NULL) {
927 /*
928 * The nexthop is explicitly specified by the
929 * application. We assume the next hop is an IPv6
930 * address.
931 */
932 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
933 } else if ((rt->rt_flags & RTF_GATEWAY))
934 dst = (struct sockaddr_in6 *)rt->rt_gateway;
935 else if (dst == NULL)
936 dst = satocsin6(rtcache_getdst(ro));
937
938 /*
939 * XXXXXX: original code follows:
940 */
941 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
942 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
943 else {
944 struct in6_multi *in6m;
945
946 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
947
948 in6_ifstat_inc(ifp, ifs6_out_mcast);
949
950 /*
951 * Confirm that the outgoing interface supports multicast.
952 */
953 if (!(ifp->if_flags & IFF_MULTICAST)) {
954 ip6stat.ip6s_noroute++;
955 in6_ifstat_inc(ifp, ifs6_out_discard);
956 error = ENETUNREACH;
957 goto bad;
958 }
959
960 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
961 if (in6m != NULL &&
962 (im6o == NULL || im6o->im6o_multicast_loop)) {
963 /*
964 * If we belong to the destination multicast group
965 * on the outgoing interface, and the caller did not
966 * forbid loopback, loop back a copy.
967 */
968 KASSERT(dst != NULL);
969 ip6_mloopback(ifp, m, dst);
970 } else {
971 /*
972 * If we are acting as a multicast router, perform
973 * multicast forwarding as if the packet had just
974 * arrived on the interface to which we are about
975 * to send. The multicast forwarding function
976 * recursively calls this function, using the
977 * IPV6_FORWARDING flag to prevent infinite recursion.
978 *
979 * Multicasts that are looped back by ip6_mloopback(),
980 * above, will be forwarded by the ip6_input() routine,
981 * if necessary.
982 */
983 if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
984 if (ip6_mforward(ip6, ifp, m) != 0) {
985 m_freem(m);
986 goto done;
987 }
988 }
989 }
990 /*
991 * Multicasts with a hoplimit of zero may be looped back,
992 * above, but must not be transmitted on a network.
993 * Also, multicasts addressed to the loopback interface
994 * are not sent -- the above call to ip6_mloopback() will
995 * loop back a copy if this host actually belongs to the
996 * destination group on the loopback interface.
997 */
998 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
999 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
1000 m_freem(m);
1001 goto done;
1002 }
1003 }
1004
1005 /*
1006 * Fill the outgoing inteface to tell the upper layer
1007 * to increment per-interface statistics.
1008 */
1009 if (ifpp)
1010 *ifpp = ifp;
1011
1012 /* Determine path MTU. */
1013 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
1014 &alwaysfrag)) != 0)
1015 goto bad;
1016 #ifdef IPSEC
1017 if (needipsectun)
1018 mtu = IPV6_MMTU;
1019 #endif
1020
1021 /*
1022 * The caller of this function may specify to use the minimum MTU
1023 * in some cases.
1024 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1025 * setting. The logic is a bit complicated; by default, unicast
1026 * packets will follow path MTU while multicast packets will be sent at
1027 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
1028 * including unicast ones will be sent at the minimum MTU. Multicast
1029 * packets will always be sent at the minimum MTU unless
1030 * IP6PO_MINMTU_DISABLE is explicitly specified.
1031 * See RFC 3542 for more details.
1032 */
1033 if (mtu > IPV6_MMTU) {
1034 if ((flags & IPV6_MINMTU))
1035 mtu = IPV6_MMTU;
1036 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
1037 mtu = IPV6_MMTU;
1038 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1039 (opt == NULL ||
1040 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
1041 mtu = IPV6_MMTU;
1042 }
1043 }
1044
1045 /*
1046 * clear embedded scope identifiers if necessary.
1047 * in6_clearscope will touch the addresses only when necessary.
1048 */
1049 in6_clearscope(&ip6->ip6_src);
1050 in6_clearscope(&ip6->ip6_dst);
1051
1052 /*
1053 * If the outgoing packet contains a hop-by-hop options header,
1054 * it must be examined and processed even by the source node.
1055 * (RFC 2460, section 4.)
1056 */
1057 if (exthdrs.ip6e_hbh) {
1058 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
1059 u_int32_t dummy1; /* XXX unused */
1060 u_int32_t dummy2; /* XXX unused */
1061
1062 /*
1063 * XXX: if we have to send an ICMPv6 error to the sender,
1064 * we need the M_LOOP flag since icmp6_error() expects
1065 * the IPv6 and the hop-by-hop options header are
1066 * continuous unless the flag is set.
1067 */
1068 m->m_flags |= M_LOOP;
1069 m->m_pkthdr.rcvif = ifp;
1070 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
1071 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
1072 &dummy1, &dummy2) < 0) {
1073 /* m was already freed at this point */
1074 error = EINVAL;/* better error? */
1075 goto done;
1076 }
1077 m->m_flags &= ~M_LOOP; /* XXX */
1078 m->m_pkthdr.rcvif = NULL;
1079 }
1080
1081 #ifdef PFIL_HOOKS
1082 /*
1083 * Run through list of hooks for output packets.
1084 */
1085 if ((error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT)) != 0)
1086 goto done;
1087 if (m == NULL)
1088 goto done;
1089 ip6 = mtod(m, struct ip6_hdr *);
1090 #endif /* PFIL_HOOKS */
1091 /*
1092 * Send the packet to the outgoing interface.
1093 * If necessary, do IPv6 fragmentation before sending.
1094 *
1095 * the logic here is rather complex:
1096 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
1097 * 1-a: send as is if tlen <= path mtu
1098 * 1-b: fragment if tlen > path mtu
1099 *
1100 * 2: if user asks us not to fragment (dontfrag == 1)
1101 * 2-a: send as is if tlen <= interface mtu
1102 * 2-b: error if tlen > interface mtu
1103 *
1104 * 3: if we always need to attach fragment header (alwaysfrag == 1)
1105 * always fragment
1106 *
1107 * 4: if dontfrag == 1 && alwaysfrag == 1
1108 * error, as we cannot handle this conflicting request
1109 */
1110 tlen = m->m_pkthdr.len;
1111 tso = (m->m_pkthdr.csum_flags & M_CSUM_TSOv6) != 0;
1112 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
1113 dontfrag = 1;
1114 else
1115 dontfrag = 0;
1116
1117 if (dontfrag && alwaysfrag) { /* case 4 */
1118 /* conflicting request - can't transmit */
1119 error = EMSGSIZE;
1120 goto bad;
1121 }
1122 if (dontfrag && (!tso && tlen > IN6_LINKMTU(ifp))) { /* case 2-b */
1123 /*
1124 * Even if the DONTFRAG option is specified, we cannot send the
1125 * packet when the data length is larger than the MTU of the
1126 * outgoing interface.
1127 * Notify the error by sending IPV6_PATHMTU ancillary data as
1128 * well as returning an error code (the latter is not described
1129 * in the API spec.)
1130 */
1131 u_int32_t mtu32;
1132 struct ip6ctlparam ip6cp;
1133
1134 mtu32 = (u_int32_t)mtu;
1135 bzero(&ip6cp, sizeof(ip6cp));
1136 ip6cp.ip6c_cmdarg = (void *)&mtu32;
1137 pfctlinput2(PRC_MSGSIZE,
1138 rtcache_getdst(ro_pmtu), &ip6cp);
1139
1140 error = EMSGSIZE;
1141 goto bad;
1142 }
1143
1144 /*
1145 * transmit packet without fragmentation
1146 */
1147 if (dontfrag || (!alwaysfrag && (tlen <= mtu || tso))) {
1148 /* case 1-a and 2-a */
1149 struct in6_ifaddr *ia6;
1150 int sw_csum;
1151
1152 ip6 = mtod(m, struct ip6_hdr *);
1153 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1154 if (ia6) {
1155 /* Record statistics for this interface address. */
1156 ia6->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len;
1157 }
1158 #ifdef IPSEC
1159 /* clean ipsec history once it goes out of the node */
1160 ipsec_delaux(m);
1161 #endif
1162
1163 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
1164 if ((sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
1165 if (IN6_NEED_CHECKSUM(ifp,
1166 sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
1167 in6_delayed_cksum(m);
1168 }
1169 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
1170 }
1171
1172 KASSERT(dst != NULL);
1173 if (__predict_true(!tso ||
1174 (ifp->if_capenable & IFCAP_TSOv6) != 0)) {
1175 error = nd6_output(ifp, origifp, m, dst, rt);
1176 } else {
1177 error = ip6_tso_output(ifp, origifp, m, dst, rt);
1178 }
1179 goto done;
1180 }
1181
1182 if (tso) {
1183 error = EINVAL; /* XXX */
1184 goto bad;
1185 }
1186
1187 /*
1188 * try to fragment the packet. case 1-b and 3
1189 */
1190 if (mtu < IPV6_MMTU) {
1191 /* path MTU cannot be less than IPV6_MMTU */
1192 error = EMSGSIZE;
1193 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1194 goto bad;
1195 } else if (ip6->ip6_plen == 0) {
1196 /* jumbo payload cannot be fragmented */
1197 error = EMSGSIZE;
1198 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1199 goto bad;
1200 } else {
1201 struct mbuf **mnext, *m_frgpart;
1202 struct ip6_frag *ip6f;
1203 u_int32_t id = htonl(ip6_randomid());
1204 u_char nextproto;
1205 #if 0 /* see below */
1206 struct ip6ctlparam ip6cp;
1207 u_int32_t mtu32;
1208 #endif
1209
1210 /*
1211 * Too large for the destination or interface;
1212 * fragment if possible.
1213 * Must be able to put at least 8 bytes per fragment.
1214 */
1215 hlen = unfragpartlen;
1216 if (mtu > IPV6_MAXPACKET)
1217 mtu = IPV6_MAXPACKET;
1218
1219 #if 0
1220 /*
1221 * It is believed this code is a leftover from the
1222 * development of the IPV6_RECVPATHMTU sockopt and
1223 * associated work to implement RFC3542.
1224 * It's not entirely clear what the intent of the API
1225 * is at this point, so disable this code for now.
1226 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
1227 * will send notifications if the application requests.
1228 */
1229
1230 /* Notify a proper path MTU to applications. */
1231 mtu32 = (u_int32_t)mtu;
1232 bzero(&ip6cp, sizeof(ip6cp));
1233 ip6cp.ip6c_cmdarg = (void *)&mtu32;
1234 pfctlinput2(PRC_MSGSIZE,
1235 rtcache_getdst(ro_pmtu), &ip6cp);
1236 #endif
1237
1238 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1239 if (len < 8) {
1240 error = EMSGSIZE;
1241 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1242 goto bad;
1243 }
1244
1245 mnext = &m->m_nextpkt;
1246
1247 /*
1248 * Change the next header field of the last header in the
1249 * unfragmentable part.
1250 */
1251 #ifdef MOBILE_IPV6
1252 #if NMIP > 0
1253 if (exthdrs.ip6e_hoa) {
1254 nextproto = *mtod(exthdrs.ip6e_hoa, u_char *);
1255 *mtod(exthdrs.ip6e_hoa, u_char *) = IPPROTO_FRAGMENT;
1256 } else
1257 #endif /* NMIP > 0 */
1258 if (exthdrs.ip6e_rthdr2) {
1259 nextproto = *mtod(exthdrs.ip6e_rthdr2, u_char *);
1260 *mtod(exthdrs.ip6e_rthdr2, u_char *) = IPPROTO_FRAGMENT;
1261 } else
1262 #endif /* MOBILE_IPV6 */
1263 if (exthdrs.ip6e_rthdr) {
1264 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1265 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1266 } else if (exthdrs.ip6e_dest1) {
1267 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1268 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1269 } else if (exthdrs.ip6e_hbh) {
1270 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1271 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1272 } else {
1273 nextproto = ip6->ip6_nxt;
1274 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1275 }
1276
1277 if ((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6))
1278 != 0) {
1279 if (IN6_NEED_CHECKSUM(ifp,
1280 m->m_pkthdr.csum_flags &
1281 (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
1282 in6_delayed_cksum(m);
1283 }
1284 m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
1285 }
1286
1287 /*
1288 * Loop through length of segment after first fragment,
1289 * make new header and copy data of each part and link onto
1290 * chain.
1291 */
1292 m0 = m;
1293 for (off = hlen; off < tlen; off += len) {
1294 struct mbuf *mlast;
1295
1296 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1297 if (!m) {
1298 error = ENOBUFS;
1299 ip6stat.ip6s_odropped++;
1300 goto sendorfree;
1301 }
1302 m->m_pkthdr.rcvif = NULL;
1303 m->m_flags = m0->m_flags & M_COPYFLAGS;
1304 *mnext = m;
1305 mnext = &m->m_nextpkt;
1306 m->m_data += max_linkhdr;
1307 mhip6 = mtod(m, struct ip6_hdr *);
1308 *mhip6 = *ip6;
1309 m->m_len = sizeof(*mhip6);
1310 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1311 if (error) {
1312 ip6stat.ip6s_odropped++;
1313 goto sendorfree;
1314 }
1315 ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
1316 if (off + len >= tlen)
1317 len = tlen - off;
1318 else
1319 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1320 mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
1321 sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1322 if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1323 error = ENOBUFS;
1324 ip6stat.ip6s_odropped++;
1325 goto sendorfree;
1326 }
1327 for (mlast = m; mlast->m_next; mlast = mlast->m_next)
1328 ;
1329 mlast->m_next = m_frgpart;
1330 m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1331 m->m_pkthdr.rcvif = (struct ifnet *)0;
1332 ip6f->ip6f_reserved = 0;
1333 ip6f->ip6f_ident = id;
1334 ip6f->ip6f_nxt = nextproto;
1335 ip6stat.ip6s_ofragments++;
1336 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1337 }
1338
1339 in6_ifstat_inc(ifp, ifs6_out_fragok);
1340 }
1341
1342 /*
1343 * Remove leading garbages.
1344 */
1345 sendorfree:
1346 m = m0->m_nextpkt;
1347 m0->m_nextpkt = 0;
1348 m_freem(m0);
1349 for (m0 = m; m; m = m0) {
1350 m0 = m->m_nextpkt;
1351 m->m_nextpkt = 0;
1352 if (error == 0) {
1353 struct in6_ifaddr *ia6;
1354 ip6 = mtod(m, struct ip6_hdr *);
1355 ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1356 if (ia6) {
1357 /*
1358 * Record statistics for this interface
1359 * address.
1360 */
1361 ia6->ia_ifa.ifa_data.ifad_outbytes +=
1362 m->m_pkthdr.len;
1363 }
1364 #ifdef IPSEC
1365 /* clean ipsec history once it goes out of the node */
1366 ipsec_delaux(m);
1367 #endif
1368 KASSERT(dst != NULL);
1369 error = nd6_output(ifp, origifp, m, dst, rt);
1370 } else
1371 m_freem(m);
1372 }
1373
1374 if (error == 0)
1375 ip6stat.ip6s_fragmented++;
1376
1377 done:
1378 rtcache_free(&ip6route);
1379
1380 #ifdef IPSEC
1381 if (sp != NULL)
1382 key_freesp(sp);
1383 #endif /* IPSEC */
1384 #ifdef FAST_IPSEC
1385 if (sp != NULL)
1386 KEY_FREESP(&sp);
1387 #endif /* FAST_IPSEC */
1388
1389
1390 return (error);
1391
1392 freehdrs:
1393 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
1394 m_freem(exthdrs.ip6e_dest1);
1395 m_freem(exthdrs.ip6e_rthdr);
1396 m_freem(exthdrs.ip6e_dest2);
1397 #ifdef MOBILE_IPV6
1398 m_freem(exthdrs.ip6e_rthdr2);
1399 #if NMIP > 0
1400 m_freem(exthdrs.ip6e_hoa);
1401 #endif /* NMIP > 0 */
1402 #endif /* MOBILE_IPV6 */
1403 /* FALLTHROUGH */
1404 bad:
1405 m_freem(m);
1406 goto done;
1407 badscope:
1408 ip6stat.ip6s_badscope++;
1409 in6_ifstat_inc(origifp, ifs6_out_discard);
1410 if (error == 0)
1411 error = EHOSTUNREACH; /* XXX */
1412 goto bad;
1413 }
1414
1415 static int
1416 ip6_copyexthdr(struct mbuf **mp, void *hdr, int hlen)
1417 {
1418 struct mbuf *m;
1419
1420 if (hlen > MCLBYTES)
1421 return (ENOBUFS); /* XXX */
1422
1423 MGET(m, M_DONTWAIT, MT_DATA);
1424 if (!m)
1425 return (ENOBUFS);
1426
1427 if (hlen > MLEN) {
1428 MCLGET(m, M_DONTWAIT);
1429 if ((m->m_flags & M_EXT) == 0) {
1430 m_free(m);
1431 return (ENOBUFS);
1432 }
1433 }
1434 m->m_len = hlen;
1435 if (hdr)
1436 bcopy(hdr, mtod(m, void *), hlen);
1437
1438 *mp = m;
1439 return (0);
1440 }
1441
1442 /*
1443 * Process a delayed payload checksum calculation.
1444 */
1445 void
1446 in6_delayed_cksum(struct mbuf *m)
1447 {
1448 uint16_t csum, offset;
1449
1450 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1451 KASSERT((~m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1452 KASSERT((m->m_pkthdr.csum_flags
1453 & (M_CSUM_UDPv4|M_CSUM_TCPv4|M_CSUM_TSOv4)) == 0);
1454
1455 offset = M_CSUM_DATA_IPv6_HL(m->m_pkthdr.csum_data);
1456 csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1457 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv6) != 0) {
1458 csum = 0xffff;
1459 }
1460
1461 offset += M_CSUM_DATA_IPv6_OFFSET(m->m_pkthdr.csum_data);
1462 if ((offset + sizeof(csum)) > m->m_len) {
1463 m_copyback(m, offset, sizeof(csum), &csum);
1464 } else {
1465 *(uint16_t *)(mtod(m, char *) + offset) = csum;
1466 }
1467 }
1468
1469 /*
1470 * Insert jumbo payload option.
1471 */
1472 static int
1473 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1474 {
1475 struct mbuf *mopt;
1476 u_int8_t *optbuf;
1477 u_int32_t v;
1478
1479 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1480
1481 /*
1482 * If there is no hop-by-hop options header, allocate new one.
1483 * If there is one but it doesn't have enough space to store the
1484 * jumbo payload option, allocate a cluster to store the whole options.
1485 * Otherwise, use it to store the options.
1486 */
1487 if (exthdrs->ip6e_hbh == 0) {
1488 MGET(mopt, M_DONTWAIT, MT_DATA);
1489 if (mopt == 0)
1490 return (ENOBUFS);
1491 mopt->m_len = JUMBOOPTLEN;
1492 optbuf = mtod(mopt, u_int8_t *);
1493 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1494 exthdrs->ip6e_hbh = mopt;
1495 } else {
1496 struct ip6_hbh *hbh;
1497
1498 mopt = exthdrs->ip6e_hbh;
1499 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1500 /*
1501 * XXX assumption:
1502 * - exthdrs->ip6e_hbh is not referenced from places
1503 * other than exthdrs.
1504 * - exthdrs->ip6e_hbh is not an mbuf chain.
1505 */
1506 int oldoptlen = mopt->m_len;
1507 struct mbuf *n;
1508
1509 /*
1510 * XXX: give up if the whole (new) hbh header does
1511 * not fit even in an mbuf cluster.
1512 */
1513 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1514 return (ENOBUFS);
1515
1516 /*
1517 * As a consequence, we must always prepare a cluster
1518 * at this point.
1519 */
1520 MGET(n, M_DONTWAIT, MT_DATA);
1521 if (n) {
1522 MCLGET(n, M_DONTWAIT);
1523 if ((n->m_flags & M_EXT) == 0) {
1524 m_freem(n);
1525 n = NULL;
1526 }
1527 }
1528 if (!n)
1529 return (ENOBUFS);
1530 n->m_len = oldoptlen + JUMBOOPTLEN;
1531 bcopy(mtod(mopt, void *), mtod(n, void *),
1532 oldoptlen);
1533 optbuf = mtod(n, u_int8_t *) + oldoptlen;
1534 m_freem(mopt);
1535 mopt = exthdrs->ip6e_hbh = n;
1536 } else {
1537 optbuf = mtod(mopt, u_int8_t *) + mopt->m_len;
1538 mopt->m_len += JUMBOOPTLEN;
1539 }
1540 optbuf[0] = IP6OPT_PADN;
1541 optbuf[1] = 0;
1542
1543 /*
1544 * Adjust the header length according to the pad and
1545 * the jumbo payload option.
1546 */
1547 hbh = mtod(mopt, struct ip6_hbh *);
1548 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1549 }
1550
1551 /* fill in the option. */
1552 optbuf[2] = IP6OPT_JUMBO;
1553 optbuf[3] = 4;
1554 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1555 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1556
1557 /* finally, adjust the packet header length */
1558 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1559
1560 return (0);
1561 #undef JUMBOOPTLEN
1562 }
1563
1564 /*
1565 * Insert fragment header and copy unfragmentable header portions.
1566 */
1567 static int
1568 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1569 struct ip6_frag **frghdrp)
1570 {
1571 struct mbuf *n, *mlast;
1572
1573 if (hlen > sizeof(struct ip6_hdr)) {
1574 n = m_copym(m0, sizeof(struct ip6_hdr),
1575 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1576 if (n == 0)
1577 return (ENOBUFS);
1578 m->m_next = n;
1579 } else
1580 n = m;
1581
1582 /* Search for the last mbuf of unfragmentable part. */
1583 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1584 ;
1585
1586 if ((mlast->m_flags & M_EXT) == 0 &&
1587 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1588 /* use the trailing space of the last mbuf for the fragment hdr */
1589 *frghdrp = (struct ip6_frag *)(mtod(mlast, char *) +
1590 mlast->m_len);
1591 mlast->m_len += sizeof(struct ip6_frag);
1592 m->m_pkthdr.len += sizeof(struct ip6_frag);
1593 } else {
1594 /* allocate a new mbuf for the fragment header */
1595 struct mbuf *mfrg;
1596
1597 MGET(mfrg, M_DONTWAIT, MT_DATA);
1598 if (mfrg == 0)
1599 return (ENOBUFS);
1600 mfrg->m_len = sizeof(struct ip6_frag);
1601 *frghdrp = mtod(mfrg, struct ip6_frag *);
1602 mlast->m_next = mfrg;
1603 }
1604
1605 return (0);
1606 }
1607
1608 static int
1609 ip6_getpmtu(struct route *ro_pmtu, struct route *ro, struct ifnet *ifp,
1610 const struct in6_addr *dst, u_long *mtup, int *alwaysfragp)
1611 {
1612 struct rtentry *rt;
1613 u_int32_t mtu = 0;
1614 int alwaysfrag = 0;
1615 int error = 0;
1616
1617 if (ro_pmtu != ro) {
1618 union {
1619 struct sockaddr dst;
1620 struct sockaddr_in6 dst6;
1621 } u;
1622
1623 /* The first hop and the final destination may differ. */
1624 sockaddr_in6_init(&u.dst6, dst, 0, 0, 0);
1625 rt = rtcache_lookup(ro_pmtu, &u.dst);
1626 } else
1627 rt = rtcache_validate(ro_pmtu);
1628 if (rt != NULL) {
1629 u_int32_t ifmtu;
1630
1631 if (ifp == NULL)
1632 ifp = rt->rt_ifp;
1633 ifmtu = IN6_LINKMTU(ifp);
1634 mtu = rt->rt_rmx.rmx_mtu;
1635 if (mtu == 0)
1636 mtu = ifmtu;
1637 else if (mtu < IPV6_MMTU) {
1638 /*
1639 * RFC2460 section 5, last paragraph:
1640 * if we record ICMPv6 too big message with
1641 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1642 * or smaller, with fragment header attached.
1643 * (fragment header is needed regardless from the
1644 * packet size, for translators to identify packets)
1645 */
1646 alwaysfrag = 1;
1647 mtu = IPV6_MMTU;
1648 } else if (mtu > ifmtu) {
1649 /*
1650 * The MTU on the route is larger than the MTU on
1651 * the interface! This shouldn't happen, unless the
1652 * MTU of the interface has been changed after the
1653 * interface was brought up. Change the MTU in the
1654 * route to match the interface MTU (as long as the
1655 * field isn't locked).
1656 */
1657 mtu = ifmtu;
1658 if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
1659 rt->rt_rmx.rmx_mtu = mtu;
1660 }
1661 } else if (ifp) {
1662 mtu = IN6_LINKMTU(ifp);
1663 } else
1664 error = EHOSTUNREACH; /* XXX */
1665
1666 *mtup = mtu;
1667 if (alwaysfragp)
1668 *alwaysfragp = alwaysfrag;
1669 return (error);
1670 }
1671
1672 /*
1673 * IP6 socket option processing.
1674 */
1675 int
1676 ip6_ctloutput(int op, struct socket *so, int level, int optname,
1677 struct mbuf **mp)
1678 {
1679 int privileged, optdatalen, uproto;
1680 void *optdata;
1681 struct in6pcb *in6p = sotoin6pcb(so);
1682 struct mbuf *m = *mp;
1683 int error, optval;
1684 int optlen;
1685 struct lwp *l = curlwp; /* XXX */
1686
1687 optlen = m ? m->m_len : 0;
1688 error = optval = 0;
1689 privileged = (l == 0 || kauth_authorize_generic(l->l_cred,
1690 KAUTH_GENERIC_ISSUSER, NULL)) ? 0 : 1;
1691 uproto = (int)so->so_proto->pr_protocol;
1692
1693 if (level != IPPROTO_IPV6) {
1694 if (op == PRCO_SETOPT && *mp)
1695 (void)m_free(*mp);
1696 return ENOPROTOOPT;
1697 }
1698 switch (op) {
1699 case PRCO_SETOPT:
1700 switch (optname) {
1701 #ifdef RFC2292
1702 case IPV6_2292PKTOPTIONS:
1703 /* m is freed in ip6_pcbopts */
1704 error = ip6_pcbopts(&in6p->in6p_outputopts,
1705 m, so);
1706 break;
1707 #endif
1708
1709 /*
1710 * Use of some Hop-by-Hop options or some
1711 * Destination options, might require special
1712 * privilege. That is, normal applications
1713 * (without special privilege) might be forbidden
1714 * from setting certain options in outgoing packets,
1715 * and might never see certain options in received
1716 * packets. [RFC 2292 Section 6]
1717 * KAME specific note:
1718 * KAME prevents non-privileged users from sending or
1719 * receiving ANY hbh/dst options in order to avoid
1720 * overhead of parsing options in the kernel.
1721 */
1722 case IPV6_RECVHOPOPTS:
1723 case IPV6_RECVDSTOPTS:
1724 case IPV6_RECVRTHDRDSTOPTS:
1725 if (!privileged) {
1726 error = EPERM;
1727 break;
1728 }
1729 /* FALLTHROUGH */
1730 case IPV6_UNICAST_HOPS:
1731 case IPV6_HOPLIMIT:
1732 case IPV6_FAITH:
1733
1734 case IPV6_RECVPKTINFO:
1735 case IPV6_RECVHOPLIMIT:
1736 case IPV6_RECVRTHDR:
1737 case IPV6_RECVPATHMTU:
1738 case IPV6_RECVTCLASS:
1739 case IPV6_V6ONLY:
1740 if (optlen != sizeof(int)) {
1741 error = EINVAL;
1742 break;
1743 }
1744 optval = *mtod(m, int *);
1745 switch (optname) {
1746
1747 case IPV6_UNICAST_HOPS:
1748 if (optval < -1 || optval >= 256)
1749 error = EINVAL;
1750 else {
1751 /* -1 = kernel default */
1752 in6p->in6p_hops = optval;
1753 }
1754 break;
1755 #define OPTSET(bit) \
1756 do { \
1757 if (optval) \
1758 in6p->in6p_flags |= (bit); \
1759 else \
1760 in6p->in6p_flags &= ~(bit); \
1761 } while (/*CONSTCOND*/ 0)
1762
1763 #ifdef RFC2292
1764 #define OPTSET2292(bit) \
1765 do { \
1766 in6p->in6p_flags |= IN6P_RFC2292; \
1767 if (optval) \
1768 in6p->in6p_flags |= (bit); \
1769 else \
1770 in6p->in6p_flags &= ~(bit); \
1771 } while (/*CONSTCOND*/ 0)
1772 #endif
1773
1774 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1775
1776 case IPV6_RECVPKTINFO:
1777 #ifdef RFC2292
1778 /* cannot mix with RFC2292 */
1779 if (OPTBIT(IN6P_RFC2292)) {
1780 error = EINVAL;
1781 break;
1782 }
1783 #endif
1784 OPTSET(IN6P_PKTINFO);
1785 break;
1786
1787 case IPV6_HOPLIMIT:
1788 {
1789 struct ip6_pktopts **optp;
1790
1791 #ifdef RFC2292
1792 /* cannot mix with RFC2292 */
1793 if (OPTBIT(IN6P_RFC2292)) {
1794 error = EINVAL;
1795 break;
1796 }
1797 #endif
1798 optp = &in6p->in6p_outputopts;
1799 error = ip6_pcbopt(IPV6_HOPLIMIT,
1800 (u_char *)&optval,
1801 sizeof(optval),
1802 optp,
1803 privileged, uproto);
1804 break;
1805 }
1806
1807 case IPV6_RECVHOPLIMIT:
1808 #ifdef RFC2292
1809 /* cannot mix with RFC2292 */
1810 if (OPTBIT(IN6P_RFC2292)) {
1811 error = EINVAL;
1812 break;
1813 }
1814 #endif
1815 OPTSET(IN6P_HOPLIMIT);
1816 break;
1817
1818 case IPV6_RECVHOPOPTS:
1819 #ifdef RFC2292
1820 /* cannot mix with RFC2292 */
1821 if (OPTBIT(IN6P_RFC2292)) {
1822 error = EINVAL;
1823 break;
1824 }
1825 #endif
1826 OPTSET(IN6P_HOPOPTS);
1827 break;
1828
1829 case IPV6_RECVDSTOPTS:
1830 #ifdef RFC2292
1831 /* cannot mix with RFC2292 */
1832 if (OPTBIT(IN6P_RFC2292)) {
1833 error = EINVAL;
1834 break;
1835 }
1836 #endif
1837 OPTSET(IN6P_DSTOPTS);
1838 break;
1839
1840 case IPV6_RECVRTHDRDSTOPTS:
1841 #ifdef RFC2292
1842 /* cannot mix with RFC2292 */
1843 if (OPTBIT(IN6P_RFC2292)) {
1844 error = EINVAL;
1845 break;
1846 }
1847 #endif
1848 OPTSET(IN6P_RTHDRDSTOPTS);
1849 break;
1850
1851 case IPV6_RECVRTHDR:
1852 #ifdef RFC2292
1853 /* cannot mix with RFC2292 */
1854 if (OPTBIT(IN6P_RFC2292)) {
1855 error = EINVAL;
1856 break;
1857 }
1858 #endif
1859 OPTSET(IN6P_RTHDR);
1860 break;
1861
1862 case IPV6_FAITH:
1863 OPTSET(IN6P_FAITH);
1864 break;
1865
1866 case IPV6_RECVPATHMTU:
1867 /*
1868 * We ignore this option for TCP
1869 * sockets.
1870 * (RFC3542 leaves this case
1871 * unspecified.)
1872 */
1873 if (uproto != IPPROTO_TCP)
1874 OPTSET(IN6P_MTU);
1875 break;
1876
1877 case IPV6_V6ONLY:
1878 /*
1879 * make setsockopt(IPV6_V6ONLY)
1880 * available only prior to bind(2).
1881 * see ipng mailing list, Jun 22 2001.
1882 */
1883 if (in6p->in6p_lport ||
1884 !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1885 error = EINVAL;
1886 break;
1887 }
1888 #ifdef INET6_BINDV6ONLY
1889 if (!optval)
1890 error = EINVAL;
1891 #else
1892 OPTSET(IN6P_IPV6_V6ONLY);
1893 #endif
1894 break;
1895 case IPV6_RECVTCLASS:
1896 #ifdef RFC2292
1897 /* cannot mix with RFC2292 XXX */
1898 if (OPTBIT(IN6P_RFC2292)) {
1899 error = EINVAL;
1900 break;
1901 }
1902 #endif
1903 OPTSET(IN6P_TCLASS);
1904 break;
1905
1906 }
1907 break;
1908
1909 case IPV6_OTCLASS:
1910 {
1911 struct ip6_pktopts **optp;
1912 u_int8_t tclass;
1913
1914 if (optlen != sizeof(tclass)) {
1915 error = EINVAL;
1916 break;
1917 }
1918 tclass = *mtod(m, u_int8_t *);
1919 optp = &in6p->in6p_outputopts;
1920 error = ip6_pcbopt(optname,
1921 (u_char *)&tclass,
1922 sizeof(tclass),
1923 optp,
1924 privileged, uproto);
1925 break;
1926 }
1927
1928 case IPV6_TCLASS:
1929 case IPV6_DONTFRAG:
1930 case IPV6_USE_MIN_MTU:
1931 if (optlen != sizeof(optval)) {
1932 error = EINVAL;
1933 break;
1934 }
1935 optval = *mtod(m, int *);
1936 {
1937 struct ip6_pktopts **optp;
1938 optp = &in6p->in6p_outputopts;
1939 error = ip6_pcbopt(optname,
1940 (u_char *)&optval,
1941 sizeof(optval),
1942 optp,
1943 privileged, uproto);
1944 break;
1945 }
1946
1947 #ifdef RFC2292
1948 case IPV6_2292PKTINFO:
1949 case IPV6_2292HOPLIMIT:
1950 case IPV6_2292HOPOPTS:
1951 case IPV6_2292DSTOPTS:
1952 case IPV6_2292RTHDR:
1953 /* RFC 2292 */
1954 if (optlen != sizeof(int)) {
1955 error = EINVAL;
1956 break;
1957 }
1958 optval = *mtod(m, int *);
1959 switch (optname) {
1960 case IPV6_2292PKTINFO:
1961 OPTSET2292(IN6P_PKTINFO);
1962 break;
1963 case IPV6_2292HOPLIMIT:
1964 OPTSET2292(IN6P_HOPLIMIT);
1965 break;
1966 case IPV6_2292HOPOPTS:
1967 /*
1968 * Check super-user privilege.
1969 * See comments for IPV6_RECVHOPOPTS.
1970 */
1971 if (!privileged)
1972 return (EPERM);
1973 OPTSET2292(IN6P_HOPOPTS);
1974 break;
1975 case IPV6_2292DSTOPTS:
1976 if (!privileged)
1977 return (EPERM);
1978 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1979 break;
1980 case IPV6_2292RTHDR:
1981 OPTSET2292(IN6P_RTHDR);
1982 break;
1983 }
1984 break;
1985 #endif
1986 case IPV6_PKTINFO:
1987 case IPV6_HOPOPTS:
1988 case IPV6_RTHDR:
1989 case IPV6_DSTOPTS:
1990 case IPV6_RTHDRDSTOPTS:
1991 case IPV6_NEXTHOP:
1992 {
1993 /* new advanced API (RFC3542) */
1994 u_char *optbuf;
1995 int optbuflen;
1996 struct ip6_pktopts **optp;
1997 if (!m) {
1998 error = EINVAL;
1999 break;
2000 }
2001
2002 #ifdef RFC2292
2003 /* cannot mix with RFC2292 */
2004 if (OPTBIT(IN6P_RFC2292)) {
2005 error = EINVAL;
2006 break;
2007 }
2008 #endif
2009
2010 if (m && m->m_next) {
2011 error = EINVAL; /* XXX */
2012 break;
2013 }
2014
2015 optbuf = mtod(m, u_char *);
2016 optbuflen = m->m_len;
2017 optp = &in6p->in6p_outputopts;
2018 error = ip6_pcbopt(optname, optbuf, optbuflen,
2019 optp, privileged, uproto);
2020 break;
2021 }
2022 #undef OPTSET
2023
2024 case IPV6_MULTICAST_IF:
2025 case IPV6_MULTICAST_HOPS:
2026 case IPV6_MULTICAST_LOOP:
2027 case IPV6_JOIN_GROUP:
2028 case IPV6_LEAVE_GROUP:
2029 error = ip6_setmoptions(optname,
2030 &in6p->in6p_moptions, m);
2031 break;
2032
2033 case IPV6_PORTRANGE:
2034 if (!m) {
2035 error = EINVAL;
2036 break;
2037 }
2038 optval = *mtod(m, int *);
2039
2040 switch (optval) {
2041 case IPV6_PORTRANGE_DEFAULT:
2042 in6p->in6p_flags &= ~(IN6P_LOWPORT);
2043 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
2044 break;
2045
2046 case IPV6_PORTRANGE_HIGH:
2047 in6p->in6p_flags &= ~(IN6P_LOWPORT);
2048 in6p->in6p_flags |= IN6P_HIGHPORT;
2049 break;
2050
2051 case IPV6_PORTRANGE_LOW:
2052 in6p->in6p_flags &= ~(IN6P_HIGHPORT);
2053 in6p->in6p_flags |= IN6P_LOWPORT;
2054 break;
2055
2056 default:
2057 error = EINVAL;
2058 break;
2059 }
2060 break;
2061
2062
2063 #if defined(IPSEC) || defined(FAST_IPSEC)
2064 case IPV6_IPSEC_POLICY:
2065 {
2066 void *req = NULL;
2067 size_t len = 0;
2068 if (m) {
2069 req = mtod(m, void *);
2070 len = m->m_len;
2071 }
2072 error = ipsec6_set_policy(in6p, optname, req,
2073 len, privileged);
2074 }
2075 break;
2076 #endif /* IPSEC */
2077
2078 default:
2079 error = ENOPROTOOPT;
2080 break;
2081 }
2082 if (m)
2083 (void)m_free(m);
2084 break;
2085
2086 case PRCO_GETOPT:
2087 switch (optname) {
2088 #ifdef RFC2292
2089 case IPV6_2292PKTOPTIONS:
2090 /*
2091 * RFC3542 (effectively) deprecated the
2092 * semantics of the 2292-style pktoptions.
2093 * Since it was not reliable in nature (i.e.,
2094 * applications had to expect the lack of some
2095 * information after all), it would make sense
2096 * to simplify this part by always returning
2097 * empty data.
2098 */
2099 *mp = m_get(M_WAIT, MT_SOOPTS);
2100 (*mp)->m_len = 0;
2101 break;
2102 #endif
2103
2104 case IPV6_RECVHOPOPTS:
2105 case IPV6_RECVDSTOPTS:
2106 case IPV6_RECVRTHDRDSTOPTS:
2107 case IPV6_UNICAST_HOPS:
2108 case IPV6_RECVPKTINFO:
2109 case IPV6_RECVHOPLIMIT:
2110 case IPV6_RECVRTHDR:
2111 case IPV6_RECVPATHMTU:
2112
2113 case IPV6_FAITH:
2114 case IPV6_V6ONLY:
2115 case IPV6_PORTRANGE:
2116 case IPV6_RECVTCLASS:
2117 switch (optname) {
2118
2119 case IPV6_RECVHOPOPTS:
2120 optval = OPTBIT(IN6P_HOPOPTS);
2121 break;
2122
2123 case IPV6_RECVDSTOPTS:
2124 optval = OPTBIT(IN6P_DSTOPTS);
2125 break;
2126
2127 case IPV6_RECVRTHDRDSTOPTS:
2128 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2129 break;
2130
2131 case IPV6_UNICAST_HOPS:
2132 optval = in6p->in6p_hops;
2133 break;
2134
2135 case IPV6_RECVPKTINFO:
2136 optval = OPTBIT(IN6P_PKTINFO);
2137 break;
2138
2139 case IPV6_RECVHOPLIMIT:
2140 optval = OPTBIT(IN6P_HOPLIMIT);
2141 break;
2142
2143 case IPV6_RECVRTHDR:
2144 optval = OPTBIT(IN6P_RTHDR);
2145 break;
2146
2147 case IPV6_RECVPATHMTU:
2148 optval = OPTBIT(IN6P_MTU);
2149 break;
2150
2151 case IPV6_FAITH:
2152 optval = OPTBIT(IN6P_FAITH);
2153 break;
2154
2155 case IPV6_V6ONLY:
2156 optval = OPTBIT(IN6P_IPV6_V6ONLY);
2157 break;
2158
2159 case IPV6_PORTRANGE:
2160 {
2161 int flags;
2162 flags = in6p->in6p_flags;
2163 if (flags & IN6P_HIGHPORT)
2164 optval = IPV6_PORTRANGE_HIGH;
2165 else if (flags & IN6P_LOWPORT)
2166 optval = IPV6_PORTRANGE_LOW;
2167 else
2168 optval = 0;
2169 break;
2170 }
2171 case IPV6_RECVTCLASS:
2172 optval = OPTBIT(IN6P_TCLASS);
2173 break;
2174
2175 }
2176 if (error)
2177 break;
2178 *mp = m = m_get(M_WAIT, MT_SOOPTS);
2179 m->m_len = sizeof(int);
2180 *mtod(m, int *) = optval;
2181 break;
2182
2183 case IPV6_PATHMTU:
2184 {
2185 u_long pmtu = 0;
2186 struct ip6_mtuinfo mtuinfo;
2187 struct route *ro = &in6p->in6p_route;
2188
2189 if (!(so->so_state & SS_ISCONNECTED))
2190 return (ENOTCONN);
2191 /*
2192 * XXX: we dot not consider the case of source
2193 * routing, or optional information to specify
2194 * the outgoing interface.
2195 */
2196 error = ip6_getpmtu(ro, NULL, NULL,
2197 &in6p->in6p_faddr, &pmtu, NULL);
2198 if (error)
2199 break;
2200 if (pmtu > IPV6_MAXPACKET)
2201 pmtu = IPV6_MAXPACKET;
2202
2203 memset(&mtuinfo, 0, sizeof(mtuinfo));
2204 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2205 optdata = (void *)&mtuinfo;
2206 optdatalen = sizeof(mtuinfo);
2207 if (optdatalen > MCLBYTES)
2208 return (EMSGSIZE); /* XXX */
2209 *mp = m = m_get(M_WAIT, MT_SOOPTS);
2210 if (optdatalen > MLEN)
2211 MCLGET(m, M_WAIT);
2212 m->m_len = optdatalen;
2213 memcpy(mtod(m, void *), optdata, optdatalen);
2214 break;
2215 }
2216
2217 #ifdef RFC2292
2218 case IPV6_2292PKTINFO:
2219 case IPV6_2292HOPLIMIT:
2220 case IPV6_2292HOPOPTS:
2221 case IPV6_2292RTHDR:
2222 case IPV6_2292DSTOPTS:
2223 switch (optname) {
2224 case IPV6_2292PKTINFO:
2225 optval = OPTBIT(IN6P_PKTINFO);
2226 break;
2227 case IPV6_2292HOPLIMIT:
2228 optval = OPTBIT(IN6P_HOPLIMIT);
2229 break;
2230 case IPV6_2292HOPOPTS:
2231 optval = OPTBIT(IN6P_HOPOPTS);
2232 break;
2233 case IPV6_2292RTHDR:
2234 optval = OPTBIT(IN6P_RTHDR);
2235 break;
2236 case IPV6_2292DSTOPTS:
2237 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2238 break;
2239 }
2240 *mp = m = m_get(M_WAIT, MT_SOOPTS);
2241 m->m_len = sizeof(int);
2242 *mtod(m, int *) = optval;
2243 break;
2244 #endif
2245 case IPV6_PKTINFO:
2246 case IPV6_HOPOPTS:
2247 case IPV6_RTHDR:
2248 case IPV6_DSTOPTS:
2249 case IPV6_RTHDRDSTOPTS:
2250 case IPV6_NEXTHOP:
2251 case IPV6_OTCLASS:
2252 case IPV6_TCLASS:
2253 case IPV6_DONTFRAG:
2254 case IPV6_USE_MIN_MTU:
2255 error = ip6_getpcbopt(in6p->in6p_outputopts,
2256 optname, mp);
2257 break;
2258
2259 case IPV6_MULTICAST_IF:
2260 case IPV6_MULTICAST_HOPS:
2261 case IPV6_MULTICAST_LOOP:
2262 case IPV6_JOIN_GROUP:
2263 case IPV6_LEAVE_GROUP:
2264 error = ip6_getmoptions(optname,
2265 in6p->in6p_moptions, mp);
2266 break;
2267
2268 #if defined(IPSEC) || defined(FAST_IPSEC)
2269 case IPV6_IPSEC_POLICY:
2270 {
2271 void *req = NULL;
2272 size_t len = 0;
2273 if (m) {
2274 req = mtod(m, void *);
2275 len = m->m_len;
2276 }
2277 error = ipsec6_get_policy(in6p, req, len, mp);
2278 break;
2279 }
2280 #endif /* IPSEC */
2281
2282
2283
2284
2285 default:
2286 error = ENOPROTOOPT;
2287 break;
2288 }
2289 break;
2290 }
2291 return (error);
2292 }
2293
2294 int
2295 ip6_raw_ctloutput(int op, struct socket *so, int level, int optname,
2296 struct mbuf **mp)
2297 {
2298 int error = 0, optval, optlen;
2299 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2300 struct in6pcb *in6p = sotoin6pcb(so);
2301 struct mbuf *m = *mp;
2302
2303 optlen = m ? m->m_len : 0;
2304
2305 if (level != IPPROTO_IPV6) {
2306 if (op == PRCO_SETOPT && *mp)
2307 (void)m_free(*mp);
2308 return ENOPROTOOPT;
2309 }
2310
2311 switch (optname) {
2312 case IPV6_CHECKSUM:
2313 /*
2314 * For ICMPv6 sockets, no modification allowed for checksum
2315 * offset, permit "no change" values to help existing apps.
2316 *
2317 * XXX RFC3542 says: "An attempt to set IPV6_CHECKSUM
2318 * for an ICMPv6 socket will fail." The current
2319 * behavior does not meet RFC3542.
2320 */
2321 switch (op) {
2322 case PRCO_SETOPT:
2323 if (optlen != sizeof(int)) {
2324 error = EINVAL;
2325 break;
2326 }
2327 optval = *mtod(m, int *);
2328 if ((optval % 2) != 0) {
2329 /* the API assumes even offset values */
2330 error = EINVAL;
2331 } else if (so->so_proto->pr_protocol ==
2332 IPPROTO_ICMPV6) {
2333 if (optval != icmp6off)
2334 error = EINVAL;
2335 } else
2336 in6p->in6p_cksum = optval;
2337 break;
2338
2339 case PRCO_GETOPT:
2340 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2341 optval = icmp6off;
2342 else
2343 optval = in6p->in6p_cksum;
2344
2345 *mp = m = m_get(M_WAIT, MT_SOOPTS);
2346 m->m_len = sizeof(int);
2347 *mtod(m, int *) = optval;
2348 break;
2349
2350 default:
2351 error = EINVAL;
2352 break;
2353 }
2354 break;
2355
2356 default:
2357 error = ENOPROTOOPT;
2358 break;
2359 }
2360
2361 if (op == PRCO_SETOPT && m)
2362 (void)m_free(m);
2363
2364 return (error);
2365 }
2366
2367 #ifdef RFC2292
2368 /*
2369 * Set up IP6 options in pcb for insertion in output packets or
2370 * specifying behavior of outgoing packets.
2371 */
2372 static int
2373 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so)
2374 {
2375 struct ip6_pktopts *opt = *pktopt;
2376 int error = 0;
2377 struct lwp *l = curlwp; /* XXX */
2378 int priv = 0;
2379
2380 /* turn off any old options. */
2381 if (opt) {
2382 #ifdef DIAGNOSTIC
2383 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2384 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2385 #if defined(MOBILE_IPV6) && NMIP > 0
2386 opt->ip6po_hoa ||
2387 opt->ip6po_rhinfo2.ip6po_rhi_rthdr ||
2388 #endif /* MOBILE_IPV6 && NMIP > 0 */
2389 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2390 printf("ip6_pcbopts: all specified options are cleared.\n");
2391 #endif
2392 ip6_clearpktopts(opt, -1);
2393 } else
2394 opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
2395 *pktopt = NULL;
2396
2397 if (!m || m->m_len == 0) {
2398 /*
2399 * Only turning off any previous options, regardless of
2400 * whether the opt is just created or given.
2401 */
2402 free(opt, M_IP6OPT);
2403 return (0);
2404 }
2405
2406 /* set options specified by user. */
2407 if (l && !kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2408 NULL))
2409 priv = 1;
2410 if ((error = ip6_setpktopts(m, opt, NULL, priv,
2411 so->so_proto->pr_protocol)) != 0) {
2412 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2413 free(opt, M_IP6OPT);
2414 return (error);
2415 }
2416 *pktopt = opt;
2417 return (0);
2418 }
2419 #endif
2420
2421 /*
2422 * initialize ip6_pktopts. beware that there are non-zero default values in
2423 * the struct.
2424 */
2425 void
2426 ip6_initpktopts(struct ip6_pktopts *opt)
2427 {
2428
2429 memset(opt, 0, sizeof(*opt));
2430 opt->ip6po_hlim = -1; /* -1 means default hop limit */
2431 opt->ip6po_tclass = -1; /* -1 means default traffic class */
2432 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2433 }
2434
2435 #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) /* XXX */
2436 static int
2437 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2438 int priv, int uproto)
2439 {
2440 struct ip6_pktopts *opt;
2441
2442 if (*pktopt == NULL) {
2443 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2444 M_WAITOK);
2445 ip6_initpktopts(*pktopt);
2446 }
2447 opt = *pktopt;
2448
2449 return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto));
2450 }
2451
2452 static int
2453 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct mbuf **mp)
2454 {
2455 void *optdata = NULL;
2456 int optdatalen = 0;
2457 struct ip6_ext *ip6e;
2458 int error = 0;
2459 struct in6_pktinfo null_pktinfo;
2460 int deftclass = 0, on;
2461 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2462 struct mbuf *m;
2463
2464 switch (optname) {
2465 case IPV6_PKTINFO:
2466 if (pktopt && pktopt->ip6po_pktinfo)
2467 optdata = (void *)pktopt->ip6po_pktinfo;
2468 else {
2469 /* XXX: we don't have to do this every time... */
2470 memset(&null_pktinfo, 0, sizeof(null_pktinfo));
2471 optdata = (void *)&null_pktinfo;
2472 }
2473 optdatalen = sizeof(struct in6_pktinfo);
2474 break;
2475 case IPV6_OTCLASS:
2476 /* XXX */
2477 return (EINVAL);
2478 case IPV6_TCLASS:
2479 if (pktopt && pktopt->ip6po_tclass >= 0)
2480 optdata = (void *)&pktopt->ip6po_tclass;
2481 else
2482 optdata = (void *)&deftclass;
2483 optdatalen = sizeof(int);
2484 break;
2485 case IPV6_HOPOPTS:
2486 if (pktopt && pktopt->ip6po_hbh) {
2487 optdata = (void *)pktopt->ip6po_hbh;
2488 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2489 optdatalen = (ip6e->ip6e_len + 1) << 3;
2490 }
2491 break;
2492 case IPV6_RTHDR:
2493 if (pktopt && pktopt->ip6po_rthdr) {
2494 optdata = (void *)pktopt->ip6po_rthdr;
2495 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2496 optdatalen = (ip6e->ip6e_len + 1) << 3;
2497 }
2498 break;
2499 case IPV6_RTHDRDSTOPTS:
2500 if (pktopt && pktopt->ip6po_dest1) {
2501 optdata = (void *)pktopt->ip6po_dest1;
2502 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2503 optdatalen = (ip6e->ip6e_len + 1) << 3;
2504 }
2505 break;
2506 case IPV6_DSTOPTS:
2507 if (pktopt && pktopt->ip6po_dest2) {
2508 optdata = (void *)pktopt->ip6po_dest2;
2509 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2510 optdatalen = (ip6e->ip6e_len + 1) << 3;
2511 }
2512 break;
2513 case IPV6_NEXTHOP:
2514 if (pktopt && pktopt->ip6po_nexthop) {
2515 optdata = (void *)pktopt->ip6po_nexthop;
2516 optdatalen = pktopt->ip6po_nexthop->sa_len;
2517 }
2518 break;
2519 case IPV6_USE_MIN_MTU:
2520 if (pktopt)
2521 optdata = (void *)&pktopt->ip6po_minmtu;
2522 else
2523 optdata = (void *)&defminmtu;
2524 optdatalen = sizeof(int);
2525 break;
2526 case IPV6_DONTFRAG:
2527 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2528 on = 1;
2529 else
2530 on = 0;
2531 optdata = (void *)&on;
2532 optdatalen = sizeof(on);
2533 break;
2534 default: /* should not happen */
2535 #ifdef DIAGNOSTIC
2536 panic("ip6_getpcbopt: unexpected option\n");
2537 #endif
2538 return (ENOPROTOOPT);
2539 }
2540
2541 if (optdatalen > MCLBYTES)
2542 return (EMSGSIZE); /* XXX */
2543 *mp = m = m_get(M_WAIT, MT_SOOPTS);
2544 if (optdatalen > MLEN)
2545 MCLGET(m, M_WAIT);
2546 m->m_len = optdatalen;
2547 if (optdatalen)
2548 memcpy(mtod(m, void *), optdata, optdatalen);
2549
2550 return (error);
2551 }
2552
2553 void
2554 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2555 {
2556 if (optname == -1 || optname == IPV6_PKTINFO) {
2557 if (pktopt->ip6po_pktinfo)
2558 free(pktopt->ip6po_pktinfo, M_IP6OPT);
2559 pktopt->ip6po_pktinfo = NULL;
2560 }
2561 if (optname == -1 || optname == IPV6_HOPLIMIT)
2562 pktopt->ip6po_hlim = -1;
2563 if (optname == -1 || optname == IPV6_TCLASS)
2564 pktopt->ip6po_tclass = -1;
2565 if (optname == -1 || optname == IPV6_NEXTHOP) {
2566 rtcache_free(&pktopt->ip6po_nextroute);
2567 if (pktopt->ip6po_nexthop)
2568 free(pktopt->ip6po_nexthop, M_IP6OPT);
2569 pktopt->ip6po_nexthop = NULL;
2570 }
2571 if (optname == -1 || optname == IPV6_HOPOPTS) {
2572 if (pktopt->ip6po_hbh)
2573 free(pktopt->ip6po_hbh, M_IP6OPT);
2574 pktopt->ip6po_hbh = NULL;
2575 }
2576 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2577 if (pktopt->ip6po_dest1)
2578 free(pktopt->ip6po_dest1, M_IP6OPT);
2579 pktopt->ip6po_dest1 = NULL;
2580 }
2581 if (optname == -1 || optname == IPV6_RTHDR) {
2582 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2583 free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2584 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2585 rtcache_free(&pktopt->ip6po_route);
2586 }
2587 #ifdef MOBILE_IPV6
2588 if (pktopt->ip6po_rhinfo2.ip6po_rhi_rthdr)
2589 free(pktopt->ip6po_rhinfo2.ip6po_rhi_rthdr, M_IP6OPT);
2590 pktopt->ip6po_rhinfo2.ip6po_rhi_rthdr = NULL;
2591 rtcache_free(&pktopt->ip6po_route2);
2592 #endif /* MOBILE_IPV6 */
2593 if (optname == -1 || optname == IPV6_DSTOPTS) {
2594 if (pktopt->ip6po_dest2)
2595 free(pktopt->ip6po_dest2, M_IP6OPT);
2596 pktopt->ip6po_dest2 = NULL;
2597 #if defined(MOBILE_IPV6) && NMIP > 0
2598 if (pktopt->ip6po_hoa)
2599 free(pktopt->ip6po_hoa, M_IP6OPT);
2600 pktopt->ip6po_hoa = NULL;
2601 #endif /* MOBILE_IPV6 && NMIP > 0 */
2602 }
2603 }
2604
2605 #define PKTOPT_EXTHDRCPY(type) \
2606 do { \
2607 if (src->type) { \
2608 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2609 dst->type = malloc(hlen, M_IP6OPT, canwait); \
2610 if (dst->type == NULL && canwait == M_NOWAIT) \
2611 goto bad; \
2612 memcpy(dst->type, src->type, hlen); \
2613 } \
2614 } while (/*CONSTCOND*/ 0)
2615
2616 static int
2617 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2618 {
2619 dst->ip6po_hlim = src->ip6po_hlim;
2620 dst->ip6po_tclass = src->ip6po_tclass;
2621 dst->ip6po_flags = src->ip6po_flags;
2622 if (src->ip6po_pktinfo) {
2623 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2624 M_IP6OPT, canwait);
2625 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2626 goto bad;
2627 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2628 }
2629 if (src->ip6po_nexthop) {
2630 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2631 M_IP6OPT, canwait);
2632 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2633 goto bad;
2634 memcpy(dst->ip6po_nexthop, src->ip6po_nexthop,
2635 src->ip6po_nexthop->sa_len);
2636 }
2637 PKTOPT_EXTHDRCPY(ip6po_hbh);
2638 PKTOPT_EXTHDRCPY(ip6po_dest1);
2639 PKTOPT_EXTHDRCPY(ip6po_dest2);
2640 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2641 return (0);
2642
2643 bad:
2644 if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2645 if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2646 if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2647 if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2648 if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2649 if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2650
2651 return (ENOBUFS);
2652 }
2653 #undef PKTOPT_EXTHDRCPY
2654
2655 struct ip6_pktopts *
2656 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2657 {
2658 int error;
2659 struct ip6_pktopts *dst;
2660
2661 dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2662 if (dst == NULL && canwait == M_NOWAIT)
2663 return (NULL);
2664 ip6_initpktopts(dst);
2665
2666 if ((error = copypktopts(dst, src, canwait)) != 0) {
2667 free(dst, M_IP6OPT);
2668 return (NULL);
2669 }
2670
2671 return (dst);
2672 }
2673
2674 void
2675 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2676 {
2677 if (pktopt == NULL)
2678 return;
2679
2680 ip6_clearpktopts(pktopt, -1);
2681
2682 free(pktopt, M_IP6OPT);
2683 }
2684
2685 /*
2686 * Set the IP6 multicast options in response to user setsockopt().
2687 */
2688 static int
2689 ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m)
2690 {
2691 int error = 0;
2692 u_int loop, ifindex;
2693 struct ipv6_mreq *mreq;
2694 struct ifnet *ifp;
2695 struct ip6_moptions *im6o = *im6op;
2696 struct route ro;
2697 struct in6_multi_mship *imm;
2698 struct lwp *l = curlwp; /* XXX */
2699
2700 if (im6o == NULL) {
2701 /*
2702 * No multicast option buffer attached to the pcb;
2703 * allocate one and initialize to default values.
2704 */
2705 im6o = (struct ip6_moptions *)
2706 malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
2707
2708 if (im6o == NULL)
2709 return (ENOBUFS);
2710 *im6op = im6o;
2711 im6o->im6o_multicast_ifp = NULL;
2712 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2713 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2714 LIST_INIT(&im6o->im6o_memberships);
2715 }
2716
2717 switch (optname) {
2718
2719 case IPV6_MULTICAST_IF:
2720 /*
2721 * Select the interface for outgoing multicast packets.
2722 */
2723 if (m == NULL || m->m_len != sizeof(u_int)) {
2724 error = EINVAL;
2725 break;
2726 }
2727 bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
2728 if (ifindex != 0) {
2729 if (if_indexlim <= ifindex || !ifindex2ifnet[ifindex]) {
2730 error = ENXIO; /* XXX EINVAL? */
2731 break;
2732 }
2733 ifp = ifindex2ifnet[ifindex];
2734 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2735 error = EADDRNOTAVAIL;
2736 break;
2737 }
2738 } else
2739 ifp = NULL;
2740 im6o->im6o_multicast_ifp = ifp;
2741 break;
2742
2743 case IPV6_MULTICAST_HOPS:
2744 {
2745 /*
2746 * Set the IP6 hoplimit for outgoing multicast packets.
2747 */
2748 int optval;
2749 if (m == NULL || m->m_len != sizeof(int)) {
2750 error = EINVAL;
2751 break;
2752 }
2753 bcopy(mtod(m, u_int *), &optval, sizeof(optval));
2754 if (optval < -1 || optval >= 256)
2755 error = EINVAL;
2756 else if (optval == -1)
2757 im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2758 else
2759 im6o->im6o_multicast_hlim = optval;
2760 break;
2761 }
2762
2763 case IPV6_MULTICAST_LOOP:
2764 /*
2765 * Set the loopback flag for outgoing multicast packets.
2766 * Must be zero or one.
2767 */
2768 if (m == NULL || m->m_len != sizeof(u_int)) {
2769 error = EINVAL;
2770 break;
2771 }
2772 bcopy(mtod(m, u_int *), &loop, sizeof(loop));
2773 if (loop > 1) {
2774 error = EINVAL;
2775 break;
2776 }
2777 im6o->im6o_multicast_loop = loop;
2778 break;
2779
2780 case IPV6_JOIN_GROUP:
2781 /*
2782 * Add a multicast group membership.
2783 * Group must be a valid IP6 multicast address.
2784 */
2785 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2786 error = EINVAL;
2787 break;
2788 }
2789 mreq = mtod(m, struct ipv6_mreq *);
2790 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
2791 /*
2792 * We use the unspecified address to specify to accept
2793 * all multicast addresses. Only super user is allowed
2794 * to do this.
2795 */
2796 if (kauth_authorize_generic(l->l_cred,
2797 KAUTH_GENERIC_ISSUSER, NULL))
2798 {
2799 error = EACCES;
2800 break;
2801 }
2802 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
2803 error = EINVAL;
2804 break;
2805 }
2806
2807 /*
2808 * If no interface was explicitly specified, choose an
2809 * appropriate one according to the given multicast address.
2810 */
2811 if (mreq->ipv6mr_interface == 0) {
2812 struct rtentry *rt;
2813 union {
2814 struct sockaddr dst;
2815 struct sockaddr_in6 dst6;
2816 } u;
2817
2818 /*
2819 * Look up the routing table for the
2820 * address, and choose the outgoing interface.
2821 * XXX: is it a good approach?
2822 */
2823 memset(&ro, 0, sizeof(ro));
2824 sockaddr_in6_init(&u.dst6, &mreq->ipv6mr_multiaddr, 0,
2825 0, 0);
2826 rtcache_setdst(&ro, &u.dst);
2827 ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp
2828 : NULL;
2829 rtcache_free(&ro);
2830 } else {
2831 /*
2832 * If the interface is specified, validate it.
2833 */
2834 if (if_indexlim <= mreq->ipv6mr_interface ||
2835 !ifindex2ifnet[mreq->ipv6mr_interface]) {
2836 error = ENXIO; /* XXX EINVAL? */
2837 break;
2838 }
2839 ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2840 }
2841
2842 /*
2843 * See if we found an interface, and confirm that it
2844 * supports multicast
2845 */
2846 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2847 error = EADDRNOTAVAIL;
2848 break;
2849 }
2850
2851 if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2852 error = EADDRNOTAVAIL; /* XXX: should not happen */
2853 break;
2854 }
2855
2856 /*
2857 * See if the membership already exists.
2858 */
2859 for (imm = im6o->im6o_memberships.lh_first;
2860 imm != NULL; imm = imm->i6mm_chain.le_next)
2861 if (imm->i6mm_maddr->in6m_ifp == ifp &&
2862 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2863 &mreq->ipv6mr_multiaddr))
2864 break;
2865 if (imm != NULL) {
2866 error = EADDRINUSE;
2867 break;
2868 }
2869 /*
2870 * Everything looks good; add a new record to the multicast
2871 * address list for the given interface.
2872 */
2873 imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr, &error, 0);
2874 if (imm == NULL)
2875 break;
2876 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2877 break;
2878
2879 case IPV6_LEAVE_GROUP:
2880 /*
2881 * Drop a multicast group membership.
2882 * Group must be a valid IP6 multicast address.
2883 */
2884 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
2885 error = EINVAL;
2886 break;
2887 }
2888 mreq = mtod(m, struct ipv6_mreq *);
2889
2890 /*
2891 * If an interface address was specified, get a pointer
2892 * to its ifnet structure.
2893 */
2894 if (mreq->ipv6mr_interface != 0) {
2895 if (if_indexlim <= mreq->ipv6mr_interface ||
2896 !ifindex2ifnet[mreq->ipv6mr_interface]) {
2897 error = ENXIO; /* XXX EINVAL? */
2898 break;
2899 }
2900 ifp = ifindex2ifnet[mreq->ipv6mr_interface];
2901 } else
2902 ifp = NULL;
2903
2904 /* Fill in the scope zone ID */
2905 if (ifp) {
2906 if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
2907 /* XXX: should not happen */
2908 error = EADDRNOTAVAIL;
2909 break;
2910 }
2911 } else if (mreq->ipv6mr_interface != 0) {
2912 /*
2913 * XXX: This case would happens when the (positive)
2914 * index is in the valid range, but the corresponding
2915 * interface has been detached dynamically. The above
2916 * check probably avoids such case to happen here, but
2917 * we check it explicitly for safety.
2918 */
2919 error = EADDRNOTAVAIL;
2920 break;
2921 } else { /* ipv6mr_interface == 0 */
2922 struct sockaddr_in6 sa6_mc;
2923
2924 /*
2925 * The API spec says as follows:
2926 * If the interface index is specified as 0, the
2927 * system may choose a multicast group membership to
2928 * drop by matching the multicast address only.
2929 * On the other hand, we cannot disambiguate the scope
2930 * zone unless an interface is provided. Thus, we
2931 * check if there's ambiguity with the default scope
2932 * zone as the last resort.
2933 */
2934 sockaddr_in6_init(&sa6_mc, &mreq->ipv6mr_multiaddr,
2935 0, 0, 0);
2936 error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
2937 if (error != 0)
2938 break;
2939 mreq->ipv6mr_multiaddr = sa6_mc.sin6_addr;
2940 }
2941
2942 /*
2943 * Find the membership in the membership list.
2944 */
2945 for (imm = im6o->im6o_memberships.lh_first;
2946 imm != NULL; imm = imm->i6mm_chain.le_next) {
2947 if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2948 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2949 &mreq->ipv6mr_multiaddr))
2950 break;
2951 }
2952 if (imm == NULL) {
2953 /* Unable to resolve interface */
2954 error = EADDRNOTAVAIL;
2955 break;
2956 }
2957 /*
2958 * Give up the multicast address record to which the
2959 * membership points.
2960 */
2961 LIST_REMOVE(imm, i6mm_chain);
2962 in6_leavegroup(imm);
2963 break;
2964
2965 default:
2966 error = EOPNOTSUPP;
2967 break;
2968 }
2969
2970 /*
2971 * If all options have default values, no need to keep the mbuf.
2972 */
2973 if (im6o->im6o_multicast_ifp == NULL &&
2974 im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2975 im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2976 im6o->im6o_memberships.lh_first == NULL) {
2977 free(*im6op, M_IPMOPTS);
2978 *im6op = NULL;
2979 }
2980
2981 return (error);
2982 }
2983
2984 /*
2985 * Return the IP6 multicast options in response to user getsockopt().
2986 */
2987 static int
2988 ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp)
2989 {
2990 u_int *hlim, *loop, *ifindex;
2991
2992 *mp = m_get(M_WAIT, MT_SOOPTS);
2993
2994 switch (optname) {
2995
2996 case IPV6_MULTICAST_IF:
2997 ifindex = mtod(*mp, u_int *);
2998 (*mp)->m_len = sizeof(u_int);
2999 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
3000 *ifindex = 0;
3001 else
3002 *ifindex = im6o->im6o_multicast_ifp->if_index;
3003 return (0);
3004
3005 case IPV6_MULTICAST_HOPS:
3006 hlim = mtod(*mp, u_int *);
3007 (*mp)->m_len = sizeof(u_int);
3008 if (im6o == NULL)
3009 *hlim = ip6_defmcasthlim;
3010 else
3011 *hlim = im6o->im6o_multicast_hlim;
3012 return (0);
3013
3014 case IPV6_MULTICAST_LOOP:
3015 loop = mtod(*mp, u_int *);
3016 (*mp)->m_len = sizeof(u_int);
3017 if (im6o == NULL)
3018 *loop = ip6_defmcasthlim;
3019 else
3020 *loop = im6o->im6o_multicast_loop;
3021 return (0);
3022
3023 default:
3024 return (EOPNOTSUPP);
3025 }
3026 }
3027
3028 /*
3029 * Discard the IP6 multicast options.
3030 */
3031 void
3032 ip6_freemoptions(struct ip6_moptions *im6o)
3033 {
3034 struct in6_multi_mship *imm;
3035
3036 if (im6o == NULL)
3037 return;
3038
3039 while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
3040 LIST_REMOVE(imm, i6mm_chain);
3041 in6_leavegroup(imm);
3042 }
3043 free(im6o, M_IPMOPTS);
3044 }
3045
3046 /*
3047 * Set IPv6 outgoing packet options based on advanced API.
3048 */
3049 int
3050 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
3051 struct ip6_pktopts *stickyopt, int priv, int uproto)
3052 {
3053 struct cmsghdr *cm = 0;
3054
3055 if (control == NULL || opt == NULL)
3056 return (EINVAL);
3057
3058 ip6_initpktopts(opt);
3059 if (stickyopt) {
3060 int error;
3061
3062 /*
3063 * If stickyopt is provided, make a local copy of the options
3064 * for this particular packet, then override them by ancillary
3065 * objects.
3066 * XXX: copypktopts() does not copy the cached route to a next
3067 * hop (if any). This is not very good in terms of efficiency,
3068 * but we can allow this since this option should be rarely
3069 * used.
3070 */
3071 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
3072 return (error);
3073 }
3074
3075 /*
3076 * XXX: Currently, we assume all the optional information is stored
3077 * in a single mbuf.
3078 */
3079 if (control->m_next)
3080 return (EINVAL);
3081
3082 for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
3083 control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
3084 int error;
3085
3086 if (control->m_len < CMSG_LEN(0))
3087 return (EINVAL);
3088
3089 cm = mtod(control, struct cmsghdr *);
3090 if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
3091 return (EINVAL);
3092 if (cm->cmsg_level != IPPROTO_IPV6)
3093 continue;
3094
3095 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3096 cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto);
3097 if (error)
3098 return (error);
3099 }
3100
3101 return (0);
3102 }
3103
3104 /*
3105 * Set a particular packet option, as a sticky option or an ancillary data
3106 * item. "len" can be 0 only when it's a sticky option.
3107 * We have 4 cases of combination of "sticky" and "cmsg":
3108 * "sticky=0, cmsg=0": impossible
3109 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3110 * "sticky=1, cmsg=0": RFC3542 socket option
3111 * "sticky=1, cmsg=1": RFC2292 socket option
3112 */
3113 static int
3114 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
3115 int priv, int sticky, int cmsg, int uproto)
3116 {
3117 int minmtupolicy;
3118
3119 if (!sticky && !cmsg) {
3120 #ifdef DIAGNOSTIC
3121 printf("ip6_setpktopt: impossible case\n");
3122 #endif
3123 return (EINVAL);
3124 }
3125
3126 /*
3127 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3128 * not be specified in the context of RFC3542. Conversely,
3129 * RFC3542 types should not be specified in the context of RFC2292.
3130 */
3131 if (!cmsg) {
3132 switch (optname) {
3133 case IPV6_2292PKTINFO:
3134 case IPV6_2292HOPLIMIT:
3135 case IPV6_2292NEXTHOP:
3136 case IPV6_2292HOPOPTS:
3137 case IPV6_2292DSTOPTS:
3138 case IPV6_2292RTHDR:
3139 case IPV6_2292PKTOPTIONS:
3140 return (ENOPROTOOPT);
3141 }
3142 }
3143 if (sticky && cmsg) {
3144 switch (optname) {
3145 case IPV6_PKTINFO:
3146 case IPV6_HOPLIMIT:
3147 case IPV6_NEXTHOP:
3148 case IPV6_HOPOPTS:
3149 case IPV6_DSTOPTS:
3150 case IPV6_RTHDRDSTOPTS:
3151 case IPV6_RTHDR:
3152 case IPV6_USE_MIN_MTU:
3153 case IPV6_DONTFRAG:
3154 case IPV6_OTCLASS:
3155 case IPV6_TCLASS:
3156 return (ENOPROTOOPT);
3157 }
3158 }
3159
3160 switch (optname) {
3161 #ifdef RFC2292
3162 case IPV6_2292PKTINFO:
3163 #endif
3164 case IPV6_PKTINFO:
3165 {
3166 struct ifnet *ifp = NULL;
3167 struct in6_pktinfo *pktinfo;
3168
3169 if (len != sizeof(struct in6_pktinfo))
3170 return (EINVAL);
3171
3172 pktinfo = (struct in6_pktinfo *)buf;
3173
3174 /*
3175 * An application can clear any sticky IPV6_PKTINFO option by
3176 * doing a "regular" setsockopt with ipi6_addr being
3177 * in6addr_any and ipi6_ifindex being zero.
3178 * [RFC 3542, Section 6]
3179 */
3180 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3181 pktinfo->ipi6_ifindex == 0 &&
3182 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3183 ip6_clearpktopts(opt, optname);
3184 break;
3185 }
3186
3187 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3188 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3189 return (EINVAL);
3190 }
3191
3192 /* validate the interface index if specified. */
3193 if (pktinfo->ipi6_ifindex >= if_indexlim) {
3194 return (ENXIO);
3195 }
3196 if (pktinfo->ipi6_ifindex) {
3197 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3198 if (ifp == NULL)
3199 return (ENXIO);
3200 }
3201
3202 /*
3203 * We store the address anyway, and let in6_selectsrc()
3204 * validate the specified address. This is because ipi6_addr
3205 * may not have enough information about its scope zone, and
3206 * we may need additional information (such as outgoing
3207 * interface or the scope zone of a destination address) to
3208 * disambiguate the scope.
3209 * XXX: the delay of the validation may confuse the
3210 * application when it is used as a sticky option.
3211 */
3212 if (opt->ip6po_pktinfo == NULL) {
3213 opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
3214 M_IP6OPT, M_NOWAIT);
3215 if (opt->ip6po_pktinfo == NULL)
3216 return (ENOBUFS);
3217 }
3218 memcpy(opt->ip6po_pktinfo, pktinfo, sizeof(*pktinfo));
3219 break;
3220 }
3221
3222 #ifdef RFC2292
3223 case IPV6_2292HOPLIMIT:
3224 #endif
3225 case IPV6_HOPLIMIT:
3226 {
3227 int *hlimp;
3228
3229 /*
3230 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3231 * to simplify the ordering among hoplimit options.
3232 */
3233 if (optname == IPV6_HOPLIMIT && sticky)
3234 return (ENOPROTOOPT);
3235
3236 if (len != sizeof(int))
3237 return (EINVAL);
3238 hlimp = (int *)buf;
3239 if (*hlimp < -1 || *hlimp > 255)
3240 return (EINVAL);
3241
3242 opt->ip6po_hlim = *hlimp;
3243 break;
3244 }
3245
3246 case IPV6_OTCLASS:
3247 if (len != sizeof(u_int8_t))
3248 return (EINVAL);
3249
3250 opt->ip6po_tclass = *(u_int8_t *)buf;
3251 break;
3252
3253 case IPV6_TCLASS:
3254 {
3255 int tclass;
3256
3257 if (len != sizeof(int))
3258 return (EINVAL);
3259 tclass = *(int *)buf;
3260 if (tclass < -1 || tclass > 255)
3261 return (EINVAL);
3262
3263 opt->ip6po_tclass = tclass;
3264 break;
3265 }
3266
3267 #ifdef RFC2292
3268 case IPV6_2292NEXTHOP:
3269 #endif
3270 case IPV6_NEXTHOP:
3271 if (!priv)
3272 return (EPERM);
3273
3274 if (len == 0) { /* just remove the option */
3275 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3276 break;
3277 }
3278
3279 /* check if cmsg_len is large enough for sa_len */
3280 if (len < sizeof(struct sockaddr) || len < *buf)
3281 return (EINVAL);
3282
3283 switch (((struct sockaddr *)buf)->sa_family) {
3284 case AF_INET6:
3285 {
3286 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3287 int error;
3288
3289 if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3290 return (EINVAL);
3291
3292 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3293 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3294 return (EINVAL);
3295 }
3296 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3297 != 0) {
3298 return (error);
3299 }
3300 break;
3301 }
3302 case AF_LINK: /* eventually be supported? */
3303 default:
3304 return (EAFNOSUPPORT);
3305 }
3306
3307 /* turn off the previous option, then set the new option. */
3308 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3309 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
3310 if (opt->ip6po_nexthop == NULL)
3311 return (ENOBUFS);
3312 memcpy(opt->ip6po_nexthop, buf, *buf);
3313 break;
3314
3315 #ifdef RFC2292
3316 case IPV6_2292HOPOPTS:
3317 #endif
3318 case IPV6_HOPOPTS:
3319 {
3320 struct ip6_hbh *hbh;
3321 int hbhlen;
3322
3323 /*
3324 * XXX: We don't allow a non-privileged user to set ANY HbH
3325 * options, since per-option restriction has too much
3326 * overhead.
3327 */
3328 if (!priv)
3329 return (EPERM);
3330
3331 if (len == 0) {
3332 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3333 break; /* just remove the option */
3334 }
3335
3336 /* message length validation */
3337 if (len < sizeof(struct ip6_hbh))
3338 return (EINVAL);
3339 hbh = (struct ip6_hbh *)buf;
3340 hbhlen = (hbh->ip6h_len + 1) << 3;
3341 if (len != hbhlen)
3342 return (EINVAL);
3343
3344 /* turn off the previous option, then set the new option. */
3345 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3346 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
3347 if (opt->ip6po_hbh == NULL)
3348 return (ENOBUFS);
3349 memcpy(opt->ip6po_hbh, hbh, hbhlen);
3350
3351 break;
3352 }
3353
3354 #ifdef RFC2292
3355 case IPV6_2292DSTOPTS:
3356 #endif
3357 case IPV6_DSTOPTS:
3358 case IPV6_RTHDRDSTOPTS:
3359 {
3360 struct ip6_dest *dest, **newdest = NULL;
3361 int destlen;
3362
3363 if (!priv) /* XXX: see the comment for IPV6_HOPOPTS */
3364 return (EPERM);
3365
3366 if (len == 0) {
3367 ip6_clearpktopts(opt, optname);
3368 break; /* just remove the option */
3369 }
3370
3371 /* message length validation */
3372 if (len < sizeof(struct ip6_dest))
3373 return (EINVAL);
3374 dest = (struct ip6_dest *)buf;
3375 destlen = (dest->ip6d_len + 1) << 3;
3376 if (len != destlen)
3377 return (EINVAL);
3378 /*
3379 * Determine the position that the destination options header
3380 * should be inserted; before or after the routing header.
3381 */
3382 switch (optname) {
3383 case IPV6_2292DSTOPTS:
3384 /*
3385 * The old advanced API is ambiguous on this point.
3386 * Our approach is to determine the position based
3387 * according to the existence of a routing header.
3388 * Note, however, that this depends on the order of the
3389 * extension headers in the ancillary data; the 1st
3390 * part of the destination options header must appear
3391 * before the routing header in the ancillary data,
3392 * too.
3393 * RFC3542 solved the ambiguity by introducing
3394 * separate ancillary data or option types.
3395 */
3396 if (opt->ip6po_rthdr == NULL)
3397 newdest = &opt->ip6po_dest1;
3398 else
3399 newdest = &opt->ip6po_dest2;
3400 break;
3401 case IPV6_RTHDRDSTOPTS:
3402 newdest = &opt->ip6po_dest1;
3403 break;
3404 case IPV6_DSTOPTS:
3405 #if defined(MOBILE_IPV6) && NMIP > 0
3406 /*
3407 * Check whether this destination option is
3408 * home address option.
3409 * If so, the option must be stored in ip6po_hoa
3410 */
3411 if (mip6_search_hoa_in_destopt((u_int8_t *)dest) != NULL)
3412 newdest = &opt->ip6po_hoa;
3413 else
3414 newdest = &opt->ip6po_dest2;
3415 #else
3416 newdest = &opt->ip6po_dest2;
3417 #endif /* MOBILE_IPV6 && NMIP > 0 */
3418 break;
3419 }
3420
3421 /* turn off the previous option, then set the new option. */
3422 ip6_clearpktopts(opt, optname);
3423 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3424 if (*newdest == NULL)
3425 return (ENOBUFS);
3426 memcpy(*newdest, dest, destlen);
3427
3428 break;
3429 }
3430
3431 #ifdef RFC2292
3432 case IPV6_2292RTHDR:
3433 #endif
3434 case IPV6_RTHDR:
3435 {
3436 struct ip6_rthdr *rth;
3437 int rthlen;
3438
3439 if (len == 0) {
3440 ip6_clearpktopts(opt, IPV6_RTHDR);
3441 break; /* just remove the option */
3442 }
3443
3444 /* message length validation */
3445 if (len < sizeof(struct ip6_rthdr))
3446 return (EINVAL);
3447 rth = (struct ip6_rthdr *)buf;
3448 rthlen = (rth->ip6r_len + 1) << 3;
3449 if (len != rthlen)
3450 return (EINVAL);
3451 switch (rth->ip6r_type) {
3452 case IPV6_RTHDR_TYPE_0:
3453 if (rth->ip6r_len == 0) /* must contain one addr */
3454 return (EINVAL);
3455 if (rth->ip6r_len % 2) /* length must be even */
3456 return (EINVAL);
3457 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3458 return (EINVAL);
3459 break;
3460 #ifdef MOBILE_IPV6
3461 case IPV6_RTHDR_TYPE_2:
3462 if (rth->ip6r_len == 0) /* must contain one addr */
3463 return (EINVAL);
3464 if (rth->ip6r_len != 2) /* length must be 2 */
3465 return (EINVAL);
3466 if (rth->ip6r_segleft != 1)
3467 return (EINVAL);
3468 break;
3469 #endif /* MOBILE_IPV6 */
3470 default:
3471 return (EINVAL); /* not supported */
3472 }
3473 /* turn off the previous option */
3474 ip6_clearpktopts(opt, IPV6_RTHDR);
3475 if (rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
3476 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3477 if (opt->ip6po_rthdr == NULL)
3478 return (ENOBUFS);
3479 bcopy(rth, opt->ip6po_rthdr, rthlen);
3480 }
3481 #ifdef MOBILE_IPV6
3482 else if (rth->ip6r_type == IPV6_RTHDR_TYPE_2) {
3483 opt->ip6po_rthdr2 = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3484 if (opt->ip6po_rthdr2 == NULL)
3485 return (ENOBUFS);
3486 bcopy(rth, opt->ip6po_rthdr2, rthlen);
3487 }
3488 #endif /* MOBILE_IPV6 */
3489 break;
3490 }
3491
3492 case IPV6_USE_MIN_MTU:
3493 if (len != sizeof(int))
3494 return (EINVAL);
3495 minmtupolicy = *(int *)buf;
3496 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3497 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3498 minmtupolicy != IP6PO_MINMTU_ALL) {
3499 return (EINVAL);
3500 }
3501 opt->ip6po_minmtu = minmtupolicy;
3502 break;
3503
3504 case IPV6_DONTFRAG:
3505 if (len != sizeof(int))
3506 return (EINVAL);
3507
3508 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3509 /*
3510 * we ignore this option for TCP sockets.
3511 * (RFC3542 leaves this case unspecified.)
3512 */
3513 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3514 } else
3515 opt->ip6po_flags |= IP6PO_DONTFRAG;
3516 break;
3517
3518 default:
3519 return (ENOPROTOOPT);
3520 } /* end of switch */
3521
3522 return (0);
3523 }
3524
3525 /*
3526 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3527 * packet to the input queue of a specified interface. Note that this
3528 * calls the output routine of the loopback "driver", but with an interface
3529 * pointer that might NOT be lo0ifp -- easier than replicating that code here.
3530 */
3531 void
3532 ip6_mloopback(struct ifnet *ifp, struct mbuf *m,
3533 const struct sockaddr_in6 *dst)
3534 {
3535 struct mbuf *copym;
3536 struct ip6_hdr *ip6;
3537
3538 copym = m_copy(m, 0, M_COPYALL);
3539 if (copym == NULL)
3540 return;
3541
3542 /*
3543 * Make sure to deep-copy IPv6 header portion in case the data
3544 * is in an mbuf cluster, so that we can safely override the IPv6
3545 * header portion later.
3546 */
3547 if ((copym->m_flags & M_EXT) != 0 ||
3548 copym->m_len < sizeof(struct ip6_hdr)) {
3549 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3550 if (copym == NULL)
3551 return;
3552 }
3553
3554 #ifdef DIAGNOSTIC
3555 if (copym->m_len < sizeof(*ip6)) {
3556 m_freem(copym);
3557 return;
3558 }
3559 #endif
3560
3561 ip6 = mtod(copym, struct ip6_hdr *);
3562 /*
3563 * clear embedded scope identifiers if necessary.
3564 * in6_clearscope will touch the addresses only when necessary.
3565 */
3566 in6_clearscope(&ip6->ip6_src);
3567 in6_clearscope(&ip6->ip6_dst);
3568
3569 (void)looutput(ifp, copym, (const struct sockaddr *)dst, NULL);
3570 }
3571
3572 /*
3573 * Chop IPv6 header off from the payload.
3574 */
3575 static int
3576 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3577 {
3578 struct mbuf *mh;
3579 struct ip6_hdr *ip6;
3580
3581 ip6 = mtod(m, struct ip6_hdr *);
3582 if (m->m_len > sizeof(*ip6)) {
3583 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3584 if (mh == 0) {
3585 m_freem(m);
3586 return ENOBUFS;
3587 }
3588 M_MOVE_PKTHDR(mh, m);
3589 MH_ALIGN(mh, sizeof(*ip6));
3590 m->m_len -= sizeof(*ip6);
3591 m->m_data += sizeof(*ip6);
3592 mh->m_next = m;
3593 m = mh;
3594 m->m_len = sizeof(*ip6);
3595 bcopy((void *)ip6, mtod(m, void *), sizeof(*ip6));
3596 }
3597 exthdrs->ip6e_ip6 = m;
3598 return 0;
3599 }
3600
3601 /*
3602 * Compute IPv6 extension header length.
3603 */
3604 int
3605 ip6_optlen(struct in6pcb *in6p)
3606 {
3607 int len;
3608
3609 if (!in6p->in6p_outputopts)
3610 return 0;
3611
3612 len = 0;
3613 #define elen(x) \
3614 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3615
3616 len += elen(in6p->in6p_outputopts->ip6po_hbh);
3617 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3618 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3619 len += elen(in6p->in6p_outputopts->ip6po_dest2);
3620 return len;
3621 #undef elen
3622 }
3623