Home | History | Annotate | Line # | Download | only in netipsec
      1 /*	$NetBSD: ipsecif.c,v 1.24 2025/06/11 02:44:13 ozaki-r Exp $  */
      2 
      3 /*
      4  * Copyright (c) 2017 Internet Initiative Japan Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.24 2025/06/11 02:44:13 ozaki-r Exp $");
     31 
     32 #ifdef _KERNEL_OPT
     33 #include "opt_inet.h"
     34 #include "opt_ipsec.h"
     35 #endif
     36 
     37 #include <sys/param.h>
     38 #include <sys/systm.h>
     39 #include <sys/socket.h>
     40 #include <sys/sockio.h>
     41 #include <sys/mbuf.h>
     42 #include <sys/errno.h>
     43 #include <sys/ioctl.h>
     44 #include <sys/syslog.h>
     45 #include <sys/kernel.h>
     46 
     47 #include <net/if.h>
     48 #include <net/route.h>
     49 
     50 #include <netinet/in.h>
     51 #include <netinet/in_systm.h>
     52 #include <netinet/ip.h>
     53 #include <netinet/ip_var.h>
     54 #include <netinet/in_var.h>
     55 #include <netinet/ip_encap.h>
     56 #include <netinet/ip_ecn.h>
     57 #include <netinet/ip_private.h>
     58 #include <netinet/udp.h>
     59 
     60 #ifdef INET6
     61 #include <netinet/ip6.h>
     62 #include <netinet6/ip6_var.h>
     63 #include <netinet6/ip6_private.h>
     64 #include <netinet6/in6_var.h>
     65 #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
     66 #include <netinet/ip_ecn.h>
     67 #endif
     68 
     69 #include <netipsec/key.h>
     70 #include <netipsec/ipsecif.h>
     71 
     72 #include <net/if_ipsec.h>
     73 
     74 static int ipsecif_set_natt_ports(struct ipsec_variant *, struct mbuf *);
     75 static void ipsecif4_input(struct mbuf *, int, int, void *);
     76 static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
     77 static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
     78 	struct ifnet *);
     79 
     80 #ifdef INET6
     81 static int ipsecif6_input(struct mbuf **, int *, int, void *);
     82 static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
     83 static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
     84 	struct ifnet *);
     85 #endif
     86 
     87 static int ip_ipsec_ttl = IPSEC_TTL;
     88 static int ip_ipsec_copy_tos = 0;
     89 #ifdef INET6
     90 int ip6_ipsec_hlim = IPSEC_HLIM;
     91 int ip6_ipsec_pmtu = 0;
     92 static int ip6_ipsec_copy_tos = 0;
     93 #endif
     94 
     95 static const struct encapsw ipsecif4_encapsw = {
     96 	.encapsw4 = {
     97 		.pr_input = ipsecif4_input,
     98 		.pr_ctlinput = NULL,
     99 	}
    100 };
    101 
    102 #ifdef INET6
    103 static const struct encapsw ipsecif6_encapsw;
    104 #endif
    105 
    106 static int
    107 ipsecif_set_natt_ports(struct ipsec_variant *var, struct mbuf *m)
    108 {
    109 
    110 	KASSERT(if_ipsec_heldref_variant(var));
    111 
    112 	if (var->iv_sport || var->iv_dport) {
    113 		struct m_tag *mtag;
    114 
    115 		mtag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
    116 		    sizeof(uint16_t) + sizeof(uint16_t), M_DONTWAIT);
    117 		if (mtag) {
    118 			uint16_t *natt_port;
    119 
    120 			natt_port = (uint16_t *)(mtag + 1);
    121 			natt_port[0] = var->iv_dport;
    122 			natt_port[1] = var->iv_sport;
    123 			m_tag_prepend(m, mtag);
    124 		} else {
    125 			return ENOBUFS;
    126 		}
    127 	}
    128 
    129 	return 0;
    130 }
    131 
    132 static struct mbuf *
    133 ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
    134     uint8_t proto, uint8_t tos)
    135 {
    136 	struct ip *ip;
    137 	struct sockaddr_in *src, *dst;
    138 
    139 	src = satosin(var->iv_psrc);
    140 	dst = satosin(var->iv_pdst);
    141 
    142 	if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
    143 	    src->sin_addr.s_addr == INADDR_BROADCAST ||
    144 	    dst->sin_addr.s_addr == INADDR_BROADCAST) {
    145 		m_freem(m);
    146 		return NULL;
    147 	}
    148 	m->m_flags &= ~M_BCAST;
    149 
    150 	if (IN_MULTICAST(src->sin_addr.s_addr) ||
    151 	    IN_MULTICAST(dst->sin_addr.s_addr)) {
    152 		m_freem(m);
    153 		return NULL;
    154 	}
    155 
    156 	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
    157 	if (m && M_UNWRITABLE(m, sizeof(struct ip)))
    158 		m = m_pullup(m, sizeof(struct ip));
    159 	if (m == NULL)
    160 		return NULL;
    161 
    162 	ip = mtod(m, struct ip *);
    163 	ip->ip_v = IPVERSION;
    164 	ip->ip_off = htons(0);
    165 	if (m->m_pkthdr.len < IP_MINFRAGSIZE)
    166 		ip->ip_id = 0;
    167 	else
    168 		ip->ip_id = ip_newid();
    169 	ip->ip_hl = sizeof(*ip) >> 2;
    170 	if (ip_ipsec_copy_tos)
    171 		ip->ip_tos = tos;
    172 	else
    173 		ip->ip_tos = 0;
    174 	ip->ip_sum = 0;
    175 	ip->ip_src = src->sin_addr;
    176 	ip->ip_dst = dst->sin_addr;
    177 	ip->ip_p = proto;
    178 	ip->ip_ttl = ip_ipsec_ttl;
    179 	ip->ip_len = htons(m->m_pkthdr.len);
    180 #ifndef IPSEC_TX_TOS_CLEAR
    181 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    182 	if (ifp->if_flags & IFF_ECN)
    183 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
    184 	else
    185 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
    186 #endif
    187 
    188 	return m;
    189 }
    190 
    191 static int
    192 ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
    193 {
    194 	struct ip ip0;
    195 	struct ip *ip;
    196 	int mtu;
    197 	struct secasvar *sav;
    198 
    199 	sav = key_lookup_sa_bysaidx(&isr->saidx);
    200 	if (sav == NULL)
    201 		return 0;
    202 
    203 	if (!(sav->natt_type & UDP_ENCAP_ESPINUDP)) {
    204 		mtu = 0;
    205 		goto out;
    206 	}
    207 
    208 	if (m->m_len < sizeof(struct ip)) {
    209 		m_copydata(m, 0, sizeof(ip0), &ip0);
    210 		ip = &ip0;
    211 	} else {
    212 		ip = mtod(m, struct ip *);
    213 	}
    214 	mtu = sav->esp_frag;
    215 	if (ntohs(ip->ip_len) <= mtu)
    216 		mtu = 0;
    217 
    218 out:
    219 	KEY_SA_UNREF(&sav);
    220 	return mtu;
    221 }
    222 
    223 static struct mbuf *
    224 ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
    225 {
    226 	const struct ip *ip;
    227 	int proto;
    228 	int tos;
    229 
    230 	KASSERT(proto0 != NULL);
    231 	KASSERT(tos0 != NULL);
    232 
    233 	switch (family) {
    234 	case AF_INET:
    235 		proto = IPPROTO_IPV4;
    236 		if (m->m_len < sizeof(*ip)) {
    237 			m = m_pullup(m, sizeof(*ip));
    238 			if (m == NULL) {
    239 				*tos0 = 0;
    240 				*proto0 = 0;
    241 				return NULL;
    242 			}
    243 		}
    244 		ip = mtod(m, const struct ip *);
    245 		tos = ip->ip_tos;
    246 		/* TODO: support ALTQ for inner packet */
    247 		break;
    248 #ifdef INET6
    249 	case AF_INET6: {
    250 		const struct ip6_hdr *ip6;
    251 		proto = IPPROTO_IPV6;
    252 		if (m->m_len < sizeof(*ip6)) {
    253 			m = m_pullup(m, sizeof(*ip6));
    254 			if (m == NULL) {
    255 				*tos0 = 0;
    256 				*proto0 = 0;
    257 				return NULL;
    258 			}
    259 		}
    260 		ip6 = mtod(m, const struct ip6_hdr *);
    261 		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    262 		/* TODO: support ALTQ for inner packet */
    263 		break;
    264 	}
    265 #endif /* INET6 */
    266 	default:
    267 		*tos0 = 0;
    268 		*proto0 = 0;
    269 		return NULL;
    270 	}
    271 
    272 	*proto0 = proto;
    273 	*tos0 = tos;
    274 	return m;
    275 }
    276 
    277 static int
    278 ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
    279 {
    280 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    281 	struct mbuf *next;
    282 	struct m_tag *mtag;
    283 	int error;
    284 
    285 	KASSERT(if_ipsec_heldref_variant(var));
    286 
    287 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
    288 	if (mtag)
    289 		m_tag_delete(m, mtag);
    290 
    291 	/* consider new IP header prepended in ipsecif4_output() */
    292 	if (mtu <= sizeof(struct ip)) {
    293 		m_freem(m);
    294 		return ENETUNREACH;
    295 	}
    296 	m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
    297 	error = ip_fragment(m, ifp, mtu - sizeof(struct ip));
    298 	if (error)
    299 		return error;
    300 
    301 	for (error = 0; m; m = next) {
    302 		next = m->m_nextpkt;
    303 		m->m_nextpkt = NULL;
    304 		if (error) {
    305 			m_freem(m);
    306 			continue;
    307 		}
    308 
    309 		error = ipsecif4_output(var, family, m);
    310 	}
    311 	if (error == 0)
    312 		IP_STATINC(IP_STAT_FRAGMENTED);
    313 
    314 	return error;
    315 }
    316 
    317 int
    318 ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
    319 {
    320 	struct m_tag *mtag;
    321 	struct sockaddr_in *src, *dst;
    322 	u_int16_t src_port = 0;
    323 	u_int16_t dst_port = 0;
    324 
    325 	KASSERT(var != NULL);
    326 
    327 	src = satosin(var->iv_psrc);
    328 	dst = satosin(var->iv_pdst);
    329 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
    330 	if (mtag) {
    331 		u_int16_t *ports;
    332 
    333 		ports = (u_int16_t *)(mtag + 1);
    334 		src_port = ports[0];
    335 		dst_port = ports[1];
    336 	}
    337 
    338 	/* address match */
    339 	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
    340 	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
    341 		return 0;
    342 
    343 	/* UDP encap? */
    344 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
    345 		goto match;
    346 
    347 	/* port match */
    348 	if (src_port != var->iv_dport ||
    349 	    dst_port != var->iv_sport) {
    350 #ifdef DEBUG
    351 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
    352 		    __func__, ntohs(src_port), ntohs(dst_port),
    353 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
    354 #endif
    355 		return 0;
    356 	}
    357 
    358 match:
    359 	/*
    360 	 * hide NAT-T information from encapsulated traffics.
    361 	 * they don't know about IPsec.
    362 	 */
    363 	if (mtag)
    364 		m_tag_delete(m, mtag);
    365 	return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
    366 }
    367 
    368 static int
    369 ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
    370 {
    371 	struct secpolicy *sp = NULL;
    372 	u_int8_t tos;
    373 	int proto;
    374 	int error;
    375 	int mtu;
    376 	u_long sa_mtu = 0;
    377 
    378 	KASSERT(if_ipsec_heldref_variant(var));
    379 	KASSERT(if_ipsec_variant_is_configured(var));
    380 	KASSERT(var->iv_psrc->sa_family == AF_INET);
    381 	KASSERT(var->iv_pdst->sa_family == AF_INET);
    382 
    383 	switch (family) {
    384 	case AF_INET:
    385 		sp = IV_SP_OUT(var);
    386 		break;
    387 	case AF_INET6:
    388 		sp = IV_SP_OUT6(var);
    389 		break;
    390 	default:
    391 		m_freem(m);
    392 		return EAFNOSUPPORT;
    393 	}
    394 	KASSERT(sp != NULL);
    395 	/*
    396 	 * The SPs in ipsec_variant are prevented from freed by
    397 	 * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
    398 	 *
    399 	 * However, lastused should be updated.
    400 	 */
    401 	key_sp_touch(sp);
    402 
    403 	KASSERT(sp->policy != IPSEC_POLICY_NONE);
    404 	KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
    405 	KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
    406 	if (sp->policy != IPSEC_POLICY_IPSEC) {
    407 		m_freem(m);
    408 		error = ENETUNREACH;
    409 		goto done;
    410 	}
    411 
    412 	/* get flowinfo */
    413 	m = ipsecif4_flowinfo(m, family, &proto, &tos);
    414 	if (m == NULL) {
    415 		error = ENETUNREACH;
    416 		goto done;
    417 	}
    418 
    419 	/* prepend new IP header */
    420 	m = ipsecif4_prepend_hdr(var, m, proto, tos);
    421 	if (m == NULL) {
    422 		error = ENETUNREACH;
    423 		goto done;
    424 	}
    425 
    426 	/*
    427 	 * Normal netipsec's NAT-T fragmentation is done in ip_output().
    428 	 * See "natt_frag" processing.
    429 	 * However, ipsec(4) interface's one is not done in the same way,
    430 	 * so we must do NAT-T fragmentation by own code.
    431 	 */
    432 	/* NAT-T ESP fragmentation */
    433 	mtu = ipsecif4_needfrag(m, sp->req);
    434 	if (mtu > 0)
    435 		return ipsecif4_fragout(var, family, m, mtu);
    436 
    437 	/* set NAT-T ports */
    438 	error = ipsecif_set_natt_ports(var, m);
    439 	if (error) {
    440 		m_freem(m);
    441 		goto done;
    442 	}
    443 
    444 	/* IPsec output */
    445 	IP_STATINC(IP_STAT_LOCALOUT);
    446 	error = ipsec4_process_packet(m, sp->req, &sa_mtu);
    447 	if (error == ENOENT)
    448 		error = 0;
    449 	/*
    450 	 * fragmentation is already done in ipsecif4_fragout(),
    451 	 * so ipsec4_process_packet() must not do fragmentation here.
    452 	 */
    453 	KASSERT(sa_mtu == 0);
    454 
    455 done:
    456 	return error;
    457 }
    458 
    459 #ifdef INET6
    460 int
    461 ipsecif6_encap_func(struct mbuf *m, struct ip6_hdr *ip6, struct ipsec_variant *var)
    462 {
    463 	struct m_tag *mtag;
    464 	struct sockaddr_in6 *src, *dst;
    465 	u_int16_t src_port = 0;
    466 	u_int16_t dst_port = 0;
    467 
    468 	KASSERT(var != NULL);
    469 
    470 	src = satosin6(var->iv_psrc);
    471 	dst = satosin6(var->iv_pdst);
    472 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS);
    473 	if (mtag) {
    474 		u_int16_t *ports;
    475 
    476 		ports = (u_int16_t *)(mtag + 1);
    477 		src_port = ports[0];
    478 		dst_port = ports[1];
    479 	}
    480 
    481 	/* address match */
    482 	if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
    483 	    !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
    484 		return 0;
    485 
    486 	/* UDP encap? */
    487 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
    488 		goto match;
    489 
    490 	/* port match */
    491 	if (src_port != var->iv_dport ||
    492 	    dst_port != var->iv_sport) {
    493 #ifdef DEBUG
    494 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
    495 		    __func__, ntohs(src_port), ntohs(dst_port),
    496 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
    497 #endif
    498 		return 0;
    499 	}
    500 
    501 match:
    502 	/*
    503 	 * hide NAT-T information from encapsulated traffics.
    504 	 * they don't know about IPsec.
    505 	 */
    506 	if (mtag)
    507 		m_tag_delete(m, mtag);
    508 	return sizeof(src->sin6_addr) + sizeof(dst->sin6_addr);
    509 }
    510 
    511 static int
    512 ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
    513 {
    514 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    515 	struct ipsec_softc *sc = ifp->if_softc;
    516 	struct route *ro_pc;
    517 	kmutex_t *lock_pc;
    518 	struct rtentry *rt;
    519 	struct sockaddr_in6 *sin6_src;
    520 	struct sockaddr_in6 *sin6_dst;
    521 	struct ip6_hdr *ip6;
    522 	int proto, error, flags;
    523 	u_int8_t itos, otos;
    524 	union {
    525 		struct sockaddr		dst;
    526 		struct sockaddr_in6	dst6;
    527 	} u;
    528 
    529 	KASSERT(if_ipsec_heldref_variant(var));
    530 	KASSERT(if_ipsec_variant_is_configured(var));
    531 
    532 	sin6_src = satosin6(var->iv_psrc);
    533 	sin6_dst = satosin6(var->iv_pdst);
    534 
    535 	KASSERT(sin6_src->sin6_family == AF_INET6);
    536 	KASSERT(sin6_dst->sin6_family == AF_INET6);
    537 
    538 	switch (family) {
    539 #ifdef INET
    540 	case AF_INET:
    541 	    {
    542 		struct ip *ip;
    543 
    544 		proto = IPPROTO_IPV4;
    545 		if (m->m_len < sizeof(*ip)) {
    546 			m = m_pullup(m, sizeof(*ip));
    547 			if (m == NULL)
    548 				return ENOBUFS;
    549 		}
    550 		ip = mtod(m, struct ip *);
    551 		itos = ip->ip_tos;
    552 		/* TODO: support ALTQ for inner packet */
    553 		break;
    554 	    }
    555 #endif /* INET */
    556 	case AF_INET6:
    557 	    {
    558 		struct ip6_hdr *xip6;
    559 		proto = IPPROTO_IPV6;
    560 		if (m->m_len < sizeof(*xip6)) {
    561 			m = m_pullup(m, sizeof(*xip6));
    562 			if (m == NULL)
    563 				return ENOBUFS;
    564 		}
    565 		xip6 = mtod(m, struct ip6_hdr *);
    566 		itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
    567 		/* TODO: support ALTQ for inner packet */
    568 		break;
    569 	    }
    570 	default:
    571 		m_freem(m);
    572 		return EAFNOSUPPORT;
    573 	}
    574 
    575 	/* prepend new IP header */
    576 	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
    577 	if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
    578 		m = m_pullup(m, sizeof(struct ip6_hdr));
    579 	if (m == NULL)
    580 		return ENOBUFS;
    581 
    582 	ip6 = mtod(m, struct ip6_hdr *);
    583 	ip6->ip6_flow	= 0;
    584 	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
    585 	ip6->ip6_vfc	|= IPV6_VERSION;
    586 #if 0	/* ip6->ip6_plen will be filled by ip6_output */
    587 	ip6->ip6_plen	= htons((u_short)m->m_pkthdr.len - sizeof(*ip6));
    588 #endif
    589 	ip6->ip6_nxt	= proto;
    590 	ip6->ip6_hlim	= ip6_ipsec_hlim;
    591 	ip6->ip6_src	= sin6_src->sin6_addr;
    592 	/* bidirectional configured tunnel mode */
    593 	if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
    594 		ip6->ip6_dst = sin6_dst->sin6_addr;
    595 	} else  {
    596 		m_freem(m);
    597 		return ENETUNREACH;
    598 	}
    599 #ifndef IPSEC_TX_TOS_CLEAR
    600 	if (!ip6_ipsec_copy_tos)
    601 		otos = 0;
    602 
    603 	if (ifp->if_flags & IFF_ECN)
    604 		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
    605 	else
    606 		ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
    607 #else
    608 	if (ip6_ipsec_copy_tos)
    609 		otos = itos;
    610 	else
    611 		otos = 0;
    612 #endif
    613 	ip6->ip6_flow &= ~ntohl(0xff00000);
    614 	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
    615 
    616 	sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
    617 
    618 	if_tunnel_get_ro(sc->ipsec_ro_percpu, &ro_pc, &lock_pc);
    619 	if ((rt = rtcache_lookup(ro_pc, &u.dst)) == NULL) {
    620 		if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
    621 		m_freem(m);
    622 		return ENETUNREACH;
    623 	}
    624 
    625 	if (rt->rt_ifp == ifp) {
    626 		rtcache_unref(rt, ro_pc);
    627 		rtcache_free(ro_pc);
    628 		if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
    629 		m_freem(m);
    630 		return ENETUNREACH;
    631 	}
    632 	rtcache_unref(rt, ro_pc);
    633 
    634 	/* set NAT-T ports */
    635 	error = ipsecif_set_natt_ports(var, m);
    636 	if (error) {
    637 		m_freem(m);
    638 		goto out;
    639 	}
    640 
    641 	/*
    642 	 * - IPSEC_PMTU_MINMTU
    643 	 *   Force fragmentation to minimum MTU to avoid path MTU discovery
    644 	 * - IPSEC_PMTU_OUTERMTU
    645 	 *   Trust outer MTU is large enough to send all packets
    646 	 *
    647 	 * It is too painful to ask for resend of inner packet, to achieve
    648 	 * path MTU discovery for encapsulated packets.
    649 	 *
    650 	 * See RFC4459.
    651 	 */
    652 	if (sc->ipsec_pmtu == IPSEC_PMTU_SYSDEFAULT) {
    653 		switch (ip6_ipsec_pmtu) {
    654 		case IPSEC_PMTU_MINMTU:
    655 			flags = IPV6_MINMTU;
    656 			break;
    657 		case IPSEC_PMTU_OUTERMTU:
    658 			flags = 0;
    659 			break;
    660 		default:
    661 #ifdef DEBUG
    662 			log(LOG_DEBUG, "%s: ignore unexpected ip6_ipsec_pmtu %d\n",
    663 			    __func__, ip6_ipsec_pmtu);
    664 #endif
    665 			flags = IPV6_MINMTU;
    666 			break;
    667 		}
    668 	} else {
    669 		switch (sc->ipsec_pmtu) {
    670 		case IPSEC_PMTU_MINMTU:
    671 			flags = IPV6_MINMTU;
    672 			break;
    673 		case IPSEC_PMTU_OUTERMTU:
    674 			flags = 0;
    675 			break;
    676 		default:
    677 #ifdef DEBUG
    678 			log(LOG_DEBUG, "%s: ignore unexpected ipsec_pmtu of %s %d\n",
    679 			    __func__, ifp->if_xname, sc->ipsec_pmtu);
    680 #endif
    681 			flags = IPV6_MINMTU;
    682 			break;
    683 		}
    684 	}
    685 	error = ip6_output(m, 0, ro_pc, flags, 0, NULL, NULL);
    686 
    687 out:
    688 	if (error)
    689 		rtcache_free(ro_pc);
    690 	if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
    691 
    692 	return error;
    693 }
    694 #endif /* INET6 */
    695 
    696 static void
    697 ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
    698 {
    699 	struct ifnet *ipsecp;
    700 	struct ipsec_softc *sc = eparg;
    701 	struct ipsec_variant *var;
    702 	const struct ip *ip;
    703 	int af;
    704 #ifndef IPSEC_TX_TOS_CLEAR
    705 	u_int8_t otos;
    706 #endif
    707 	struct psref psref_rcvif;
    708 	struct psref psref_var;
    709 	struct ifnet *rcvif;
    710 
    711 	KASSERT(sc != NULL);
    712 
    713 	ipsecp = &sc->ipsec_if;
    714 	if ((ipsecp->if_flags & IFF_UP) == 0) {
    715 		m_freem(m);
    716 		ip_statinc(IP_STAT_NOIPSEC);
    717 		return;
    718 	}
    719 
    720 	var = if_ipsec_getref_variant(sc, &psref_var);
    721 	if (if_ipsec_variant_is_unconfigured(var)) {
    722 		if_ipsec_putref_variant(var, &psref_var);
    723 		m_freem(m);
    724 		ip_statinc(IP_STAT_NOIPSEC);
    725 		return;
    726 	}
    727 
    728 	ip = mtod(m, const struct ip *);
    729 
    730 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
    731 	if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
    732 		m_put_rcvif_psref(rcvif, &psref_rcvif);
    733 		if_ipsec_putref_variant(var, &psref_var);
    734 		m_freem(m);
    735 		ip_statinc(IP_STAT_NOIPSEC);
    736 		return;
    737 	}
    738 	m_put_rcvif_psref(rcvif, &psref_rcvif);
    739 	if_ipsec_putref_variant(var, &psref_var);
    740 #ifndef IPSEC_TX_TOS_CLEAR
    741 	otos = ip->ip_tos;
    742 #endif
    743 	m_adj(m, off);
    744 
    745 	switch (proto) {
    746 	case IPPROTO_IPV4:
    747 	    {
    748 		struct ip *xip;
    749 		af = AF_INET;
    750 		if (M_UNWRITABLE(m, sizeof(*xip))) {
    751 			m = m_pullup(m, sizeof(*xip));
    752 			if (m == NULL)
    753 				return;
    754 		}
    755 		xip = mtod(m, struct ip *);
    756 #ifndef IPSEC_TX_TOS_CLEAR
    757 		if (ipsecp->if_flags & IFF_ECN)
    758 			ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
    759 		else
    760 			ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
    761 #endif
    762 		break;
    763 	    }
    764 #ifdef INET6
    765 	case IPPROTO_IPV6:
    766 	    {
    767 		struct ip6_hdr *ip6;
    768 		u_int8_t itos;
    769 		af = AF_INET6;
    770 		if (M_UNWRITABLE(m, sizeof(*ip6))) {
    771 			m = m_pullup(m, sizeof(*ip6));
    772 			if (m == NULL)
    773 				return;
    774 		}
    775 		ip6 = mtod(m, struct ip6_hdr *);
    776 		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    777 #ifndef IPSEC_TX_TOS_CLEAR
    778 		if (ipsecp->if_flags & IFF_ECN)
    779 			ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
    780 		else
    781 			ip_ecn_egress(ECN_NOCARE, &otos, &itos);
    782 #endif
    783 		ip6->ip6_flow &= ~htonl(0xff << 20);
    784 		ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
    785 		break;
    786 	    }
    787 #endif /* INET6 */
    788 	default:
    789 		ip_statinc(IP_STAT_NOIPSEC);
    790 		m_freem(m);
    791 		return;
    792 	}
    793 	if_ipsec_input(m, af, ipsecp);
    794 
    795 	return;
    796 }
    797 
    798 /*
    799  * validate and filter the packet
    800  */
    801 static int
    802 ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
    803     struct ifnet *ifp)
    804 {
    805 	struct sockaddr_in *src, *dst;
    806 
    807 	src = satosin(var->iv_psrc);
    808 	dst = satosin(var->iv_pdst);
    809 
    810 	return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
    811 }
    812 
    813 #ifdef INET6
    814 static int
    815 ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
    816 {
    817 	struct mbuf *m = *mp;
    818 	struct ifnet *ipsecp;
    819 	struct ipsec_softc *sc = eparg;
    820 	struct ipsec_variant *var;
    821 	struct ip6_hdr *ip6;
    822 	int af = 0;
    823 #ifndef IPSEC_TX_TOS_CLEAR
    824 	u_int32_t otos;
    825 #endif
    826 	struct psref psref_rcvif;
    827 	struct psref psref_var;
    828 	struct ifnet *rcvif;
    829 
    830 	KASSERT(eparg != NULL);
    831 
    832 	ipsecp = &sc->ipsec_if;
    833 	if ((ipsecp->if_flags & IFF_UP) == 0) {
    834 		m_freem(m);
    835 		IP6_STATINC(IP6_STAT_NOIPSEC);
    836 		return IPPROTO_DONE;
    837 	}
    838 
    839 	var = if_ipsec_getref_variant(sc, &psref_var);
    840 	if (if_ipsec_variant_is_unconfigured(var)) {
    841 		if_ipsec_putref_variant(var, &psref_var);
    842 		m_freem(m);
    843 		IP6_STATINC(IP6_STAT_NOIPSEC);
    844 		return IPPROTO_DONE;
    845 	}
    846 
    847 	ip6 = mtod(m, struct ip6_hdr *);
    848 
    849 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
    850 	if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
    851 		m_put_rcvif_psref(rcvif, &psref_rcvif);
    852 		if_ipsec_putref_variant(var, &psref_var);
    853 		m_freem(m);
    854 		IP6_STATINC(IP6_STAT_NOIPSEC);
    855 		return IPPROTO_DONE;
    856 	}
    857 	m_put_rcvif_psref(rcvif, &psref_rcvif);
    858 	if_ipsec_putref_variant(var, &psref_var);
    859 
    860 #ifndef IPSEC_TX_TOS_CLEAR
    861 	otos = ip6->ip6_flow;
    862 #endif
    863 	m_adj(m, *offp);
    864 
    865 	switch (proto) {
    866 #ifdef INET
    867 	case IPPROTO_IPV4:
    868 	    {
    869 		af = AF_INET;
    870 #ifndef IPSEC_TX_TOS_CLEAR
    871 		struct ip *ip;
    872 		u_int8_t otos8;
    873 		otos8 = (ntohl(otos) >> 20) & 0xff;
    874 
    875 		if (M_UNWRITABLE(m, sizeof(*ip))) {
    876 			m = m_pullup(m, sizeof(*ip));
    877 			if (m == NULL)
    878 				return IPPROTO_DONE;
    879 		}
    880 		ip = mtod(m, struct ip *);
    881 		if (ipsecp->if_flags & IFF_ECN)
    882 			ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
    883 		else
    884 			ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
    885 #endif
    886 		break;
    887 	    }
    888 #endif /* INET */
    889 	case IPPROTO_IPV6:
    890 	    {
    891 		af = AF_INET6;
    892 #ifndef IPSEC_TX_TOS_CLEAR
    893 		struct ip6_hdr *xip6;
    894 
    895 		if (M_UNWRITABLE(m, sizeof(*xip6))) {
    896 			m = m_pullup(m, sizeof(*xip6));
    897 			if (m == NULL)
    898 				return IPPROTO_DONE;
    899 		}
    900 		xip6 = mtod(m, struct ip6_hdr *);
    901 		if (ipsecp->if_flags & IFF_ECN)
    902 			ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
    903 		else
    904 			ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
    905 		break;
    906 #endif
    907 	    }
    908 	default:
    909 		IP6_STATINC(IP6_STAT_NOIPSEC);
    910 		m_freem(m);
    911 		return IPPROTO_DONE;
    912 	}
    913 
    914 	if_ipsec_input(m, af, ipsecp);
    915 	return IPPROTO_DONE;
    916 }
    917 
    918 /*
    919  * validate and filter the packet.
    920  */
    921 static int
    922 ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
    923     struct ifnet *ifp)
    924 {
    925 	struct sockaddr_in6 *src, *dst;
    926 
    927 	src = satosin6(var->iv_psrc);
    928 	dst = satosin6(var->iv_pdst);
    929 
    930 	return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
    931 }
    932 #endif /* INET6 */
    933 
    934 int
    935 ipsecif4_attach(struct ipsec_variant *var)
    936 {
    937 	struct ipsec_softc *sc = var->iv_softc;
    938 
    939 	KASSERT(if_ipsec_variant_is_configured(var));
    940 
    941 	if (var->iv_encap_cookie4 != NULL)
    942 		return EALREADY;
    943 
    944 	var->iv_encap_cookie4 = encap_attach_addr(AF_INET, -1,
    945 	    var->iv_psrc, var->iv_pdst, if_ipsec_encap_func, &ipsecif4_encapsw,
    946 	    sc);
    947 	if (var->iv_encap_cookie4 == NULL)
    948 		return EEXIST;
    949 
    950 	var->iv_output = ipsecif4_output;
    951 	return 0;
    952 }
    953 
    954 int
    955 ipsecif4_detach(struct ipsec_variant *var)
    956 {
    957 	int error;
    958 
    959 	if (var->iv_encap_cookie4 == NULL)
    960 		return 0;
    961 
    962 	var->iv_output = NULL;
    963 	error = encap_detach(var->iv_encap_cookie4);
    964 	if (error == 0)
    965 		var->iv_encap_cookie4 = NULL;
    966 
    967 	return error;
    968 }
    969 
    970 #ifdef INET6
    971 int
    972 ipsecif6_attach(struct ipsec_variant *var)
    973 {
    974 	struct ipsec_softc *sc = var->iv_softc;
    975 
    976 	KASSERT(if_ipsec_variant_is_configured(var));
    977 	KASSERT(var->iv_encap_cookie6 == NULL);
    978 
    979 	var->iv_encap_cookie6 = encap_attach_addr(AF_INET6, -1,
    980 	    var->iv_psrc, var->iv_pdst, if_ipsec_encap_func, &ipsecif6_encapsw,
    981 	    sc);
    982 	if (var->iv_encap_cookie6 == NULL)
    983 		return EEXIST;
    984 
    985 	var->iv_output = ipsecif6_output;
    986 	return 0;
    987 }
    988 
    989 int
    990 ipsecif6_detach(struct ipsec_variant *var)
    991 {
    992 	struct ipsec_softc *sc = var->iv_softc;
    993 	int error;
    994 
    995 	KASSERT(var->iv_encap_cookie6 != NULL);
    996 
    997 	if_tunnel_ro_percpu_rtcache_free(sc->ipsec_ro_percpu);
    998 
    999 	var->iv_output = NULL;
   1000 	error = encap_detach(var->iv_encap_cookie6);
   1001 	if (error == 0)
   1002 		var->iv_encap_cookie6 = NULL;
   1003 	return error;
   1004 }
   1005 
   1006 void *
   1007 ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
   1008 {
   1009 	struct ipsec_softc *sc = eparg;
   1010 	struct ip6ctlparam *ip6cp = NULL;
   1011 	struct ip6_hdr *ip6;
   1012 	const struct sockaddr_in6 *dst6;
   1013 	struct route *ro_pc;
   1014 	kmutex_t *lock_pc;
   1015 
   1016 	if (sa->sa_family != AF_INET6 ||
   1017 	    sa->sa_len != sizeof(struct sockaddr_in6))
   1018 		return NULL;
   1019 
   1020 	if ((unsigned)cmd >= PRC_NCMDS)
   1021 		return NULL;
   1022 	if (cmd == PRC_HOSTDEAD)
   1023 		d = NULL;
   1024 	else if (inet6ctlerrmap[cmd] == 0)
   1025 		return NULL;
   1026 
   1027 	/* if the parameter is from icmp6, decode it. */
   1028 	if (d != NULL) {
   1029 		ip6cp = (struct ip6ctlparam *)d;
   1030 		ip6 = ip6cp->ip6c_ip6;
   1031 	} else {
   1032 		ip6 = NULL;
   1033 	}
   1034 
   1035 	if (!ip6)
   1036 		return NULL;
   1037 
   1038 	if_tunnel_get_ro(sc->ipsec_ro_percpu, &ro_pc, &lock_pc);
   1039 	dst6 = satocsin6(rtcache_getdst(ro_pc));
   1040 	/* XXX scope */
   1041 	if (dst6 == NULL)
   1042 		;
   1043 	else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
   1044 		/* flush route cache */
   1045 		rtcache_free(ro_pc);
   1046 
   1047 	if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
   1048 
   1049 	return NULL;
   1050 }
   1051 
   1052 ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
   1053 #define	ipsecif6_ctlinput	ipsecif6_ctlinput_wrapper
   1054 
   1055 static const struct encapsw ipsecif6_encapsw = {
   1056 	.encapsw6 = {
   1057 		.pr_input = ipsecif6_input,
   1058 		.pr_ctlinput = ipsecif6_ctlinput,
   1059 	}
   1060 };
   1061 #endif /* INET6 */
   1062