Home | History | Annotate | Line # | Download | only in netipsec
ipsecif.c revision 1.5
      1 /*	$NetBSD: ipsecif.c,v 1.5 2018/03/13 03:05:12 knakahara Exp $  */
      2 
      3 /*
      4  * Copyright (c) 2017 Internet Initiative Japan Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.5 2018/03/13 03:05:12 knakahara Exp $");
     31 
     32 #ifdef _KERNEL_OPT
     33 #include "opt_inet.h"
     34 #include "opt_ipsec.h"
     35 #endif
     36 
     37 #include <sys/param.h>
     38 #include <sys/systm.h>
     39 #include <sys/socket.h>
     40 #include <sys/sockio.h>
     41 #include <sys/mbuf.h>
     42 #include <sys/errno.h>
     43 #include <sys/ioctl.h>
     44 #include <sys/syslog.h>
     45 #include <sys/kernel.h>
     46 
     47 #include <net/if.h>
     48 #include <net/route.h>
     49 
     50 #include <netinet/in.h>
     51 #include <netinet/in_systm.h>
     52 #include <netinet/ip.h>
     53 #include <netinet/ip_var.h>
     54 #include <netinet/in_var.h>
     55 #include <netinet/ip_encap.h>
     56 #include <netinet/ip_ecn.h>
     57 #include <netinet/ip_private.h>
     58 #include <netinet/udp.h>
     59 
     60 #ifdef INET6
     61 #include <netinet/ip6.h>
     62 #include <netinet6/ip6_var.h>
     63 #include <netinet6/ip6_private.h>
     64 #include <netinet6/in6_var.h>
     65 #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
     66 #include <netinet/ip_ecn.h>
     67 #endif
     68 
     69 #include <netipsec/key.h>
     70 #include <netipsec/ipsecif.h>
     71 
     72 #include <net/if_ipsec.h>
     73 
     74 static void ipsecif4_input(struct mbuf *, int, int, void *);
     75 static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
     76 static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
     77 	struct ifnet *);
     78 
     79 #ifdef INET6
     80 static int ipsecif6_input(struct mbuf **, int *, int, void *);
     81 static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
     82 static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
     83 	struct ifnet *);
     84 #endif
     85 
     86 static int ip_ipsec_ttl = IPSEC_TTL;
     87 static int ip_ipsec_copy_tos = 0;
     88 #ifdef INET6
     89 static int ip6_ipsec_hlim = IPSEC_HLIM;
     90 static int ip6_ipsec_pmtu = 0; /* XXX: per interface configuration?? */
     91 static int ip6_ipsec_copy_tos = 0;
     92 #endif
     93 
     94 struct encapsw ipsecif4_encapsw = {
     95 	.encapsw4 = {
     96 		.pr_input = ipsecif4_input,
     97 		.pr_ctlinput = NULL,
     98 	}
     99 };
    100 
    101 #ifdef INET6
    102 static const struct encapsw ipsecif6_encapsw;
    103 #endif
    104 
    105 static struct mbuf *
    106 ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
    107     uint8_t proto, uint8_t tos)
    108 {
    109 	struct ip *ip;
    110 	struct sockaddr_in *src, *dst;
    111 
    112 	src = satosin(var->iv_psrc);
    113 	dst = satosin(var->iv_pdst);
    114 
    115 	if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
    116 	    src->sin_addr.s_addr == INADDR_BROADCAST ||
    117 	    dst->sin_addr.s_addr == INADDR_BROADCAST) {
    118 		m_freem(m);
    119 		return NULL;
    120 	}
    121 	m->m_flags &= ~M_BCAST;
    122 
    123 	if (IN_MULTICAST(src->sin_addr.s_addr) ||
    124 	    IN_MULTICAST(dst->sin_addr.s_addr)) {
    125 		m_freem(m);
    126 		return NULL;
    127 	}
    128 
    129 	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
    130 	if (m && M_UNWRITABLE(m, sizeof(struct ip)))
    131 		m = m_pullup(m, sizeof(struct ip));
    132 	if (m == NULL)
    133 		return NULL;
    134 
    135 	ip = mtod(m, struct ip *);
    136 	ip->ip_v = IPVERSION;
    137 	ip->ip_off = htons(0);
    138 	ip->ip_id = 0;
    139 	ip->ip_hl = sizeof(*ip) >> 2;
    140 	if (ip_ipsec_copy_tos)
    141 		ip->ip_tos = tos;
    142 	else
    143 		ip->ip_tos = 0;
    144 	ip->ip_sum = 0;
    145 	ip->ip_src = src->sin_addr;
    146 	ip->ip_dst = dst->sin_addr;
    147 	ip->ip_p = proto;
    148 	ip->ip_ttl = ip_ipsec_ttl;
    149 	ip->ip_len = htons(m->m_pkthdr.len);
    150 #ifndef IPSEC_TX_TOS_CLEAR
    151 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    152 	if (ifp->if_flags & IFF_ECN)
    153 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
    154 	else
    155 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
    156 #endif
    157 
    158 	return m;
    159 }
    160 
    161 static int
    162 ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
    163 {
    164 	struct ip ip0;
    165 	struct ip *ip;
    166 	int mtu;
    167 	struct secasvar *sav;
    168 
    169 	sav = key_lookup_sa_bysaidx(&isr->saidx);
    170 	if (sav == NULL)
    171 		return 0;
    172 
    173 	if (!(sav->natt_type & UDP_ENCAP_ESPINUDP) &&
    174 	    !(sav->natt_type & UDP_ENCAP_ESPINUDP_NON_IKE)) {
    175 		mtu = 0;
    176 		goto out;
    177 	}
    178 
    179 	if (m->m_len < sizeof(struct ip)) {
    180 		m_copydata(m, 0, sizeof(ip0), &ip0);
    181 		ip = &ip0;
    182 	} else {
    183 		ip = mtod(m, struct ip *);
    184 	}
    185 	mtu = sav->esp_frag;
    186 	if (ntohs(ip->ip_len) <= mtu)
    187 		mtu = 0;
    188 
    189 out:
    190 	KEY_SA_UNREF(&sav);
    191 	return mtu;
    192 }
    193 
    194 static struct mbuf *
    195 ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
    196 {
    197 	const struct ip *ip;
    198 	int proto;
    199 	int tos;
    200 
    201 	KASSERT(proto0 != NULL);
    202 	KASSERT(tos0 != NULL);
    203 
    204 	switch (family) {
    205 	case AF_INET:
    206 		proto = IPPROTO_IPV4;
    207 		if (m->m_len < sizeof(*ip)) {
    208 			m = m_pullup(m, sizeof(*ip));
    209 			if (m == NULL) {
    210 				*tos0 = 0;
    211 				*proto0 = 0;
    212 				return NULL;
    213 			}
    214 		}
    215 		ip = mtod(m, const struct ip *);
    216 		tos = ip->ip_tos;
    217 		/* TODO: support ALTQ for innner packet */
    218 		break;
    219 #ifdef INET6
    220 	case AF_INET6: {
    221 		const struct ip6_hdr *ip6;
    222 		proto = IPPROTO_IPV6;
    223 		if (m->m_len < sizeof(*ip6)) {
    224 			m = m_pullup(m, sizeof(*ip6));
    225 			if (m == NULL) {
    226 				*tos0 = 0;
    227 				*proto0 = 0;
    228 				return NULL;
    229 			}
    230 		}
    231 		ip6 = mtod(m, const struct ip6_hdr *);
    232 		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    233 		/* TODO: support ALTQ for innner packet */
    234 		break;
    235 	}
    236 #endif /* INET6 */
    237 	default:
    238 		*tos0 = 0;
    239 		*proto0 = 0;
    240 		return NULL;
    241 	}
    242 
    243 	*proto0 = proto;
    244 	*tos0 = tos;
    245 	return m;
    246 }
    247 
    248 static int
    249 ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
    250 {
    251 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    252 	struct mbuf *next;
    253 	struct m_tag *mtag;
    254 	int error;
    255 
    256 	KASSERT(if_ipsec_heldref_variant(var));
    257 
    258 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
    259 	if (mtag)
    260 		m_tag_delete(m, mtag);
    261 
    262 	/* consider new IP header prepended in ipsecif4_output() */
    263 	if (mtu <= sizeof(struct ip)) {
    264 		m_freem(m);
    265 		return ENETUNREACH;
    266 	}
    267 	m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
    268 	error = ip_fragment(m, ifp, mtu - sizeof(struct ip));
    269 	if (error)
    270 		return error;
    271 
    272 	for (error = 0; m; m = next) {
    273 		next = m->m_nextpkt;
    274 		m->m_nextpkt = NULL;
    275 		if (error) {
    276 			m_freem(m);
    277 			continue;
    278 		}
    279 
    280 		error = ipsecif4_output(var, family, m);
    281 	}
    282 	if (error == 0)
    283 		IP_STATINC(IP_STAT_FRAGMENTED);
    284 
    285 	return error;
    286 }
    287 
    288 int
    289 ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
    290 {
    291 	struct m_tag *mtag;
    292 	struct sockaddr_in *src, *dst;
    293 	u_int16_t src_port = 0;
    294 	u_int16_t dst_port = 0;
    295 
    296 	KASSERT(var != NULL);
    297 
    298 	src = satosin(var->iv_psrc);
    299 	dst = satosin(var->iv_pdst);
    300 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
    301 	if (mtag) {
    302 		u_int16_t *ports;
    303 
    304 		ports = (u_int16_t *)(mtag + 1);
    305 		src_port = ports[0];
    306 		dst_port = ports[1];
    307 	}
    308 
    309 	/* address match */
    310 	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
    311 	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
    312 		return 0;
    313 
    314 	/* UDP encap? */
    315 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
    316 		goto match;
    317 
    318 	/* port match */
    319 	if (src_port != var->iv_dport ||
    320 	    dst_port != var->iv_sport) {
    321 #ifdef DEBUG
    322 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
    323 		    __func__, ntohs(src_port), ntohs(dst_port),
    324 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
    325 #endif
    326 		return 0;
    327 	}
    328 
    329 match:
    330 	/*
    331 	 * hide NAT-T information from encapsulated traffics.
    332 	 * they don't know about IPsec.
    333 	 */
    334 	if (mtag)
    335 		m_tag_delete(m, mtag);
    336 	return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
    337 }
    338 
    339 static int
    340 ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
    341 {
    342 	struct secpolicy *sp = NULL;
    343 	u_int8_t tos;
    344 	int proto;
    345 	int error;
    346 	int mtu;
    347 	u_long sa_mtu = 0;
    348 
    349 	KASSERT(if_ipsec_heldref_variant(var));
    350 	KASSERT(if_ipsec_variant_is_configured(var));
    351 	KASSERT(var->iv_psrc->sa_family == AF_INET);
    352 	KASSERT(var->iv_pdst->sa_family == AF_INET);
    353 
    354 	sp = IV_SP_OUT(var);
    355 	KASSERT(sp != NULL);
    356 	/*
    357 	 * The SPs in ipsec_variant are prevented from freed by
    358 	 * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
    359 	 */
    360 
    361 	KASSERT(sp->policy != IPSEC_POLICY_NONE);
    362 	KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
    363 	KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
    364 	if (sp->policy != IPSEC_POLICY_IPSEC) {
    365 		struct ifnet *ifp = &var->iv_softc->ipsec_if;
    366 		m_freem(m);
    367 		IF_DROP(&ifp->if_snd);
    368 		return 0;
    369 	}
    370 
    371 	/* get flowinfo */
    372 	m = ipsecif4_flowinfo(m, family, &proto, &tos);
    373 	if (m == NULL) {
    374 		error = ENETUNREACH;
    375 		goto done;
    376 	}
    377 
    378 	/* prepend new IP header */
    379 	m = ipsecif4_prepend_hdr(var, m, proto, tos);
    380 	if (m == NULL) {
    381 		error = ENETUNREACH;
    382 		goto done;
    383 	}
    384 
    385 	/*
    386 	 * Normal netipsec's NAT-T fragmentation is done in ip_output().
    387 	 * See "natt_frag" processing.
    388 	 * However, ipsec(4) interface's one is not done in the same way,
    389 	 * so we must do NAT-T fragmentation by own code.
    390 	 */
    391 	/* NAT-T ESP fragmentation */
    392 	mtu = ipsecif4_needfrag(m, sp->req);
    393 	if (mtu > 0)
    394 		return ipsecif4_fragout(var, family, m, mtu);
    395 
    396 	/* IPsec output */
    397 	IP_STATINC(IP_STAT_LOCALOUT);
    398 	error = ipsec4_process_packet(m, sp->req, &sa_mtu);
    399 	if (error == ENOENT)
    400 		error = 0;
    401 	/*
    402 	 * frangmentation is already done in ipsecif4_fragout(),
    403 	 * so ipsec4_process_packet() must not do fragmentation here.
    404 	 */
    405 	KASSERT(sa_mtu == 0);
    406 
    407 done:
    408 	return error;
    409 }
    410 
    411 #ifdef INET6
    412 static int
    413 ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
    414 {
    415 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    416 	struct ipsec_softc *sc = ifp->if_softc;
    417 	struct ipsec_ro *iro;
    418 	struct rtentry *rt;
    419 	struct sockaddr_in6 *sin6_src;
    420 	struct sockaddr_in6 *sin6_dst;
    421 	struct ip6_hdr *ip6;
    422 	int proto, error;
    423 	u_int8_t itos, otos;
    424 	union {
    425 		struct sockaddr		dst;
    426 		struct sockaddr_in6	dst6;
    427 	} u;
    428 
    429 	KASSERT(if_ipsec_heldref_variant(var));
    430 	KASSERT(if_ipsec_variant_is_configured(var));
    431 
    432 	sin6_src = satosin6(var->iv_psrc);
    433 	sin6_dst = satosin6(var->iv_pdst);
    434 
    435 	KASSERT(sin6_src->sin6_family == AF_INET6);
    436 	KASSERT(sin6_dst->sin6_family == AF_INET6);
    437 
    438 	switch (family) {
    439 #ifdef INET
    440 	case AF_INET:
    441 	    {
    442 		struct ip *ip;
    443 
    444 		proto = IPPROTO_IPV4;
    445 		if (m->m_len < sizeof(*ip)) {
    446 			m = m_pullup(m, sizeof(*ip));
    447 			if (m == NULL)
    448 				return ENOBUFS;
    449 		}
    450 		ip = mtod(m, struct ip *);
    451 		itos = ip->ip_tos;
    452 		/* TODO: support ALTQ for innner packet */
    453 		break;
    454 	    }
    455 #endif /* INET */
    456 	case AF_INET6:
    457 	    {
    458 		struct ip6_hdr *xip6;
    459 		proto = IPPROTO_IPV6;
    460 		if (m->m_len < sizeof(*xip6)) {
    461 			m = m_pullup(m, sizeof(*xip6));
    462 			if (m == NULL)
    463 				return ENOBUFS;
    464 		}
    465 		xip6 = mtod(m, struct ip6_hdr *);
    466 		itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
    467 		/* TODO: support ALTQ for innner packet */
    468 		break;
    469 	    }
    470 	default:
    471 		m_freem(m);
    472 		return EAFNOSUPPORT;
    473 	}
    474 
    475 	/* prepend new IP header */
    476 	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
    477 	if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
    478 		m = m_pullup(m, sizeof(struct ip6_hdr));
    479 	if (m == NULL)
    480 		return ENOBUFS;
    481 
    482 	ip6 = mtod(m, struct ip6_hdr *);
    483 	ip6->ip6_flow	= 0;
    484 	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
    485 	ip6->ip6_vfc	|= IPV6_VERSION;
    486 #if 0	/* ip6->ip6_plen will be filled by ip6_output */
    487 	ip6->ip6_plen	= htons((u_short)m->m_pkthdr.len - sizeof(*ip6));
    488 #endif
    489 	ip6->ip6_nxt	= proto;
    490 	ip6->ip6_hlim	= ip6_ipsec_hlim;
    491 	ip6->ip6_src	= sin6_src->sin6_addr;
    492 	/* bidirectional configured tunnel mode */
    493 	if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
    494 		ip6->ip6_dst = sin6_dst->sin6_addr;
    495 	} else  {
    496 		m_freem(m);
    497 		return ENETUNREACH;
    498 	}
    499 #ifndef IPSEC_TX_TOS_CLEAR
    500 	if (ifp->if_flags & IFF_ECN)
    501 		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
    502 	else
    503 		ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
    504 
    505 	if (!ip6_ipsec_copy_tos)
    506 		otos = 0;
    507 #else
    508 	if (ip6_ipsec_copy_tos)
    509 		otos = itos;
    510 	else
    511 		otos = 0;
    512 #endif
    513 	ip6->ip6_flow &= ~ntohl(0xff00000);
    514 	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
    515 
    516 	sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
    517 
    518 	iro = percpu_getref(sc->ipsec_ro_percpu);
    519 	mutex_enter(&iro->ir_lock);
    520 	if ((rt = rtcache_lookup(&iro->ir_ro, &u.dst)) == NULL) {
    521 		mutex_exit(&iro->ir_lock);
    522 		percpu_putref(sc->ipsec_ro_percpu);
    523 		m_freem(m);
    524 		return ENETUNREACH;
    525 	}
    526 
    527 	if (rt->rt_ifp == ifp) {
    528 		rtcache_unref(rt, &iro->ir_ro);
    529 		rtcache_free(&iro->ir_ro);
    530 		mutex_exit(&iro->ir_lock);
    531 		percpu_putref(sc->ipsec_ro_percpu);
    532 		m_freem(m);
    533 		return ENETUNREACH;
    534 	}
    535 	rtcache_unref(rt, &iro->ir_ro);
    536 
    537 	/*
    538 	 * force fragmentation to minimum MTU, to avoid path MTU discovery.
    539 	 * it is too painful to ask for resend of inner packet, to achieve
    540 	 * path MTU discovery for encapsulated packets.
    541 	 */
    542 	error = ip6_output(m, 0, &iro->ir_ro,
    543 	    ip6_ipsec_pmtu ? 0 : IPV6_MINMTU, 0, NULL, NULL);
    544 	if (error)
    545 		rtcache_free(&iro->ir_ro);
    546 
    547 	mutex_exit(&iro->ir_lock);
    548 	percpu_putref(sc->ipsec_ro_percpu);
    549 
    550 	return error;
    551 }
    552 #endif /* INET6 */
    553 
    554 static void
    555 ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
    556 {
    557 	struct ifnet *ipsecp;
    558 	struct ipsec_softc *sc = eparg;
    559 	struct ipsec_variant *var;
    560 	const struct ip *ip;
    561 	int af;
    562 #ifndef IPSEC_TX_TOS_CLEAR
    563 	u_int8_t otos;
    564 #endif
    565 	struct psref psref_rcvif;
    566 	struct psref psref_var;
    567 	struct ifnet *rcvif;
    568 
    569 	KASSERT(sc != NULL);
    570 
    571 	ipsecp = &sc->ipsec_if;
    572 	if ((ipsecp->if_flags & IFF_UP) == 0) {
    573 		m_freem(m);
    574 		ip_statinc(IP_STAT_NOIPSEC);
    575 		return;
    576 	}
    577 
    578 	var = if_ipsec_getref_variant(sc, &psref_var);
    579 	if (if_ipsec_variant_is_unconfigured(var)) {
    580 		if_ipsec_putref_variant(var, &psref_var);
    581 		m_freem(m);
    582 		ip_statinc(IP_STAT_NOIPSEC);
    583 		return;
    584 	}
    585 
    586 	ip = mtod(m, const struct ip *);
    587 
    588 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
    589 	if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
    590 		m_put_rcvif_psref(rcvif, &psref_rcvif);
    591 		if_ipsec_putref_variant(var, &psref_var);
    592 		m_freem(m);
    593 		ip_statinc(IP_STAT_NOIPSEC);
    594 		return;
    595 	}
    596 	m_put_rcvif_psref(rcvif, &psref_rcvif);
    597 	if_ipsec_putref_variant(var, &psref_var);
    598 #ifndef IPSEC_TX_TOS_CLEAR
    599 	otos = ip->ip_tos;
    600 #endif
    601 	m_adj(m, off);
    602 
    603 	switch (proto) {
    604 	case IPPROTO_IPV4:
    605 	    {
    606 		struct ip *xip;
    607 		af = AF_INET;
    608 		if (M_UNWRITABLE(m, sizeof(*xip))) {
    609 			m = m_pullup(m, sizeof(*xip));
    610 			if (m == NULL)
    611 				return;
    612 		}
    613 		xip = mtod(m, struct ip *);
    614 #ifndef IPSEC_TX_TOS_CLEAR
    615 		if (ipsecp->if_flags & IFF_ECN)
    616 			ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
    617 		else
    618 			ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
    619 #endif
    620 		break;
    621 	    }
    622 #ifdef INET6
    623 	case IPPROTO_IPV6:
    624 	    {
    625 		struct ip6_hdr *ip6;
    626 		u_int8_t itos;
    627 		af = AF_INET6;
    628 		if (M_UNWRITABLE(m, sizeof(*ip6))) {
    629 			m = m_pullup(m, sizeof(*ip6));
    630 			if (m == NULL)
    631 				return;
    632 		}
    633 		ip6 = mtod(m, struct ip6_hdr *);
    634 		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    635 #ifndef IPSEC_TX_TOS_CLEAR
    636 		if (ipsecp->if_flags & IFF_ECN)
    637 			ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
    638 		else
    639 			ip_ecn_egress(ECN_NOCARE, &otos, &itos);
    640 #endif
    641 		ip6->ip6_flow &= ~htonl(0xff << 20);
    642 		ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
    643 		break;
    644 	    }
    645 #endif /* INET6 */
    646 	default:
    647 		ip_statinc(IP_STAT_NOIPSEC);
    648 		m_freem(m);
    649 		return;
    650 	}
    651 	if_ipsec_input(m, af, ipsecp);
    652 
    653 	return;
    654 }
    655 
    656 /*
    657  * validate and filter the pakcet
    658  */
    659 static int
    660 ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
    661     struct ifnet *ifp)
    662 {
    663 	struct sockaddr_in *src, *dst;
    664 
    665 	src = satosin(var->iv_psrc);
    666 	dst = satosin(var->iv_pdst);
    667 
    668 	return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
    669 }
    670 
    671 #ifdef INET6
    672 static int
    673 ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
    674 {
    675 	struct mbuf *m = *mp;
    676 	struct ifnet *ipsecp;
    677 	struct ipsec_softc *sc = eparg;
    678 	struct ipsec_variant *var;
    679 	struct ip6_hdr *ip6;
    680 	int af = 0;
    681 #ifndef IPSEC_TX_TOS_CLEAR
    682 	u_int32_t otos;
    683 #endif
    684 	struct psref psref_rcvif;
    685 	struct psref psref_var;
    686 	struct ifnet *rcvif;
    687 
    688 	KASSERT(eparg != NULL);
    689 
    690 	ipsecp = &sc->ipsec_if;
    691 	if ((ipsecp->if_flags & IFF_UP) == 0) {
    692 		m_freem(m);
    693 		IP6_STATINC(IP6_STAT_NOIPSEC);
    694 		return IPPROTO_DONE;
    695 	}
    696 
    697 	var = if_ipsec_getref_variant(sc, &psref_var);
    698 	if (if_ipsec_variant_is_unconfigured(var)) {
    699 		if_ipsec_putref_variant(var, &psref_var);
    700 		m_freem(m);
    701 		IP6_STATINC(IP6_STAT_NOIPSEC);
    702 		return IPPROTO_DONE;
    703 	}
    704 
    705 	ip6 = mtod(m, struct ip6_hdr *);
    706 
    707 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
    708 	if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
    709 		m_put_rcvif_psref(rcvif, &psref_rcvif);
    710 		if_ipsec_putref_variant(var, &psref_var);
    711 		m_freem(m);
    712 		IP6_STATINC(IP6_STAT_NOIPSEC);
    713 		return IPPROTO_DONE;
    714 	}
    715 	m_put_rcvif_psref(rcvif, &psref_rcvif);
    716 	if_ipsec_putref_variant(var, &psref_var);
    717 
    718 #ifndef IPSEC_TX_TOS_CLEAR
    719 	otos = ip6->ip6_flow;
    720 #endif
    721 	m_adj(m, *offp);
    722 
    723 	switch (proto) {
    724 #ifdef INET
    725 	case IPPROTO_IPV4:
    726 	    {
    727 		af = AF_INET;
    728 #ifndef IPSEC_TX_TOS_CLEAR
    729 		struct ip *ip;
    730 		u_int8_t otos8;
    731 		otos8 = (ntohl(otos) >> 20) & 0xff;
    732 
    733 		if (M_UNWRITABLE(m, sizeof(*ip))) {
    734 			m = m_pullup(m, sizeof(*ip));
    735 			if (m == NULL)
    736 				return IPPROTO_DONE;
    737 		}
    738 		ip = mtod(m, struct ip *);
    739 		if (ipsecp->if_flags & IFF_ECN)
    740 			ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
    741 		else
    742 			ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
    743 #endif
    744 		break;
    745 	    }
    746 #endif /* INET */
    747 	case IPPROTO_IPV6:
    748 	    {
    749 		af = AF_INET6;
    750 #ifndef IPSEC_TX_TOS_CLEAR
    751 		struct ip6_hdr *xip6;
    752 
    753 		if (M_UNWRITABLE(m, sizeof(*xip6))) {
    754 			m = m_pullup(m, sizeof(*xip6));
    755 			if (m == NULL)
    756 				return IPPROTO_DONE;
    757 		}
    758 		xip6 = mtod(m, struct ip6_hdr *);
    759 		if (ipsecp->if_flags & IFF_ECN)
    760 			ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
    761 		else
    762 			ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
    763 		break;
    764 #endif
    765 	    }
    766 	default:
    767 		IP6_STATINC(IP6_STAT_NOIPSEC);
    768 		m_freem(m);
    769 		return IPPROTO_DONE;
    770 	}
    771 
    772 	if_ipsec_input(m, af, ipsecp);
    773 	return IPPROTO_DONE;
    774 }
    775 
    776 /*
    777  * validate and filter the packet.
    778  */
    779 static int
    780 ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
    781     struct ifnet *ifp)
    782 {
    783 	struct sockaddr_in6 *src, *dst;
    784 
    785 	src = satosin6(var->iv_psrc);
    786 	dst = satosin6(var->iv_pdst);
    787 
    788 	return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
    789 }
    790 #endif /* INET6 */
    791 
    792 int
    793 ipsecif4_attach(struct ipsec_variant *var)
    794 {
    795 	struct ipsec_softc *sc = var->iv_softc;
    796 
    797 	KASSERT(if_ipsec_variant_is_configured(var));
    798 
    799 	if (var->iv_encap_cookie4 != NULL)
    800 		return EALREADY;
    801 	var->iv_encap_cookie4 = encap_attach_func(AF_INET, -1, if_ipsec_encap_func,
    802 	    &ipsecif4_encapsw, sc);
    803 	if (var->iv_encap_cookie4 == NULL)
    804 		return EEXIST;
    805 
    806 	var->iv_output = ipsecif4_output;
    807 	return 0;
    808 }
    809 
    810 int
    811 ipsecif4_detach(struct ipsec_variant *var)
    812 {
    813 	int error;
    814 
    815 	if (var->iv_encap_cookie4 == NULL)
    816 		return 0;
    817 
    818 	var->iv_output = NULL;
    819 	error = encap_detach(var->iv_encap_cookie4);
    820 	if (error == 0)
    821 		var->iv_encap_cookie4 = NULL;
    822 
    823 	return error;
    824 }
    825 
    826 #ifdef INET6
    827 int
    828 ipsecif6_attach(struct ipsec_variant *var)
    829 {
    830 	struct sockaddr_in6 mask6;
    831 	struct ipsec_softc *sc = var->iv_softc;
    832 
    833 	KASSERT(if_ipsec_variant_is_configured(var));
    834 	KASSERT(var->iv_encap_cookie6 == NULL);
    835 
    836 	memset(&mask6, 0, sizeof(mask6));
    837 	mask6.sin6_len = sizeof(struct sockaddr_in6);
    838 	mask6.sin6_addr.s6_addr32[0] = mask6.sin6_addr.s6_addr32[1] =
    839 	mask6.sin6_addr.s6_addr32[2] = mask6.sin6_addr.s6_addr32[3] = ~0;
    840 
    841 	var->iv_encap_cookie6 = encap_attach(AF_INET6, -1,
    842 	    var->iv_psrc, (struct sockaddr *)&mask6,
    843 	    var->iv_pdst, (struct sockaddr *)&mask6,
    844 	    &ipsecif6_encapsw, sc);
    845 	if (var->iv_encap_cookie6 == NULL)
    846 		return EEXIST;
    847 
    848 	var->iv_output = ipsecif6_output;
    849 	return 0;
    850 }
    851 
    852 static void
    853 ipsecif6_rtcache_free_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
    854 {
    855 	struct ipsec_ro *iro = p;
    856 
    857 	mutex_enter(&iro->ir_lock);
    858 	rtcache_free(&iro->ir_ro);
    859 	mutex_exit(&iro->ir_lock);
    860 }
    861 
    862 int
    863 ipsecif6_detach(struct ipsec_variant *var)
    864 {
    865 	struct ipsec_softc *sc = var->iv_softc;
    866 	int error;
    867 
    868 	KASSERT(var->iv_encap_cookie6 != NULL);
    869 
    870 	percpu_foreach(sc->ipsec_ro_percpu, ipsecif6_rtcache_free_pc, NULL);
    871 
    872 	var->iv_output = NULL;
    873 	error = encap_detach(var->iv_encap_cookie6);
    874 	if (error == 0)
    875 		var->iv_encap_cookie6 = NULL;
    876 	return error;
    877 }
    878 
    879 void *
    880 ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
    881 {
    882 	struct ipsec_softc *sc = eparg;
    883 	struct ip6ctlparam *ip6cp = NULL;
    884 	struct ip6_hdr *ip6;
    885 	const struct sockaddr_in6 *dst6;
    886 	struct ipsec_ro *iro;
    887 
    888 	if (sa->sa_family != AF_INET6 ||
    889 	    sa->sa_len != sizeof(struct sockaddr_in6))
    890 		return NULL;
    891 
    892 	if ((unsigned)cmd >= PRC_NCMDS)
    893 		return NULL;
    894 	if (cmd == PRC_HOSTDEAD)
    895 		d = NULL;
    896 	else if (inet6ctlerrmap[cmd] == 0)
    897 		return NULL;
    898 
    899 	/* if the parameter is from icmp6, decode it. */
    900 	if (d != NULL) {
    901 		ip6cp = (struct ip6ctlparam *)d;
    902 		ip6 = ip6cp->ip6c_ip6;
    903 	} else {
    904 		ip6 = NULL;
    905 	}
    906 
    907 	if (!ip6)
    908 		return NULL;
    909 
    910 	iro = percpu_getref(sc->ipsec_ro_percpu);
    911 	mutex_enter(&iro->ir_lock);
    912 	dst6 = satocsin6(rtcache_getdst(&iro->ir_ro));
    913 	/* XXX scope */
    914 	if (dst6 == NULL)
    915 		;
    916 	else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
    917 		/* flush route cache */
    918 		rtcache_free(&iro->ir_ro);
    919 
    920 	mutex_exit(&iro->ir_lock);
    921 	percpu_putref(sc->ipsec_ro_percpu);
    922 
    923 	return NULL;
    924 }
    925 
    926 ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
    927 #define	ipsecif6_ctlinput	ipsecif6_ctlinput_wrapper
    928 
    929 static const struct encapsw ipsecif6_encapsw = {
    930 	.encapsw6 = {
    931 		.pr_input = ipsecif6_input,
    932 		.pr_ctlinput = ipsecif6_ctlinput,
    933 	}
    934 };
    935 #endif /* INET6 */
    936