Home | History | Annotate | Line # | Download | only in netipsec
ipsecif.c revision 1.3
      1 /*	$NetBSD: ipsecif.c,v 1.3 2018/03/06 10:07:06 knakahara Exp $  */
      2 
      3 /*
      4  * Copyright (c) 2017 Internet Initiative Japan Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.3 2018/03/06 10:07:06 knakahara Exp $");
     31 
     32 #ifdef _KERNEL_OPT
     33 #include "opt_inet.h"
     34 #include "opt_ipsec.h"
     35 #endif
     36 
     37 #include <sys/param.h>
     38 #include <sys/systm.h>
     39 #include <sys/socket.h>
     40 #include <sys/sockio.h>
     41 #include <sys/mbuf.h>
     42 #include <sys/errno.h>
     43 #include <sys/ioctl.h>
     44 #include <sys/syslog.h>
     45 #include <sys/kernel.h>
     46 
     47 #include <net/if.h>
     48 #include <net/route.h>
     49 
     50 #include <netinet/in.h>
     51 #include <netinet/in_systm.h>
     52 #include <netinet/ip.h>
     53 #include <netinet/ip_var.h>
     54 #include <netinet/in_var.h>
     55 #include <netinet/ip_encap.h>
     56 #include <netinet/ip_ecn.h>
     57 #include <netinet/ip_private.h>
     58 #include <netinet/udp.h>
     59 
     60 #ifdef INET6
     61 #include <netinet/ip6.h>
     62 #include <netinet6/ip6_var.h>
     63 #include <netinet6/ip6_private.h>
     64 #include <netinet6/in6_var.h>
     65 #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
     66 #include <netinet/ip_ecn.h>
     67 #endif
     68 
     69 #include <netipsec/key.h>
     70 #include <netipsec/ipsecif.h>
     71 
     72 #include <net/if_ipsec.h>
     73 
     74 static void ipsecif4_input(struct mbuf *, int, int, void *);
     75 static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
     76 static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
     77 	struct ifnet *);
     78 
     79 #ifdef INET6
     80 static int ipsecif6_input(struct mbuf **, int *, int, void *);
     81 static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
     82 static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
     83 	struct ifnet *);
     84 #endif
     85 
     86 static int ip_ipsec_ttl = IPSEC_TTL;
     87 static int ip_ipsec_copy_tos = 0;
     88 #ifdef INET6
     89 static int ip6_ipsec_hlim = IPSEC_HLIM;
     90 static int ip6_ipsec_pmtu = 0; /* XXX: per interface configuration?? */
     91 static int ip6_ipsec_copy_tos = 0;
     92 #endif
     93 
     94 struct encapsw ipsecif4_encapsw = {
     95 	.encapsw4 = {
     96 		.pr_input = ipsecif4_input,
     97 		.pr_ctlinput = NULL,
     98 	}
     99 };
    100 
    101 #ifdef INET6
    102 static const struct encapsw ipsecif6_encapsw;
    103 #endif
    104 
    105 static struct mbuf *
    106 ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
    107     uint8_t proto, uint8_t tos)
    108 {
    109 	struct ip *ip;
    110 	struct sockaddr_in *src, *dst;
    111 
    112 	src = satosin(var->iv_psrc);
    113 	dst = satosin(var->iv_pdst);
    114 
    115 	if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
    116 	    src->sin_addr.s_addr == INADDR_BROADCAST ||
    117 	    dst->sin_addr.s_addr == INADDR_BROADCAST) {
    118 		m_freem(m);
    119 		return NULL;
    120 	}
    121 	m->m_flags &= ~M_BCAST;
    122 
    123 	if (IN_MULTICAST(src->sin_addr.s_addr) ||
    124 	    IN_MULTICAST(dst->sin_addr.s_addr)) {
    125 		m_freem(m);
    126 		return NULL;
    127 	}
    128 
    129 	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
    130 	if (m && M_UNWRITABLE(m, sizeof(struct ip)))
    131 		m = m_pullup(m, sizeof(struct ip));
    132 	if (m == NULL)
    133 		return NULL;
    134 
    135 	ip = mtod(m, struct ip *);
    136 	ip->ip_v = IPVERSION;
    137 	ip->ip_off = htons(0);
    138 	ip->ip_id = 0;
    139 	ip->ip_hl = sizeof(*ip) >> 2;
    140 	if (ip_ipsec_copy_tos)
    141 		ip->ip_tos = tos;
    142 	else
    143 		ip->ip_tos = 0;
    144 	ip->ip_sum = 0;
    145 	ip->ip_src = src->sin_addr;
    146 	ip->ip_dst = dst->sin_addr;
    147 	ip->ip_p = proto;
    148 	ip->ip_ttl = ip_ipsec_ttl;
    149 	ip->ip_len = htons(m->m_pkthdr.len);
    150 #ifndef IPSEC_TX_TOS_CLEAR
    151 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    152 	if (ifp->if_flags & IFF_ECN)
    153 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
    154 	else
    155 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
    156 #endif
    157 
    158 	return m;
    159 }
    160 
    161 static int
    162 ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
    163 {
    164 	struct ip ip0;
    165 	struct ip *ip;
    166 	int mtu;
    167 	struct secasvar *sav;
    168 
    169 	sav = key_lookup_sa_bysaidx(&isr->saidx);
    170 	if (sav == NULL)
    171 		return 0;
    172 
    173 	if (!(sav->natt_type & UDP_ENCAP_ESPINUDP) &&
    174 	    !(sav->natt_type & UDP_ENCAP_ESPINUDP_NON_IKE)) {
    175 		mtu = 0;
    176 		goto out;
    177 	}
    178 
    179 	if (m->m_len < sizeof(struct ip)) {
    180 		m_copydata(m, 0, sizeof(ip0), &ip0);
    181 		ip = &ip0;
    182 	} else {
    183 		ip = mtod(m, struct ip *);
    184 	}
    185 	mtu = sav->esp_frag;
    186 	if (ntohs(ip->ip_len) <= mtu)
    187 		mtu = 0;
    188 
    189 out:
    190 	KEY_SA_UNREF(&sav);
    191 	return mtu;
    192 }
    193 
    194 static struct mbuf *
    195 ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
    196 {
    197 	const struct ip *ip;
    198 	int proto;
    199 	int tos;
    200 
    201 	KASSERT(proto0 != NULL);
    202 	KASSERT(tos0 != NULL);
    203 
    204 	switch (family) {
    205 	case AF_INET:
    206 		proto = IPPROTO_IPV4;
    207 		if (m->m_len < sizeof(*ip)) {
    208 			m = m_pullup(m, sizeof(*ip));
    209 			if (m == NULL) {
    210 				*tos0 = 0;
    211 				*proto0 = 0;
    212 				return NULL;
    213 			}
    214 		}
    215 		ip = mtod(m, const struct ip *);
    216 		tos = ip->ip_tos;
    217 		/* TODO: support ALTQ for innner packet */
    218 		break;
    219 #ifdef INET6
    220 	case AF_INET6: {
    221 		const struct ip6_hdr *ip6;
    222 		proto = IPPROTO_IPV6;
    223 		if (m->m_len < sizeof(*ip6)) {
    224 			m = m_pullup(m, sizeof(*ip6));
    225 			if (m == NULL) {
    226 				*tos0 = 0;
    227 				*proto0 = 0;
    228 				return NULL;
    229 			}
    230 		}
    231 		ip6 = mtod(m, const struct ip6_hdr *);
    232 		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    233 		/* TODO: support ALTQ for innner packet */
    234 		break;
    235 	}
    236 #endif /* INET6 */
    237 	default:
    238 		*tos0 = 0;
    239 		*proto0 = 0;
    240 		return NULL;
    241 	}
    242 
    243 	*proto0 = proto;
    244 	*tos0 = tos;
    245 	return m;
    246 }
    247 
    248 static int
    249 ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
    250 {
    251 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    252 	struct mbuf *next;
    253 	struct m_tag *mtag;
    254 	int error;
    255 
    256 	KASSERT(if_ipsec_heldref_variant(var));
    257 
    258 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
    259 	if (mtag)
    260 		m_tag_delete(m, mtag);
    261 
    262 	error = ip_fragment(m, ifp, mtu);
    263 	if (error)
    264 		return error;
    265 
    266 	for (error = 0; m; m = next) {
    267 		next = m->m_nextpkt;
    268 		m->m_nextpkt = NULL;
    269 		if (error) {
    270 			m_freem(m);
    271 			continue;
    272 		}
    273 
    274 		error = ipsecif4_output(var, family, m);
    275 	}
    276 	if (error == 0)
    277 		IP_STATINC(IP_STAT_FRAGMENTED);
    278 
    279 	return error;
    280 }
    281 
    282 int
    283 ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
    284 {
    285 	struct m_tag *mtag;
    286 	struct sockaddr_in *src, *dst;
    287 	u_int16_t src_port = 0;
    288 	u_int16_t dst_port = 0;
    289 
    290 	KASSERT(var != NULL);
    291 
    292 	src = satosin(var->iv_psrc);
    293 	dst = satosin(var->iv_pdst);
    294 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
    295 	if (mtag) {
    296 		u_int16_t *ports;
    297 
    298 		ports = (u_int16_t *)(mtag + 1);
    299 		src_port = ports[0];
    300 		dst_port = ports[1];
    301 	}
    302 
    303 	/* address match */
    304 	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
    305 	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
    306 		return 0;
    307 
    308 	/* UDP encap? */
    309 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
    310 		goto match;
    311 
    312 	/* port match */
    313 	if (src_port != var->iv_dport ||
    314 	    dst_port != var->iv_sport) {
    315 #ifdef DEBUG
    316 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
    317 		    __func__, ntohs(src_port), ntohs(dst_port),
    318 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
    319 #endif
    320 		return 0;
    321 	}
    322 
    323 match:
    324 	/*
    325 	 * hide NAT-T information from encapsulated traffics.
    326 	 * they don't know about IPsec.
    327 	 */
    328 	if (mtag)
    329 		m_tag_delete(m, mtag);
    330 	return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
    331 }
    332 
    333 static int
    334 ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
    335 {
    336 	struct secpolicy *sp = NULL;
    337 	u_int8_t tos;
    338 	int proto;
    339 	int error;
    340 	int mtu;
    341 	u_long sa_mtu = 0;
    342 
    343 	KASSERT(if_ipsec_heldref_variant(var));
    344 	KASSERT(if_ipsec_variant_is_configured(var));
    345 	KASSERT(var->iv_psrc->sa_family == AF_INET);
    346 	KASSERT(var->iv_pdst->sa_family == AF_INET);
    347 
    348 	sp = IV_SP_OUT(var);
    349 	KASSERT(sp != NULL);
    350 	/*
    351 	 * The SPs in ipsec_variant are prevented from freed by
    352 	 * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
    353 	 */
    354 
    355 	KASSERT(sp->policy != IPSEC_POLICY_NONE);
    356 	KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
    357 	KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
    358 	if (sp->policy != IPSEC_POLICY_IPSEC) {
    359 		struct ifnet *ifp = &var->iv_softc->ipsec_if;
    360 		m_freem(m);
    361 		IF_DROP(&ifp->if_snd);
    362 		return 0;
    363 	}
    364 
    365 	/* get flowinfo */
    366 	m = ipsecif4_flowinfo(m, family, &proto, &tos);
    367 	if (m == NULL) {
    368 		error = ENETUNREACH;
    369 		goto done;
    370 	}
    371 
    372 	/* prepend new IP header */
    373 	m = ipsecif4_prepend_hdr(var, m, proto, tos);
    374 	if (m == NULL) {
    375 		error = ENETUNREACH;
    376 		goto done;
    377 	}
    378 
    379 	/*
    380 	 * Normal netipsec's NAT-T fragmentation is done in ip_output().
    381 	 * See "natt_frag" processing.
    382 	 * However, ipsec(4) interface's one is not done in the same way,
    383 	 * so we must do NAT-T fragmentation by own code.
    384 	 */
    385 	/* NAT-T ESP fragmentation */
    386 	mtu = ipsecif4_needfrag(m, sp->req);
    387 	if (mtu > 0)
    388 		return ipsecif4_fragout(var, family, m, mtu);
    389 
    390 	/* IPsec output */
    391 	IP_STATINC(IP_STAT_LOCALOUT);
    392 	error = ipsec4_process_packet(m, sp->req, &sa_mtu);
    393 	if (error == ENOENT)
    394 		error = 0;
    395 	/*
    396 	 * frangmentation is already done in ipsecif4_fragout(),
    397 	 * so ipsec4_process_packet() must not do fragmentation here.
    398 	 */
    399 	KASSERT(error != 0 || sa_mtu == 0);
    400 
    401 done:
    402 	return error;
    403 }
    404 
    405 #ifdef INET6
    406 static int
    407 ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
    408 {
    409 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    410 	struct ipsec_softc *sc = ifp->if_softc;
    411 	struct ipsec_ro *iro;
    412 	struct rtentry *rt;
    413 	struct sockaddr_in6 *sin6_src;
    414 	struct sockaddr_in6 *sin6_dst;
    415 	struct ip6_hdr *ip6;
    416 	int proto, error;
    417 	u_int8_t itos, otos;
    418 	union {
    419 		struct sockaddr		dst;
    420 		struct sockaddr_in6	dst6;
    421 	} u;
    422 
    423 	KASSERT(if_ipsec_heldref_variant(var));
    424 	KASSERT(if_ipsec_variant_is_configured(var));
    425 
    426 	sin6_src = satosin6(var->iv_psrc);
    427 	sin6_dst = satosin6(var->iv_pdst);
    428 
    429 	KASSERT(sin6_src->sin6_family == AF_INET6);
    430 	KASSERT(sin6_dst->sin6_family == AF_INET6);
    431 
    432 	switch (family) {
    433 #ifdef INET
    434 	case AF_INET:
    435 	    {
    436 		struct ip *ip;
    437 
    438 		proto = IPPROTO_IPV4;
    439 		if (m->m_len < sizeof(*ip)) {
    440 			m = m_pullup(m, sizeof(*ip));
    441 			if (m == NULL)
    442 				return ENOBUFS;
    443 		}
    444 		ip = mtod(m, struct ip *);
    445 		itos = ip->ip_tos;
    446 		/* TODO: support ALTQ for innner packet */
    447 		break;
    448 	    }
    449 #endif /* INET */
    450 	case AF_INET6:
    451 	    {
    452 		struct ip6_hdr *xip6;
    453 		proto = IPPROTO_IPV6;
    454 		if (m->m_len < sizeof(*xip6)) {
    455 			m = m_pullup(m, sizeof(*xip6));
    456 			if (m == NULL)
    457 				return ENOBUFS;
    458 		}
    459 		xip6 = mtod(m, struct ip6_hdr *);
    460 		itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
    461 		/* TODO: support ALTQ for innner packet */
    462 		break;
    463 	    }
    464 	default:
    465 		m_freem(m);
    466 		return EAFNOSUPPORT;
    467 	}
    468 
    469 	/* prepend new IP header */
    470 	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
    471 	if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
    472 		m = m_pullup(m, sizeof(struct ip6_hdr));
    473 	if (m == NULL)
    474 		return ENOBUFS;
    475 
    476 	ip6 = mtod(m, struct ip6_hdr *);
    477 	ip6->ip6_flow	= 0;
    478 	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
    479 	ip6->ip6_vfc	|= IPV6_VERSION;
    480 	ip6->ip6_plen	= htons((u_short)m->m_pkthdr.len);
    481 	ip6->ip6_nxt	= proto;
    482 	ip6->ip6_hlim	= ip6_ipsec_hlim;
    483 	ip6->ip6_src	= sin6_src->sin6_addr;
    484 	/* bidirectional configured tunnel mode */
    485 	if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
    486 		ip6->ip6_dst = sin6_dst->sin6_addr;
    487 	} else  {
    488 		m_freem(m);
    489 		return ENETUNREACH;
    490 	}
    491 #ifndef IPSEC_TX_TOS_CLEAR
    492 	if (ifp->if_flags & IFF_ECN)
    493 		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
    494 	else
    495 		ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
    496 
    497 	if (!ip6_ipsec_copy_tos)
    498 		otos = 0;
    499 #else
    500 	if (ip6_ipsec_copy_tos)
    501 		otos = itos;
    502 	else
    503 		otos = 0;
    504 #endif
    505 	ip6->ip6_flow &= ~ntohl(0xff00000);
    506 	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
    507 
    508 	sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
    509 
    510 	iro = percpu_getref(sc->ipsec_ro_percpu);
    511 	mutex_enter(&iro->ir_lock);
    512 	if ((rt = rtcache_lookup(&iro->ir_ro, &u.dst)) == NULL) {
    513 		mutex_exit(&iro->ir_lock);
    514 		percpu_putref(sc->ipsec_ro_percpu);
    515 		m_freem(m);
    516 		return ENETUNREACH;
    517 	}
    518 
    519 	if (rt->rt_ifp == ifp) {
    520 		rtcache_unref(rt, &iro->ir_ro);
    521 		rtcache_free(&iro->ir_ro);
    522 		mutex_exit(&iro->ir_lock);
    523 		percpu_putref(sc->ipsec_ro_percpu);
    524 		m_freem(m);
    525 		return ENETUNREACH;
    526 	}
    527 	rtcache_unref(rt, &iro->ir_ro);
    528 
    529 	/*
    530 	 * force fragmentation to minimum MTU, to avoid path MTU discovery.
    531 	 * it is too painful to ask for resend of inner packet, to achieve
    532 	 * path MTU discovery for encapsulated packets.
    533 	 */
    534 	error = ip6_output(m, 0, &iro->ir_ro,
    535 	    ip6_ipsec_pmtu ? 0 : IPV6_MINMTU, 0, NULL, NULL);
    536 	if (error)
    537 		rtcache_free(&iro->ir_ro);
    538 
    539 	mutex_exit(&iro->ir_lock);
    540 	percpu_putref(sc->ipsec_ro_percpu);
    541 
    542 	return error;
    543 }
    544 #endif /* INET6 */
    545 
    546 static void
    547 ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
    548 {
    549 	struct ifnet *ipsecp;
    550 	struct ipsec_softc *sc = eparg;
    551 	struct ipsec_variant *var;
    552 	const struct ip *ip;
    553 	int af;
    554 #ifndef IPSEC_TX_TOS_CLEAR
    555 	u_int8_t otos;
    556 #endif
    557 	struct psref psref_rcvif;
    558 	struct psref psref_var;
    559 	struct ifnet *rcvif;
    560 
    561 	KASSERT(sc != NULL);
    562 
    563 	ipsecp = &sc->ipsec_if;
    564 	if ((ipsecp->if_flags & IFF_UP) == 0) {
    565 		m_freem(m);
    566 		ip_statinc(IP_STAT_NOIPSEC);
    567 		return;
    568 	}
    569 
    570 	var = if_ipsec_getref_variant(sc, &psref_var);
    571 	if (if_ipsec_variant_is_unconfigured(var)) {
    572 		if_ipsec_putref_variant(var, &psref_var);
    573 		m_freem(m);
    574 		ip_statinc(IP_STAT_NOIPSEC);
    575 		return;
    576 	}
    577 
    578 	ip = mtod(m, const struct ip *);
    579 
    580 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
    581 	if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
    582 		m_put_rcvif_psref(rcvif, &psref_rcvif);
    583 		if_ipsec_putref_variant(var, &psref_var);
    584 		m_freem(m);
    585 		ip_statinc(IP_STAT_NOIPSEC);
    586 		return;
    587 	}
    588 	m_put_rcvif_psref(rcvif, &psref_rcvif);
    589 	if_ipsec_putref_variant(var, &psref_var);
    590 #ifndef IPSEC_TX_TOS_CLEAR
    591 	otos = ip->ip_tos;
    592 #endif
    593 	m_adj(m, off);
    594 
    595 	switch (proto) {
    596 	case IPPROTO_IPV4:
    597 	    {
    598 		struct ip *xip;
    599 		af = AF_INET;
    600 		if (M_UNWRITABLE(m, sizeof(*xip))) {
    601 			m = m_pullup(m, sizeof(*xip));
    602 			if (m == NULL)
    603 				return;
    604 		}
    605 		xip = mtod(m, struct ip *);
    606 #ifndef IPSEC_TX_TOS_CLEAR
    607 		if (ipsecp->if_flags & IFF_ECN)
    608 			ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
    609 		else
    610 			ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
    611 #endif
    612 		break;
    613 	    }
    614 #ifdef INET6
    615 	case IPPROTO_IPV6:
    616 	    {
    617 		struct ip6_hdr *ip6;
    618 		u_int8_t itos;
    619 		af = AF_INET6;
    620 		if (M_UNWRITABLE(m, sizeof(*ip6))) {
    621 			m = m_pullup(m, sizeof(*ip6));
    622 			if (m == NULL)
    623 				return;
    624 		}
    625 		ip6 = mtod(m, struct ip6_hdr *);
    626 		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    627 #ifndef IPSEC_TX_TOS_CLEAR
    628 		if (ipsecp->if_flags & IFF_ECN)
    629 			ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
    630 		else
    631 			ip_ecn_egress(ECN_NOCARE, &otos, &itos);
    632 #endif
    633 		ip6->ip6_flow &= ~htonl(0xff << 20);
    634 		ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
    635 		break;
    636 	    }
    637 #endif /* INET6 */
    638 	default:
    639 		ip_statinc(IP_STAT_NOIPSEC);
    640 		m_freem(m);
    641 		return;
    642 	}
    643 	if_ipsec_input(m, af, ipsecp);
    644 
    645 	return;
    646 }
    647 
    648 /*
    649  * validate and filter the pakcet
    650  */
    651 static int
    652 ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
    653     struct ifnet *ifp)
    654 {
    655 	struct sockaddr_in *src, *dst;
    656 
    657 	src = satosin(var->iv_psrc);
    658 	dst = satosin(var->iv_pdst);
    659 
    660 	return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
    661 }
    662 
    663 #ifdef INET6
    664 static int
    665 ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
    666 {
    667 	struct mbuf *m = *mp;
    668 	struct ifnet *ipsecp;
    669 	struct ipsec_softc *sc = eparg;
    670 	struct ipsec_variant *var;
    671 	struct ip6_hdr *ip6;
    672 	int af = 0;
    673 #ifndef IPSEC_TX_TOS_CLEAR
    674 	u_int32_t otos;
    675 #endif
    676 	struct psref psref_rcvif;
    677 	struct psref psref_var;
    678 	struct ifnet *rcvif;
    679 
    680 	KASSERT(eparg != NULL);
    681 
    682 	ipsecp = &sc->ipsec_if;
    683 	if ((ipsecp->if_flags & IFF_UP) == 0) {
    684 		m_freem(m);
    685 		IP6_STATINC(IP6_STAT_NOIPSEC);
    686 		return IPPROTO_DONE;
    687 	}
    688 
    689 	var = if_ipsec_getref_variant(sc, &psref_var);
    690 	if (if_ipsec_variant_is_unconfigured(var)) {
    691 		if_ipsec_putref_variant(var, &psref_var);
    692 		m_freem(m);
    693 		IP6_STATINC(IP6_STAT_NOIPSEC);
    694 		return IPPROTO_DONE;
    695 	}
    696 
    697 	ip6 = mtod(m, struct ip6_hdr *);
    698 
    699 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
    700 	if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
    701 		m_put_rcvif_psref(rcvif, &psref_rcvif);
    702 		if_ipsec_putref_variant(var, &psref_var);
    703 		m_freem(m);
    704 		IP6_STATINC(IP6_STAT_NOIPSEC);
    705 		return IPPROTO_DONE;
    706 	}
    707 	m_put_rcvif_psref(rcvif, &psref_rcvif);
    708 	if_ipsec_putref_variant(var, &psref_var);
    709 
    710 #ifndef IPSEC_TX_TOS_CLEAR
    711 	otos = ip6->ip6_flow;
    712 #endif
    713 	m_adj(m, *offp);
    714 
    715 	switch (proto) {
    716 #ifdef INET
    717 	case IPPROTO_IPV4:
    718 	    {
    719 		af = AF_INET;
    720 #ifndef IPSEC_TX_TOS_CLEAR
    721 		struct ip *ip;
    722 		u_int8_t otos8;
    723 		otos8 = (ntohl(otos) >> 20) & 0xff;
    724 
    725 		if (M_UNWRITABLE(m, sizeof(*ip))) {
    726 			m = m_pullup(m, sizeof(*ip));
    727 			if (m == NULL)
    728 				return IPPROTO_DONE;
    729 		}
    730 		ip = mtod(m, struct ip *);
    731 		if (ipsecp->if_flags & IFF_ECN)
    732 			ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
    733 		else
    734 			ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
    735 #endif
    736 		break;
    737 	    }
    738 #endif /* INET */
    739 	case IPPROTO_IPV6:
    740 	    {
    741 		af = AF_INET6;
    742 #ifndef IPSEC_TX_TOS_CLEAR
    743 		struct ip6_hdr *xip6;
    744 
    745 		if (M_UNWRITABLE(m, sizeof(*xip6))) {
    746 			m = m_pullup(m, sizeof(*xip6));
    747 			if (m == NULL)
    748 				return IPPROTO_DONE;
    749 		}
    750 		xip6 = mtod(m, struct ip6_hdr *);
    751 		if (ipsecp->if_flags & IFF_ECN)
    752 			ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
    753 		else
    754 			ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
    755 		break;
    756 #endif
    757 	    }
    758 	default:
    759 		IP6_STATINC(IP6_STAT_NOIPSEC);
    760 		m_freem(m);
    761 		return IPPROTO_DONE;
    762 	}
    763 
    764 	if_ipsec_input(m, af, ipsecp);
    765 	return IPPROTO_DONE;
    766 }
    767 
    768 /*
    769  * validate and filter the packet.
    770  */
    771 static int
    772 ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
    773     struct ifnet *ifp)
    774 {
    775 	struct sockaddr_in6 *src, *dst;
    776 
    777 	src = satosin6(var->iv_psrc);
    778 	dst = satosin6(var->iv_pdst);
    779 
    780 	return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
    781 }
    782 #endif /* INET6 */
    783 
    784 int
    785 ipsecif4_attach(struct ipsec_variant *var)
    786 {
    787 	struct ipsec_softc *sc = var->iv_softc;
    788 
    789 	KASSERT(if_ipsec_variant_is_configured(var));
    790 
    791 	if (var->iv_encap_cookie4 != NULL)
    792 		return EALREADY;
    793 	var->iv_encap_cookie4 = encap_attach_func(AF_INET, -1, if_ipsec_encap_func,
    794 	    &ipsecif4_encapsw, sc);
    795 	if (var->iv_encap_cookie4 == NULL)
    796 		return EEXIST;
    797 
    798 	var->iv_output = ipsecif4_output;
    799 	return 0;
    800 }
    801 
    802 int
    803 ipsecif4_detach(struct ipsec_variant *var)
    804 {
    805 	int error;
    806 
    807 	if (var->iv_encap_cookie4 == NULL)
    808 		return 0;
    809 
    810 	var->iv_output = NULL;
    811 	error = encap_detach(var->iv_encap_cookie4);
    812 	if (error == 0)
    813 		var->iv_encap_cookie4 = NULL;
    814 
    815 	return error;
    816 }
    817 
    818 #ifdef INET6
    819 int
    820 ipsecif6_attach(struct ipsec_variant *var)
    821 {
    822 	struct sockaddr_in6 mask6;
    823 	struct ipsec_softc *sc = var->iv_softc;
    824 
    825 	KASSERT(if_ipsec_variant_is_configured(var));
    826 	KASSERT(var->iv_encap_cookie6 == NULL);
    827 
    828 	memset(&mask6, 0, sizeof(mask6));
    829 	mask6.sin6_len = sizeof(struct sockaddr_in6);
    830 	mask6.sin6_addr.s6_addr32[0] = mask6.sin6_addr.s6_addr32[1] =
    831 	mask6.sin6_addr.s6_addr32[2] = mask6.sin6_addr.s6_addr32[3] = ~0;
    832 
    833 	var->iv_encap_cookie6 = encap_attach(AF_INET6, -1,
    834 	    var->iv_psrc, (struct sockaddr *)&mask6,
    835 	    var->iv_pdst, (struct sockaddr *)&mask6,
    836 	    &ipsecif6_encapsw, sc);
    837 	if (var->iv_encap_cookie6 == NULL)
    838 		return EEXIST;
    839 
    840 	var->iv_output = ipsecif6_output;
    841 	return 0;
    842 }
    843 
    844 static void
    845 ipsecif6_rtcache_free_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
    846 {
    847 	struct ipsec_ro *iro = p;
    848 
    849 	mutex_enter(&iro->ir_lock);
    850 	rtcache_free(&iro->ir_ro);
    851 	mutex_exit(&iro->ir_lock);
    852 }
    853 
    854 int
    855 ipsecif6_detach(struct ipsec_variant *var)
    856 {
    857 	struct ipsec_softc *sc = var->iv_softc;
    858 	int error;
    859 
    860 	KASSERT(var->iv_encap_cookie6 != NULL);
    861 
    862 	percpu_foreach(sc->ipsec_ro_percpu, ipsecif6_rtcache_free_pc, NULL);
    863 
    864 	var->iv_output = NULL;
    865 	error = encap_detach(var->iv_encap_cookie6);
    866 	if (error == 0)
    867 		var->iv_encap_cookie6 = NULL;
    868 	return error;
    869 }
    870 
    871 void *
    872 ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
    873 {
    874 	struct ipsec_softc *sc = eparg;
    875 	struct ip6ctlparam *ip6cp = NULL;
    876 	struct ip6_hdr *ip6;
    877 	const struct sockaddr_in6 *dst6;
    878 	struct ipsec_ro *iro;
    879 
    880 	if (sa->sa_family != AF_INET6 ||
    881 	    sa->sa_len != sizeof(struct sockaddr_in6))
    882 		return NULL;
    883 
    884 	if ((unsigned)cmd >= PRC_NCMDS)
    885 		return NULL;
    886 	if (cmd == PRC_HOSTDEAD)
    887 		d = NULL;
    888 	else if (inet6ctlerrmap[cmd] == 0)
    889 		return NULL;
    890 
    891 	/* if the parameter is from icmp6, decode it. */
    892 	if (d != NULL) {
    893 		ip6cp = (struct ip6ctlparam *)d;
    894 		ip6 = ip6cp->ip6c_ip6;
    895 	} else {
    896 		ip6 = NULL;
    897 	}
    898 
    899 	if (!ip6)
    900 		return NULL;
    901 
    902 	iro = percpu_getref(sc->ipsec_ro_percpu);
    903 	mutex_enter(&iro->ir_lock);
    904 	dst6 = satocsin6(rtcache_getdst(&iro->ir_ro));
    905 	/* XXX scope */
    906 	if (dst6 == NULL)
    907 		;
    908 	else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
    909 		/* flush route cache */
    910 		rtcache_free(&iro->ir_ro);
    911 
    912 	mutex_exit(&iro->ir_lock);
    913 	percpu_putref(sc->ipsec_ro_percpu);
    914 
    915 	return NULL;
    916 }
    917 
    918 ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
    919 #define	ipsecif6_ctlinput	ipsecif6_ctlinput_wrapper
    920 
    921 static const struct encapsw ipsecif6_encapsw = {
    922 	.encapsw6 = {
    923 		.pr_input = ipsecif6_input,
    924 		.pr_ctlinput = ipsecif6_ctlinput,
    925 	}
    926 };
    927 #endif /* INET6 */
    928