Home | History | Annotate | Line # | Download | only in netipsec
ipsecif.c revision 1.1.2.8
      1 /*	$NetBSD: ipsecif.c,v 1.1.2.8 2019/05/29 15:57:38 martin Exp $  */
      2 
      3 /*
      4  * Copyright (c) 2017 Internet Initiative Japan Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.1.2.8 2019/05/29 15:57:38 martin Exp $");
     31 
     32 #ifdef _KERNEL_OPT
     33 #include "opt_inet.h"
     34 #include "opt_ipsec.h"
     35 #endif
     36 
     37 #include <sys/param.h>
     38 #include <sys/systm.h>
     39 #include <sys/socket.h>
     40 #include <sys/sockio.h>
     41 #include <sys/mbuf.h>
     42 #include <sys/errno.h>
     43 #include <sys/ioctl.h>
     44 #include <sys/syslog.h>
     45 #include <sys/kernel.h>
     46 
     47 #include <net/if.h>
     48 #include <net/route.h>
     49 
     50 #include <netinet/in.h>
     51 #include <netinet/in_systm.h>
     52 #include <netinet/ip.h>
     53 #include <netinet/ip_var.h>
     54 #include <netinet/in_var.h>
     55 #include <netinet/ip_encap.h>
     56 #include <netinet/ip_ecn.h>
     57 #include <netinet/ip_private.h>
     58 #include <netinet/udp.h>
     59 
     60 #ifdef INET6
     61 #include <netinet/ip6.h>
     62 #include <netinet6/ip6_var.h>
     63 #include <netinet6/ip6_private.h>
     64 #include <netinet6/in6_var.h>
     65 #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
     66 #include <netinet/ip_ecn.h>
     67 #endif
     68 
     69 #include <netipsec/key.h>
     70 #include <netipsec/ipsecif.h>
     71 
     72 #include <net/if_ipsec.h>
     73 
     74 static void ipsecif4_input(struct mbuf *, int, int, void *);
     75 static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
     76 static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
     77 	struct ifnet *);
     78 
     79 #ifdef INET6
     80 static int ipsecif6_input(struct mbuf **, int *, int, void *);
     81 static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
     82 static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
     83 	struct ifnet *);
     84 #endif
     85 
     86 static int ip_ipsec_ttl = IPSEC_TTL;
     87 static int ip_ipsec_copy_tos = 0;
     88 #ifdef INET6
     89 static int ip6_ipsec_hlim = IPSEC_HLIM;
     90 static int ip6_ipsec_pmtu = 0; /* XXX: per interface configuration?? */
     91 static int ip6_ipsec_copy_tos = 0;
     92 #endif
     93 
     94 struct encapsw ipsecif4_encapsw = {
     95 	.encapsw4 = {
     96 		.pr_input = ipsecif4_input,
     97 		.pr_ctlinput = NULL,
     98 	}
     99 };
    100 
    101 #ifdef INET6
    102 static const struct encapsw ipsecif6_encapsw;
    103 #endif
    104 
    105 static struct mbuf *
    106 ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
    107     uint8_t proto, uint8_t tos)
    108 {
    109 	struct ip *ip;
    110 	struct sockaddr_in *src, *dst;
    111 
    112 	src = satosin(var->iv_psrc);
    113 	dst = satosin(var->iv_pdst);
    114 
    115 	if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
    116 	    src->sin_addr.s_addr == INADDR_BROADCAST ||
    117 	    dst->sin_addr.s_addr == INADDR_BROADCAST) {
    118 		m_freem(m);
    119 		return NULL;
    120 	}
    121 	m->m_flags &= ~M_BCAST;
    122 
    123 	if (IN_MULTICAST(src->sin_addr.s_addr) ||
    124 	   IN_MULTICAST(dst->sin_addr.s_addr)) {
    125 		m_freem(m);
    126 		return NULL;
    127 	}
    128 
    129 	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
    130 	if (m && M_UNWRITABLE(m, sizeof(struct ip)))
    131 		m = m_pullup(m, sizeof(struct ip));
    132 	if (m == NULL)
    133 		return NULL;
    134 
    135 	ip = mtod(m, struct ip *);
    136 	ip->ip_v = IPVERSION;
    137 	ip->ip_off = htons(0);
    138 	if (m->m_pkthdr.len < IP_MINFRAGSIZE)
    139 		ip->ip_id = 0;
    140 	else
    141 		ip->ip_id = ip_newid(NULL);
    142 	ip->ip_hl = sizeof(*ip) >> 2;
    143 	if (ip_ipsec_copy_tos)
    144 		ip->ip_tos = tos;
    145 	else
    146 		ip->ip_tos = 0;
    147 	ip->ip_sum = 0;
    148 	ip->ip_src = src->sin_addr;
    149 	ip->ip_dst = dst->sin_addr;
    150 	ip->ip_p = proto;
    151 	ip->ip_ttl = ip_ipsec_ttl;
    152 	ip->ip_len = htons(m->m_pkthdr.len);
    153 #ifndef IPSEC_TX_TOS_CLEAR
    154 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    155 	if (ifp->if_flags & IFF_ECN)
    156 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
    157 	else
    158 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
    159 #endif
    160 
    161 	return m;
    162 }
    163 
    164 static int
    165 ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
    166 {
    167 	struct ip ip0;
    168 	struct ip *ip;
    169 	int mtu;
    170 	struct secasvar *sav;
    171 
    172 	sav = key_lookup_sa_bysaidx(&isr->saidx);
    173 	if (sav == NULL)
    174 		return 0;
    175 
    176 	if (!(sav->natt_type & UDP_ENCAP_ESPINUDP) &&
    177 	    !(sav->natt_type & UDP_ENCAP_ESPINUDP_NON_IKE)) {
    178 		mtu = 0;
    179 		goto out;
    180 	}
    181 
    182 	if (m->m_len < sizeof(struct ip)) {
    183 		m_copydata(m, 0, sizeof(ip0), &ip0);
    184 		ip = &ip0;
    185 
    186 	} else {
    187 		ip = mtod(m, struct ip *);
    188 	}
    189 	mtu = sav->esp_frag;
    190 	if (ntohs(ip->ip_len) <= mtu)
    191 		mtu = 0;
    192 
    193 out:
    194 	KEY_SA_UNREF(&sav);
    195 	return mtu;
    196 }
    197 
    198 static struct mbuf *
    199 ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
    200 {
    201 	const struct ip *ip;
    202 	int proto;
    203 	int tos;
    204 
    205 	KASSERT(proto0 != NULL);
    206 	KASSERT(tos0 != NULL);
    207 
    208 	switch (family) {
    209 	case AF_INET:
    210 		proto = IPPROTO_IPV4;
    211 		if (m->m_len < sizeof(*ip)) {
    212 			m = m_pullup(m, sizeof(*ip));
    213 			if (!m) {
    214 				*tos0 = 0;
    215 				*proto0 = 0;
    216 				return  NULL;
    217 			}
    218 		}
    219 		ip = mtod(m, const struct ip *);
    220 		tos = ip->ip_tos;
    221 		/* TODO: support ALTQ for innner packet */
    222 		break;
    223 #ifdef INET6
    224 	case AF_INET6: {
    225 		const struct ip6_hdr *ip6;
    226 		proto = IPPROTO_IPV6;
    227 		if (m->m_len < sizeof(*ip6)) {
    228 			m = m_pullup(m, sizeof(*ip6));
    229 			if (!m) {
    230 				*tos0 = 0;
    231 				*proto0 = 0;
    232 				return NULL;
    233 			}
    234 		}
    235 		ip6 = mtod(m, const struct ip6_hdr *);
    236 		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    237 		/* TODO: support ALTQ for innner packet */
    238 		break;
    239 	}
    240 #endif /* INET6 */
    241 	default:
    242 		*tos0 = 0;
    243 		*proto0 = 0;
    244 		return NULL;
    245 	}
    246 
    247 	*proto0 = proto;
    248 	*tos0 = tos;
    249 	return m;
    250 }
    251 
    252 static int
    253 ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
    254 {
    255 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    256 	struct mbuf *next;
    257 	struct m_tag *mtag;
    258 	int error;
    259 
    260 	KASSERT(if_ipsec_heldref_variant(var));
    261 
    262 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
    263 	if (mtag)
    264 		m_tag_delete(m, mtag);
    265 
    266 	/* consider new IP header prepended in ipsecif4_output() */
    267 	if (mtu <= sizeof(struct ip)) {
    268 		m_freem(m);
    269 		return ENETUNREACH;
    270 	}
    271 	m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
    272 	error = ip_fragment(m, ifp, mtu - sizeof(struct ip));
    273 	if (error)
    274 		return error;
    275 
    276 	for (error = 0; m; m = next) {
    277 		next = m->m_nextpkt;
    278 		m->m_nextpkt = NULL;
    279 		if (error) {
    280 			m_freem(m);
    281 			continue;
    282 		}
    283 
    284 		error = ipsecif4_output(var, family, m);
    285 	}
    286 	if (error == 0)
    287 		IP_STATINC(IP_STAT_FRAGMENTED);
    288 
    289 	return error;
    290 }
    291 
    292 int
    293 ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
    294 {
    295 	struct m_tag *mtag;
    296 	struct sockaddr_in *src, *dst;
    297 	u_int16_t src_port = 0;
    298 	u_int16_t dst_port = 0;
    299 
    300 	KASSERT(var != NULL);
    301 
    302 	src = satosin(var->iv_psrc);
    303 	dst = satosin(var->iv_pdst);
    304 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
    305 	if (mtag) {
    306 		u_int16_t *ports;
    307 
    308 		ports = (u_int16_t *)(mtag + 1);
    309 		src_port = ports[0];
    310 		dst_port = ports[1];
    311 	}
    312 
    313 	/* address match */
    314 	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
    315 	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
    316 		return 0;
    317 
    318 	/* UDP encap? */
    319 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
    320 		goto match;
    321 
    322 	/* port match */
    323 	if (src_port != var->iv_dport ||
    324 	    dst_port != var->iv_sport) {
    325 #ifdef DEBUG
    326 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
    327 		    __func__, ntohs(src_port), ntohs(dst_port),
    328 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
    329 #endif
    330 		return 0;
    331 	}
    332 
    333 match:
    334 	/*
    335 	 * hide NAT-T information from encapsulated traffics.
    336 	 * they don't know about IPsec.
    337 	 */
    338 	if (mtag)
    339 		m_tag_delete(m, mtag);
    340 	return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
    341 }
    342 
    343 static int
    344 ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
    345 {
    346 	struct secpolicy *sp = NULL;
    347 	u_int8_t tos;
    348 	int proto;
    349 	int error;
    350 	int mtu;
    351 	u_long sa_mtu = 0;
    352 
    353 	KASSERT(if_ipsec_heldref_variant(var));
    354 	KASSERT(if_ipsec_variant_is_configured(var));
    355 	KASSERT(var->iv_psrc->sa_family == AF_INET);
    356 	KASSERT(var->iv_pdst->sa_family == AF_INET);
    357 
    358 	sp = IV_SP_OUT(var);
    359 	KASSERT(sp != NULL);
    360 	/*
    361 	 * The SPs in ipsec_variant are prevented from freed by
    362 	 * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
    363 	 */
    364 
    365 	KASSERT(sp->policy != IPSEC_POLICY_NONE);
    366 	KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
    367 	KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
    368 	if(sp->policy != IPSEC_POLICY_IPSEC) {
    369 		struct ifnet *ifp = &var->iv_softc->ipsec_if;
    370 		m_freem(m);
    371 		IF_DROP(&ifp->if_snd);
    372 		return 0;
    373 	}
    374 
    375 	/* get flowinfo */
    376 	m = ipsecif4_flowinfo(m, family, &proto, &tos);
    377 	if (m == NULL) {
    378 		error = ENETUNREACH;
    379 		goto done;
    380 	}
    381 
    382 	/* prepend new IP header */
    383 	m = ipsecif4_prepend_hdr(var, m, proto, tos);
    384 	if (m == NULL) {
    385 		error = ENETUNREACH;
    386 		goto done;
    387 	}
    388 
    389 	/*
    390 	 * Normal netipsec's NAT-T fragmentation is done in ip_output().
    391 	 * See "natt_frag" processing.
    392 	 * However, ipsec(4) interface's one is not done in the same way,
    393 	 * so we must do NAT-T fragmentation by own code.
    394 	 */
    395 	/* NAT-T ESP fragmentation */
    396 	mtu = ipsecif4_needfrag(m, sp->req);
    397 	if (mtu > 0)
    398 		return ipsecif4_fragout(var, family, m, mtu);
    399 
    400 	/* IPsec output */
    401 	IP_STATINC(IP_STAT_LOCALOUT);
    402 	error = ipsec4_process_packet(m, sp->req, &sa_mtu);
    403 	if (error == ENOENT)
    404 		error = 0;
    405 	/*
    406 	 * frangmentation is already done in ipsecif4_fragout(),
    407 	 * so ipsec4_process_packet() must not do fragmentation here.
    408 	 */
    409 	KASSERT(sa_mtu == 0);
    410 
    411 done:
    412 	return error;
    413 }
    414 
    415 #ifdef INET6
    416 int
    417 ipsecif6_encap_func(struct mbuf *m, struct ip6_hdr *ip6, struct ipsec_variant *var)
    418 {
    419 	struct m_tag *mtag;
    420 	struct sockaddr_in6 *src, *dst;
    421 	u_int16_t src_port = 0;
    422 	u_int16_t dst_port = 0;
    423 
    424 	KASSERT(var != NULL);
    425 
    426 	src = satosin6(var->iv_psrc);
    427 	dst = satosin6(var->iv_pdst);
    428 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
    429 	if (mtag) {
    430 		u_int16_t *ports;
    431 
    432 		ports = (u_int16_t *)(mtag + 1);
    433 		src_port = ports[0];
    434 		dst_port = ports[1];
    435 	}
    436 
    437 	/* address match */
    438 	if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
    439 	    !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
    440 		return 0;
    441 
    442 	/* UDP encap? */
    443 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
    444 		goto match;
    445 
    446 	/* port match */
    447 	if (src_port != var->iv_dport ||
    448 	    dst_port != var->iv_sport) {
    449 #ifdef DEBUG
    450 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
    451 		    __func__, ntohs(src_port), ntohs(dst_port),
    452 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
    453 #endif
    454 		return 0;
    455 	}
    456 
    457 match:
    458 	/*
    459 	 * hide NAT-T information from encapsulated traffics.
    460 	 * they don't know about IPsec.
    461 	 */
    462 	if (mtag)
    463 		m_tag_delete(m, mtag);
    464 	return sizeof(src->sin6_addr) + sizeof(dst->sin6_addr);
    465 }
    466 
    467 static int
    468 ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
    469 {
    470 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    471 	struct ipsec_softc *sc = ifp->if_softc;
    472 	struct ipsec_ro *iro;
    473 	struct rtentry *rt;
    474 	struct sockaddr_in6 *sin6_src;
    475 	struct sockaddr_in6 *sin6_dst;
    476 	struct ip6_hdr *ip6;
    477 	int proto, error;
    478 	u_int8_t itos, otos;
    479 	union {
    480 		struct sockaddr		dst;
    481 		struct sockaddr_in6	dst6;
    482 	} u;
    483 
    484 	KASSERT(if_ipsec_heldref_variant(var));
    485 	KASSERT(if_ipsec_variant_is_configured(var));
    486 
    487 	sin6_src = satosin6(var->iv_psrc);
    488 	sin6_dst = satosin6(var->iv_pdst);
    489 
    490 	KASSERT(sin6_src->sin6_family == AF_INET6);
    491 	KASSERT(sin6_dst->sin6_family == AF_INET6);
    492 
    493 	switch (family) {
    494 #ifdef INET
    495 	case AF_INET:
    496 	    {
    497 		struct ip *ip;
    498 
    499 		proto = IPPROTO_IPV4;
    500 		if (m->m_len < sizeof(*ip)) {
    501 			m = m_pullup(m, sizeof(*ip));
    502 			if (!m)
    503 				return ENOBUFS;
    504 		}
    505 		ip = mtod(m, struct ip *);
    506 		itos = ip->ip_tos;
    507 		/*
    508 		 * TODO:
    509 		 *support ALTQ for innner packet
    510 		 */
    511 		break;
    512 	    }
    513 #endif /* INET */
    514 	case AF_INET6:
    515 	    {
    516 		struct ip6_hdr *xip6;
    517 		proto = IPPROTO_IPV6;
    518 		if (m->m_len < sizeof(*xip6)) {
    519 			m = m_pullup(m, sizeof(*xip6));
    520 			if (!m)
    521 				return ENOBUFS;
    522 		}
    523 		xip6 = mtod(m, struct ip6_hdr *);
    524 		itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
    525 		/* TODO:
    526 		 * support ALTQ for innner packet
    527 		 */
    528 		break;
    529 	    }
    530 	default:
    531 		m_freem(m);
    532 		return EAFNOSUPPORT;
    533 	}
    534 
    535 	/* prepend new IP header */
    536 	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
    537 	if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
    538 		m = m_pullup(m, sizeof(struct ip6_hdr));
    539 	if (m == NULL)
    540 		return ENOBUFS;
    541 
    542 	ip6 = mtod(m, struct ip6_hdr *);
    543 	ip6->ip6_flow	= 0;
    544 	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
    545 	ip6->ip6_vfc	|= IPV6_VERSION;
    546 #if 0	/* ip6->ip6_plen will be filled by ip6_output */
    547 	ip6->ip6_plen	= htons((u_short)m->m_pkthdr.len - sizeof(*ip6));
    548 #endif
    549 	ip6->ip6_nxt	= proto;
    550 	ip6->ip6_hlim	= ip6_ipsec_hlim;
    551 	ip6->ip6_src	= sin6_src->sin6_addr;
    552 	/* bidirectional configured tunnel mode */
    553 	if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
    554 		ip6->ip6_dst = sin6_dst->sin6_addr;
    555 	} else  {
    556 		m_freem(m);
    557 		return ENETUNREACH;
    558 	}
    559 #ifndef IPSEC_TX_TOS_CLEAR
    560 	if (!ip6_ipsec_copy_tos)
    561 		otos = 0;
    562 
    563 	if (ifp->if_flags & IFF_ECN)
    564 		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
    565 	else
    566 		ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
    567 #else
    568 	if (ip6_ipsec_copy_tos)
    569 		otos = itos;
    570 	else
    571 		otos = 0;
    572 #endif
    573 	ip6->ip6_flow &= ~ntohl(0xff00000);
    574 	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
    575 
    576 	sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
    577 
    578 	iro = percpu_getref(sc->ipsec_ro_percpu);
    579 	mutex_enter(iro->ir_lock);
    580 	if ((rt = rtcache_lookup(&iro->ir_ro, &u.dst)) == NULL) {
    581 		mutex_exit(iro->ir_lock);
    582 		percpu_putref(sc->ipsec_ro_percpu);
    583 		m_freem(m);
    584 		return ENETUNREACH;
    585 	}
    586 
    587 	if (rt->rt_ifp == ifp) {
    588 		rtcache_unref(rt, &iro->ir_ro);
    589 		rtcache_free(&iro->ir_ro);
    590 		mutex_exit(iro->ir_lock);
    591 		percpu_putref(sc->ipsec_ro_percpu);
    592 		m_freem(m);
    593 		return ENETUNREACH;
    594 	}
    595 	rtcache_unref(rt, &iro->ir_ro);
    596 
    597 	/*
    598 	 * force fragmentation to minimum MTU, to avoid path MTU discovery.
    599 	 * it is too painful to ask for resend of inner packet, to achieve
    600 	 * path MTU discovery for encapsulated packets.
    601 	 */
    602 	error = ip6_output(m, 0, &iro->ir_ro,
    603 	    ip6_ipsec_pmtu ? 0 : IPV6_MINMTU, 0, NULL, NULL);
    604 	if (error)
    605 		rtcache_free(&iro->ir_ro);
    606 
    607 	mutex_exit(iro->ir_lock);
    608 	percpu_putref(sc->ipsec_ro_percpu);
    609 
    610 	return error;
    611 }
    612 #endif /* INET6 */
    613 
    614 static void
    615 ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
    616 {
    617 	struct ifnet *ipsecp;
    618 	struct ipsec_softc *sc = eparg;
    619 	struct ipsec_variant *var;
    620 	const struct ip *ip;
    621 	int af;
    622 #ifndef IPSEC_TX_TOS_CLEAR
    623 	u_int8_t otos;
    624 #endif
    625 	struct psref psref_rcvif;
    626 	struct psref psref_var;
    627 	struct ifnet *rcvif;
    628 
    629 	KASSERT(sc != NULL);
    630 
    631 	ipsecp = &sc->ipsec_if;
    632 	if ((ipsecp->if_flags & IFF_UP) == 0) {
    633 		m_freem(m);
    634 		ip_statinc(IP_STAT_NOIPSEC);
    635 		return;
    636 	}
    637 
    638 	var = if_ipsec_getref_variant(sc, &psref_var);
    639 	if (if_ipsec_variant_is_unconfigured(var)) {
    640 		if_ipsec_putref_variant(var, &psref_var);
    641 		m_freem(m);
    642 		ip_statinc(IP_STAT_NOIPSEC);
    643 		return;
    644 	}
    645 
    646 	ip = mtod(m, const struct ip *);
    647 
    648 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
    649 	if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
    650 		m_put_rcvif_psref(rcvif, &psref_rcvif);
    651 		if_ipsec_putref_variant(var, &psref_var);
    652 		m_freem(m);
    653 		ip_statinc(IP_STAT_NOIPSEC);
    654 		return;
    655 	}
    656 	m_put_rcvif_psref(rcvif, &psref_rcvif);
    657 	if_ipsec_putref_variant(var, &psref_var);
    658 #ifndef IPSEC_TX_TOS_CLEAR
    659 	otos = ip->ip_tos;
    660 #endif
    661 	m_adj(m, off);
    662 
    663 	switch (proto) {
    664 	case IPPROTO_IPV4:
    665 	    {
    666 		struct ip *xip;
    667 		af = AF_INET;
    668 		if (M_UNWRITABLE(m, sizeof(*xip))) {
    669 			m = m_pullup(m, sizeof(*xip));
    670 			if (!m)
    671 				return;
    672 		}
    673 		xip = mtod(m, struct ip *);
    674 #ifndef IPSEC_TX_TOS_CLEAR
    675 		if (ipsecp->if_flags & IFF_ECN)
    676 			ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
    677 		else
    678 			ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
    679 #endif
    680 		break;
    681 	    }
    682 #ifdef INET6
    683 	case IPPROTO_IPV6:
    684 	    {
    685 		struct ip6_hdr *ip6;
    686 		u_int8_t itos;
    687 		af = AF_INET6;
    688 		if (M_UNWRITABLE(m, sizeof(*ip6))) {
    689 			m = m_pullup(m, sizeof(*ip6));
    690 			if (!m)
    691 				return;
    692 		}
    693 		ip6 = mtod(m, struct ip6_hdr *);
    694 		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    695 #ifndef IPSEC_TX_TOS_CLEAR
    696 		if (ipsecp->if_flags & IFF_ECN)
    697 			ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
    698 		else
    699 			ip_ecn_egress(ECN_NOCARE, &otos, &itos);
    700 #endif
    701 		ip6->ip6_flow &= ~htonl(0xff << 20);
    702 		ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
    703 		break;
    704 	    }
    705 #endif /* INET6 */
    706 	default:
    707 		ip_statinc(IP_STAT_NOIPSEC);
    708 		m_freem(m);
    709 		return;
    710 	}
    711 	if_ipsec_input(m, af, ipsecp);
    712 
    713 	return;
    714 }
    715 
    716 /*
    717  * validate and filter the pakcet
    718  */
    719 static int
    720 ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
    721     struct ifnet *ifp)
    722 {
    723 	struct sockaddr_in *src, *dst;
    724 
    725 	src = satosin(var->iv_psrc);
    726 	dst = satosin(var->iv_pdst);
    727 
    728 	return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
    729 }
    730 
    731 #ifdef INET6
    732 static int
    733 ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
    734 {
    735 	struct mbuf *m = *mp;
    736 	struct ifnet *ipsecp;
    737 	struct ipsec_softc *sc = eparg;
    738 	struct ipsec_variant *var;
    739 	struct ip6_hdr *ip6;
    740 	int af = 0;
    741 #ifndef IPSEC_TX_TOS_CLEAR
    742 	u_int32_t otos;
    743 #endif
    744 	struct psref psref_rcvif;
    745 	struct psref psref_var;
    746 	struct ifnet *rcvif;
    747 
    748 	KASSERT(eparg != NULL);
    749 
    750 	ipsecp = &sc->ipsec_if;
    751 	if ((ipsecp->if_flags & IFF_UP) == 0) {
    752 		m_freem(m);
    753 		IP6_STATINC(IP6_STAT_NOIPSEC);
    754 		return IPPROTO_DONE;
    755 	}
    756 
    757 	var = if_ipsec_getref_variant(sc, &psref_var);
    758 	if (if_ipsec_variant_is_unconfigured(var)) {
    759 		if_ipsec_putref_variant(var, &psref_var);
    760 		m_freem(m);
    761 		IP6_STATINC(IP6_STAT_NOIPSEC);
    762 		return IPPROTO_DONE;
    763 	}
    764 
    765 	ip6 = mtod(m, struct ip6_hdr *);
    766 
    767 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
    768 	if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
    769 		m_put_rcvif_psref(rcvif, &psref_rcvif);
    770 		if_ipsec_putref_variant(var, &psref_var);
    771 		m_freem(m);
    772 		IP6_STATINC(IP6_STAT_NOIPSEC);
    773 		return IPPROTO_DONE;
    774 	}
    775 	m_put_rcvif_psref(rcvif, &psref_rcvif);
    776 	if_ipsec_putref_variant(var, &psref_var);
    777 
    778 #ifndef IPSEC_TX_TOS_CLEAR
    779 	otos = ip6->ip6_flow;
    780 #endif
    781 	m_adj(m, *offp);
    782 
    783 	switch (proto) {
    784 #ifdef INET
    785 	case IPPROTO_IPV4:
    786 	    {
    787 		af = AF_INET;
    788 #ifndef IPSEC_TX_TOS_CLEAR
    789 		struct ip *ip;
    790 		u_int8_t otos8;
    791 		otos8 = (ntohl(otos) >> 20) & 0xff;
    792 
    793 		if (M_UNWRITABLE(m, sizeof(*ip))) {
    794 			m = m_pullup(m, sizeof(*ip));
    795 			if (!m)
    796 				return IPPROTO_DONE;
    797 		}
    798 		ip = mtod(m, struct ip *);
    799 		if (ipsecp->if_flags & IFF_ECN)
    800 			ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
    801 		else
    802 			ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
    803 #endif
    804 		break;
    805 	    }
    806 #endif /* INET */
    807 	case IPPROTO_IPV6:
    808 	    {
    809 		af = AF_INET6;
    810 #ifndef IPSEC_TX_TOS_CLEAR
    811 		struct ip6_hdr *xip6;
    812 
    813 		if (M_UNWRITABLE(m, sizeof(*xip6))) {
    814 			m = m_pullup(m, sizeof(*xip6));
    815 			if (!m)
    816 				return IPPROTO_DONE;
    817 		}
    818 		xip6 = mtod(m, struct ip6_hdr *);
    819 		if (ipsecp->if_flags & IFF_ECN)
    820 			ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
    821 		else
    822 			ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
    823 		break;
    824 #endif
    825 	    }
    826 	default:
    827 		IP6_STATINC(IP6_STAT_NOIPSEC);
    828 		m_freem(m);
    829 		return IPPROTO_DONE;
    830 	}
    831 
    832 	if_ipsec_input(m, af, ipsecp);
    833 	return IPPROTO_DONE;
    834 }
    835 
    836 /*
    837  * validate and filter the packet.
    838  */
    839 static int
    840 ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
    841     struct ifnet *ifp)
    842 {
    843 	struct sockaddr_in6 *src, *dst;
    844 
    845 	src = satosin6(var->iv_psrc);
    846 	dst = satosin6(var->iv_pdst);
    847 
    848 	return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
    849 }
    850 #endif /* INET6 */
    851 
    852 int
    853 ipsecif4_attach(struct ipsec_variant *var)
    854 {
    855 	struct ipsec_softc *sc = var->iv_softc;
    856 
    857 	KASSERT(if_ipsec_variant_is_configured(var));
    858 
    859 	if (var->iv_encap_cookie4 != NULL)
    860 		return EALREADY;
    861 	var->iv_encap_cookie4 = encap_attach_func(AF_INET, -1, if_ipsec_encap_func,
    862 	    &ipsecif4_encapsw, sc);
    863 	if (var->iv_encap_cookie4 == NULL)
    864 		return EEXIST;
    865 
    866 	var->iv_output = ipsecif4_output;
    867 	return 0;
    868 }
    869 
    870 int
    871 ipsecif4_detach(struct ipsec_variant *var)
    872 {
    873 	int error;
    874 
    875 	if (var->iv_encap_cookie4 == NULL)
    876 		return 0;
    877 
    878 	var->iv_output = NULL;
    879 	error = encap_detach(var->iv_encap_cookie4);
    880 	if (error == 0)
    881 		var->iv_encap_cookie4 = NULL;
    882 
    883 	return error;
    884 }
    885 
    886 #ifdef INET6
    887 int
    888 ipsecif6_attach(struct ipsec_variant *var)
    889 {
    890 	struct sockaddr_in6 mask6;
    891 	struct ipsec_softc *sc = var->iv_softc;
    892 
    893 	KASSERT(if_ipsec_variant_is_configured(var));
    894 	KASSERT(var->iv_encap_cookie6 == NULL);
    895 
    896 	memset(&mask6, 0, sizeof(mask6));
    897 	mask6.sin6_len = sizeof(struct sockaddr_in6);
    898 	mask6.sin6_addr.s6_addr32[0] = mask6.sin6_addr.s6_addr32[1] =
    899 	mask6.sin6_addr.s6_addr32[2] = mask6.sin6_addr.s6_addr32[3] = ~0;
    900 
    901 	var->iv_encap_cookie6 = encap_attach_func(AF_INET6, -1, if_ipsec_encap_func,
    902 	    &ipsecif6_encapsw, sc);
    903 	if (var->iv_encap_cookie6 == NULL)
    904 		return EEXIST;
    905 
    906 	var->iv_output = ipsecif6_output;
    907 	return 0;
    908 }
    909 
    910 static void
    911 ipsecif6_rtcache_free_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
    912 {
    913 	struct ipsec_ro *iro = p;
    914 
    915 	mutex_enter(iro->ir_lock);
    916 	rtcache_free(&iro->ir_ro);
    917 	mutex_exit(iro->ir_lock);
    918 }
    919 
    920 int
    921 ipsecif6_detach(struct ipsec_variant *var)
    922 {
    923 	struct ipsec_softc *sc = var->iv_softc;
    924 	int error;
    925 
    926 	KASSERT(var->iv_encap_cookie6 != NULL);
    927 
    928 	percpu_foreach(sc->ipsec_ro_percpu, ipsecif6_rtcache_free_pc, NULL);
    929 
    930 	var->iv_output = NULL;
    931 	error = encap_detach(var->iv_encap_cookie6);
    932 	if (error == 0)
    933 		var->iv_encap_cookie6 = NULL;
    934 	return error;
    935 }
    936 
    937 void *
    938 ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
    939 {
    940 	struct ipsec_softc *sc = eparg;
    941 	struct ip6ctlparam *ip6cp = NULL;
    942 	struct ip6_hdr *ip6;
    943 	const struct sockaddr_in6 *dst6;
    944 	struct ipsec_ro *iro;
    945 
    946 	if (sa->sa_family != AF_INET6 ||
    947 	    sa->sa_len != sizeof(struct sockaddr_in6))
    948 		return NULL;
    949 
    950 	if ((unsigned)cmd >= PRC_NCMDS)
    951 		return NULL;
    952 	if (cmd == PRC_HOSTDEAD)
    953 		d = NULL;
    954 	else if (inet6ctlerrmap[cmd] == 0)
    955 		return NULL;
    956 
    957 	/* if the parameter is from icmp6, decode it. */
    958 	if (d != NULL) {
    959 		ip6cp = (struct ip6ctlparam *)d;
    960 		ip6 = ip6cp->ip6c_ip6;
    961 	} else {
    962 		ip6 = NULL;
    963 	}
    964 
    965 	if (!ip6)
    966 		return NULL;
    967 
    968 	iro = percpu_getref(sc->ipsec_ro_percpu);
    969 	mutex_enter(iro->ir_lock);
    970 	dst6 = satocsin6(rtcache_getdst(&iro->ir_ro));
    971 	/* XXX scope */
    972 	if (dst6 == NULL)
    973 		;
    974 	else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
    975 		/* flush route cache */
    976 		rtcache_free(&iro->ir_ro);
    977 
    978 	mutex_exit(iro->ir_lock);
    979 	percpu_putref(sc->ipsec_ro_percpu);
    980 
    981 	return NULL;
    982 }
    983 
    984 ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
    985 #define	ipsecif6_ctlinput	ipsecif6_ctlinput_wrapper
    986 
    987 static const struct encapsw ipsecif6_encapsw = {
    988 	.encapsw6 = {
    989 		.pr_input = ipsecif6_input,
    990 		.pr_ctlinput = ipsecif6_ctlinput,
    991 	}
    992 };
    993 #endif /* INET6 */
    994