Home | History | Annotate | Line # | Download | only in netipsec
ipsecif.c revision 1.1.2.10
      1 /*	$NetBSD: ipsecif.c,v 1.1.2.10 2020/01/31 11:01:38 martin Exp $  */
      2 
      3 /*
      4  * Copyright (c) 2017 Internet Initiative Japan Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.1.2.10 2020/01/31 11:01:38 martin Exp $");
     31 
     32 #ifdef _KERNEL_OPT
     33 #include "opt_inet.h"
     34 #include "opt_ipsec.h"
     35 #endif
     36 
     37 #include <sys/param.h>
     38 #include <sys/systm.h>
     39 #include <sys/socket.h>
     40 #include <sys/sockio.h>
     41 #include <sys/mbuf.h>
     42 #include <sys/errno.h>
     43 #include <sys/ioctl.h>
     44 #include <sys/syslog.h>
     45 #include <sys/kernel.h>
     46 
     47 #include <net/if.h>
     48 #include <net/route.h>
     49 
     50 #include <netinet/in.h>
     51 #include <netinet/in_systm.h>
     52 #include <netinet/ip.h>
     53 #include <netinet/ip_var.h>
     54 #include <netinet/in_var.h>
     55 #include <netinet/ip_encap.h>
     56 #include <netinet/ip_ecn.h>
     57 #include <netinet/ip_private.h>
     58 #include <netinet/udp.h>
     59 
     60 #ifdef INET6
     61 #include <netinet/ip6.h>
     62 #include <netinet6/ip6_var.h>
     63 #include <netinet6/ip6_private.h>
     64 #include <netinet6/in6_var.h>
     65 #include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
     66 #include <netinet/ip_ecn.h>
     67 #endif
     68 
     69 #include <netipsec/key.h>
     70 #include <netipsec/ipsecif.h>
     71 
     72 #include <net/if_ipsec.h>
     73 
     74 static int ipsecif_set_natt_ports(struct ipsec_variant *, struct mbuf *);
     75 static void ipsecif4_input(struct mbuf *, int, int, void *);
     76 static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
     77 static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
     78 	struct ifnet *);
     79 
     80 #ifdef INET6
     81 static int ipsecif6_input(struct mbuf **, int *, int, void *);
     82 static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
     83 static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
     84 	struct ifnet *);
     85 #endif
     86 
     87 static int ip_ipsec_ttl = IPSEC_TTL;
     88 static int ip_ipsec_copy_tos = 0;
     89 #ifdef INET6
     90 static int ip6_ipsec_hlim = IPSEC_HLIM;
     91 static int ip6_ipsec_pmtu = 0; /* XXX: per interface configuration?? */
     92 static int ip6_ipsec_copy_tos = 0;
     93 #endif
     94 
     95 struct encapsw ipsecif4_encapsw = {
     96 	.encapsw4 = {
     97 		.pr_input = ipsecif4_input,
     98 		.pr_ctlinput = NULL,
     99 	}
    100 };
    101 
    102 #ifdef INET6
    103 static const struct encapsw ipsecif6_encapsw;
    104 #endif
    105 
    106 static int
    107 ipsecif_set_natt_ports(struct ipsec_variant *var, struct mbuf *m)
    108 {
    109 
    110 	KASSERT(if_ipsec_heldref_variant(var));
    111 
    112 	if (var->iv_sport || var->iv_dport) {
    113 		struct m_tag *mtag;
    114 
    115 		mtag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
    116 		    sizeof(uint16_t) + sizeof(uint16_t), M_DONTWAIT);
    117 		if (mtag) {
    118 			uint16_t *natt_port;
    119 
    120 			natt_port = (uint16_t *)(mtag + 1);
    121 			natt_port[0] = var->iv_dport;
    122 			natt_port[1] = var->iv_sport;
    123 			m_tag_prepend(m, mtag);
    124 		} else {
    125 			return ENOBUFS;
    126 		}
    127 	}
    128 
    129 	return 0;
    130 }
    131 
    132 static struct mbuf *
    133 ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
    134     uint8_t proto, uint8_t tos)
    135 {
    136 	struct ip *ip;
    137 	struct sockaddr_in *src, *dst;
    138 
    139 	src = satosin(var->iv_psrc);
    140 	dst = satosin(var->iv_pdst);
    141 
    142 	if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
    143 	    src->sin_addr.s_addr == INADDR_BROADCAST ||
    144 	    dst->sin_addr.s_addr == INADDR_BROADCAST) {
    145 		m_freem(m);
    146 		return NULL;
    147 	}
    148 	m->m_flags &= ~M_BCAST;
    149 
    150 	if (IN_MULTICAST(src->sin_addr.s_addr) ||
    151 	   IN_MULTICAST(dst->sin_addr.s_addr)) {
    152 		m_freem(m);
    153 		return NULL;
    154 	}
    155 
    156 	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
    157 	if (m && M_UNWRITABLE(m, sizeof(struct ip)))
    158 		m = m_pullup(m, sizeof(struct ip));
    159 	if (m == NULL)
    160 		return NULL;
    161 
    162 	ip = mtod(m, struct ip *);
    163 	ip->ip_v = IPVERSION;
    164 	ip->ip_off = htons(0);
    165 	if (m->m_pkthdr.len < IP_MINFRAGSIZE)
    166 		ip->ip_id = 0;
    167 	else
    168 		ip->ip_id = ip_newid(NULL);
    169 	ip->ip_hl = sizeof(*ip) >> 2;
    170 	if (ip_ipsec_copy_tos)
    171 		ip->ip_tos = tos;
    172 	else
    173 		ip->ip_tos = 0;
    174 	ip->ip_sum = 0;
    175 	ip->ip_src = src->sin_addr;
    176 	ip->ip_dst = dst->sin_addr;
    177 	ip->ip_p = proto;
    178 	ip->ip_ttl = ip_ipsec_ttl;
    179 	ip->ip_len = htons(m->m_pkthdr.len);
    180 #ifndef IPSEC_TX_TOS_CLEAR
    181 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    182 	if (ifp->if_flags & IFF_ECN)
    183 		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
    184 	else
    185 		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
    186 #endif
    187 
    188 	return m;
    189 }
    190 
    191 static int
    192 ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
    193 {
    194 	struct ip ip0;
    195 	struct ip *ip;
    196 	int mtu;
    197 	struct secasvar *sav;
    198 
    199 	sav = key_lookup_sa_bysaidx(&isr->saidx);
    200 	if (sav == NULL)
    201 		return 0;
    202 
    203 	if (!(sav->natt_type & UDP_ENCAP_ESPINUDP) &&
    204 	    !(sav->natt_type & UDP_ENCAP_ESPINUDP_NON_IKE)) {
    205 		mtu = 0;
    206 		goto out;
    207 	}
    208 
    209 	if (m->m_len < sizeof(struct ip)) {
    210 		m_copydata(m, 0, sizeof(ip0), &ip0);
    211 		ip = &ip0;
    212 
    213 	} else {
    214 		ip = mtod(m, struct ip *);
    215 	}
    216 	mtu = sav->esp_frag;
    217 	if (ntohs(ip->ip_len) <= mtu)
    218 		mtu = 0;
    219 
    220 out:
    221 	KEY_SA_UNREF(&sav);
    222 	return mtu;
    223 }
    224 
    225 static struct mbuf *
    226 ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
    227 {
    228 	const struct ip *ip;
    229 	int proto;
    230 	int tos;
    231 
    232 	KASSERT(proto0 != NULL);
    233 	KASSERT(tos0 != NULL);
    234 
    235 	switch (family) {
    236 	case AF_INET:
    237 		proto = IPPROTO_IPV4;
    238 		if (m->m_len < sizeof(*ip)) {
    239 			m = m_pullup(m, sizeof(*ip));
    240 			if (!m) {
    241 				*tos0 = 0;
    242 				*proto0 = 0;
    243 				return  NULL;
    244 			}
    245 		}
    246 		ip = mtod(m, const struct ip *);
    247 		tos = ip->ip_tos;
    248 		/* TODO: support ALTQ for innner packet */
    249 		break;
    250 #ifdef INET6
    251 	case AF_INET6: {
    252 		const struct ip6_hdr *ip6;
    253 		proto = IPPROTO_IPV6;
    254 		if (m->m_len < sizeof(*ip6)) {
    255 			m = m_pullup(m, sizeof(*ip6));
    256 			if (!m) {
    257 				*tos0 = 0;
    258 				*proto0 = 0;
    259 				return NULL;
    260 			}
    261 		}
    262 		ip6 = mtod(m, const struct ip6_hdr *);
    263 		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    264 		/* TODO: support ALTQ for innner packet */
    265 		break;
    266 	}
    267 #endif /* INET6 */
    268 	default:
    269 		*tos0 = 0;
    270 		*proto0 = 0;
    271 		return NULL;
    272 	}
    273 
    274 	*proto0 = proto;
    275 	*tos0 = tos;
    276 	return m;
    277 }
    278 
    279 static int
    280 ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
    281 {
    282 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    283 	struct mbuf *next;
    284 	struct m_tag *mtag;
    285 	int error;
    286 
    287 	KASSERT(if_ipsec_heldref_variant(var));
    288 
    289 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
    290 	if (mtag)
    291 		m_tag_delete(m, mtag);
    292 
    293 	/* consider new IP header prepended in ipsecif4_output() */
    294 	if (mtu <= sizeof(struct ip)) {
    295 		m_freem(m);
    296 		return ENETUNREACH;
    297 	}
    298 	m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
    299 	error = ip_fragment(m, ifp, mtu - sizeof(struct ip));
    300 	if (error)
    301 		return error;
    302 
    303 	for (error = 0; m; m = next) {
    304 		next = m->m_nextpkt;
    305 		m->m_nextpkt = NULL;
    306 		if (error) {
    307 			m_freem(m);
    308 			continue;
    309 		}
    310 
    311 		error = ipsecif4_output(var, family, m);
    312 	}
    313 	if (error == 0)
    314 		IP_STATINC(IP_STAT_FRAGMENTED);
    315 
    316 	return error;
    317 }
    318 
    319 int
    320 ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
    321 {
    322 	struct m_tag *mtag;
    323 	struct sockaddr_in *src, *dst;
    324 	u_int16_t src_port = 0;
    325 	u_int16_t dst_port = 0;
    326 
    327 	KASSERT(var != NULL);
    328 
    329 	src = satosin(var->iv_psrc);
    330 	dst = satosin(var->iv_pdst);
    331 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
    332 	if (mtag) {
    333 		u_int16_t *ports;
    334 
    335 		ports = (u_int16_t *)(mtag + 1);
    336 		src_port = ports[0];
    337 		dst_port = ports[1];
    338 	}
    339 
    340 	/* address match */
    341 	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
    342 	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
    343 		return 0;
    344 
    345 	/* UDP encap? */
    346 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
    347 		goto match;
    348 
    349 	/* port match */
    350 	if (src_port != var->iv_dport ||
    351 	    dst_port != var->iv_sport) {
    352 #ifdef DEBUG
    353 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
    354 		    __func__, ntohs(src_port), ntohs(dst_port),
    355 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
    356 #endif
    357 		return 0;
    358 	}
    359 
    360 match:
    361 	/*
    362 	 * hide NAT-T information from encapsulated traffics.
    363 	 * they don't know about IPsec.
    364 	 */
    365 	if (mtag)
    366 		m_tag_delete(m, mtag);
    367 	return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
    368 }
    369 
    370 static int
    371 ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
    372 {
    373 	struct secpolicy *sp = NULL;
    374 	u_int8_t tos;
    375 	int proto;
    376 	int error;
    377 	int mtu;
    378 	u_long sa_mtu = 0;
    379 
    380 	KASSERT(if_ipsec_heldref_variant(var));
    381 	KASSERT(if_ipsec_variant_is_configured(var));
    382 	KASSERT(var->iv_psrc->sa_family == AF_INET);
    383 	KASSERT(var->iv_pdst->sa_family == AF_INET);
    384 
    385 	switch (family) {
    386 	case AF_INET:
    387 		sp = IV_SP_OUT(var);
    388 		break;
    389 	case AF_INET6:
    390 		sp = IV_SP_OUT6(var);
    391 		break;
    392 	default:
    393 		m_freem(m);
    394 		return EAFNOSUPPORT;
    395 	}
    396 	KASSERT(sp != NULL);
    397 	/*
    398 	 * The SPs in ipsec_variant are prevented from freed by
    399 	 * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
    400 	 */
    401 
    402 	KASSERT(sp->policy != IPSEC_POLICY_NONE);
    403 	KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
    404 	KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
    405 	if(sp->policy != IPSEC_POLICY_IPSEC) {
    406 		m_freem(m);
    407 		error = ENETUNREACH;
    408 		goto done;
    409 	}
    410 
    411 	/* get flowinfo */
    412 	m = ipsecif4_flowinfo(m, family, &proto, &tos);
    413 	if (m == NULL) {
    414 		error = ENETUNREACH;
    415 		goto done;
    416 	}
    417 
    418 	/* prepend new IP header */
    419 	m = ipsecif4_prepend_hdr(var, m, proto, tos);
    420 	if (m == NULL) {
    421 		error = ENETUNREACH;
    422 		goto done;
    423 	}
    424 
    425 	/*
    426 	 * Normal netipsec's NAT-T fragmentation is done in ip_output().
    427 	 * See "natt_frag" processing.
    428 	 * However, ipsec(4) interface's one is not done in the same way,
    429 	 * so we must do NAT-T fragmentation by own code.
    430 	 */
    431 	/* NAT-T ESP fragmentation */
    432 	mtu = ipsecif4_needfrag(m, sp->req);
    433 	if (mtu > 0)
    434 		return ipsecif4_fragout(var, family, m, mtu);
    435 
    436 	/* set NAT-T ports */
    437 	error = ipsecif_set_natt_ports(var, m);
    438 	if (error) {
    439 		m_freem(m);
    440 		goto done;
    441 	}
    442 
    443 	/* IPsec output */
    444 	IP_STATINC(IP_STAT_LOCALOUT);
    445 	error = ipsec4_process_packet(m, sp->req, &sa_mtu);
    446 	if (error == ENOENT)
    447 		error = 0;
    448 	/*
    449 	 * frangmentation is already done in ipsecif4_fragout(),
    450 	 * so ipsec4_process_packet() must not do fragmentation here.
    451 	 */
    452 	KASSERT(sa_mtu == 0);
    453 
    454 done:
    455 	return error;
    456 }
    457 
    458 #ifdef INET6
    459 int
    460 ipsecif6_encap_func(struct mbuf *m, struct ip6_hdr *ip6, struct ipsec_variant *var)
    461 {
    462 	struct m_tag *mtag;
    463 	struct sockaddr_in6 *src, *dst;
    464 	u_int16_t src_port = 0;
    465 	u_int16_t dst_port = 0;
    466 
    467 	KASSERT(var != NULL);
    468 
    469 	src = satosin6(var->iv_psrc);
    470 	dst = satosin6(var->iv_pdst);
    471 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
    472 	if (mtag) {
    473 		u_int16_t *ports;
    474 
    475 		ports = (u_int16_t *)(mtag + 1);
    476 		src_port = ports[0];
    477 		dst_port = ports[1];
    478 	}
    479 
    480 	/* address match */
    481 	if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) ||
    482 	    !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src))
    483 		return 0;
    484 
    485 	/* UDP encap? */
    486 	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
    487 		goto match;
    488 
    489 	/* port match */
    490 	if (src_port != var->iv_dport ||
    491 	    dst_port != var->iv_sport) {
    492 #ifdef DEBUG
    493 		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
    494 		    __func__, ntohs(src_port), ntohs(dst_port),
    495 		    ntohs(var->iv_sport), ntohs(var->iv_dport));
    496 #endif
    497 		return 0;
    498 	}
    499 
    500 match:
    501 	/*
    502 	 * hide NAT-T information from encapsulated traffics.
    503 	 * they don't know about IPsec.
    504 	 */
    505 	if (mtag)
    506 		m_tag_delete(m, mtag);
    507 	return sizeof(src->sin6_addr) + sizeof(dst->sin6_addr);
    508 }
    509 
    510 static int
    511 ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
    512 {
    513 	struct ifnet *ifp = &var->iv_softc->ipsec_if;
    514 	struct ipsec_softc *sc = ifp->if_softc;
    515 	struct route *ro_pc;
    516 	kmutex_t *lock_pc;
    517 	struct rtentry *rt;
    518 	struct sockaddr_in6 *sin6_src;
    519 	struct sockaddr_in6 *sin6_dst;
    520 	struct ip6_hdr *ip6;
    521 	int proto, error;
    522 	u_int8_t itos, otos;
    523 	union {
    524 		struct sockaddr		dst;
    525 		struct sockaddr_in6	dst6;
    526 	} u;
    527 
    528 	KASSERT(if_ipsec_heldref_variant(var));
    529 	KASSERT(if_ipsec_variant_is_configured(var));
    530 
    531 	sin6_src = satosin6(var->iv_psrc);
    532 	sin6_dst = satosin6(var->iv_pdst);
    533 
    534 	KASSERT(sin6_src->sin6_family == AF_INET6);
    535 	KASSERT(sin6_dst->sin6_family == AF_INET6);
    536 
    537 	switch (family) {
    538 #ifdef INET
    539 	case AF_INET:
    540 	    {
    541 		struct ip *ip;
    542 
    543 		proto = IPPROTO_IPV4;
    544 		if (m->m_len < sizeof(*ip)) {
    545 			m = m_pullup(m, sizeof(*ip));
    546 			if (!m)
    547 				return ENOBUFS;
    548 		}
    549 		ip = mtod(m, struct ip *);
    550 		itos = ip->ip_tos;
    551 		/*
    552 		 * TODO:
    553 		 *support ALTQ for innner packet
    554 		 */
    555 		break;
    556 	    }
    557 #endif /* INET */
    558 	case AF_INET6:
    559 	    {
    560 		struct ip6_hdr *xip6;
    561 		proto = IPPROTO_IPV6;
    562 		if (m->m_len < sizeof(*xip6)) {
    563 			m = m_pullup(m, sizeof(*xip6));
    564 			if (!m)
    565 				return ENOBUFS;
    566 		}
    567 		xip6 = mtod(m, struct ip6_hdr *);
    568 		itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
    569 		/* TODO:
    570 		 * support ALTQ for innner packet
    571 		 */
    572 		break;
    573 	    }
    574 	default:
    575 		m_freem(m);
    576 		return EAFNOSUPPORT;
    577 	}
    578 
    579 	/* prepend new IP header */
    580 	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
    581 	if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
    582 		m = m_pullup(m, sizeof(struct ip6_hdr));
    583 	if (m == NULL)
    584 		return ENOBUFS;
    585 
    586 	ip6 = mtod(m, struct ip6_hdr *);
    587 	ip6->ip6_flow	= 0;
    588 	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
    589 	ip6->ip6_vfc	|= IPV6_VERSION;
    590 #if 0	/* ip6->ip6_plen will be filled by ip6_output */
    591 	ip6->ip6_plen	= htons((u_short)m->m_pkthdr.len - sizeof(*ip6));
    592 #endif
    593 	ip6->ip6_nxt	= proto;
    594 	ip6->ip6_hlim	= ip6_ipsec_hlim;
    595 	ip6->ip6_src	= sin6_src->sin6_addr;
    596 	/* bidirectional configured tunnel mode */
    597 	if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
    598 		ip6->ip6_dst = sin6_dst->sin6_addr;
    599 	} else  {
    600 		m_freem(m);
    601 		return ENETUNREACH;
    602 	}
    603 #ifndef IPSEC_TX_TOS_CLEAR
    604 	if (!ip6_ipsec_copy_tos)
    605 		otos = 0;
    606 
    607 	if (ifp->if_flags & IFF_ECN)
    608 		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
    609 	else
    610 		ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
    611 #else
    612 	if (ip6_ipsec_copy_tos)
    613 		otos = itos;
    614 	else
    615 		otos = 0;
    616 #endif
    617 	ip6->ip6_flow &= ~ntohl(0xff00000);
    618 	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
    619 
    620 	sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
    621 
    622 	if_tunnel_get_ro(sc->ipsec_ro_percpu, &ro_pc, &lock_pc);
    623 	if ((rt = rtcache_lookup(ro_pc, &u.dst)) == NULL) {
    624 		if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
    625 		m_freem(m);
    626 		return ENETUNREACH;
    627 	}
    628 
    629 	if (rt->rt_ifp == ifp) {
    630 		rtcache_unref(rt, ro_pc);
    631 		rtcache_free(ro_pc);
    632 		if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
    633 		m_freem(m);
    634 		return ENETUNREACH;
    635 	}
    636 	rtcache_unref(rt, ro_pc);
    637 
    638 	/* set NAT-T ports */
    639 	error = ipsecif_set_natt_ports(var, m);
    640 	if (error) {
    641 		m_freem(m);
    642 		goto out;
    643 	}
    644 
    645 	/*
    646 	 * force fragmentation to minimum MTU, to avoid path MTU discovery.
    647 	 * it is too painful to ask for resend of inner packet, to achieve
    648 	 * path MTU discovery for encapsulated packets.
    649 	 */
    650 	error = ip6_output(m, 0, ro_pc,
    651 	    ip6_ipsec_pmtu ? 0 : IPV6_MINMTU, 0, NULL, NULL);
    652 
    653 out:
    654 	if (error)
    655 		rtcache_free(ro_pc);
    656 	if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
    657 
    658 	return error;
    659 }
    660 #endif /* INET6 */
    661 
    662 static void
    663 ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
    664 {
    665 	struct ifnet *ipsecp;
    666 	struct ipsec_softc *sc = eparg;
    667 	struct ipsec_variant *var;
    668 	const struct ip *ip;
    669 	int af;
    670 #ifndef IPSEC_TX_TOS_CLEAR
    671 	u_int8_t otos;
    672 #endif
    673 	struct psref psref_rcvif;
    674 	struct psref psref_var;
    675 	struct ifnet *rcvif;
    676 
    677 	KASSERT(sc != NULL);
    678 
    679 	ipsecp = &sc->ipsec_if;
    680 	if ((ipsecp->if_flags & IFF_UP) == 0) {
    681 		m_freem(m);
    682 		ip_statinc(IP_STAT_NOIPSEC);
    683 		return;
    684 	}
    685 
    686 	var = if_ipsec_getref_variant(sc, &psref_var);
    687 	if (if_ipsec_variant_is_unconfigured(var)) {
    688 		if_ipsec_putref_variant(var, &psref_var);
    689 		m_freem(m);
    690 		ip_statinc(IP_STAT_NOIPSEC);
    691 		return;
    692 	}
    693 
    694 	ip = mtod(m, const struct ip *);
    695 
    696 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
    697 	if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
    698 		m_put_rcvif_psref(rcvif, &psref_rcvif);
    699 		if_ipsec_putref_variant(var, &psref_var);
    700 		m_freem(m);
    701 		ip_statinc(IP_STAT_NOIPSEC);
    702 		return;
    703 	}
    704 	m_put_rcvif_psref(rcvif, &psref_rcvif);
    705 	if_ipsec_putref_variant(var, &psref_var);
    706 #ifndef IPSEC_TX_TOS_CLEAR
    707 	otos = ip->ip_tos;
    708 #endif
    709 	m_adj(m, off);
    710 
    711 	switch (proto) {
    712 	case IPPROTO_IPV4:
    713 	    {
    714 		struct ip *xip;
    715 		af = AF_INET;
    716 		if (M_UNWRITABLE(m, sizeof(*xip))) {
    717 			m = m_pullup(m, sizeof(*xip));
    718 			if (!m)
    719 				return;
    720 		}
    721 		xip = mtod(m, struct ip *);
    722 #ifndef IPSEC_TX_TOS_CLEAR
    723 		if (ipsecp->if_flags & IFF_ECN)
    724 			ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
    725 		else
    726 			ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
    727 #endif
    728 		break;
    729 	    }
    730 #ifdef INET6
    731 	case IPPROTO_IPV6:
    732 	    {
    733 		struct ip6_hdr *ip6;
    734 		u_int8_t itos;
    735 		af = AF_INET6;
    736 		if (M_UNWRITABLE(m, sizeof(*ip6))) {
    737 			m = m_pullup(m, sizeof(*ip6));
    738 			if (!m)
    739 				return;
    740 		}
    741 		ip6 = mtod(m, struct ip6_hdr *);
    742 		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
    743 #ifndef IPSEC_TX_TOS_CLEAR
    744 		if (ipsecp->if_flags & IFF_ECN)
    745 			ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
    746 		else
    747 			ip_ecn_egress(ECN_NOCARE, &otos, &itos);
    748 #endif
    749 		ip6->ip6_flow &= ~htonl(0xff << 20);
    750 		ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
    751 		break;
    752 	    }
    753 #endif /* INET6 */
    754 	default:
    755 		ip_statinc(IP_STAT_NOIPSEC);
    756 		m_freem(m);
    757 		return;
    758 	}
    759 	if_ipsec_input(m, af, ipsecp);
    760 
    761 	return;
    762 }
    763 
    764 /*
    765  * validate and filter the pakcet
    766  */
    767 static int
    768 ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
    769     struct ifnet *ifp)
    770 {
    771 	struct sockaddr_in *src, *dst;
    772 
    773 	src = satosin(var->iv_psrc);
    774 	dst = satosin(var->iv_pdst);
    775 
    776 	return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
    777 }
    778 
    779 #ifdef INET6
    780 static int
    781 ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
    782 {
    783 	struct mbuf *m = *mp;
    784 	struct ifnet *ipsecp;
    785 	struct ipsec_softc *sc = eparg;
    786 	struct ipsec_variant *var;
    787 	struct ip6_hdr *ip6;
    788 	int af = 0;
    789 #ifndef IPSEC_TX_TOS_CLEAR
    790 	u_int32_t otos;
    791 #endif
    792 	struct psref psref_rcvif;
    793 	struct psref psref_var;
    794 	struct ifnet *rcvif;
    795 
    796 	KASSERT(eparg != NULL);
    797 
    798 	ipsecp = &sc->ipsec_if;
    799 	if ((ipsecp->if_flags & IFF_UP) == 0) {
    800 		m_freem(m);
    801 		IP6_STATINC(IP6_STAT_NOIPSEC);
    802 		return IPPROTO_DONE;
    803 	}
    804 
    805 	var = if_ipsec_getref_variant(sc, &psref_var);
    806 	if (if_ipsec_variant_is_unconfigured(var)) {
    807 		if_ipsec_putref_variant(var, &psref_var);
    808 		m_freem(m);
    809 		IP6_STATINC(IP6_STAT_NOIPSEC);
    810 		return IPPROTO_DONE;
    811 	}
    812 
    813 	ip6 = mtod(m, struct ip6_hdr *);
    814 
    815 	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
    816 	if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
    817 		m_put_rcvif_psref(rcvif, &psref_rcvif);
    818 		if_ipsec_putref_variant(var, &psref_var);
    819 		m_freem(m);
    820 		IP6_STATINC(IP6_STAT_NOIPSEC);
    821 		return IPPROTO_DONE;
    822 	}
    823 	m_put_rcvif_psref(rcvif, &psref_rcvif);
    824 	if_ipsec_putref_variant(var, &psref_var);
    825 
    826 #ifndef IPSEC_TX_TOS_CLEAR
    827 	otos = ip6->ip6_flow;
    828 #endif
    829 	m_adj(m, *offp);
    830 
    831 	switch (proto) {
    832 #ifdef INET
    833 	case IPPROTO_IPV4:
    834 	    {
    835 		af = AF_INET;
    836 #ifndef IPSEC_TX_TOS_CLEAR
    837 		struct ip *ip;
    838 		u_int8_t otos8;
    839 		otos8 = (ntohl(otos) >> 20) & 0xff;
    840 
    841 		if (M_UNWRITABLE(m, sizeof(*ip))) {
    842 			m = m_pullup(m, sizeof(*ip));
    843 			if (!m)
    844 				return IPPROTO_DONE;
    845 		}
    846 		ip = mtod(m, struct ip *);
    847 		if (ipsecp->if_flags & IFF_ECN)
    848 			ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
    849 		else
    850 			ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
    851 #endif
    852 		break;
    853 	    }
    854 #endif /* INET */
    855 	case IPPROTO_IPV6:
    856 	    {
    857 		af = AF_INET6;
    858 #ifndef IPSEC_TX_TOS_CLEAR
    859 		struct ip6_hdr *xip6;
    860 
    861 		if (M_UNWRITABLE(m, sizeof(*xip6))) {
    862 			m = m_pullup(m, sizeof(*xip6));
    863 			if (!m)
    864 				return IPPROTO_DONE;
    865 		}
    866 		xip6 = mtod(m, struct ip6_hdr *);
    867 		if (ipsecp->if_flags & IFF_ECN)
    868 			ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
    869 		else
    870 			ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
    871 		break;
    872 #endif
    873 	    }
    874 	default:
    875 		IP6_STATINC(IP6_STAT_NOIPSEC);
    876 		m_freem(m);
    877 		return IPPROTO_DONE;
    878 	}
    879 
    880 	if_ipsec_input(m, af, ipsecp);
    881 	return IPPROTO_DONE;
    882 }
    883 
    884 /*
    885  * validate and filter the packet.
    886  */
    887 static int
    888 ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
    889     struct ifnet *ifp)
    890 {
    891 	struct sockaddr_in6 *src, *dst;
    892 
    893 	src = satosin6(var->iv_psrc);
    894 	dst = satosin6(var->iv_pdst);
    895 
    896 	return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
    897 }
    898 #endif /* INET6 */
    899 
    900 int
    901 ipsecif4_attach(struct ipsec_variant *var)
    902 {
    903 	struct ipsec_softc *sc = var->iv_softc;
    904 
    905 	KASSERT(if_ipsec_variant_is_configured(var));
    906 
    907 	if (var->iv_encap_cookie4 != NULL)
    908 		return EALREADY;
    909 	var->iv_encap_cookie4 = encap_attach_func(AF_INET, -1, if_ipsec_encap_func,
    910 	    &ipsecif4_encapsw, sc);
    911 	if (var->iv_encap_cookie4 == NULL)
    912 		return EEXIST;
    913 
    914 	var->iv_output = ipsecif4_output;
    915 	return 0;
    916 }
    917 
    918 int
    919 ipsecif4_detach(struct ipsec_variant *var)
    920 {
    921 	int error;
    922 
    923 	if (var->iv_encap_cookie4 == NULL)
    924 		return 0;
    925 
    926 	var->iv_output = NULL;
    927 	error = encap_detach(var->iv_encap_cookie4);
    928 	if (error == 0)
    929 		var->iv_encap_cookie4 = NULL;
    930 
    931 	return error;
    932 }
    933 
    934 #ifdef INET6
    935 int
    936 ipsecif6_attach(struct ipsec_variant *var)
    937 {
    938 	struct ipsec_softc *sc = var->iv_softc;
    939 
    940 	KASSERT(if_ipsec_variant_is_configured(var));
    941 	KASSERT(var->iv_encap_cookie6 == NULL);
    942 
    943 	var->iv_encap_cookie6 = encap_attach_func(AF_INET6, -1, if_ipsec_encap_func,
    944 	    &ipsecif6_encapsw, sc);
    945 	if (var->iv_encap_cookie6 == NULL)
    946 		return EEXIST;
    947 
    948 	var->iv_output = ipsecif6_output;
    949 	return 0;
    950 }
    951 
    952 int
    953 ipsecif6_detach(struct ipsec_variant *var)
    954 {
    955 	struct ipsec_softc *sc = var->iv_softc;
    956 	int error;
    957 
    958 	KASSERT(var->iv_encap_cookie6 != NULL);
    959 
    960 	if_tunnel_ro_percpu_rtcache_free(sc->ipsec_ro_percpu);
    961 
    962 	var->iv_output = NULL;
    963 	error = encap_detach(var->iv_encap_cookie6);
    964 	if (error == 0)
    965 		var->iv_encap_cookie6 = NULL;
    966 	return error;
    967 }
    968 
    969 void *
    970 ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
    971 {
    972 	struct ipsec_softc *sc = eparg;
    973 	struct ip6ctlparam *ip6cp = NULL;
    974 	struct ip6_hdr *ip6;
    975 	const struct sockaddr_in6 *dst6;
    976 	struct route *ro_pc;
    977 	kmutex_t *lock_pc;
    978 
    979 	if (sa->sa_family != AF_INET6 ||
    980 	    sa->sa_len != sizeof(struct sockaddr_in6))
    981 		return NULL;
    982 
    983 	if ((unsigned)cmd >= PRC_NCMDS)
    984 		return NULL;
    985 	if (cmd == PRC_HOSTDEAD)
    986 		d = NULL;
    987 	else if (inet6ctlerrmap[cmd] == 0)
    988 		return NULL;
    989 
    990 	/* if the parameter is from icmp6, decode it. */
    991 	if (d != NULL) {
    992 		ip6cp = (struct ip6ctlparam *)d;
    993 		ip6 = ip6cp->ip6c_ip6;
    994 	} else {
    995 		ip6 = NULL;
    996 	}
    997 
    998 	if (!ip6)
    999 		return NULL;
   1000 
   1001 	if_tunnel_get_ro(sc->ipsec_ro_percpu, &ro_pc, &lock_pc);
   1002 	dst6 = satocsin6(rtcache_getdst(ro_pc));
   1003 	/* XXX scope */
   1004 	if (dst6 == NULL)
   1005 		;
   1006 	else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
   1007 		/* flush route cache */
   1008 		rtcache_free(ro_pc);
   1009 
   1010 	if_tunnel_put_ro(sc->ipsec_ro_percpu, lock_pc);
   1011 
   1012 	return NULL;
   1013 }
   1014 
   1015 ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
   1016 #define	ipsecif6_ctlinput	ipsecif6_ctlinput_wrapper
   1017 
   1018 static const struct encapsw ipsecif6_encapsw = {
   1019 	.encapsw6 = {
   1020 		.pr_input = ipsecif6_input,
   1021 		.pr_ctlinput = ipsecif6_ctlinput,
   1022 	}
   1023 };
   1024 #endif /* INET6 */
   1025