npf_inet.c revision 1.37.12.2
11.37.12.2Spgoyette/*	$NetBSD: npf_inet.c,v 1.37.12.2 2018/03/22 01:44:51 pgoyette Exp $	*/
21.1Srmind
31.1Srmind/*-
41.29Srmind * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
51.1Srmind * All rights reserved.
61.1Srmind *
71.1Srmind * This material is based upon work partially supported by The
81.1Srmind * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
91.1Srmind *
101.1Srmind * Redistribution and use in source and binary forms, with or without
111.1Srmind * modification, are permitted provided that the following conditions
121.1Srmind * are met:
131.1Srmind * 1. Redistributions of source code must retain the above copyright
141.1Srmind *    notice, this list of conditions and the following disclaimer.
151.1Srmind * 2. Redistributions in binary form must reproduce the above copyright
161.1Srmind *    notice, this list of conditions and the following disclaimer in the
171.1Srmind *    documentation and/or other materials provided with the distribution.
181.1Srmind *
191.1Srmind * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
201.1Srmind * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
211.1Srmind * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
221.1Srmind * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
231.1Srmind * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
241.1Srmind * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
251.1Srmind * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
261.1Srmind * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
271.1Srmind * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
281.1Srmind * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
291.1Srmind * POSSIBILITY OF SUCH DAMAGE.
301.1Srmind */
311.1Srmind
321.1Srmind/*
331.22Srmind * Various protocol related helper routines.
341.12Srmind *
351.12Srmind * This layer manipulates npf_cache_t structure i.e. caches requested headers
361.12Srmind * and stores which information was cached in the information bit field.
371.12Srmind * It is also responsibility of this layer to update or invalidate the cache
381.12Srmind * on rewrites (e.g. by translation routines).
391.1Srmind */
401.1Srmind
411.36Schristos#ifdef _KERNEL
421.1Srmind#include <sys/cdefs.h>
431.37.12.2Spgoyette__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.37.12.2 2018/03/22 01:44:51 pgoyette Exp $");
441.1Srmind
451.1Srmind#include <sys/param.h>
461.11Srmind#include <sys/types.h>
471.1Srmind
481.4Srmind#include <net/pfil.h>
491.4Srmind#include <net/if.h>
501.4Srmind#include <net/ethertypes.h>
511.4Srmind#include <net/if_ether.h>
521.4Srmind
531.1Srmind#include <netinet/in_systm.h>
541.1Srmind#include <netinet/in.h>
551.33Smlelstv#include <netinet6/in6_var.h>
561.1Srmind#include <netinet/ip.h>
571.4Srmind#include <netinet/ip6.h>
581.1Srmind#include <netinet/tcp.h>
591.1Srmind#include <netinet/udp.h>
601.1Srmind#include <netinet/ip_icmp.h>
611.36Schristos#endif
621.1Srmind
631.1Srmind#include "npf_impl.h"
641.1Srmind
651.1Srmind/*
661.27Srmind * npf_fixup{16,32}_cksum: incremental update of the Internet checksum.
671.1Srmind */
681.1Srmind
691.1Srminduint16_t
701.1Srmindnpf_fixup16_cksum(uint16_t cksum, uint16_t odatum, uint16_t ndatum)
711.1Srmind{
721.1Srmind	uint32_t sum;
731.1Srmind
741.1Srmind	/*
751.1Srmind	 * RFC 1624:
761.1Srmind	 *	HC' = ~(~HC + ~m + m')
771.27Srmind	 *
781.27Srmind	 * Note: 1's complement sum is endian-independent (RFC 1071, page 2).
791.1Srmind	 */
801.27Srmind	sum = ~cksum & 0xffff;
811.27Srmind	sum += (~odatum & 0xffff) + ndatum;
821.1Srmind	sum = (sum >> 16) + (sum & 0xffff);
831.1Srmind	sum += (sum >> 16);
841.1Srmind
851.27Srmind	return ~sum & 0xffff;
861.1Srmind}
871.1Srmind
881.1Srminduint16_t
891.1Srmindnpf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum)
901.1Srmind{
911.27Srmind	uint32_t sum;
921.27Srmind
931.27Srmind	/*
941.27Srmind	 * Checksum 32-bit datum as as two 16-bit.  Note, the first
951.27Srmind	 * 32->16 bit reduction is not necessary.
961.27Srmind	 */
971.27Srmind	sum = ~cksum & 0xffff;
981.27Srmind	sum += (~odatum & 0xffff) + (ndatum & 0xffff);
991.1Srmind
1001.27Srmind	sum += (~odatum >> 16) + (ndatum >> 16);
1011.27Srmind	sum = (sum >> 16) + (sum & 0xffff);
1021.27Srmind	sum += (sum >> 16);
1031.27Srmind	return ~sum & 0xffff;
1041.1Srmind}
1051.1Srmind
1061.1Srmind/*
1071.4Srmind * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6.
1081.4Srmind */
1091.4Srminduint16_t
1101.19Srmindnpf_addr_cksum(uint16_t cksum, int sz, const npf_addr_t *oaddr,
1111.19Srmind    const npf_addr_t *naddr)
1121.4Srmind{
1131.19Srmind	const uint32_t *oip32 = (const uint32_t *)oaddr;
1141.19Srmind	const uint32_t *nip32 = (const uint32_t *)naddr;
1151.4Srmind
1161.4Srmind	KASSERT(sz % sizeof(uint32_t) == 0);
1171.4Srmind	do {
1181.4Srmind		cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++);
1191.4Srmind		sz -= sizeof(uint32_t);
1201.4Srmind	} while (sz);
1211.4Srmind
1221.4Srmind	return cksum;
1231.4Srmind}
1241.4Srmind
1251.4Srmind/*
1261.26Srmind * npf_addr_sum: provide IP addresses as a XORed 32-bit integer.
1271.4Srmind * Note: used for hash function.
1281.1Srmind */
1291.4Srminduint32_t
1301.26Srmindnpf_addr_mix(const int sz, const npf_addr_t *a1, const npf_addr_t *a2)
1311.1Srmind{
1321.4Srmind	uint32_t mix = 0;
1331.1Srmind
1341.5Srmind	KASSERT(sz > 0 && a1 != NULL && a2 != NULL);
1351.5Srmind
1361.26Srmind	for (int i = 0; i < (sz >> 2); i++) {
1371.36Schristos		mix ^= a1->word32[i];
1381.36Schristos		mix ^= a2->word32[i];
1391.4Srmind	}
1401.4Srmind	return mix;
1411.4Srmind}
1421.1Srmind
1431.13Srmind/*
1441.13Srmind * npf_addr_mask: apply the mask to a given address and store the result.
1451.13Srmind */
1461.13Srmindvoid
1471.13Srmindnpf_addr_mask(const npf_addr_t *addr, const npf_netmask_t mask,
1481.13Srmind    const int alen, npf_addr_t *out)
1491.12Srmind{
1501.13Srmind	const int nwords = alen >> 2;
1511.12Srmind	uint_fast8_t length = mask;
1521.12Srmind
1531.12Srmind	/* Note: maximum length is 32 for IPv4 and 128 for IPv6. */
1541.12Srmind	KASSERT(length <= NPF_MAX_NETMASK);
1551.12Srmind
1561.13Srmind	for (int i = 0; i < nwords; i++) {
1571.13Srmind		uint32_t wordmask;
1581.13Srmind
1591.12Srmind		if (length >= 32) {
1601.13Srmind			wordmask = htonl(0xffffffff);
1611.12Srmind			length -= 32;
1621.13Srmind		} else if (length) {
1631.13Srmind			wordmask = htonl(0xffffffff << (32 - length));
1641.13Srmind			length = 0;
1651.12Srmind		} else {
1661.13Srmind			wordmask = 0;
1671.12Srmind		}
1681.36Schristos		out->word32[i] = addr->word32[i] & wordmask;
1691.12Srmind	}
1701.12Srmind}
1711.12Srmind
1721.12Srmind/*
1731.12Srmind * npf_addr_cmp: compare two addresses, either IPv4 or IPv6.
1741.12Srmind *
1751.13Srmind * => Return 0 if equal and negative/positive if less/greater accordingly.
1761.12Srmind * => Ignore the mask, if NPF_NO_NETMASK is specified.
1771.12Srmind */
1781.12Srmindint
1791.12Srmindnpf_addr_cmp(const npf_addr_t *addr1, const npf_netmask_t mask1,
1801.13Srmind    const npf_addr_t *addr2, const npf_netmask_t mask2, const int alen)
1811.12Srmind{
1821.13Srmind	npf_addr_t realaddr1, realaddr2;
1831.12Srmind
1841.12Srmind	if (mask1 != NPF_NO_NETMASK) {
1851.13Srmind		npf_addr_mask(addr1, mask1, alen, &realaddr1);
1861.13Srmind		addr1 = &realaddr1;
1871.12Srmind	}
1881.12Srmind	if (mask2 != NPF_NO_NETMASK) {
1891.13Srmind		npf_addr_mask(addr2, mask2, alen, &realaddr2);
1901.13Srmind		addr2 = &realaddr2;
1911.12Srmind	}
1921.13Srmind	return memcmp(addr1, addr2, alen);
1931.12Srmind}
1941.12Srmind
1951.4Srmind/*
1961.4Srmind * npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length.
1971.12Srmind *
1981.12Srmind * => Returns all values in host byte-order.
1991.4Srmind */
2001.4Srmindint
2011.12Srmindnpf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win)
2021.4Srmind{
2031.19Srmind	const struct tcphdr *th = npc->npc_l4.tcp;
2041.8Srmind	u_int thlen;
2051.1Srmind
2061.7Szoltan	KASSERT(npf_iscached(npc, NPC_TCP));
2071.1Srmind
2081.4Srmind	*seq = ntohl(th->th_seq);
2091.4Srmind	*ack = ntohl(th->th_ack);
2101.4Srmind	*win = (uint32_t)ntohs(th->th_win);
2111.8Srmind	thlen = th->th_off << 2;
2121.1Srmind
2131.7Szoltan	if (npf_iscached(npc, NPC_IP4)) {
2141.19Srmind		const struct ip *ip = npc->npc_ip.v4;
2151.21Srmind		return ntohs(ip->ip_len) - npc->npc_hlen - thlen;
2161.12Srmind	} else if (npf_iscached(npc, NPC_IP6)) {
2171.19Srmind		const struct ip6_hdr *ip6 = npc->npc_ip.v6;
2181.37.12.2Spgoyette		return ntohs(ip6->ip6_plen) -
2191.37.12.2Spgoyette		    (npc->npc_hlen - sizeof(*ip6)) - thlen;
2201.7Szoltan	}
2211.7Szoltan	return 0;
2221.1Srmind}
2231.1Srmind
2241.1Srmind/*
2251.4Srmind * npf_fetch_tcpopts: parse and return TCP options.
2261.1Srmind */
2271.1Srmindbool
2281.32Srmindnpf_fetch_tcpopts(npf_cache_t *npc, uint16_t *mss, int *wscale)
2291.1Srmind{
2301.32Srmind	nbuf_t *nbuf = npc->npc_nbuf;
2311.19Srmind	const struct tcphdr *th = npc->npc_l4.tcp;
2321.4Srmind	int topts_len, step;
2331.37.12.1Spgoyette	uint8_t *nptr;
2341.4Srmind	uint8_t val;
2351.19Srmind	bool ok;
2361.4Srmind
2371.7Szoltan	KASSERT(npf_iscached(npc, NPC_IP46));
2381.7Szoltan	KASSERT(npf_iscached(npc, NPC_TCP));
2391.10Srmind
2401.4Srmind	/* Determine if there are any TCP options, get their length. */
2411.4Srmind	topts_len = (th->th_off << 2) - sizeof(struct tcphdr);
2421.4Srmind	if (topts_len <= 0) {
2431.4Srmind		/* No options. */
2441.1Srmind		return false;
2451.4Srmind	}
2461.4Srmind	KASSERT(topts_len <= MAX_TCPOPTLEN);
2471.1Srmind
2481.4Srmind	/* First step: IP and TCP header up to options. */
2491.21Srmind	step = npc->npc_hlen + sizeof(struct tcphdr);
2501.19Srmind	nbuf_reset(nbuf);
2511.4Srmindnext:
2521.19Srmind	if ((nptr = nbuf_advance(nbuf, step, 1)) == NULL) {
2531.19Srmind		ok = false;
2541.19Srmind		goto done;
2551.4Srmind	}
2561.37.12.1Spgoyette	val = *nptr;
2571.12Srmind
2581.4Srmind	switch (val) {
2591.4Srmind	case TCPOPT_EOL:
2601.4Srmind		/* Done. */
2611.19Srmind		ok = true;
2621.19Srmind		goto done;
2631.4Srmind	case TCPOPT_NOP:
2641.4Srmind		topts_len--;
2651.4Srmind		step = 1;
2661.4Srmind		break;
2671.4Srmind	case TCPOPT_MAXSEG:
2681.37.12.1Spgoyette		if ((nptr = nbuf_ensure_contig(nbuf, TCPOLEN_MAXSEG)) == NULL) {
2691.19Srmind			ok = false;
2701.19Srmind			goto done;
2711.4Srmind		}
2721.4Srmind		if (mss) {
2731.19Srmind			if (*mss) {
2741.37.12.1Spgoyette				memcpy(nptr + 2, mss, sizeof(uint16_t));
2751.19Srmind			} else {
2761.37.12.1Spgoyette				memcpy(mss, nptr + 2, sizeof(uint16_t));
2771.19Srmind			}
2781.4Srmind		}
2791.4Srmind		topts_len -= TCPOLEN_MAXSEG;
2801.37.12.1Spgoyette		step = TCPOLEN_MAXSEG;
2811.4Srmind		break;
2821.4Srmind	case TCPOPT_WINDOW:
2831.10Srmind		/* TCP Window Scaling (RFC 1323). */
2841.37.12.1Spgoyette		if ((nptr = nbuf_ensure_contig(nbuf, TCPOLEN_WINDOW)) == NULL) {
2851.19Srmind			ok = false;
2861.19Srmind			goto done;
2871.4Srmind		}
2881.37.12.1Spgoyette		val = *(nptr + 2);
2891.4Srmind		*wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val;
2901.4Srmind		topts_len -= TCPOLEN_WINDOW;
2911.37.12.1Spgoyette		step = TCPOLEN_WINDOW;
2921.4Srmind		break;
2931.4Srmind	default:
2941.37.12.1Spgoyette		if ((nptr = nbuf_ensure_contig(nbuf, 2)) == NULL) {
2951.19Srmind			ok = false;
2961.19Srmind			goto done;
2971.4Srmind		}
2981.37.12.1Spgoyette		val = *(nptr + 1);
2991.16Srmind		if (val < 2 || val > topts_len) {
3001.19Srmind			ok = false;
3011.19Srmind			goto done;
3021.4Srmind		}
3031.4Srmind		topts_len -= val;
3041.37.12.1Spgoyette		step = val;
3051.4Srmind	}
3061.12Srmind
3071.6Srmind	/* Any options left? */
3081.4Srmind	if (__predict_true(topts_len > 0)) {
3091.4Srmind		goto next;
3101.4Srmind	}
3111.19Srmind	ok = true;
3121.19Srminddone:
3131.19Srmind	if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) {
3141.32Srmind		npf_recache(npc);
3151.19Srmind	}
3161.19Srmind	return ok;
3171.1Srmind}
3181.1Srmind
3191.19Srmindstatic int
3201.19Srmindnpf_cache_ip(npf_cache_t *npc, nbuf_t *nbuf)
3211.1Srmind{
3221.19Srmind	const void *nptr = nbuf_dataptr(nbuf);
3231.19Srmind	const uint8_t ver = *(const uint8_t *)nptr;
3241.19Srmind	int flags = 0;
3251.12Srmind
3261.37.12.2Spgoyette	/*
3271.37.12.2Spgoyette	 * We intentionally don't read the L4 payload after IPPROTO_AH.
3281.37.12.2Spgoyette	 */
3291.37.12.2Spgoyette
3301.4Srmind	switch (ver >> 4) {
3311.12Srmind	case IPVERSION: {
3321.19Srmind		struct ip *ip;
3331.12Srmind
3341.19Srmind		ip = nbuf_ensure_contig(nbuf, sizeof(struct ip));
3351.19Srmind		if (ip == NULL) {
3361.37.12.1Spgoyette			return NPC_FMTERR;
3371.4Srmind		}
3381.12Srmind
3391.4Srmind		/* Check header length and fragment offset. */
3401.10Srmind		if ((u_int)(ip->ip_hl << 2) < sizeof(struct ip)) {
3411.37.12.1Spgoyette			return NPC_FMTERR;
3421.4Srmind		}
3431.4Srmind		if (ip->ip_off & ~htons(IP_DF | IP_RF)) {
3441.4Srmind			/* Note fragmentation. */
3451.19Srmind			flags |= NPC_IPFRAG;
3461.4Srmind		}
3471.12Srmind
3481.4Srmind		/* Cache: layer 3 - IPv4. */
3491.14Srmind		npc->npc_alen = sizeof(struct in_addr);
3501.28Srmind		npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip->ip_src;
3511.28Srmind		npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip->ip_dst;
3521.7Szoltan		npc->npc_hlen = ip->ip_hl << 2;
3531.19Srmind		npc->npc_proto = ip->ip_p;
3541.19Srmind
3551.19Srmind		npc->npc_ip.v4 = ip;
3561.19Srmind		flags |= NPC_IP4;
3571.4Srmind		break;
3581.12Srmind	}
3591.4Srmind
3601.12Srmind	case (IPV6_VERSION >> 4): {
3611.19Srmind		struct ip6_hdr *ip6;
3621.19Srmind		struct ip6_ext *ip6e;
3631.37Schristos		struct ip6_frag *ip6f;
3641.19Srmind		size_t off, hlen;
3651.37.12.1Spgoyette		int frag_present;
3661.19Srmind
3671.19Srmind		ip6 = nbuf_ensure_contig(nbuf, sizeof(struct ip6_hdr));
3681.19Srmind		if (ip6 == NULL) {
3691.37.12.1Spgoyette			return NPC_FMTERR;
3701.7Szoltan		}
3711.19Srmind
3721.37.12.2Spgoyette		/*
3731.37.12.2Spgoyette		 * XXX: We don't handle IPv6 Jumbograms.
3741.37.12.2Spgoyette		 */
3751.37.12.2Spgoyette
3761.19Srmind		/* Set initial next-protocol value. */
3771.19Srmind		hlen = sizeof(struct ip6_hdr);
3781.19Srmind		npc->npc_proto = ip6->ip6_nxt;
3791.13Srmind		npc->npc_hlen = hlen;
3801.7Szoltan
3811.37.12.1Spgoyette		frag_present = 0;
3821.37.12.1Spgoyette
3831.12Srmind		/*
3841.19Srmind		 * Advance by the length of the current header.
3851.12Srmind		 */
3861.19Srmind		off = nbuf_offset(nbuf);
3871.37.12.1Spgoyette		while ((ip6e = nbuf_advance(nbuf, hlen, sizeof(*ip6e))) != NULL) {
3881.13Srmind			/*
3891.13Srmind			 * Determine whether we are going to continue.
3901.13Srmind			 */
3911.19Srmind			switch (npc->npc_proto) {
3921.13Srmind			case IPPROTO_HOPOPTS:
3931.7Szoltan			case IPPROTO_DSTOPTS:
3941.7Szoltan			case IPPROTO_ROUTING:
3951.19Srmind				hlen = (ip6e->ip6e_len + 1) << 3;
3961.7Szoltan				break;
3971.7Szoltan			case IPPROTO_FRAGMENT:
3981.37.12.1Spgoyette				if (frag_present++)
3991.37.12.1Spgoyette					return NPC_FMTERR;
4001.37Schristos				ip6f = nbuf_ensure_contig(nbuf, sizeof(*ip6f));
4011.37Schristos				if (ip6f == NULL)
4021.37.12.1Spgoyette					return NPC_FMTERR;
4031.37.12.1Spgoyette
4041.37.12.1Spgoyette				/* RFC6946: Skip dummy fragments. */
4051.37.12.1Spgoyette				if (!ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK) &&
4061.37.12.1Spgoyette				    !(ip6f->ip6f_offlg & IP6F_MORE_FRAG)) {
4071.37.12.1Spgoyette					hlen = sizeof(struct ip6_frag);
4081.37.12.1Spgoyette					break;
4091.37.12.1Spgoyette				}
4101.37.12.1Spgoyette
4111.37.12.1Spgoyette				hlen = 0;
4121.37.12.1Spgoyette				flags |= NPC_IPFRAG;
4131.37Schristos
4141.7Szoltan				break;
4151.7Szoltan			default:
4161.13Srmind				hlen = 0;
4171.13Srmind				break;
4181.13Srmind			}
4191.13Srmind
4201.13Srmind			if (!hlen) {
4211.7Szoltan				break;
4221.7Szoltan			}
4231.19Srmind			npc->npc_proto = ip6e->ip6e_nxt;
4241.13Srmind			npc->npc_hlen += hlen;
4251.13Srmind		}
4261.7Szoltan
4271.23Srmind		/*
4281.23Srmind		 * Re-fetch the header pointers (nbufs might have been
4291.23Srmind		 * reallocated).  Restore the original offset (if any).
4301.23Srmind		 */
4311.19Srmind		nbuf_reset(nbuf);
4321.23Srmind		ip6 = nbuf_dataptr(nbuf);
4331.19Srmind		if (off) {
4341.19Srmind			nbuf_advance(nbuf, off, 0);
4351.19Srmind		}
4361.19Srmind
4371.12Srmind		/* Cache: layer 3 - IPv6. */
4381.14Srmind		npc->npc_alen = sizeof(struct in6_addr);
4391.28Srmind		npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip6->ip6_src;
4401.37.12.2Spgoyette		npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip6->ip6_dst;
4411.19Srmind
4421.19Srmind		npc->npc_ip.v6 = ip6;
4431.19Srmind		flags |= NPC_IP6;
4441.7Szoltan		break;
4451.12Srmind	}
4461.4Srmind	default:
4471.19Srmind		break;
4481.4Srmind	}
4491.19Srmind	return flags;
4501.1Srmind}
4511.1Srmind
4521.1Srmind/*
4531.4Srmind * npf_cache_all: general routine to cache all relevant IP (v4 or v6)
4541.12Srmind * and TCP, UDP or ICMP headers.
4551.19Srmind *
4561.19Srmind * => nbuf offset shall be set accordingly.
4571.1Srmind */
4581.10Srmindint
4591.32Srmindnpf_cache_all(npf_cache_t *npc)
4601.1Srmind{
4611.32Srmind	nbuf_t *nbuf = npc->npc_nbuf;
4621.19Srmind	int flags, l4flags;
4631.19Srmind	u_int hlen;
4641.19Srmind
4651.19Srmind	/*
4661.19Srmind	 * This routine is a main point where the references are cached,
4671.19Srmind	 * therefore clear the flag as we reset.
4681.19Srmind	 */
4691.19Srmindagain:
4701.19Srmind	nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET);
4711.1Srmind
4721.19Srmind	/*
4731.19Srmind	 * First, cache the L3 header (IPv4 or IPv6).  If IP packet is
4741.19Srmind	 * fragmented, then we cannot look into L4.
4751.19Srmind	 */
4761.19Srmind	flags = npf_cache_ip(npc, nbuf);
4771.37.12.1Spgoyette	if ((flags & NPC_IP46) == 0 || (flags & NPC_IPFRAG) != 0 ||
4781.37.12.1Spgoyette	    (flags & NPC_FMTERR) != 0) {
4791.23Srmind		nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET);
4801.19Srmind		npc->npc_info |= flags;
4811.19Srmind		return flags;
4821.1Srmind	}
4831.19Srmind	hlen = npc->npc_hlen;
4841.19Srmind
4851.19Srmind	switch (npc->npc_proto) {
4861.1Srmind	case IPPROTO_TCP:
4871.19Srmind		/* Cache: layer 4 - TCP. */
4881.19Srmind		npc->npc_l4.tcp = nbuf_advance(nbuf, hlen,
4891.19Srmind		    sizeof(struct tcphdr));
4901.19Srmind		l4flags = NPC_LAYER4 | NPC_TCP;
4911.10Srmind		break;
4921.1Srmind	case IPPROTO_UDP:
4931.19Srmind		/* Cache: layer 4 - UDP. */
4941.19Srmind		npc->npc_l4.udp = nbuf_advance(nbuf, hlen,
4951.19Srmind		    sizeof(struct udphdr));
4961.19Srmind		l4flags = NPC_LAYER4 | NPC_UDP;
4971.10Srmind		break;
4981.1Srmind	case IPPROTO_ICMP:
4991.19Srmind		/* Cache: layer 4 - ICMPv4. */
5001.19Srmind		npc->npc_l4.icmp = nbuf_advance(nbuf, hlen,
5011.19Srmind		    offsetof(struct icmp, icmp_void));
5021.19Srmind		l4flags = NPC_LAYER4 | NPC_ICMP;
5031.19Srmind		break;
5041.15Sspz	case IPPROTO_ICMPV6:
5051.19Srmind		/* Cache: layer 4 - ICMPv6. */
5061.19Srmind		npc->npc_l4.icmp6 = nbuf_advance(nbuf, hlen,
5071.19Srmind		    offsetof(struct icmp6_hdr, icmp6_data32));
5081.19Srmind		l4flags = NPC_LAYER4 | NPC_ICMP;
5091.19Srmind		break;
5101.19Srmind	default:
5111.19Srmind		l4flags = 0;
5121.10Srmind		break;
5131.1Srmind	}
5141.19Srmind
5151.19Srmind	if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) {
5161.19Srmind		goto again;
5171.19Srmind	}
5181.19Srmind
5191.19Srmind	/* Add the L4 flags if nbuf_advance() succeeded. */
5201.19Srmind	if (l4flags && npc->npc_l4.hdr) {
5211.19Srmind		flags |= l4flags;
5221.19Srmind	}
5231.19Srmind	npc->npc_info |= flags;
5241.19Srmind	return flags;
5251.19Srmind}
5261.19Srmind
5271.19Srmindvoid
5281.32Srmindnpf_recache(npf_cache_t *npc)
5291.19Srmind{
5301.32Srmind	nbuf_t *nbuf = npc->npc_nbuf;
5311.24Smartin	const int mflags __diagused = npc->npc_info & (NPC_IP46 | NPC_LAYER4);
5321.25Smrg	int flags __diagused;
5331.19Srmind
5341.19Srmind	nbuf_reset(nbuf);
5351.19Srmind	npc->npc_info = 0;
5361.32Srmind	flags = npf_cache_all(npc);
5371.32Srmind
5381.19Srmind	KASSERT((flags & mflags) == mflags);
5391.19Srmind	KASSERT(nbuf_flag_p(nbuf, NBUF_DATAREF_RESET) == 0);
5401.1Srmind}
5411.1Srmind
5421.1Srmind/*
5431.19Srmind * npf_rwrip: rewrite required IP address.
5441.4Srmind */
5451.4Srmindbool
5461.28Srmindnpf_rwrip(const npf_cache_t *npc, u_int which, const npf_addr_t *addr)
5471.4Srmind{
5481.4Srmind	KASSERT(npf_iscached(npc, NPC_IP46));
5491.28Srmind	KASSERT(which == NPF_SRC || which == NPF_DST);
5501.4Srmind
5511.28Srmind	memcpy(npc->npc_ips[which], addr, npc->npc_alen);
5521.4Srmind	return true;
5531.4Srmind}
5541.4Srmind
5551.4Srmind/*
5561.19Srmind * npf_rwrport: rewrite required TCP/UDP port.
5571.1Srmind */
5581.1Srmindbool
5591.28Srmindnpf_rwrport(const npf_cache_t *npc, u_int which, const in_port_t port)
5601.1Srmind{
5611.21Srmind	const int proto = npc->npc_proto;
5621.4Srmind	in_port_t *oport;
5631.1Srmind
5641.4Srmind	KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
5651.1Srmind	KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
5661.28Srmind	KASSERT(which == NPF_SRC || which == NPF_DST);
5671.1Srmind
5681.19Srmind	/* Get the offset and store the port in it. */
5691.4Srmind	if (proto == IPPROTO_TCP) {
5701.19Srmind		struct tcphdr *th = npc->npc_l4.tcp;
5711.28Srmind		oport = (which == NPF_SRC) ? &th->th_sport : &th->th_dport;
5721.1Srmind	} else {
5731.19Srmind		struct udphdr *uh = npc->npc_l4.udp;
5741.28Srmind		oport = (which == NPF_SRC) ? &uh->uh_sport : &uh->uh_dport;
5751.1Srmind	}
5761.19Srmind	memcpy(oport, &port, sizeof(in_port_t));
5771.1Srmind	return true;
5781.1Srmind}
5791.1Srmind
5801.1Srmind/*
5811.19Srmind * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum.
5821.1Srmind */
5831.1Srmindbool
5841.28Srmindnpf_rwrcksum(const npf_cache_t *npc, u_int which,
5851.19Srmind    const npf_addr_t *addr, const in_port_t port)
5861.1Srmind{
5871.28Srmind	const npf_addr_t *oaddr = npc->npc_ips[which];
5881.21Srmind	const int proto = npc->npc_proto;
5891.19Srmind	const int alen = npc->npc_alen;
5901.18Srmind	uint16_t *ocksum;
5911.18Srmind	in_port_t oport;
5921.18Srmind
5931.19Srmind	KASSERT(npf_iscached(npc, NPC_LAYER4));
5941.28Srmind	KASSERT(which == NPF_SRC || which == NPF_DST);
5951.18Srmind
5961.4Srmind	if (npf_iscached(npc, NPC_IP4)) {
5971.19Srmind		struct ip *ip = npc->npc_ip.v4;
5981.19Srmind		uint16_t ipsum = ip->ip_sum;
5991.4Srmind
6001.19Srmind		/* Recalculate IPv4 checksum and rewrite. */
6011.19Srmind		ip->ip_sum = npf_addr_cksum(ipsum, alen, oaddr, addr);
6021.4Srmind	} else {
6031.4Srmind		/* No checksum for IPv6. */
6041.4Srmind		KASSERT(npf_iscached(npc, NPC_IP6));
6051.4Srmind	}
6061.4Srmind
6071.18Srmind	/* Nothing else to do for ICMP. */
6081.30Srmind	if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
6091.4Srmind		return true;
6101.4Srmind	}
6111.7Szoltan	KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
6121.4Srmind
6131.18Srmind	/*
6141.18Srmind	 * Calculate TCP/UDP checksum:
6151.18Srmind	 * - Skip if UDP and the current checksum is zero.
6161.18Srmind	 * - Fixup the IP address change.
6171.18Srmind	 * - Fixup the port change, if required (non-zero).
6181.18Srmind	 */
6191.4Srmind	if (proto == IPPROTO_TCP) {
6201.19Srmind		struct tcphdr *th = npc->npc_l4.tcp;
6211.4Srmind
6221.18Srmind		ocksum = &th->th_sum;
6231.28Srmind		oport = (which == NPF_SRC) ? th->th_sport : th->th_dport;
6241.4Srmind	} else {
6251.19Srmind		struct udphdr *uh = npc->npc_l4.udp;
6261.4Srmind
6271.4Srmind		KASSERT(proto == IPPROTO_UDP);
6281.18Srmind		ocksum = &uh->uh_sum;
6291.18Srmind		if (*ocksum == 0) {
6301.4Srmind			/* No need to update. */
6311.4Srmind			return true;
6321.4Srmind		}
6331.28Srmind		oport = (which == NPF_SRC) ? uh->uh_sport : uh->uh_dport;
6341.18Srmind	}
6351.18Srmind
6361.19Srmind	uint16_t cksum = npf_addr_cksum(*ocksum, alen, oaddr, addr);
6371.18Srmind	if (port) {
6381.18Srmind		cksum = npf_fixup16_cksum(cksum, oport, port);
6391.4Srmind	}
6401.1Srmind
6411.19Srmind	/* Rewrite TCP/UDP checksum. */
6421.19Srmind	memcpy(ocksum, &cksum, sizeof(uint16_t));
6431.4Srmind	return true;
6441.4Srmind}
6451.4Srmind
6461.29Srmind/*
6471.30Srmind * npf_napt_rwr: perform address and/or port translation.
6481.30Srmind */
6491.30Srmindint
6501.30Srmindnpf_napt_rwr(const npf_cache_t *npc, u_int which,
6511.30Srmind    const npf_addr_t *addr, const in_addr_t port)
6521.30Srmind{
6531.30Srmind	const unsigned proto = npc->npc_proto;
6541.30Srmind
6551.30Srmind	/*
6561.30Srmind	 * Rewrite IP and/or TCP/UDP checksums first, since we need the
6571.30Srmind	 * current (old) address/port for the calculations.  Then perform
6581.30Srmind	 * the address translation i.e. rewrite source or destination.
6591.30Srmind	 */
6601.30Srmind	if (!npf_rwrcksum(npc, which, addr, port)) {
6611.30Srmind		return EINVAL;
6621.30Srmind	}
6631.30Srmind	if (!npf_rwrip(npc, which, addr)) {
6641.30Srmind		return EINVAL;
6651.30Srmind	}
6661.30Srmind	if (port == 0) {
6671.30Srmind		/* Done. */
6681.30Srmind		return 0;
6691.30Srmind	}
6701.30Srmind
6711.30Srmind	switch (proto) {
6721.30Srmind	case IPPROTO_TCP:
6731.30Srmind	case IPPROTO_UDP:
6741.30Srmind		/* Rewrite source/destination port. */
6751.30Srmind		if (!npf_rwrport(npc, which, port)) {
6761.30Srmind			return EINVAL;
6771.30Srmind		}
6781.30Srmind		break;
6791.30Srmind	case IPPROTO_ICMP:
6801.30Srmind	case IPPROTO_ICMPV6:
6811.30Srmind		KASSERT(npf_iscached(npc, NPC_ICMP));
6821.30Srmind		/* Nothing. */
6831.30Srmind		break;
6841.30Srmind	default:
6851.30Srmind		return ENOTSUP;
6861.30Srmind	}
6871.30Srmind	return 0;
6881.30Srmind}
6891.30Srmind
6901.30Srmind/*
6911.29Srmind * IPv6-to-IPv6 Network Prefix Translation (NPTv6), as per RFC 6296.
6921.29Srmind */
6931.29Srmind
6941.29Srmindint
6951.29Srmindnpf_npt66_rwr(const npf_cache_t *npc, u_int which, const npf_addr_t *pref,
6961.29Srmind    npf_netmask_t len, uint16_t adj)
6971.29Srmind{
6981.29Srmind	npf_addr_t *addr = npc->npc_ips[which];
6991.29Srmind	unsigned remnant, word, preflen = len >> 4;
7001.29Srmind	uint32_t sum;
7011.29Srmind
7021.29Srmind	KASSERT(which == NPF_SRC || which == NPF_DST);
7031.29Srmind
7041.29Srmind	if (!npf_iscached(npc, NPC_IP6)) {
7051.29Srmind		return EINVAL;
7061.29Srmind	}
7071.29Srmind	if (len <= 48) {
7081.29Srmind		/*
7091.29Srmind		 * The word to adjust.  Cannot translate the 0xffff
7101.29Srmind		 * subnet if /48 or shorter.
7111.29Srmind		 */
7121.29Srmind		word = 3;
7131.36Schristos		if (addr->word16[word] == 0xffff) {
7141.29Srmind			return EINVAL;
7151.29Srmind		}
7161.29Srmind	} else {
7171.29Srmind		/*
7181.29Srmind		 * Also, all 0s or 1s in the host part are disallowed for
7191.29Srmind		 * longer than /48 prefixes.
7201.29Srmind		 */
7211.36Schristos		if ((addr->word32[2] == 0 && addr->word32[3] == 0) ||
7221.36Schristos		    (addr->word32[2] == ~0U && addr->word32[3] == ~0U))
7231.29Srmind			return EINVAL;
7241.29Srmind
7251.29Srmind		/* Determine the 16-bit word to adjust. */
7261.29Srmind		for (word = 4; word < 8; word++)
7271.36Schristos			if (addr->word16[word] != 0xffff)
7281.29Srmind				break;
7291.29Srmind	}
7301.29Srmind
7311.29Srmind	/* Rewrite the prefix. */
7321.29Srmind	for (unsigned i = 0; i < preflen; i++) {
7331.36Schristos		addr->word16[i] = pref->word16[i];
7341.29Srmind	}
7351.29Srmind
7361.29Srmind	/*
7371.29Srmind	 * If prefix length is within a 16-bit word (not dividable by 16),
7381.29Srmind	 * then prepare a mask, determine the word and adjust it.
7391.29Srmind	 */
7401.29Srmind	if ((remnant = len - (preflen << 4)) != 0) {
7411.29Srmind		const uint16_t wordmask = (1U << remnant) - 1;
7421.29Srmind		const unsigned i = preflen;
7431.29Srmind
7441.36Schristos		addr->word16[i] = (pref->word16[i] & wordmask) |
7451.36Schristos		    (addr->word16[i] & ~wordmask);
7461.29Srmind	}
7471.29Srmind
7481.29Srmind	/*
7491.29Srmind	 * Performing 1's complement sum/difference.
7501.29Srmind	 */
7511.36Schristos	sum = addr->word16[word] + adj;
7521.29Srmind	while (sum >> 16) {
7531.29Srmind		sum = (sum >> 16) + (sum & 0xffff);
7541.29Srmind	}
7551.29Srmind	if (sum == 0xffff) {
7561.29Srmind		/* RFC 1071. */
7571.29Srmind		sum = 0x0000;
7581.29Srmind	}
7591.36Schristos	addr->word16[word] = sum;
7601.29Srmind	return 0;
7611.29Srmind}
7621.29Srmind
7631.13Srmind#if defined(DDB) || defined(_NPF_TESTING)
7641.13Srmind
7651.31Srmindconst char *
7661.31Srmindnpf_addr_dump(const npf_addr_t *addr, int alen)
7671.13Srmind{
7681.31Srmind	if (alen == sizeof(struct in_addr)) {
7691.31Srmind		struct in_addr ip;
7701.31Srmind		memcpy(&ip, addr, alen);
7711.31Srmind		return inet_ntoa(ip);
7721.31Srmind	}
7731.36Schristos	return "[IPv6]";
7741.13Srmind}
7751.13Srmind
7761.13Srmind#endif
777