npf_inet.c revision 1.37.12.2
11.37.12.2Spgoyette/* $NetBSD: npf_inet.c,v 1.37.12.2 2018/03/22 01:44:51 pgoyette Exp $ */ 21.1Srmind 31.1Srmind/*- 41.29Srmind * Copyright (c) 2009-2014 The NetBSD Foundation, Inc. 51.1Srmind * All rights reserved. 61.1Srmind * 71.1Srmind * This material is based upon work partially supported by The 81.1Srmind * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 91.1Srmind * 101.1Srmind * Redistribution and use in source and binary forms, with or without 111.1Srmind * modification, are permitted provided that the following conditions 121.1Srmind * are met: 131.1Srmind * 1. Redistributions of source code must retain the above copyright 141.1Srmind * notice, this list of conditions and the following disclaimer. 151.1Srmind * 2. Redistributions in binary form must reproduce the above copyright 161.1Srmind * notice, this list of conditions and the following disclaimer in the 171.1Srmind * documentation and/or other materials provided with the distribution. 181.1Srmind * 191.1Srmind * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 201.1Srmind * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 211.1Srmind * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 221.1Srmind * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 231.1Srmind * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 241.1Srmind * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 251.1Srmind * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 261.1Srmind * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 271.1Srmind * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 281.1Srmind * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 291.1Srmind * POSSIBILITY OF SUCH DAMAGE. 301.1Srmind */ 311.1Srmind 321.1Srmind/* 331.22Srmind * Various protocol related helper routines. 341.12Srmind * 351.12Srmind * This layer manipulates npf_cache_t structure i.e. caches requested headers 361.12Srmind * and stores which information was cached in the information bit field. 371.12Srmind * It is also responsibility of this layer to update or invalidate the cache 381.12Srmind * on rewrites (e.g. by translation routines). 391.1Srmind */ 401.1Srmind 411.36Schristos#ifdef _KERNEL 421.1Srmind#include <sys/cdefs.h> 431.37.12.2Spgoyette__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.37.12.2 2018/03/22 01:44:51 pgoyette Exp $"); 441.1Srmind 451.1Srmind#include <sys/param.h> 461.11Srmind#include <sys/types.h> 471.1Srmind 481.4Srmind#include <net/pfil.h> 491.4Srmind#include <net/if.h> 501.4Srmind#include <net/ethertypes.h> 511.4Srmind#include <net/if_ether.h> 521.4Srmind 531.1Srmind#include <netinet/in_systm.h> 541.1Srmind#include <netinet/in.h> 551.33Smlelstv#include <netinet6/in6_var.h> 561.1Srmind#include <netinet/ip.h> 571.4Srmind#include <netinet/ip6.h> 581.1Srmind#include <netinet/tcp.h> 591.1Srmind#include <netinet/udp.h> 601.1Srmind#include <netinet/ip_icmp.h> 611.36Schristos#endif 621.1Srmind 631.1Srmind#include "npf_impl.h" 641.1Srmind 651.1Srmind/* 661.27Srmind * npf_fixup{16,32}_cksum: incremental update of the Internet checksum. 671.1Srmind */ 681.1Srmind 691.1Srminduint16_t 701.1Srmindnpf_fixup16_cksum(uint16_t cksum, uint16_t odatum, uint16_t ndatum) 711.1Srmind{ 721.1Srmind uint32_t sum; 731.1Srmind 741.1Srmind /* 751.1Srmind * RFC 1624: 761.1Srmind * HC' = ~(~HC + ~m + m') 771.27Srmind * 781.27Srmind * Note: 1's complement sum is endian-independent (RFC 1071, page 2). 791.1Srmind */ 801.27Srmind sum = ~cksum & 0xffff; 811.27Srmind sum += (~odatum & 0xffff) + ndatum; 821.1Srmind sum = (sum >> 16) + (sum & 0xffff); 831.1Srmind sum += (sum >> 16); 841.1Srmind 851.27Srmind return ~sum & 0xffff; 861.1Srmind} 871.1Srmind 881.1Srminduint16_t 891.1Srmindnpf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum) 901.1Srmind{ 911.27Srmind uint32_t sum; 921.27Srmind 931.27Srmind /* 941.27Srmind * Checksum 32-bit datum as as two 16-bit. Note, the first 951.27Srmind * 32->16 bit reduction is not necessary. 961.27Srmind */ 971.27Srmind sum = ~cksum & 0xffff; 981.27Srmind sum += (~odatum & 0xffff) + (ndatum & 0xffff); 991.1Srmind 1001.27Srmind sum += (~odatum >> 16) + (ndatum >> 16); 1011.27Srmind sum = (sum >> 16) + (sum & 0xffff); 1021.27Srmind sum += (sum >> 16); 1031.27Srmind return ~sum & 0xffff; 1041.1Srmind} 1051.1Srmind 1061.1Srmind/* 1071.4Srmind * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6. 1081.4Srmind */ 1091.4Srminduint16_t 1101.19Srmindnpf_addr_cksum(uint16_t cksum, int sz, const npf_addr_t *oaddr, 1111.19Srmind const npf_addr_t *naddr) 1121.4Srmind{ 1131.19Srmind const uint32_t *oip32 = (const uint32_t *)oaddr; 1141.19Srmind const uint32_t *nip32 = (const uint32_t *)naddr; 1151.4Srmind 1161.4Srmind KASSERT(sz % sizeof(uint32_t) == 0); 1171.4Srmind do { 1181.4Srmind cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++); 1191.4Srmind sz -= sizeof(uint32_t); 1201.4Srmind } while (sz); 1211.4Srmind 1221.4Srmind return cksum; 1231.4Srmind} 1241.4Srmind 1251.4Srmind/* 1261.26Srmind * npf_addr_sum: provide IP addresses as a XORed 32-bit integer. 1271.4Srmind * Note: used for hash function. 1281.1Srmind */ 1291.4Srminduint32_t 1301.26Srmindnpf_addr_mix(const int sz, const npf_addr_t *a1, const npf_addr_t *a2) 1311.1Srmind{ 1321.4Srmind uint32_t mix = 0; 1331.1Srmind 1341.5Srmind KASSERT(sz > 0 && a1 != NULL && a2 != NULL); 1351.5Srmind 1361.26Srmind for (int i = 0; i < (sz >> 2); i++) { 1371.36Schristos mix ^= a1->word32[i]; 1381.36Schristos mix ^= a2->word32[i]; 1391.4Srmind } 1401.4Srmind return mix; 1411.4Srmind} 1421.1Srmind 1431.13Srmind/* 1441.13Srmind * npf_addr_mask: apply the mask to a given address and store the result. 1451.13Srmind */ 1461.13Srmindvoid 1471.13Srmindnpf_addr_mask(const npf_addr_t *addr, const npf_netmask_t mask, 1481.13Srmind const int alen, npf_addr_t *out) 1491.12Srmind{ 1501.13Srmind const int nwords = alen >> 2; 1511.12Srmind uint_fast8_t length = mask; 1521.12Srmind 1531.12Srmind /* Note: maximum length is 32 for IPv4 and 128 for IPv6. */ 1541.12Srmind KASSERT(length <= NPF_MAX_NETMASK); 1551.12Srmind 1561.13Srmind for (int i = 0; i < nwords; i++) { 1571.13Srmind uint32_t wordmask; 1581.13Srmind 1591.12Srmind if (length >= 32) { 1601.13Srmind wordmask = htonl(0xffffffff); 1611.12Srmind length -= 32; 1621.13Srmind } else if (length) { 1631.13Srmind wordmask = htonl(0xffffffff << (32 - length)); 1641.13Srmind length = 0; 1651.12Srmind } else { 1661.13Srmind wordmask = 0; 1671.12Srmind } 1681.36Schristos out->word32[i] = addr->word32[i] & wordmask; 1691.12Srmind } 1701.12Srmind} 1711.12Srmind 1721.12Srmind/* 1731.12Srmind * npf_addr_cmp: compare two addresses, either IPv4 or IPv6. 1741.12Srmind * 1751.13Srmind * => Return 0 if equal and negative/positive if less/greater accordingly. 1761.12Srmind * => Ignore the mask, if NPF_NO_NETMASK is specified. 1771.12Srmind */ 1781.12Srmindint 1791.12Srmindnpf_addr_cmp(const npf_addr_t *addr1, const npf_netmask_t mask1, 1801.13Srmind const npf_addr_t *addr2, const npf_netmask_t mask2, const int alen) 1811.12Srmind{ 1821.13Srmind npf_addr_t realaddr1, realaddr2; 1831.12Srmind 1841.12Srmind if (mask1 != NPF_NO_NETMASK) { 1851.13Srmind npf_addr_mask(addr1, mask1, alen, &realaddr1); 1861.13Srmind addr1 = &realaddr1; 1871.12Srmind } 1881.12Srmind if (mask2 != NPF_NO_NETMASK) { 1891.13Srmind npf_addr_mask(addr2, mask2, alen, &realaddr2); 1901.13Srmind addr2 = &realaddr2; 1911.12Srmind } 1921.13Srmind return memcmp(addr1, addr2, alen); 1931.12Srmind} 1941.12Srmind 1951.4Srmind/* 1961.4Srmind * npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length. 1971.12Srmind * 1981.12Srmind * => Returns all values in host byte-order. 1991.4Srmind */ 2001.4Srmindint 2011.12Srmindnpf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win) 2021.4Srmind{ 2031.19Srmind const struct tcphdr *th = npc->npc_l4.tcp; 2041.8Srmind u_int thlen; 2051.1Srmind 2061.7Szoltan KASSERT(npf_iscached(npc, NPC_TCP)); 2071.1Srmind 2081.4Srmind *seq = ntohl(th->th_seq); 2091.4Srmind *ack = ntohl(th->th_ack); 2101.4Srmind *win = (uint32_t)ntohs(th->th_win); 2111.8Srmind thlen = th->th_off << 2; 2121.1Srmind 2131.7Szoltan if (npf_iscached(npc, NPC_IP4)) { 2141.19Srmind const struct ip *ip = npc->npc_ip.v4; 2151.21Srmind return ntohs(ip->ip_len) - npc->npc_hlen - thlen; 2161.12Srmind } else if (npf_iscached(npc, NPC_IP6)) { 2171.19Srmind const struct ip6_hdr *ip6 = npc->npc_ip.v6; 2181.37.12.2Spgoyette return ntohs(ip6->ip6_plen) - 2191.37.12.2Spgoyette (npc->npc_hlen - sizeof(*ip6)) - thlen; 2201.7Szoltan } 2211.7Szoltan return 0; 2221.1Srmind} 2231.1Srmind 2241.1Srmind/* 2251.4Srmind * npf_fetch_tcpopts: parse and return TCP options. 2261.1Srmind */ 2271.1Srmindbool 2281.32Srmindnpf_fetch_tcpopts(npf_cache_t *npc, uint16_t *mss, int *wscale) 2291.1Srmind{ 2301.32Srmind nbuf_t *nbuf = npc->npc_nbuf; 2311.19Srmind const struct tcphdr *th = npc->npc_l4.tcp; 2321.4Srmind int topts_len, step; 2331.37.12.1Spgoyette uint8_t *nptr; 2341.4Srmind uint8_t val; 2351.19Srmind bool ok; 2361.4Srmind 2371.7Szoltan KASSERT(npf_iscached(npc, NPC_IP46)); 2381.7Szoltan KASSERT(npf_iscached(npc, NPC_TCP)); 2391.10Srmind 2401.4Srmind /* Determine if there are any TCP options, get their length. */ 2411.4Srmind topts_len = (th->th_off << 2) - sizeof(struct tcphdr); 2421.4Srmind if (topts_len <= 0) { 2431.4Srmind /* No options. */ 2441.1Srmind return false; 2451.4Srmind } 2461.4Srmind KASSERT(topts_len <= MAX_TCPOPTLEN); 2471.1Srmind 2481.4Srmind /* First step: IP and TCP header up to options. */ 2491.21Srmind step = npc->npc_hlen + sizeof(struct tcphdr); 2501.19Srmind nbuf_reset(nbuf); 2511.4Srmindnext: 2521.19Srmind if ((nptr = nbuf_advance(nbuf, step, 1)) == NULL) { 2531.19Srmind ok = false; 2541.19Srmind goto done; 2551.4Srmind } 2561.37.12.1Spgoyette val = *nptr; 2571.12Srmind 2581.4Srmind switch (val) { 2591.4Srmind case TCPOPT_EOL: 2601.4Srmind /* Done. */ 2611.19Srmind ok = true; 2621.19Srmind goto done; 2631.4Srmind case TCPOPT_NOP: 2641.4Srmind topts_len--; 2651.4Srmind step = 1; 2661.4Srmind break; 2671.4Srmind case TCPOPT_MAXSEG: 2681.37.12.1Spgoyette if ((nptr = nbuf_ensure_contig(nbuf, TCPOLEN_MAXSEG)) == NULL) { 2691.19Srmind ok = false; 2701.19Srmind goto done; 2711.4Srmind } 2721.4Srmind if (mss) { 2731.19Srmind if (*mss) { 2741.37.12.1Spgoyette memcpy(nptr + 2, mss, sizeof(uint16_t)); 2751.19Srmind } else { 2761.37.12.1Spgoyette memcpy(mss, nptr + 2, sizeof(uint16_t)); 2771.19Srmind } 2781.4Srmind } 2791.4Srmind topts_len -= TCPOLEN_MAXSEG; 2801.37.12.1Spgoyette step = TCPOLEN_MAXSEG; 2811.4Srmind break; 2821.4Srmind case TCPOPT_WINDOW: 2831.10Srmind /* TCP Window Scaling (RFC 1323). */ 2841.37.12.1Spgoyette if ((nptr = nbuf_ensure_contig(nbuf, TCPOLEN_WINDOW)) == NULL) { 2851.19Srmind ok = false; 2861.19Srmind goto done; 2871.4Srmind } 2881.37.12.1Spgoyette val = *(nptr + 2); 2891.4Srmind *wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val; 2901.4Srmind topts_len -= TCPOLEN_WINDOW; 2911.37.12.1Spgoyette step = TCPOLEN_WINDOW; 2921.4Srmind break; 2931.4Srmind default: 2941.37.12.1Spgoyette if ((nptr = nbuf_ensure_contig(nbuf, 2)) == NULL) { 2951.19Srmind ok = false; 2961.19Srmind goto done; 2971.4Srmind } 2981.37.12.1Spgoyette val = *(nptr + 1); 2991.16Srmind if (val < 2 || val > topts_len) { 3001.19Srmind ok = false; 3011.19Srmind goto done; 3021.4Srmind } 3031.4Srmind topts_len -= val; 3041.37.12.1Spgoyette step = val; 3051.4Srmind } 3061.12Srmind 3071.6Srmind /* Any options left? */ 3081.4Srmind if (__predict_true(topts_len > 0)) { 3091.4Srmind goto next; 3101.4Srmind } 3111.19Srmind ok = true; 3121.19Srminddone: 3131.19Srmind if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 3141.32Srmind npf_recache(npc); 3151.19Srmind } 3161.19Srmind return ok; 3171.1Srmind} 3181.1Srmind 3191.19Srmindstatic int 3201.19Srmindnpf_cache_ip(npf_cache_t *npc, nbuf_t *nbuf) 3211.1Srmind{ 3221.19Srmind const void *nptr = nbuf_dataptr(nbuf); 3231.19Srmind const uint8_t ver = *(const uint8_t *)nptr; 3241.19Srmind int flags = 0; 3251.12Srmind 3261.37.12.2Spgoyette /* 3271.37.12.2Spgoyette * We intentionally don't read the L4 payload after IPPROTO_AH. 3281.37.12.2Spgoyette */ 3291.37.12.2Spgoyette 3301.4Srmind switch (ver >> 4) { 3311.12Srmind case IPVERSION: { 3321.19Srmind struct ip *ip; 3331.12Srmind 3341.19Srmind ip = nbuf_ensure_contig(nbuf, sizeof(struct ip)); 3351.19Srmind if (ip == NULL) { 3361.37.12.1Spgoyette return NPC_FMTERR; 3371.4Srmind } 3381.12Srmind 3391.4Srmind /* Check header length and fragment offset. */ 3401.10Srmind if ((u_int)(ip->ip_hl << 2) < sizeof(struct ip)) { 3411.37.12.1Spgoyette return NPC_FMTERR; 3421.4Srmind } 3431.4Srmind if (ip->ip_off & ~htons(IP_DF | IP_RF)) { 3441.4Srmind /* Note fragmentation. */ 3451.19Srmind flags |= NPC_IPFRAG; 3461.4Srmind } 3471.12Srmind 3481.4Srmind /* Cache: layer 3 - IPv4. */ 3491.14Srmind npc->npc_alen = sizeof(struct in_addr); 3501.28Srmind npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip->ip_src; 3511.28Srmind npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip->ip_dst; 3521.7Szoltan npc->npc_hlen = ip->ip_hl << 2; 3531.19Srmind npc->npc_proto = ip->ip_p; 3541.19Srmind 3551.19Srmind npc->npc_ip.v4 = ip; 3561.19Srmind flags |= NPC_IP4; 3571.4Srmind break; 3581.12Srmind } 3591.4Srmind 3601.12Srmind case (IPV6_VERSION >> 4): { 3611.19Srmind struct ip6_hdr *ip6; 3621.19Srmind struct ip6_ext *ip6e; 3631.37Schristos struct ip6_frag *ip6f; 3641.19Srmind size_t off, hlen; 3651.37.12.1Spgoyette int frag_present; 3661.19Srmind 3671.19Srmind ip6 = nbuf_ensure_contig(nbuf, sizeof(struct ip6_hdr)); 3681.19Srmind if (ip6 == NULL) { 3691.37.12.1Spgoyette return NPC_FMTERR; 3701.7Szoltan } 3711.19Srmind 3721.37.12.2Spgoyette /* 3731.37.12.2Spgoyette * XXX: We don't handle IPv6 Jumbograms. 3741.37.12.2Spgoyette */ 3751.37.12.2Spgoyette 3761.19Srmind /* Set initial next-protocol value. */ 3771.19Srmind hlen = sizeof(struct ip6_hdr); 3781.19Srmind npc->npc_proto = ip6->ip6_nxt; 3791.13Srmind npc->npc_hlen = hlen; 3801.7Szoltan 3811.37.12.1Spgoyette frag_present = 0; 3821.37.12.1Spgoyette 3831.12Srmind /* 3841.19Srmind * Advance by the length of the current header. 3851.12Srmind */ 3861.19Srmind off = nbuf_offset(nbuf); 3871.37.12.1Spgoyette while ((ip6e = nbuf_advance(nbuf, hlen, sizeof(*ip6e))) != NULL) { 3881.13Srmind /* 3891.13Srmind * Determine whether we are going to continue. 3901.13Srmind */ 3911.19Srmind switch (npc->npc_proto) { 3921.13Srmind case IPPROTO_HOPOPTS: 3931.7Szoltan case IPPROTO_DSTOPTS: 3941.7Szoltan case IPPROTO_ROUTING: 3951.19Srmind hlen = (ip6e->ip6e_len + 1) << 3; 3961.7Szoltan break; 3971.7Szoltan case IPPROTO_FRAGMENT: 3981.37.12.1Spgoyette if (frag_present++) 3991.37.12.1Spgoyette return NPC_FMTERR; 4001.37Schristos ip6f = nbuf_ensure_contig(nbuf, sizeof(*ip6f)); 4011.37Schristos if (ip6f == NULL) 4021.37.12.1Spgoyette return NPC_FMTERR; 4031.37.12.1Spgoyette 4041.37.12.1Spgoyette /* RFC6946: Skip dummy fragments. */ 4051.37.12.1Spgoyette if (!ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK) && 4061.37.12.1Spgoyette !(ip6f->ip6f_offlg & IP6F_MORE_FRAG)) { 4071.37.12.1Spgoyette hlen = sizeof(struct ip6_frag); 4081.37.12.1Spgoyette break; 4091.37.12.1Spgoyette } 4101.37.12.1Spgoyette 4111.37.12.1Spgoyette hlen = 0; 4121.37.12.1Spgoyette flags |= NPC_IPFRAG; 4131.37Schristos 4141.7Szoltan break; 4151.7Szoltan default: 4161.13Srmind hlen = 0; 4171.13Srmind break; 4181.13Srmind } 4191.13Srmind 4201.13Srmind if (!hlen) { 4211.7Szoltan break; 4221.7Szoltan } 4231.19Srmind npc->npc_proto = ip6e->ip6e_nxt; 4241.13Srmind npc->npc_hlen += hlen; 4251.13Srmind } 4261.7Szoltan 4271.23Srmind /* 4281.23Srmind * Re-fetch the header pointers (nbufs might have been 4291.23Srmind * reallocated). Restore the original offset (if any). 4301.23Srmind */ 4311.19Srmind nbuf_reset(nbuf); 4321.23Srmind ip6 = nbuf_dataptr(nbuf); 4331.19Srmind if (off) { 4341.19Srmind nbuf_advance(nbuf, off, 0); 4351.19Srmind } 4361.19Srmind 4371.12Srmind /* Cache: layer 3 - IPv6. */ 4381.14Srmind npc->npc_alen = sizeof(struct in6_addr); 4391.28Srmind npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip6->ip6_src; 4401.37.12.2Spgoyette npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip6->ip6_dst; 4411.19Srmind 4421.19Srmind npc->npc_ip.v6 = ip6; 4431.19Srmind flags |= NPC_IP6; 4441.7Szoltan break; 4451.12Srmind } 4461.4Srmind default: 4471.19Srmind break; 4481.4Srmind } 4491.19Srmind return flags; 4501.1Srmind} 4511.1Srmind 4521.1Srmind/* 4531.4Srmind * npf_cache_all: general routine to cache all relevant IP (v4 or v6) 4541.12Srmind * and TCP, UDP or ICMP headers. 4551.19Srmind * 4561.19Srmind * => nbuf offset shall be set accordingly. 4571.1Srmind */ 4581.10Srmindint 4591.32Srmindnpf_cache_all(npf_cache_t *npc) 4601.1Srmind{ 4611.32Srmind nbuf_t *nbuf = npc->npc_nbuf; 4621.19Srmind int flags, l4flags; 4631.19Srmind u_int hlen; 4641.19Srmind 4651.19Srmind /* 4661.19Srmind * This routine is a main point where the references are cached, 4671.19Srmind * therefore clear the flag as we reset. 4681.19Srmind */ 4691.19Srmindagain: 4701.19Srmind nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 4711.1Srmind 4721.19Srmind /* 4731.19Srmind * First, cache the L3 header (IPv4 or IPv6). If IP packet is 4741.19Srmind * fragmented, then we cannot look into L4. 4751.19Srmind */ 4761.19Srmind flags = npf_cache_ip(npc, nbuf); 4771.37.12.1Spgoyette if ((flags & NPC_IP46) == 0 || (flags & NPC_IPFRAG) != 0 || 4781.37.12.1Spgoyette (flags & NPC_FMTERR) != 0) { 4791.23Srmind nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 4801.19Srmind npc->npc_info |= flags; 4811.19Srmind return flags; 4821.1Srmind } 4831.19Srmind hlen = npc->npc_hlen; 4841.19Srmind 4851.19Srmind switch (npc->npc_proto) { 4861.1Srmind case IPPROTO_TCP: 4871.19Srmind /* Cache: layer 4 - TCP. */ 4881.19Srmind npc->npc_l4.tcp = nbuf_advance(nbuf, hlen, 4891.19Srmind sizeof(struct tcphdr)); 4901.19Srmind l4flags = NPC_LAYER4 | NPC_TCP; 4911.10Srmind break; 4921.1Srmind case IPPROTO_UDP: 4931.19Srmind /* Cache: layer 4 - UDP. */ 4941.19Srmind npc->npc_l4.udp = nbuf_advance(nbuf, hlen, 4951.19Srmind sizeof(struct udphdr)); 4961.19Srmind l4flags = NPC_LAYER4 | NPC_UDP; 4971.10Srmind break; 4981.1Srmind case IPPROTO_ICMP: 4991.19Srmind /* Cache: layer 4 - ICMPv4. */ 5001.19Srmind npc->npc_l4.icmp = nbuf_advance(nbuf, hlen, 5011.19Srmind offsetof(struct icmp, icmp_void)); 5021.19Srmind l4flags = NPC_LAYER4 | NPC_ICMP; 5031.19Srmind break; 5041.15Sspz case IPPROTO_ICMPV6: 5051.19Srmind /* Cache: layer 4 - ICMPv6. */ 5061.19Srmind npc->npc_l4.icmp6 = nbuf_advance(nbuf, hlen, 5071.19Srmind offsetof(struct icmp6_hdr, icmp6_data32)); 5081.19Srmind l4flags = NPC_LAYER4 | NPC_ICMP; 5091.19Srmind break; 5101.19Srmind default: 5111.19Srmind l4flags = 0; 5121.10Srmind break; 5131.1Srmind } 5141.19Srmind 5151.19Srmind if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 5161.19Srmind goto again; 5171.19Srmind } 5181.19Srmind 5191.19Srmind /* Add the L4 flags if nbuf_advance() succeeded. */ 5201.19Srmind if (l4flags && npc->npc_l4.hdr) { 5211.19Srmind flags |= l4flags; 5221.19Srmind } 5231.19Srmind npc->npc_info |= flags; 5241.19Srmind return flags; 5251.19Srmind} 5261.19Srmind 5271.19Srmindvoid 5281.32Srmindnpf_recache(npf_cache_t *npc) 5291.19Srmind{ 5301.32Srmind nbuf_t *nbuf = npc->npc_nbuf; 5311.24Smartin const int mflags __diagused = npc->npc_info & (NPC_IP46 | NPC_LAYER4); 5321.25Smrg int flags __diagused; 5331.19Srmind 5341.19Srmind nbuf_reset(nbuf); 5351.19Srmind npc->npc_info = 0; 5361.32Srmind flags = npf_cache_all(npc); 5371.32Srmind 5381.19Srmind KASSERT((flags & mflags) == mflags); 5391.19Srmind KASSERT(nbuf_flag_p(nbuf, NBUF_DATAREF_RESET) == 0); 5401.1Srmind} 5411.1Srmind 5421.1Srmind/* 5431.19Srmind * npf_rwrip: rewrite required IP address. 5441.4Srmind */ 5451.4Srmindbool 5461.28Srmindnpf_rwrip(const npf_cache_t *npc, u_int which, const npf_addr_t *addr) 5471.4Srmind{ 5481.4Srmind KASSERT(npf_iscached(npc, NPC_IP46)); 5491.28Srmind KASSERT(which == NPF_SRC || which == NPF_DST); 5501.4Srmind 5511.28Srmind memcpy(npc->npc_ips[which], addr, npc->npc_alen); 5521.4Srmind return true; 5531.4Srmind} 5541.4Srmind 5551.4Srmind/* 5561.19Srmind * npf_rwrport: rewrite required TCP/UDP port. 5571.1Srmind */ 5581.1Srmindbool 5591.28Srmindnpf_rwrport(const npf_cache_t *npc, u_int which, const in_port_t port) 5601.1Srmind{ 5611.21Srmind const int proto = npc->npc_proto; 5621.4Srmind in_port_t *oport; 5631.1Srmind 5641.4Srmind KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP)); 5651.1Srmind KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP); 5661.28Srmind KASSERT(which == NPF_SRC || which == NPF_DST); 5671.1Srmind 5681.19Srmind /* Get the offset and store the port in it. */ 5691.4Srmind if (proto == IPPROTO_TCP) { 5701.19Srmind struct tcphdr *th = npc->npc_l4.tcp; 5711.28Srmind oport = (which == NPF_SRC) ? &th->th_sport : &th->th_dport; 5721.1Srmind } else { 5731.19Srmind struct udphdr *uh = npc->npc_l4.udp; 5741.28Srmind oport = (which == NPF_SRC) ? &uh->uh_sport : &uh->uh_dport; 5751.1Srmind } 5761.19Srmind memcpy(oport, &port, sizeof(in_port_t)); 5771.1Srmind return true; 5781.1Srmind} 5791.1Srmind 5801.1Srmind/* 5811.19Srmind * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum. 5821.1Srmind */ 5831.1Srmindbool 5841.28Srmindnpf_rwrcksum(const npf_cache_t *npc, u_int which, 5851.19Srmind const npf_addr_t *addr, const in_port_t port) 5861.1Srmind{ 5871.28Srmind const npf_addr_t *oaddr = npc->npc_ips[which]; 5881.21Srmind const int proto = npc->npc_proto; 5891.19Srmind const int alen = npc->npc_alen; 5901.18Srmind uint16_t *ocksum; 5911.18Srmind in_port_t oport; 5921.18Srmind 5931.19Srmind KASSERT(npf_iscached(npc, NPC_LAYER4)); 5941.28Srmind KASSERT(which == NPF_SRC || which == NPF_DST); 5951.18Srmind 5961.4Srmind if (npf_iscached(npc, NPC_IP4)) { 5971.19Srmind struct ip *ip = npc->npc_ip.v4; 5981.19Srmind uint16_t ipsum = ip->ip_sum; 5991.4Srmind 6001.19Srmind /* Recalculate IPv4 checksum and rewrite. */ 6011.19Srmind ip->ip_sum = npf_addr_cksum(ipsum, alen, oaddr, addr); 6021.4Srmind } else { 6031.4Srmind /* No checksum for IPv6. */ 6041.4Srmind KASSERT(npf_iscached(npc, NPC_IP6)); 6051.4Srmind } 6061.4Srmind 6071.18Srmind /* Nothing else to do for ICMP. */ 6081.30Srmind if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { 6091.4Srmind return true; 6101.4Srmind } 6111.7Szoltan KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP)); 6121.4Srmind 6131.18Srmind /* 6141.18Srmind * Calculate TCP/UDP checksum: 6151.18Srmind * - Skip if UDP and the current checksum is zero. 6161.18Srmind * - Fixup the IP address change. 6171.18Srmind * - Fixup the port change, if required (non-zero). 6181.18Srmind */ 6191.4Srmind if (proto == IPPROTO_TCP) { 6201.19Srmind struct tcphdr *th = npc->npc_l4.tcp; 6211.4Srmind 6221.18Srmind ocksum = &th->th_sum; 6231.28Srmind oport = (which == NPF_SRC) ? th->th_sport : th->th_dport; 6241.4Srmind } else { 6251.19Srmind struct udphdr *uh = npc->npc_l4.udp; 6261.4Srmind 6271.4Srmind KASSERT(proto == IPPROTO_UDP); 6281.18Srmind ocksum = &uh->uh_sum; 6291.18Srmind if (*ocksum == 0) { 6301.4Srmind /* No need to update. */ 6311.4Srmind return true; 6321.4Srmind } 6331.28Srmind oport = (which == NPF_SRC) ? uh->uh_sport : uh->uh_dport; 6341.18Srmind } 6351.18Srmind 6361.19Srmind uint16_t cksum = npf_addr_cksum(*ocksum, alen, oaddr, addr); 6371.18Srmind if (port) { 6381.18Srmind cksum = npf_fixup16_cksum(cksum, oport, port); 6391.4Srmind } 6401.1Srmind 6411.19Srmind /* Rewrite TCP/UDP checksum. */ 6421.19Srmind memcpy(ocksum, &cksum, sizeof(uint16_t)); 6431.4Srmind return true; 6441.4Srmind} 6451.4Srmind 6461.29Srmind/* 6471.30Srmind * npf_napt_rwr: perform address and/or port translation. 6481.30Srmind */ 6491.30Srmindint 6501.30Srmindnpf_napt_rwr(const npf_cache_t *npc, u_int which, 6511.30Srmind const npf_addr_t *addr, const in_addr_t port) 6521.30Srmind{ 6531.30Srmind const unsigned proto = npc->npc_proto; 6541.30Srmind 6551.30Srmind /* 6561.30Srmind * Rewrite IP and/or TCP/UDP checksums first, since we need the 6571.30Srmind * current (old) address/port for the calculations. Then perform 6581.30Srmind * the address translation i.e. rewrite source or destination. 6591.30Srmind */ 6601.30Srmind if (!npf_rwrcksum(npc, which, addr, port)) { 6611.30Srmind return EINVAL; 6621.30Srmind } 6631.30Srmind if (!npf_rwrip(npc, which, addr)) { 6641.30Srmind return EINVAL; 6651.30Srmind } 6661.30Srmind if (port == 0) { 6671.30Srmind /* Done. */ 6681.30Srmind return 0; 6691.30Srmind } 6701.30Srmind 6711.30Srmind switch (proto) { 6721.30Srmind case IPPROTO_TCP: 6731.30Srmind case IPPROTO_UDP: 6741.30Srmind /* Rewrite source/destination port. */ 6751.30Srmind if (!npf_rwrport(npc, which, port)) { 6761.30Srmind return EINVAL; 6771.30Srmind } 6781.30Srmind break; 6791.30Srmind case IPPROTO_ICMP: 6801.30Srmind case IPPROTO_ICMPV6: 6811.30Srmind KASSERT(npf_iscached(npc, NPC_ICMP)); 6821.30Srmind /* Nothing. */ 6831.30Srmind break; 6841.30Srmind default: 6851.30Srmind return ENOTSUP; 6861.30Srmind } 6871.30Srmind return 0; 6881.30Srmind} 6891.30Srmind 6901.30Srmind/* 6911.29Srmind * IPv6-to-IPv6 Network Prefix Translation (NPTv6), as per RFC 6296. 6921.29Srmind */ 6931.29Srmind 6941.29Srmindint 6951.29Srmindnpf_npt66_rwr(const npf_cache_t *npc, u_int which, const npf_addr_t *pref, 6961.29Srmind npf_netmask_t len, uint16_t adj) 6971.29Srmind{ 6981.29Srmind npf_addr_t *addr = npc->npc_ips[which]; 6991.29Srmind unsigned remnant, word, preflen = len >> 4; 7001.29Srmind uint32_t sum; 7011.29Srmind 7021.29Srmind KASSERT(which == NPF_SRC || which == NPF_DST); 7031.29Srmind 7041.29Srmind if (!npf_iscached(npc, NPC_IP6)) { 7051.29Srmind return EINVAL; 7061.29Srmind } 7071.29Srmind if (len <= 48) { 7081.29Srmind /* 7091.29Srmind * The word to adjust. Cannot translate the 0xffff 7101.29Srmind * subnet if /48 or shorter. 7111.29Srmind */ 7121.29Srmind word = 3; 7131.36Schristos if (addr->word16[word] == 0xffff) { 7141.29Srmind return EINVAL; 7151.29Srmind } 7161.29Srmind } else { 7171.29Srmind /* 7181.29Srmind * Also, all 0s or 1s in the host part are disallowed for 7191.29Srmind * longer than /48 prefixes. 7201.29Srmind */ 7211.36Schristos if ((addr->word32[2] == 0 && addr->word32[3] == 0) || 7221.36Schristos (addr->word32[2] == ~0U && addr->word32[3] == ~0U)) 7231.29Srmind return EINVAL; 7241.29Srmind 7251.29Srmind /* Determine the 16-bit word to adjust. */ 7261.29Srmind for (word = 4; word < 8; word++) 7271.36Schristos if (addr->word16[word] != 0xffff) 7281.29Srmind break; 7291.29Srmind } 7301.29Srmind 7311.29Srmind /* Rewrite the prefix. */ 7321.29Srmind for (unsigned i = 0; i < preflen; i++) { 7331.36Schristos addr->word16[i] = pref->word16[i]; 7341.29Srmind } 7351.29Srmind 7361.29Srmind /* 7371.29Srmind * If prefix length is within a 16-bit word (not dividable by 16), 7381.29Srmind * then prepare a mask, determine the word and adjust it. 7391.29Srmind */ 7401.29Srmind if ((remnant = len - (preflen << 4)) != 0) { 7411.29Srmind const uint16_t wordmask = (1U << remnant) - 1; 7421.29Srmind const unsigned i = preflen; 7431.29Srmind 7441.36Schristos addr->word16[i] = (pref->word16[i] & wordmask) | 7451.36Schristos (addr->word16[i] & ~wordmask); 7461.29Srmind } 7471.29Srmind 7481.29Srmind /* 7491.29Srmind * Performing 1's complement sum/difference. 7501.29Srmind */ 7511.36Schristos sum = addr->word16[word] + adj; 7521.29Srmind while (sum >> 16) { 7531.29Srmind sum = (sum >> 16) + (sum & 0xffff); 7541.29Srmind } 7551.29Srmind if (sum == 0xffff) { 7561.29Srmind /* RFC 1071. */ 7571.29Srmind sum = 0x0000; 7581.29Srmind } 7591.36Schristos addr->word16[word] = sum; 7601.29Srmind return 0; 7611.29Srmind} 7621.29Srmind 7631.13Srmind#if defined(DDB) || defined(_NPF_TESTING) 7641.13Srmind 7651.31Srmindconst char * 7661.31Srmindnpf_addr_dump(const npf_addr_t *addr, int alen) 7671.13Srmind{ 7681.31Srmind if (alen == sizeof(struct in_addr)) { 7691.31Srmind struct in_addr ip; 7701.31Srmind memcpy(&ip, addr, alen); 7711.31Srmind return inet_ntoa(ip); 7721.31Srmind } 7731.36Schristos return "[IPv6]"; 7741.13Srmind} 7751.13Srmind 7761.13Srmind#endif 777