1 1.1 rmind /*- 2 1.29 rmind * Copyright (c) 2009-2014 The NetBSD Foundation, Inc. 3 1.1 rmind * All rights reserved. 4 1.1 rmind * 5 1.1 rmind * This material is based upon work partially supported by The 6 1.1 rmind * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 7 1.1 rmind * 8 1.1 rmind * Redistribution and use in source and binary forms, with or without 9 1.1 rmind * modification, are permitted provided that the following conditions 10 1.1 rmind * are met: 11 1.1 rmind * 1. Redistributions of source code must retain the above copyright 12 1.1 rmind * notice, this list of conditions and the following disclaimer. 13 1.1 rmind * 2. Redistributions in binary form must reproduce the above copyright 14 1.1 rmind * notice, this list of conditions and the following disclaimer in the 15 1.1 rmind * documentation and/or other materials provided with the distribution. 16 1.1 rmind * 17 1.1 rmind * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 1.1 rmind * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 1.1 rmind * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 1.1 rmind * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 1.1 rmind * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 1.1 rmind * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 1.1 rmind * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 1.1 rmind * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 1.1 rmind * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 1.1 rmind * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 1.1 rmind * POSSIBILITY OF SUCH DAMAGE. 28 1.1 rmind */ 29 1.1 rmind 30 1.1 rmind /* 31 1.22 rmind * Various protocol related helper routines. 32 1.12 rmind * 33 1.12 rmind * This layer manipulates npf_cache_t structure i.e. caches requested headers 34 1.12 rmind * and stores which information was cached in the information bit field. 35 1.12 rmind * It is also responsibility of this layer to update or invalidate the cache 36 1.12 rmind * on rewrites (e.g. by translation routines). 37 1.1 rmind */ 38 1.1 rmind 39 1.36 christos #ifdef _KERNEL 40 1.1 rmind #include <sys/cdefs.h> 41 1.58 joe __KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.58 2025/07/01 18:42:37 joe Exp $"); 42 1.1 rmind 43 1.1 rmind #include <sys/param.h> 44 1.11 rmind #include <sys/types.h> 45 1.1 rmind 46 1.4 rmind #include <net/pfil.h> 47 1.4 rmind #include <net/if.h> 48 1.4 rmind #include <net/ethertypes.h> 49 1.4 rmind #include <net/if_ether.h> 50 1.4 rmind 51 1.1 rmind #include <netinet/in_systm.h> 52 1.1 rmind #include <netinet/in.h> 53 1.33 mlelstv #include <netinet6/in6_var.h> 54 1.1 rmind #include <netinet/ip.h> 55 1.4 rmind #include <netinet/ip6.h> 56 1.1 rmind #include <netinet/tcp.h> 57 1.1 rmind #include <netinet/udp.h> 58 1.1 rmind #include <netinet/ip_icmp.h> 59 1.36 christos #endif 60 1.1 rmind 61 1.1 rmind #include "npf_impl.h" 62 1.1 rmind 63 1.1 rmind /* 64 1.27 rmind * npf_fixup{16,32}_cksum: incremental update of the Internet checksum. 65 1.1 rmind */ 66 1.1 rmind 67 1.1 rmind uint16_t 68 1.1 rmind npf_fixup16_cksum(uint16_t cksum, uint16_t odatum, uint16_t ndatum) 69 1.1 rmind { 70 1.1 rmind uint32_t sum; 71 1.1 rmind 72 1.1 rmind /* 73 1.1 rmind * RFC 1624: 74 1.1 rmind * HC' = ~(~HC + ~m + m') 75 1.27 rmind * 76 1.27 rmind * Note: 1's complement sum is endian-independent (RFC 1071, page 2). 77 1.1 rmind */ 78 1.27 rmind sum = ~cksum & 0xffff; 79 1.27 rmind sum += (~odatum & 0xffff) + ndatum; 80 1.1 rmind sum = (sum >> 16) + (sum & 0xffff); 81 1.1 rmind sum += (sum >> 16); 82 1.1 rmind 83 1.27 rmind return ~sum & 0xffff; 84 1.1 rmind } 85 1.1 rmind 86 1.1 rmind uint16_t 87 1.1 rmind npf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum) 88 1.1 rmind { 89 1.27 rmind uint32_t sum; 90 1.27 rmind 91 1.27 rmind /* 92 1.27 rmind * Checksum 32-bit datum as as two 16-bit. Note, the first 93 1.27 rmind * 32->16 bit reduction is not necessary. 94 1.27 rmind */ 95 1.27 rmind sum = ~cksum & 0xffff; 96 1.27 rmind sum += (~odatum & 0xffff) + (ndatum & 0xffff); 97 1.1 rmind 98 1.27 rmind sum += (~odatum >> 16) + (ndatum >> 16); 99 1.27 rmind sum = (sum >> 16) + (sum & 0xffff); 100 1.27 rmind sum += (sum >> 16); 101 1.27 rmind return ~sum & 0xffff; 102 1.1 rmind } 103 1.1 rmind 104 1.1 rmind /* 105 1.4 rmind * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6. 106 1.4 rmind */ 107 1.4 rmind uint16_t 108 1.19 rmind npf_addr_cksum(uint16_t cksum, int sz, const npf_addr_t *oaddr, 109 1.19 rmind const npf_addr_t *naddr) 110 1.4 rmind { 111 1.19 rmind const uint32_t *oip32 = (const uint32_t *)oaddr; 112 1.19 rmind const uint32_t *nip32 = (const uint32_t *)naddr; 113 1.4 rmind 114 1.4 rmind KASSERT(sz % sizeof(uint32_t) == 0); 115 1.4 rmind do { 116 1.4 rmind cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++); 117 1.4 rmind sz -= sizeof(uint32_t); 118 1.4 rmind } while (sz); 119 1.4 rmind 120 1.4 rmind return cksum; 121 1.4 rmind } 122 1.4 rmind 123 1.4 rmind /* 124 1.26 rmind * npf_addr_sum: provide IP addresses as a XORed 32-bit integer. 125 1.4 rmind * Note: used for hash function. 126 1.1 rmind */ 127 1.4 rmind uint32_t 128 1.53 rmind npf_addr_mix(const int alen, const npf_addr_t *a1, const npf_addr_t *a2) 129 1.1 rmind { 130 1.53 rmind const int nwords = alen >> 2; 131 1.4 rmind uint32_t mix = 0; 132 1.1 rmind 133 1.53 rmind KASSERT(alen > 0 && a1 != NULL && a2 != NULL); 134 1.5 rmind 135 1.53 rmind for (int i = 0; i < nwords; i++) { 136 1.36 christos mix ^= a1->word32[i]; 137 1.36 christos mix ^= a2->word32[i]; 138 1.4 rmind } 139 1.4 rmind return mix; 140 1.4 rmind } 141 1.1 rmind 142 1.13 rmind /* 143 1.13 rmind * npf_addr_mask: apply the mask to a given address and store the result. 144 1.13 rmind */ 145 1.13 rmind void 146 1.13 rmind npf_addr_mask(const npf_addr_t *addr, const npf_netmask_t mask, 147 1.13 rmind const int alen, npf_addr_t *out) 148 1.12 rmind { 149 1.13 rmind const int nwords = alen >> 2; 150 1.12 rmind uint_fast8_t length = mask; 151 1.12 rmind 152 1.12 rmind /* Note: maximum length is 32 for IPv4 and 128 for IPv6. */ 153 1.12 rmind KASSERT(length <= NPF_MAX_NETMASK); 154 1.12 rmind 155 1.13 rmind for (int i = 0; i < nwords; i++) { 156 1.13 rmind uint32_t wordmask; 157 1.13 rmind 158 1.12 rmind if (length >= 32) { 159 1.13 rmind wordmask = htonl(0xffffffff); 160 1.12 rmind length -= 32; 161 1.13 rmind } else if (length) { 162 1.13 rmind wordmask = htonl(0xffffffff << (32 - length)); 163 1.13 rmind length = 0; 164 1.12 rmind } else { 165 1.13 rmind wordmask = 0; 166 1.12 rmind } 167 1.36 christos out->word32[i] = addr->word32[i] & wordmask; 168 1.12 rmind } 169 1.12 rmind } 170 1.12 rmind 171 1.12 rmind /* 172 1.53 rmind * npf_addr_bitor: bitwise OR the host part (given the netmask). 173 1.53 rmind * Zero mask can be used to OR the entire address. 174 1.53 rmind */ 175 1.53 rmind void 176 1.53 rmind npf_addr_bitor(const npf_addr_t *addr, const npf_netmask_t mask, 177 1.53 rmind const int alen, npf_addr_t *out) 178 1.53 rmind { 179 1.53 rmind const int nwords = alen >> 2; 180 1.53 rmind uint_fast8_t length = mask; 181 1.53 rmind 182 1.53 rmind /* Note: maximum length is 32 for IPv4 and 128 for IPv6. */ 183 1.53 rmind KASSERT(length <= NPF_MAX_NETMASK); 184 1.53 rmind 185 1.53 rmind for (int i = 0; i < nwords; i++) { 186 1.53 rmind uint32_t wordmask; 187 1.53 rmind 188 1.53 rmind if (length >= 32) { 189 1.53 rmind wordmask = htonl(0xffffffff); 190 1.53 rmind length -= 32; 191 1.53 rmind } else if (length) { 192 1.53 rmind wordmask = htonl(0xffffffff << (32 - length)); 193 1.53 rmind length = 0; 194 1.53 rmind } else { 195 1.53 rmind wordmask = 0; 196 1.53 rmind } 197 1.53 rmind out->word32[i] |= addr->word32[i] & ~wordmask; 198 1.53 rmind } 199 1.53 rmind } 200 1.53 rmind 201 1.53 rmind /* 202 1.12 rmind * npf_addr_cmp: compare two addresses, either IPv4 or IPv6. 203 1.12 rmind * 204 1.13 rmind * => Return 0 if equal and negative/positive if less/greater accordingly. 205 1.12 rmind * => Ignore the mask, if NPF_NO_NETMASK is specified. 206 1.12 rmind */ 207 1.12 rmind int 208 1.12 rmind npf_addr_cmp(const npf_addr_t *addr1, const npf_netmask_t mask1, 209 1.13 rmind const npf_addr_t *addr2, const npf_netmask_t mask2, const int alen) 210 1.12 rmind { 211 1.13 rmind npf_addr_t realaddr1, realaddr2; 212 1.12 rmind 213 1.12 rmind if (mask1 != NPF_NO_NETMASK) { 214 1.13 rmind npf_addr_mask(addr1, mask1, alen, &realaddr1); 215 1.13 rmind addr1 = &realaddr1; 216 1.12 rmind } 217 1.12 rmind if (mask2 != NPF_NO_NETMASK) { 218 1.13 rmind npf_addr_mask(addr2, mask2, alen, &realaddr2); 219 1.13 rmind addr2 = &realaddr2; 220 1.12 rmind } 221 1.13 rmind return memcmp(addr1, addr2, alen); 222 1.12 rmind } 223 1.12 rmind 224 1.54 rmind int 225 1.54 rmind npf_netmask_check(const int alen, npf_netmask_t mask) 226 1.54 rmind { 227 1.54 rmind switch (alen) { 228 1.54 rmind case sizeof(struct in_addr): 229 1.54 rmind if (__predict_false(mask > 32 && mask != NPF_NO_NETMASK)) { 230 1.54 rmind return EINVAL; 231 1.54 rmind } 232 1.54 rmind break; 233 1.54 rmind case sizeof(struct in6_addr): 234 1.54 rmind if (__predict_false(mask > 128 && mask != NPF_NO_NETMASK)) { 235 1.54 rmind return EINVAL; 236 1.54 rmind } 237 1.54 rmind break; 238 1.54 rmind default: 239 1.54 rmind return EINVAL; 240 1.54 rmind } 241 1.54 rmind return 0; 242 1.54 rmind } 243 1.54 rmind 244 1.4 rmind /* 245 1.4 rmind * npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length. 246 1.12 rmind * 247 1.12 rmind * => Returns all values in host byte-order. 248 1.4 rmind */ 249 1.4 rmind int 250 1.12 rmind npf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win) 251 1.4 rmind { 252 1.19 rmind const struct tcphdr *th = npc->npc_l4.tcp; 253 1.8 rmind u_int thlen; 254 1.1 rmind 255 1.7 zoltan KASSERT(npf_iscached(npc, NPC_TCP)); 256 1.1 rmind 257 1.4 rmind *seq = ntohl(th->th_seq); 258 1.4 rmind *ack = ntohl(th->th_ack); 259 1.4 rmind *win = (uint32_t)ntohs(th->th_win); 260 1.8 rmind thlen = th->th_off << 2; 261 1.1 rmind 262 1.7 zoltan if (npf_iscached(npc, NPC_IP4)) { 263 1.19 rmind const struct ip *ip = npc->npc_ip.v4; 264 1.21 rmind return ntohs(ip->ip_len) - npc->npc_hlen - thlen; 265 1.12 rmind } else if (npf_iscached(npc, NPC_IP6)) { 266 1.19 rmind const struct ip6_hdr *ip6 = npc->npc_ip.v6; 267 1.42 maxv return ntohs(ip6->ip6_plen) - 268 1.42 maxv (npc->npc_hlen - sizeof(*ip6)) - thlen; 269 1.7 zoltan } 270 1.7 zoltan return 0; 271 1.1 rmind } 272 1.1 rmind 273 1.1 rmind /* 274 1.4 rmind * npf_fetch_tcpopts: parse and return TCP options. 275 1.1 rmind */ 276 1.1 rmind bool 277 1.32 rmind npf_fetch_tcpopts(npf_cache_t *npc, uint16_t *mss, int *wscale) 278 1.1 rmind { 279 1.32 rmind nbuf_t *nbuf = npc->npc_nbuf; 280 1.19 rmind const struct tcphdr *th = npc->npc_l4.tcp; 281 1.49 maxv int cnt, optlen = 0; 282 1.49 maxv uint8_t *cp, opt; 283 1.4 rmind uint8_t val; 284 1.19 rmind bool ok; 285 1.4 rmind 286 1.7 zoltan KASSERT(npf_iscached(npc, NPC_IP46)); 287 1.7 zoltan KASSERT(npf_iscached(npc, NPC_TCP)); 288 1.10 rmind 289 1.4 rmind /* Determine if there are any TCP options, get their length. */ 290 1.49 maxv cnt = (th->th_off << 2) - sizeof(struct tcphdr); 291 1.49 maxv if (cnt <= 0) { 292 1.4 rmind /* No options. */ 293 1.1 rmind return false; 294 1.4 rmind } 295 1.49 maxv KASSERT(cnt <= MAX_TCPOPTLEN); 296 1.1 rmind 297 1.49 maxv /* Fetch all the options at once. */ 298 1.19 rmind nbuf_reset(nbuf); 299 1.49 maxv const int step = npc->npc_hlen + sizeof(struct tcphdr); 300 1.49 maxv if ((cp = nbuf_advance(nbuf, step, cnt)) == NULL) { 301 1.19 rmind ok = false; 302 1.19 rmind goto done; 303 1.4 rmind } 304 1.12 rmind 305 1.49 maxv /* Scan the options. */ 306 1.49 maxv for (; cnt > 0; cnt -= optlen, cp += optlen) { 307 1.49 maxv opt = cp[0]; 308 1.49 maxv if (opt == TCPOPT_EOL) 309 1.49 maxv break; 310 1.49 maxv if (opt == TCPOPT_NOP) 311 1.49 maxv optlen = 1; 312 1.49 maxv else { 313 1.49 maxv if (cnt < 2) 314 1.49 maxv break; 315 1.49 maxv optlen = cp[1]; 316 1.49 maxv if (optlen < 2 || optlen > cnt) 317 1.49 maxv break; 318 1.49 maxv } 319 1.49 maxv 320 1.49 maxv switch (opt) { 321 1.49 maxv case TCPOPT_MAXSEG: 322 1.49 maxv if (optlen != TCPOLEN_MAXSEG) 323 1.49 maxv continue; 324 1.49 maxv if (mss) { 325 1.51 maxv memcpy(mss, cp + 2, sizeof(uint16_t)); 326 1.19 rmind } 327 1.49 maxv break; 328 1.49 maxv case TCPOPT_WINDOW: 329 1.50 maxv if (optlen != TCPOLEN_WINDOW) 330 1.49 maxv continue; 331 1.49 maxv val = *(cp + 2); 332 1.49 maxv *wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val; 333 1.49 maxv break; 334 1.49 maxv default: 335 1.49 maxv break; 336 1.4 rmind } 337 1.4 rmind } 338 1.49 maxv 339 1.19 rmind ok = true; 340 1.19 rmind done: 341 1.19 rmind if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 342 1.32 rmind npf_recache(npc); 343 1.19 rmind } 344 1.19 rmind return ok; 345 1.1 rmind } 346 1.1 rmind 347 1.51 maxv /* 348 1.51 maxv * npf_set_mss: set the MSS. 349 1.51 maxv */ 350 1.51 maxv bool 351 1.51 maxv npf_set_mss(npf_cache_t *npc, uint16_t mss, uint16_t *old, uint16_t *new, 352 1.51 maxv bool *mid) 353 1.51 maxv { 354 1.51 maxv nbuf_t *nbuf = npc->npc_nbuf; 355 1.51 maxv const struct tcphdr *th = npc->npc_l4.tcp; 356 1.51 maxv int cnt, optlen = 0; 357 1.51 maxv uint8_t *cp, *base, opt; 358 1.51 maxv bool ok; 359 1.51 maxv 360 1.51 maxv KASSERT(npf_iscached(npc, NPC_IP46)); 361 1.51 maxv KASSERT(npf_iscached(npc, NPC_TCP)); 362 1.51 maxv 363 1.51 maxv /* Determine if there are any TCP options, get their length. */ 364 1.51 maxv cnt = (th->th_off << 2) - sizeof(struct tcphdr); 365 1.51 maxv if (cnt <= 0) { 366 1.51 maxv /* No options. */ 367 1.51 maxv return false; 368 1.51 maxv } 369 1.51 maxv KASSERT(cnt <= MAX_TCPOPTLEN); 370 1.51 maxv 371 1.51 maxv /* Fetch all the options at once. */ 372 1.51 maxv nbuf_reset(nbuf); 373 1.51 maxv const int step = npc->npc_hlen + sizeof(struct tcphdr); 374 1.51 maxv if ((base = nbuf_advance(nbuf, step, cnt)) == NULL) { 375 1.51 maxv ok = false; 376 1.51 maxv goto done; 377 1.51 maxv } 378 1.51 maxv 379 1.51 maxv /* Scan the options. */ 380 1.51 maxv for (cp = base; cnt > 0; cnt -= optlen, cp += optlen) { 381 1.51 maxv opt = cp[0]; 382 1.51 maxv if (opt == TCPOPT_EOL) 383 1.51 maxv break; 384 1.51 maxv if (opt == TCPOPT_NOP) 385 1.51 maxv optlen = 1; 386 1.51 maxv else { 387 1.51 maxv if (cnt < 2) 388 1.51 maxv break; 389 1.51 maxv optlen = cp[1]; 390 1.51 maxv if (optlen < 2 || optlen > cnt) 391 1.51 maxv break; 392 1.51 maxv } 393 1.51 maxv 394 1.51 maxv switch (opt) { 395 1.51 maxv case TCPOPT_MAXSEG: 396 1.51 maxv if (optlen != TCPOLEN_MAXSEG) 397 1.51 maxv continue; 398 1.51 maxv if (((cp + 2) - base) % sizeof(uint16_t) != 0) { 399 1.51 maxv *mid = true; 400 1.51 maxv memcpy(&old[0], cp + 1, sizeof(uint16_t)); 401 1.51 maxv memcpy(&old[1], cp + 3, sizeof(uint16_t)); 402 1.51 maxv memcpy(cp + 2, &mss, sizeof(uint16_t)); 403 1.51 maxv memcpy(&new[0], cp + 1, sizeof(uint16_t)); 404 1.51 maxv memcpy(&new[1], cp + 3, sizeof(uint16_t)); 405 1.51 maxv } else { 406 1.51 maxv *mid = false; 407 1.51 maxv memcpy(cp + 2, &mss, sizeof(uint16_t)); 408 1.51 maxv } 409 1.51 maxv break; 410 1.51 maxv default: 411 1.51 maxv break; 412 1.51 maxv } 413 1.51 maxv } 414 1.51 maxv 415 1.51 maxv ok = true; 416 1.51 maxv done: 417 1.51 maxv if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 418 1.51 maxv npf_recache(npc); 419 1.51 maxv } 420 1.51 maxv return ok; 421 1.51 maxv } 422 1.51 maxv 423 1.19 rmind static int 424 1.19 rmind npf_cache_ip(npf_cache_t *npc, nbuf_t *nbuf) 425 1.1 rmind { 426 1.19 rmind const void *nptr = nbuf_dataptr(nbuf); 427 1.19 rmind const uint8_t ver = *(const uint8_t *)nptr; 428 1.19 rmind int flags = 0; 429 1.12 rmind 430 1.43 maxv /* 431 1.43 maxv * We intentionally don't read the L4 payload after IPPROTO_AH. 432 1.43 maxv */ 433 1.43 maxv 434 1.4 rmind switch (ver >> 4) { 435 1.12 rmind case IPVERSION: { 436 1.19 rmind struct ip *ip; 437 1.12 rmind 438 1.19 rmind ip = nbuf_ensure_contig(nbuf, sizeof(struct ip)); 439 1.19 rmind if (ip == NULL) { 440 1.38 maxv return NPC_FMTERR; 441 1.4 rmind } 442 1.12 rmind 443 1.46 maxv /* Retrieve the complete header. */ 444 1.10 rmind if ((u_int)(ip->ip_hl << 2) < sizeof(struct ip)) { 445 1.38 maxv return NPC_FMTERR; 446 1.4 rmind } 447 1.46 maxv ip = nbuf_ensure_contig(nbuf, (u_int)(ip->ip_hl << 2)); 448 1.46 maxv if (ip == NULL) { 449 1.46 maxv return NPC_FMTERR; 450 1.46 maxv } 451 1.46 maxv 452 1.4 rmind if (ip->ip_off & ~htons(IP_DF | IP_RF)) { 453 1.4 rmind /* Note fragmentation. */ 454 1.19 rmind flags |= NPC_IPFRAG; 455 1.4 rmind } 456 1.12 rmind 457 1.4 rmind /* Cache: layer 3 - IPv4. */ 458 1.14 rmind npc->npc_alen = sizeof(struct in_addr); 459 1.28 rmind npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip->ip_src; 460 1.28 rmind npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip->ip_dst; 461 1.7 zoltan npc->npc_hlen = ip->ip_hl << 2; 462 1.19 rmind npc->npc_proto = ip->ip_p; 463 1.19 rmind 464 1.19 rmind npc->npc_ip.v4 = ip; 465 1.19 rmind flags |= NPC_IP4; 466 1.4 rmind break; 467 1.12 rmind } 468 1.4 rmind 469 1.12 rmind case (IPV6_VERSION >> 4): { 470 1.19 rmind struct ip6_hdr *ip6; 471 1.19 rmind struct ip6_ext *ip6e; 472 1.37 christos struct ip6_frag *ip6f; 473 1.19 rmind size_t off, hlen; 474 1.38 maxv int frag_present; 475 1.19 rmind 476 1.19 rmind ip6 = nbuf_ensure_contig(nbuf, sizeof(struct ip6_hdr)); 477 1.19 rmind if (ip6 == NULL) { 478 1.38 maxv return NPC_FMTERR; 479 1.7 zoltan } 480 1.19 rmind 481 1.44 maxv /* 482 1.44 maxv * XXX: We don't handle IPv6 Jumbograms. 483 1.44 maxv */ 484 1.44 maxv 485 1.19 rmind /* Set initial next-protocol value. */ 486 1.19 rmind hlen = sizeof(struct ip6_hdr); 487 1.19 rmind npc->npc_proto = ip6->ip6_nxt; 488 1.13 rmind npc->npc_hlen = hlen; 489 1.7 zoltan 490 1.38 maxv frag_present = 0; 491 1.38 maxv 492 1.12 rmind /* 493 1.19 rmind * Advance by the length of the current header. 494 1.12 rmind */ 495 1.19 rmind off = nbuf_offset(nbuf); 496 1.38 maxv while ((ip6e = nbuf_advance(nbuf, hlen, sizeof(*ip6e))) != NULL) { 497 1.13 rmind /* 498 1.13 rmind * Determine whether we are going to continue. 499 1.13 rmind */ 500 1.19 rmind switch (npc->npc_proto) { 501 1.13 rmind case IPPROTO_HOPOPTS: 502 1.7 zoltan case IPPROTO_DSTOPTS: 503 1.7 zoltan case IPPROTO_ROUTING: 504 1.19 rmind hlen = (ip6e->ip6e_len + 1) << 3; 505 1.7 zoltan break; 506 1.7 zoltan case IPPROTO_FRAGMENT: 507 1.38 maxv if (frag_present++) 508 1.38 maxv return NPC_FMTERR; 509 1.37 christos ip6f = nbuf_ensure_contig(nbuf, sizeof(*ip6f)); 510 1.37 christos if (ip6f == NULL) 511 1.38 maxv return NPC_FMTERR; 512 1.38 maxv 513 1.41 maxv /* RFC6946: Skip dummy fragments. */ 514 1.41 maxv if (!ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK) && 515 1.41 maxv !(ip6f->ip6f_offlg & IP6F_MORE_FRAG)) { 516 1.41 maxv hlen = sizeof(struct ip6_frag); 517 1.41 maxv break; 518 1.41 maxv } 519 1.41 maxv 520 1.40 maxv hlen = 0; 521 1.40 maxv flags |= NPC_IPFRAG; 522 1.37 christos 523 1.7 zoltan break; 524 1.7 zoltan default: 525 1.13 rmind hlen = 0; 526 1.13 rmind break; 527 1.13 rmind } 528 1.13 rmind 529 1.13 rmind if (!hlen) { 530 1.7 zoltan break; 531 1.7 zoltan } 532 1.19 rmind npc->npc_proto = ip6e->ip6e_nxt; 533 1.13 rmind npc->npc_hlen += hlen; 534 1.13 rmind } 535 1.7 zoltan 536 1.46 maxv if (ip6e == NULL) { 537 1.46 maxv return NPC_FMTERR; 538 1.46 maxv } 539 1.46 maxv 540 1.23 rmind /* 541 1.23 rmind * Re-fetch the header pointers (nbufs might have been 542 1.23 rmind * reallocated). Restore the original offset (if any). 543 1.23 rmind */ 544 1.19 rmind nbuf_reset(nbuf); 545 1.23 rmind ip6 = nbuf_dataptr(nbuf); 546 1.19 rmind if (off) { 547 1.19 rmind nbuf_advance(nbuf, off, 0); 548 1.19 rmind } 549 1.19 rmind 550 1.12 rmind /* Cache: layer 3 - IPv6. */ 551 1.14 rmind npc->npc_alen = sizeof(struct in6_addr); 552 1.28 rmind npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip6->ip6_src; 553 1.44 maxv npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip6->ip6_dst; 554 1.19 rmind 555 1.19 rmind npc->npc_ip.v6 = ip6; 556 1.19 rmind flags |= NPC_IP6; 557 1.7 zoltan break; 558 1.12 rmind } 559 1.4 rmind default: 560 1.19 rmind break; 561 1.4 rmind } 562 1.19 rmind return flags; 563 1.1 rmind } 564 1.1 rmind 565 1.56 rmind static inline int 566 1.56 rmind npf_cache_tcp(npf_cache_t *npc, nbuf_t *nbuf, unsigned hlen) 567 1.56 rmind { 568 1.56 rmind struct tcphdr *th; 569 1.56 rmind 570 1.56 rmind th = nbuf_advance(nbuf, hlen, sizeof(struct tcphdr)); 571 1.56 rmind if (__predict_false(th == NULL)) { 572 1.56 rmind return NPC_FMTERR; 573 1.56 rmind } 574 1.56 rmind if (__predict_false(th->th_off < 5)) { 575 1.56 rmind return NPC_FMTERR; 576 1.56 rmind } 577 1.56 rmind npc->npc_l4.tcp = th; 578 1.56 rmind return NPC_LAYER4 | NPC_TCP; 579 1.56 rmind } 580 1.56 rmind 581 1.58 joe int 582 1.58 joe npf_cache_ether(npf_cache_t *npc) 583 1.58 joe { 584 1.58 joe struct mbuf *m = npc->npc_nbuf->nb_mbuf0; 585 1.58 joe struct ether_header *ether; 586 1.58 joe 587 1.58 joe nbuf_unset_flag(npc->npc_nbuf, NBUF_DATAREF_RESET); 588 1.58 joe /* 589 1.58 joe * we are so sure ether header will be in the first mbuf 590 1.58 joe * and we are also sure 14 bytes ether_header will be fully accessible 591 1.58 joe */ 592 1.58 joe ether = mtod(m, struct ether_header *); 593 1.58 joe if (__predict_false(ether == NULL)) 594 1.58 joe return NPC_FMTERR; 595 1.58 joe memcpy(&npc->ether, ether, sizeof(npc->ether)); 596 1.58 joe 597 1.58 joe KASSERT(nbuf_flag_p(npc->npc_nbuf, NBUF_DATAREF_RESET) == 0); 598 1.58 joe return NPC_LAYER2; 599 1.58 joe } 600 1.58 joe 601 1.1 rmind /* 602 1.4 rmind * npf_cache_all: general routine to cache all relevant IP (v4 or v6) 603 1.12 rmind * and TCP, UDP or ICMP headers. 604 1.19 rmind * 605 1.19 rmind * => nbuf offset shall be set accordingly. 606 1.1 rmind */ 607 1.10 rmind int 608 1.32 rmind npf_cache_all(npf_cache_t *npc) 609 1.1 rmind { 610 1.32 rmind nbuf_t *nbuf = npc->npc_nbuf; 611 1.19 rmind int flags, l4flags; 612 1.19 rmind u_int hlen; 613 1.19 rmind 614 1.19 rmind /* 615 1.19 rmind * This routine is a main point where the references are cached, 616 1.19 rmind * therefore clear the flag as we reset. 617 1.19 rmind */ 618 1.19 rmind again: 619 1.19 rmind nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 620 1.1 rmind 621 1.19 rmind /* 622 1.19 rmind * First, cache the L3 header (IPv4 or IPv6). If IP packet is 623 1.19 rmind * fragmented, then we cannot look into L4. 624 1.19 rmind */ 625 1.19 rmind flags = npf_cache_ip(npc, nbuf); 626 1.38 maxv if ((flags & NPC_IP46) == 0 || (flags & NPC_IPFRAG) != 0 || 627 1.38 maxv (flags & NPC_FMTERR) != 0) { 628 1.47 maxv goto out; 629 1.1 rmind } 630 1.19 rmind hlen = npc->npc_hlen; 631 1.19 rmind 632 1.45 maxv /* 633 1.45 maxv * Note: we guarantee that the potential "Query Id" field of the 634 1.45 maxv * ICMPv4/ICMPv6 packets is in the nbuf. This field is used in the 635 1.45 maxv * ICMP ALG. 636 1.45 maxv */ 637 1.19 rmind switch (npc->npc_proto) { 638 1.1 rmind case IPPROTO_TCP: 639 1.19 rmind /* Cache: layer 4 - TCP. */ 640 1.56 rmind l4flags = npf_cache_tcp(npc, nbuf, hlen); 641 1.10 rmind break; 642 1.1 rmind case IPPROTO_UDP: 643 1.19 rmind /* Cache: layer 4 - UDP. */ 644 1.19 rmind npc->npc_l4.udp = nbuf_advance(nbuf, hlen, 645 1.19 rmind sizeof(struct udphdr)); 646 1.19 rmind l4flags = NPC_LAYER4 | NPC_UDP; 647 1.10 rmind break; 648 1.1 rmind case IPPROTO_ICMP: 649 1.19 rmind /* Cache: layer 4 - ICMPv4. */ 650 1.19 rmind npc->npc_l4.icmp = nbuf_advance(nbuf, hlen, 651 1.45 maxv ICMP_MINLEN); 652 1.19 rmind l4flags = NPC_LAYER4 | NPC_ICMP; 653 1.19 rmind break; 654 1.15 spz case IPPROTO_ICMPV6: 655 1.19 rmind /* Cache: layer 4 - ICMPv6. */ 656 1.19 rmind npc->npc_l4.icmp6 = nbuf_advance(nbuf, hlen, 657 1.45 maxv sizeof(struct icmp6_hdr)); 658 1.19 rmind l4flags = NPC_LAYER4 | NPC_ICMP; 659 1.19 rmind break; 660 1.19 rmind default: 661 1.19 rmind l4flags = 0; 662 1.10 rmind break; 663 1.1 rmind } 664 1.19 rmind 665 1.57 rmind /* 666 1.57 rmind * Error out if nbuf_advance() failed. 667 1.57 rmind */ 668 1.57 rmind if (__predict_false(l4flags && !npc->npc_l4.hdr)) { 669 1.47 maxv goto err; 670 1.47 maxv } 671 1.47 maxv 672 1.19 rmind if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 673 1.19 rmind goto again; 674 1.19 rmind } 675 1.19 rmind 676 1.47 maxv flags |= l4flags; 677 1.47 maxv npc->npc_info |= flags; 678 1.47 maxv return flags; 679 1.47 maxv 680 1.47 maxv err: 681 1.47 maxv flags = NPC_FMTERR; 682 1.47 maxv out: 683 1.47 maxv nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 684 1.19 rmind npc->npc_info |= flags; 685 1.19 rmind return flags; 686 1.19 rmind } 687 1.19 rmind 688 1.19 rmind void 689 1.32 rmind npf_recache(npf_cache_t *npc) 690 1.19 rmind { 691 1.32 rmind nbuf_t *nbuf = npc->npc_nbuf; 692 1.24 martin const int mflags __diagused = npc->npc_info & (NPC_IP46 | NPC_LAYER4); 693 1.25 mrg int flags __diagused; 694 1.19 rmind 695 1.19 rmind nbuf_reset(nbuf); 696 1.19 rmind npc->npc_info = 0; 697 1.32 rmind flags = npf_cache_all(npc); 698 1.32 rmind 699 1.19 rmind KASSERT((flags & mflags) == mflags); 700 1.19 rmind KASSERT(nbuf_flag_p(nbuf, NBUF_DATAREF_RESET) == 0); 701 1.1 rmind } 702 1.1 rmind 703 1.1 rmind /* 704 1.19 rmind * npf_rwrip: rewrite required IP address. 705 1.4 rmind */ 706 1.4 rmind bool 707 1.28 rmind npf_rwrip(const npf_cache_t *npc, u_int which, const npf_addr_t *addr) 708 1.4 rmind { 709 1.4 rmind KASSERT(npf_iscached(npc, NPC_IP46)); 710 1.28 rmind KASSERT(which == NPF_SRC || which == NPF_DST); 711 1.4 rmind 712 1.28 rmind memcpy(npc->npc_ips[which], addr, npc->npc_alen); 713 1.4 rmind return true; 714 1.4 rmind } 715 1.4 rmind 716 1.4 rmind /* 717 1.19 rmind * npf_rwrport: rewrite required TCP/UDP port. 718 1.1 rmind */ 719 1.1 rmind bool 720 1.28 rmind npf_rwrport(const npf_cache_t *npc, u_int which, const in_port_t port) 721 1.1 rmind { 722 1.21 rmind const int proto = npc->npc_proto; 723 1.4 rmind in_port_t *oport; 724 1.1 rmind 725 1.4 rmind KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP)); 726 1.1 rmind KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP); 727 1.28 rmind KASSERT(which == NPF_SRC || which == NPF_DST); 728 1.1 rmind 729 1.19 rmind /* Get the offset and store the port in it. */ 730 1.4 rmind if (proto == IPPROTO_TCP) { 731 1.19 rmind struct tcphdr *th = npc->npc_l4.tcp; 732 1.28 rmind oport = (which == NPF_SRC) ? &th->th_sport : &th->th_dport; 733 1.1 rmind } else { 734 1.19 rmind struct udphdr *uh = npc->npc_l4.udp; 735 1.28 rmind oport = (which == NPF_SRC) ? &uh->uh_sport : &uh->uh_dport; 736 1.1 rmind } 737 1.19 rmind memcpy(oport, &port, sizeof(in_port_t)); 738 1.1 rmind return true; 739 1.1 rmind } 740 1.1 rmind 741 1.1 rmind /* 742 1.19 rmind * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum. 743 1.1 rmind */ 744 1.1 rmind bool 745 1.28 rmind npf_rwrcksum(const npf_cache_t *npc, u_int which, 746 1.19 rmind const npf_addr_t *addr, const in_port_t port) 747 1.1 rmind { 748 1.28 rmind const npf_addr_t *oaddr = npc->npc_ips[which]; 749 1.21 rmind const int proto = npc->npc_proto; 750 1.19 rmind const int alen = npc->npc_alen; 751 1.55 rmind uint16_t cksum, *ocksum; 752 1.55 rmind struct tcphdr *th; 753 1.55 rmind struct udphdr *uh; 754 1.18 rmind in_port_t oport; 755 1.18 rmind 756 1.19 rmind KASSERT(npf_iscached(npc, NPC_LAYER4)); 757 1.28 rmind KASSERT(which == NPF_SRC || which == NPF_DST); 758 1.18 rmind 759 1.4 rmind if (npf_iscached(npc, NPC_IP4)) { 760 1.19 rmind struct ip *ip = npc->npc_ip.v4; 761 1.19 rmind uint16_t ipsum = ip->ip_sum; 762 1.4 rmind 763 1.19 rmind /* Recalculate IPv4 checksum and rewrite. */ 764 1.19 rmind ip->ip_sum = npf_addr_cksum(ipsum, alen, oaddr, addr); 765 1.4 rmind } else { 766 1.4 rmind /* No checksum for IPv6. */ 767 1.4 rmind KASSERT(npf_iscached(npc, NPC_IP6)); 768 1.4 rmind } 769 1.4 rmind 770 1.18 rmind /* 771 1.18 rmind * Calculate TCP/UDP checksum: 772 1.18 rmind * - Skip if UDP and the current checksum is zero. 773 1.18 rmind * - Fixup the IP address change. 774 1.18 rmind * - Fixup the port change, if required (non-zero). 775 1.18 rmind */ 776 1.55 rmind switch (proto) { 777 1.55 rmind case IPPROTO_TCP: 778 1.55 rmind KASSERT(npf_iscached(npc, NPC_TCP)); 779 1.55 rmind th = npc->npc_l4.tcp; 780 1.18 rmind ocksum = &th->th_sum; 781 1.28 rmind oport = (which == NPF_SRC) ? th->th_sport : th->th_dport; 782 1.55 rmind break; 783 1.55 rmind case IPPROTO_UDP: 784 1.55 rmind KASSERT(npf_iscached(npc, NPC_UDP)); 785 1.55 rmind uh = npc->npc_l4.udp; 786 1.18 rmind ocksum = &uh->uh_sum; 787 1.18 rmind if (*ocksum == 0) { 788 1.4 rmind /* No need to update. */ 789 1.4 rmind return true; 790 1.4 rmind } 791 1.28 rmind oport = (which == NPF_SRC) ? uh->uh_sport : uh->uh_dport; 792 1.55 rmind break; 793 1.55 rmind case IPPROTO_ICMP: 794 1.55 rmind case IPPROTO_ICMPV6: 795 1.55 rmind default: 796 1.55 rmind /* Nothing else to do for ICMP. */ 797 1.55 rmind return true; 798 1.18 rmind } 799 1.18 rmind 800 1.55 rmind /* 801 1.55 rmind * Update and rewrite the TCP/UDP checksum. 802 1.55 rmind */ 803 1.55 rmind cksum = npf_addr_cksum(*ocksum, alen, oaddr, addr); 804 1.18 rmind if (port) { 805 1.18 rmind cksum = npf_fixup16_cksum(cksum, oport, port); 806 1.4 rmind } 807 1.19 rmind memcpy(ocksum, &cksum, sizeof(uint16_t)); 808 1.4 rmind return true; 809 1.4 rmind } 810 1.4 rmind 811 1.29 rmind /* 812 1.30 rmind * npf_napt_rwr: perform address and/or port translation. 813 1.30 rmind */ 814 1.30 rmind int 815 1.30 rmind npf_napt_rwr(const npf_cache_t *npc, u_int which, 816 1.30 rmind const npf_addr_t *addr, const in_addr_t port) 817 1.30 rmind { 818 1.30 rmind const unsigned proto = npc->npc_proto; 819 1.30 rmind 820 1.30 rmind /* 821 1.30 rmind * Rewrite IP and/or TCP/UDP checksums first, since we need the 822 1.30 rmind * current (old) address/port for the calculations. Then perform 823 1.30 rmind * the address translation i.e. rewrite source or destination. 824 1.30 rmind */ 825 1.30 rmind if (!npf_rwrcksum(npc, which, addr, port)) { 826 1.30 rmind return EINVAL; 827 1.30 rmind } 828 1.30 rmind if (!npf_rwrip(npc, which, addr)) { 829 1.30 rmind return EINVAL; 830 1.30 rmind } 831 1.30 rmind if (port == 0) { 832 1.30 rmind /* Done. */ 833 1.30 rmind return 0; 834 1.30 rmind } 835 1.30 rmind 836 1.30 rmind switch (proto) { 837 1.30 rmind case IPPROTO_TCP: 838 1.30 rmind case IPPROTO_UDP: 839 1.30 rmind /* Rewrite source/destination port. */ 840 1.30 rmind if (!npf_rwrport(npc, which, port)) { 841 1.30 rmind return EINVAL; 842 1.30 rmind } 843 1.30 rmind break; 844 1.30 rmind case IPPROTO_ICMP: 845 1.30 rmind case IPPROTO_ICMPV6: 846 1.30 rmind KASSERT(npf_iscached(npc, NPC_ICMP)); 847 1.30 rmind /* Nothing. */ 848 1.30 rmind break; 849 1.30 rmind default: 850 1.30 rmind return ENOTSUP; 851 1.30 rmind } 852 1.30 rmind return 0; 853 1.30 rmind } 854 1.30 rmind 855 1.30 rmind /* 856 1.29 rmind * IPv6-to-IPv6 Network Prefix Translation (NPTv6), as per RFC 6296. 857 1.29 rmind */ 858 1.29 rmind int 859 1.29 rmind npf_npt66_rwr(const npf_cache_t *npc, u_int which, const npf_addr_t *pref, 860 1.29 rmind npf_netmask_t len, uint16_t adj) 861 1.29 rmind { 862 1.29 rmind npf_addr_t *addr = npc->npc_ips[which]; 863 1.29 rmind unsigned remnant, word, preflen = len >> 4; 864 1.29 rmind uint32_t sum; 865 1.29 rmind 866 1.29 rmind KASSERT(which == NPF_SRC || which == NPF_DST); 867 1.29 rmind 868 1.29 rmind if (!npf_iscached(npc, NPC_IP6)) { 869 1.29 rmind return EINVAL; 870 1.29 rmind } 871 1.29 rmind if (len <= 48) { 872 1.29 rmind /* 873 1.29 rmind * The word to adjust. Cannot translate the 0xffff 874 1.29 rmind * subnet if /48 or shorter. 875 1.29 rmind */ 876 1.29 rmind word = 3; 877 1.36 christos if (addr->word16[word] == 0xffff) { 878 1.29 rmind return EINVAL; 879 1.29 rmind } 880 1.29 rmind } else { 881 1.29 rmind /* 882 1.29 rmind * Also, all 0s or 1s in the host part are disallowed for 883 1.29 rmind * longer than /48 prefixes. 884 1.29 rmind */ 885 1.36 christos if ((addr->word32[2] == 0 && addr->word32[3] == 0) || 886 1.36 christos (addr->word32[2] == ~0U && addr->word32[3] == ~0U)) 887 1.29 rmind return EINVAL; 888 1.29 rmind 889 1.29 rmind /* Determine the 16-bit word to adjust. */ 890 1.29 rmind for (word = 4; word < 8; word++) 891 1.36 christos if (addr->word16[word] != 0xffff) 892 1.29 rmind break; 893 1.29 rmind } 894 1.29 rmind 895 1.29 rmind /* Rewrite the prefix. */ 896 1.29 rmind for (unsigned i = 0; i < preflen; i++) { 897 1.36 christos addr->word16[i] = pref->word16[i]; 898 1.29 rmind } 899 1.29 rmind 900 1.29 rmind /* 901 1.29 rmind * If prefix length is within a 16-bit word (not dividable by 16), 902 1.29 rmind * then prepare a mask, determine the word and adjust it. 903 1.29 rmind */ 904 1.29 rmind if ((remnant = len - (preflen << 4)) != 0) { 905 1.29 rmind const uint16_t wordmask = (1U << remnant) - 1; 906 1.29 rmind const unsigned i = preflen; 907 1.29 rmind 908 1.36 christos addr->word16[i] = (pref->word16[i] & wordmask) | 909 1.36 christos (addr->word16[i] & ~wordmask); 910 1.29 rmind } 911 1.29 rmind 912 1.29 rmind /* 913 1.29 rmind * Performing 1's complement sum/difference. 914 1.29 rmind */ 915 1.36 christos sum = addr->word16[word] + adj; 916 1.29 rmind while (sum >> 16) { 917 1.29 rmind sum = (sum >> 16) + (sum & 0xffff); 918 1.29 rmind } 919 1.29 rmind if (sum == 0xffff) { 920 1.29 rmind /* RFC 1071. */ 921 1.29 rmind sum = 0x0000; 922 1.29 rmind } 923 1.36 christos addr->word16[word] = sum; 924 1.29 rmind return 0; 925 1.29 rmind } 926 1.29 rmind 927 1.13 rmind #if defined(DDB) || defined(_NPF_TESTING) 928 1.13 rmind 929 1.31 rmind const char * 930 1.31 rmind npf_addr_dump(const npf_addr_t *addr, int alen) 931 1.13 rmind { 932 1.31 rmind if (alen == sizeof(struct in_addr)) { 933 1.31 rmind struct in_addr ip; 934 1.31 rmind memcpy(&ip, addr, alen); 935 1.31 rmind return inet_ntoa(ip); 936 1.31 rmind } 937 1.36 christos return "[IPv6]"; 938 1.13 rmind } 939 1.13 rmind 940 1.13 rmind #endif 941