1 /*- 2 * Copyright (c) 2009-2014 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This material is based upon work partially supported by The 6 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * Various protocol related helper routines. 32 * 33 * This layer manipulates npf_cache_t structure i.e. caches requested headers 34 * and stores which information was cached in the information bit field. 35 * It is also responsibility of this layer to update or invalidate the cache 36 * on rewrites (e.g. by translation routines). 37 */ 38 39 #ifdef _KERNEL 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.58 2025/07/01 18:42:37 joe Exp $"); 42 43 #include <sys/param.h> 44 #include <sys/types.h> 45 46 #include <net/pfil.h> 47 #include <net/if.h> 48 #include <net/ethertypes.h> 49 #include <net/if_ether.h> 50 51 #include <netinet/in_systm.h> 52 #include <netinet/in.h> 53 #include <netinet6/in6_var.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip6.h> 56 #include <netinet/tcp.h> 57 #include <netinet/udp.h> 58 #include <netinet/ip_icmp.h> 59 #endif 60 61 #include "npf_impl.h" 62 63 /* 64 * npf_fixup{16,32}_cksum: incremental update of the Internet checksum. 65 */ 66 67 uint16_t 68 npf_fixup16_cksum(uint16_t cksum, uint16_t odatum, uint16_t ndatum) 69 { 70 uint32_t sum; 71 72 /* 73 * RFC 1624: 74 * HC' = ~(~HC + ~m + m') 75 * 76 * Note: 1's complement sum is endian-independent (RFC 1071, page 2). 77 */ 78 sum = ~cksum & 0xffff; 79 sum += (~odatum & 0xffff) + ndatum; 80 sum = (sum >> 16) + (sum & 0xffff); 81 sum += (sum >> 16); 82 83 return ~sum & 0xffff; 84 } 85 86 uint16_t 87 npf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum) 88 { 89 uint32_t sum; 90 91 /* 92 * Checksum 32-bit datum as as two 16-bit. Note, the first 93 * 32->16 bit reduction is not necessary. 94 */ 95 sum = ~cksum & 0xffff; 96 sum += (~odatum & 0xffff) + (ndatum & 0xffff); 97 98 sum += (~odatum >> 16) + (ndatum >> 16); 99 sum = (sum >> 16) + (sum & 0xffff); 100 sum += (sum >> 16); 101 return ~sum & 0xffff; 102 } 103 104 /* 105 * npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6. 106 */ 107 uint16_t 108 npf_addr_cksum(uint16_t cksum, int sz, const npf_addr_t *oaddr, 109 const npf_addr_t *naddr) 110 { 111 const uint32_t *oip32 = (const uint32_t *)oaddr; 112 const uint32_t *nip32 = (const uint32_t *)naddr; 113 114 KASSERT(sz % sizeof(uint32_t) == 0); 115 do { 116 cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++); 117 sz -= sizeof(uint32_t); 118 } while (sz); 119 120 return cksum; 121 } 122 123 /* 124 * npf_addr_sum: provide IP addresses as a XORed 32-bit integer. 125 * Note: used for hash function. 126 */ 127 uint32_t 128 npf_addr_mix(const int alen, const npf_addr_t *a1, const npf_addr_t *a2) 129 { 130 const int nwords = alen >> 2; 131 uint32_t mix = 0; 132 133 KASSERT(alen > 0 && a1 != NULL && a2 != NULL); 134 135 for (int i = 0; i < nwords; i++) { 136 mix ^= a1->word32[i]; 137 mix ^= a2->word32[i]; 138 } 139 return mix; 140 } 141 142 /* 143 * npf_addr_mask: apply the mask to a given address and store the result. 144 */ 145 void 146 npf_addr_mask(const npf_addr_t *addr, const npf_netmask_t mask, 147 const int alen, npf_addr_t *out) 148 { 149 const int nwords = alen >> 2; 150 uint_fast8_t length = mask; 151 152 /* Note: maximum length is 32 for IPv4 and 128 for IPv6. */ 153 KASSERT(length <= NPF_MAX_NETMASK); 154 155 for (int i = 0; i < nwords; i++) { 156 uint32_t wordmask; 157 158 if (length >= 32) { 159 wordmask = htonl(0xffffffff); 160 length -= 32; 161 } else if (length) { 162 wordmask = htonl(0xffffffff << (32 - length)); 163 length = 0; 164 } else { 165 wordmask = 0; 166 } 167 out->word32[i] = addr->word32[i] & wordmask; 168 } 169 } 170 171 /* 172 * npf_addr_bitor: bitwise OR the host part (given the netmask). 173 * Zero mask can be used to OR the entire address. 174 */ 175 void 176 npf_addr_bitor(const npf_addr_t *addr, const npf_netmask_t mask, 177 const int alen, npf_addr_t *out) 178 { 179 const int nwords = alen >> 2; 180 uint_fast8_t length = mask; 181 182 /* Note: maximum length is 32 for IPv4 and 128 for IPv6. */ 183 KASSERT(length <= NPF_MAX_NETMASK); 184 185 for (int i = 0; i < nwords; i++) { 186 uint32_t wordmask; 187 188 if (length >= 32) { 189 wordmask = htonl(0xffffffff); 190 length -= 32; 191 } else if (length) { 192 wordmask = htonl(0xffffffff << (32 - length)); 193 length = 0; 194 } else { 195 wordmask = 0; 196 } 197 out->word32[i] |= addr->word32[i] & ~wordmask; 198 } 199 } 200 201 /* 202 * npf_addr_cmp: compare two addresses, either IPv4 or IPv6. 203 * 204 * => Return 0 if equal and negative/positive if less/greater accordingly. 205 * => Ignore the mask, if NPF_NO_NETMASK is specified. 206 */ 207 int 208 npf_addr_cmp(const npf_addr_t *addr1, const npf_netmask_t mask1, 209 const npf_addr_t *addr2, const npf_netmask_t mask2, const int alen) 210 { 211 npf_addr_t realaddr1, realaddr2; 212 213 if (mask1 != NPF_NO_NETMASK) { 214 npf_addr_mask(addr1, mask1, alen, &realaddr1); 215 addr1 = &realaddr1; 216 } 217 if (mask2 != NPF_NO_NETMASK) { 218 npf_addr_mask(addr2, mask2, alen, &realaddr2); 219 addr2 = &realaddr2; 220 } 221 return memcmp(addr1, addr2, alen); 222 } 223 224 int 225 npf_netmask_check(const int alen, npf_netmask_t mask) 226 { 227 switch (alen) { 228 case sizeof(struct in_addr): 229 if (__predict_false(mask > 32 && mask != NPF_NO_NETMASK)) { 230 return EINVAL; 231 } 232 break; 233 case sizeof(struct in6_addr): 234 if (__predict_false(mask > 128 && mask != NPF_NO_NETMASK)) { 235 return EINVAL; 236 } 237 break; 238 default: 239 return EINVAL; 240 } 241 return 0; 242 } 243 244 /* 245 * npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length. 246 * 247 * => Returns all values in host byte-order. 248 */ 249 int 250 npf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win) 251 { 252 const struct tcphdr *th = npc->npc_l4.tcp; 253 u_int thlen; 254 255 KASSERT(npf_iscached(npc, NPC_TCP)); 256 257 *seq = ntohl(th->th_seq); 258 *ack = ntohl(th->th_ack); 259 *win = (uint32_t)ntohs(th->th_win); 260 thlen = th->th_off << 2; 261 262 if (npf_iscached(npc, NPC_IP4)) { 263 const struct ip *ip = npc->npc_ip.v4; 264 return ntohs(ip->ip_len) - npc->npc_hlen - thlen; 265 } else if (npf_iscached(npc, NPC_IP6)) { 266 const struct ip6_hdr *ip6 = npc->npc_ip.v6; 267 return ntohs(ip6->ip6_plen) - 268 (npc->npc_hlen - sizeof(*ip6)) - thlen; 269 } 270 return 0; 271 } 272 273 /* 274 * npf_fetch_tcpopts: parse and return TCP options. 275 */ 276 bool 277 npf_fetch_tcpopts(npf_cache_t *npc, uint16_t *mss, int *wscale) 278 { 279 nbuf_t *nbuf = npc->npc_nbuf; 280 const struct tcphdr *th = npc->npc_l4.tcp; 281 int cnt, optlen = 0; 282 uint8_t *cp, opt; 283 uint8_t val; 284 bool ok; 285 286 KASSERT(npf_iscached(npc, NPC_IP46)); 287 KASSERT(npf_iscached(npc, NPC_TCP)); 288 289 /* Determine if there are any TCP options, get their length. */ 290 cnt = (th->th_off << 2) - sizeof(struct tcphdr); 291 if (cnt <= 0) { 292 /* No options. */ 293 return false; 294 } 295 KASSERT(cnt <= MAX_TCPOPTLEN); 296 297 /* Fetch all the options at once. */ 298 nbuf_reset(nbuf); 299 const int step = npc->npc_hlen + sizeof(struct tcphdr); 300 if ((cp = nbuf_advance(nbuf, step, cnt)) == NULL) { 301 ok = false; 302 goto done; 303 } 304 305 /* Scan the options. */ 306 for (; cnt > 0; cnt -= optlen, cp += optlen) { 307 opt = cp[0]; 308 if (opt == TCPOPT_EOL) 309 break; 310 if (opt == TCPOPT_NOP) 311 optlen = 1; 312 else { 313 if (cnt < 2) 314 break; 315 optlen = cp[1]; 316 if (optlen < 2 || optlen > cnt) 317 break; 318 } 319 320 switch (opt) { 321 case TCPOPT_MAXSEG: 322 if (optlen != TCPOLEN_MAXSEG) 323 continue; 324 if (mss) { 325 memcpy(mss, cp + 2, sizeof(uint16_t)); 326 } 327 break; 328 case TCPOPT_WINDOW: 329 if (optlen != TCPOLEN_WINDOW) 330 continue; 331 val = *(cp + 2); 332 *wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val; 333 break; 334 default: 335 break; 336 } 337 } 338 339 ok = true; 340 done: 341 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 342 npf_recache(npc); 343 } 344 return ok; 345 } 346 347 /* 348 * npf_set_mss: set the MSS. 349 */ 350 bool 351 npf_set_mss(npf_cache_t *npc, uint16_t mss, uint16_t *old, uint16_t *new, 352 bool *mid) 353 { 354 nbuf_t *nbuf = npc->npc_nbuf; 355 const struct tcphdr *th = npc->npc_l4.tcp; 356 int cnt, optlen = 0; 357 uint8_t *cp, *base, opt; 358 bool ok; 359 360 KASSERT(npf_iscached(npc, NPC_IP46)); 361 KASSERT(npf_iscached(npc, NPC_TCP)); 362 363 /* Determine if there are any TCP options, get their length. */ 364 cnt = (th->th_off << 2) - sizeof(struct tcphdr); 365 if (cnt <= 0) { 366 /* No options. */ 367 return false; 368 } 369 KASSERT(cnt <= MAX_TCPOPTLEN); 370 371 /* Fetch all the options at once. */ 372 nbuf_reset(nbuf); 373 const int step = npc->npc_hlen + sizeof(struct tcphdr); 374 if ((base = nbuf_advance(nbuf, step, cnt)) == NULL) { 375 ok = false; 376 goto done; 377 } 378 379 /* Scan the options. */ 380 for (cp = base; cnt > 0; cnt -= optlen, cp += optlen) { 381 opt = cp[0]; 382 if (opt == TCPOPT_EOL) 383 break; 384 if (opt == TCPOPT_NOP) 385 optlen = 1; 386 else { 387 if (cnt < 2) 388 break; 389 optlen = cp[1]; 390 if (optlen < 2 || optlen > cnt) 391 break; 392 } 393 394 switch (opt) { 395 case TCPOPT_MAXSEG: 396 if (optlen != TCPOLEN_MAXSEG) 397 continue; 398 if (((cp + 2) - base) % sizeof(uint16_t) != 0) { 399 *mid = true; 400 memcpy(&old[0], cp + 1, sizeof(uint16_t)); 401 memcpy(&old[1], cp + 3, sizeof(uint16_t)); 402 memcpy(cp + 2, &mss, sizeof(uint16_t)); 403 memcpy(&new[0], cp + 1, sizeof(uint16_t)); 404 memcpy(&new[1], cp + 3, sizeof(uint16_t)); 405 } else { 406 *mid = false; 407 memcpy(cp + 2, &mss, sizeof(uint16_t)); 408 } 409 break; 410 default: 411 break; 412 } 413 } 414 415 ok = true; 416 done: 417 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 418 npf_recache(npc); 419 } 420 return ok; 421 } 422 423 static int 424 npf_cache_ip(npf_cache_t *npc, nbuf_t *nbuf) 425 { 426 const void *nptr = nbuf_dataptr(nbuf); 427 const uint8_t ver = *(const uint8_t *)nptr; 428 int flags = 0; 429 430 /* 431 * We intentionally don't read the L4 payload after IPPROTO_AH. 432 */ 433 434 switch (ver >> 4) { 435 case IPVERSION: { 436 struct ip *ip; 437 438 ip = nbuf_ensure_contig(nbuf, sizeof(struct ip)); 439 if (ip == NULL) { 440 return NPC_FMTERR; 441 } 442 443 /* Retrieve the complete header. */ 444 if ((u_int)(ip->ip_hl << 2) < sizeof(struct ip)) { 445 return NPC_FMTERR; 446 } 447 ip = nbuf_ensure_contig(nbuf, (u_int)(ip->ip_hl << 2)); 448 if (ip == NULL) { 449 return NPC_FMTERR; 450 } 451 452 if (ip->ip_off & ~htons(IP_DF | IP_RF)) { 453 /* Note fragmentation. */ 454 flags |= NPC_IPFRAG; 455 } 456 457 /* Cache: layer 3 - IPv4. */ 458 npc->npc_alen = sizeof(struct in_addr); 459 npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip->ip_src; 460 npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip->ip_dst; 461 npc->npc_hlen = ip->ip_hl << 2; 462 npc->npc_proto = ip->ip_p; 463 464 npc->npc_ip.v4 = ip; 465 flags |= NPC_IP4; 466 break; 467 } 468 469 case (IPV6_VERSION >> 4): { 470 struct ip6_hdr *ip6; 471 struct ip6_ext *ip6e; 472 struct ip6_frag *ip6f; 473 size_t off, hlen; 474 int frag_present; 475 476 ip6 = nbuf_ensure_contig(nbuf, sizeof(struct ip6_hdr)); 477 if (ip6 == NULL) { 478 return NPC_FMTERR; 479 } 480 481 /* 482 * XXX: We don't handle IPv6 Jumbograms. 483 */ 484 485 /* Set initial next-protocol value. */ 486 hlen = sizeof(struct ip6_hdr); 487 npc->npc_proto = ip6->ip6_nxt; 488 npc->npc_hlen = hlen; 489 490 frag_present = 0; 491 492 /* 493 * Advance by the length of the current header. 494 */ 495 off = nbuf_offset(nbuf); 496 while ((ip6e = nbuf_advance(nbuf, hlen, sizeof(*ip6e))) != NULL) { 497 /* 498 * Determine whether we are going to continue. 499 */ 500 switch (npc->npc_proto) { 501 case IPPROTO_HOPOPTS: 502 case IPPROTO_DSTOPTS: 503 case IPPROTO_ROUTING: 504 hlen = (ip6e->ip6e_len + 1) << 3; 505 break; 506 case IPPROTO_FRAGMENT: 507 if (frag_present++) 508 return NPC_FMTERR; 509 ip6f = nbuf_ensure_contig(nbuf, sizeof(*ip6f)); 510 if (ip6f == NULL) 511 return NPC_FMTERR; 512 513 /* RFC6946: Skip dummy fragments. */ 514 if (!ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK) && 515 !(ip6f->ip6f_offlg & IP6F_MORE_FRAG)) { 516 hlen = sizeof(struct ip6_frag); 517 break; 518 } 519 520 hlen = 0; 521 flags |= NPC_IPFRAG; 522 523 break; 524 default: 525 hlen = 0; 526 break; 527 } 528 529 if (!hlen) { 530 break; 531 } 532 npc->npc_proto = ip6e->ip6e_nxt; 533 npc->npc_hlen += hlen; 534 } 535 536 if (ip6e == NULL) { 537 return NPC_FMTERR; 538 } 539 540 /* 541 * Re-fetch the header pointers (nbufs might have been 542 * reallocated). Restore the original offset (if any). 543 */ 544 nbuf_reset(nbuf); 545 ip6 = nbuf_dataptr(nbuf); 546 if (off) { 547 nbuf_advance(nbuf, off, 0); 548 } 549 550 /* Cache: layer 3 - IPv6. */ 551 npc->npc_alen = sizeof(struct in6_addr); 552 npc->npc_ips[NPF_SRC] = (npf_addr_t *)&ip6->ip6_src; 553 npc->npc_ips[NPF_DST] = (npf_addr_t *)&ip6->ip6_dst; 554 555 npc->npc_ip.v6 = ip6; 556 flags |= NPC_IP6; 557 break; 558 } 559 default: 560 break; 561 } 562 return flags; 563 } 564 565 static inline int 566 npf_cache_tcp(npf_cache_t *npc, nbuf_t *nbuf, unsigned hlen) 567 { 568 struct tcphdr *th; 569 570 th = nbuf_advance(nbuf, hlen, sizeof(struct tcphdr)); 571 if (__predict_false(th == NULL)) { 572 return NPC_FMTERR; 573 } 574 if (__predict_false(th->th_off < 5)) { 575 return NPC_FMTERR; 576 } 577 npc->npc_l4.tcp = th; 578 return NPC_LAYER4 | NPC_TCP; 579 } 580 581 int 582 npf_cache_ether(npf_cache_t *npc) 583 { 584 struct mbuf *m = npc->npc_nbuf->nb_mbuf0; 585 struct ether_header *ether; 586 587 nbuf_unset_flag(npc->npc_nbuf, NBUF_DATAREF_RESET); 588 /* 589 * we are so sure ether header will be in the first mbuf 590 * and we are also sure 14 bytes ether_header will be fully accessible 591 */ 592 ether = mtod(m, struct ether_header *); 593 if (__predict_false(ether == NULL)) 594 return NPC_FMTERR; 595 memcpy(&npc->ether, ether, sizeof(npc->ether)); 596 597 KASSERT(nbuf_flag_p(npc->npc_nbuf, NBUF_DATAREF_RESET) == 0); 598 return NPC_LAYER2; 599 } 600 601 /* 602 * npf_cache_all: general routine to cache all relevant IP (v4 or v6) 603 * and TCP, UDP or ICMP headers. 604 * 605 * => nbuf offset shall be set accordingly. 606 */ 607 int 608 npf_cache_all(npf_cache_t *npc) 609 { 610 nbuf_t *nbuf = npc->npc_nbuf; 611 int flags, l4flags; 612 u_int hlen; 613 614 /* 615 * This routine is a main point where the references are cached, 616 * therefore clear the flag as we reset. 617 */ 618 again: 619 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 620 621 /* 622 * First, cache the L3 header (IPv4 or IPv6). If IP packet is 623 * fragmented, then we cannot look into L4. 624 */ 625 flags = npf_cache_ip(npc, nbuf); 626 if ((flags & NPC_IP46) == 0 || (flags & NPC_IPFRAG) != 0 || 627 (flags & NPC_FMTERR) != 0) { 628 goto out; 629 } 630 hlen = npc->npc_hlen; 631 632 /* 633 * Note: we guarantee that the potential "Query Id" field of the 634 * ICMPv4/ICMPv6 packets is in the nbuf. This field is used in the 635 * ICMP ALG. 636 */ 637 switch (npc->npc_proto) { 638 case IPPROTO_TCP: 639 /* Cache: layer 4 - TCP. */ 640 l4flags = npf_cache_tcp(npc, nbuf, hlen); 641 break; 642 case IPPROTO_UDP: 643 /* Cache: layer 4 - UDP. */ 644 npc->npc_l4.udp = nbuf_advance(nbuf, hlen, 645 sizeof(struct udphdr)); 646 l4flags = NPC_LAYER4 | NPC_UDP; 647 break; 648 case IPPROTO_ICMP: 649 /* Cache: layer 4 - ICMPv4. */ 650 npc->npc_l4.icmp = nbuf_advance(nbuf, hlen, 651 ICMP_MINLEN); 652 l4flags = NPC_LAYER4 | NPC_ICMP; 653 break; 654 case IPPROTO_ICMPV6: 655 /* Cache: layer 4 - ICMPv6. */ 656 npc->npc_l4.icmp6 = nbuf_advance(nbuf, hlen, 657 sizeof(struct icmp6_hdr)); 658 l4flags = NPC_LAYER4 | NPC_ICMP; 659 break; 660 default: 661 l4flags = 0; 662 break; 663 } 664 665 /* 666 * Error out if nbuf_advance() failed. 667 */ 668 if (__predict_false(l4flags && !npc->npc_l4.hdr)) { 669 goto err; 670 } 671 672 if (nbuf_flag_p(nbuf, NBUF_DATAREF_RESET)) { 673 goto again; 674 } 675 676 flags |= l4flags; 677 npc->npc_info |= flags; 678 return flags; 679 680 err: 681 flags = NPC_FMTERR; 682 out: 683 nbuf_unset_flag(nbuf, NBUF_DATAREF_RESET); 684 npc->npc_info |= flags; 685 return flags; 686 } 687 688 void 689 npf_recache(npf_cache_t *npc) 690 { 691 nbuf_t *nbuf = npc->npc_nbuf; 692 const int mflags __diagused = npc->npc_info & (NPC_IP46 | NPC_LAYER4); 693 int flags __diagused; 694 695 nbuf_reset(nbuf); 696 npc->npc_info = 0; 697 flags = npf_cache_all(npc); 698 699 KASSERT((flags & mflags) == mflags); 700 KASSERT(nbuf_flag_p(nbuf, NBUF_DATAREF_RESET) == 0); 701 } 702 703 /* 704 * npf_rwrip: rewrite required IP address. 705 */ 706 bool 707 npf_rwrip(const npf_cache_t *npc, u_int which, const npf_addr_t *addr) 708 { 709 KASSERT(npf_iscached(npc, NPC_IP46)); 710 KASSERT(which == NPF_SRC || which == NPF_DST); 711 712 memcpy(npc->npc_ips[which], addr, npc->npc_alen); 713 return true; 714 } 715 716 /* 717 * npf_rwrport: rewrite required TCP/UDP port. 718 */ 719 bool 720 npf_rwrport(const npf_cache_t *npc, u_int which, const in_port_t port) 721 { 722 const int proto = npc->npc_proto; 723 in_port_t *oport; 724 725 KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP)); 726 KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP); 727 KASSERT(which == NPF_SRC || which == NPF_DST); 728 729 /* Get the offset and store the port in it. */ 730 if (proto == IPPROTO_TCP) { 731 struct tcphdr *th = npc->npc_l4.tcp; 732 oport = (which == NPF_SRC) ? &th->th_sport : &th->th_dport; 733 } else { 734 struct udphdr *uh = npc->npc_l4.udp; 735 oport = (which == NPF_SRC) ? &uh->uh_sport : &uh->uh_dport; 736 } 737 memcpy(oport, &port, sizeof(in_port_t)); 738 return true; 739 } 740 741 /* 742 * npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum. 743 */ 744 bool 745 npf_rwrcksum(const npf_cache_t *npc, u_int which, 746 const npf_addr_t *addr, const in_port_t port) 747 { 748 const npf_addr_t *oaddr = npc->npc_ips[which]; 749 const int proto = npc->npc_proto; 750 const int alen = npc->npc_alen; 751 uint16_t cksum, *ocksum; 752 struct tcphdr *th; 753 struct udphdr *uh; 754 in_port_t oport; 755 756 KASSERT(npf_iscached(npc, NPC_LAYER4)); 757 KASSERT(which == NPF_SRC || which == NPF_DST); 758 759 if (npf_iscached(npc, NPC_IP4)) { 760 struct ip *ip = npc->npc_ip.v4; 761 uint16_t ipsum = ip->ip_sum; 762 763 /* Recalculate IPv4 checksum and rewrite. */ 764 ip->ip_sum = npf_addr_cksum(ipsum, alen, oaddr, addr); 765 } else { 766 /* No checksum for IPv6. */ 767 KASSERT(npf_iscached(npc, NPC_IP6)); 768 } 769 770 /* 771 * Calculate TCP/UDP checksum: 772 * - Skip if UDP and the current checksum is zero. 773 * - Fixup the IP address change. 774 * - Fixup the port change, if required (non-zero). 775 */ 776 switch (proto) { 777 case IPPROTO_TCP: 778 KASSERT(npf_iscached(npc, NPC_TCP)); 779 th = npc->npc_l4.tcp; 780 ocksum = &th->th_sum; 781 oport = (which == NPF_SRC) ? th->th_sport : th->th_dport; 782 break; 783 case IPPROTO_UDP: 784 KASSERT(npf_iscached(npc, NPC_UDP)); 785 uh = npc->npc_l4.udp; 786 ocksum = &uh->uh_sum; 787 if (*ocksum == 0) { 788 /* No need to update. */ 789 return true; 790 } 791 oport = (which == NPF_SRC) ? uh->uh_sport : uh->uh_dport; 792 break; 793 case IPPROTO_ICMP: 794 case IPPROTO_ICMPV6: 795 default: 796 /* Nothing else to do for ICMP. */ 797 return true; 798 } 799 800 /* 801 * Update and rewrite the TCP/UDP checksum. 802 */ 803 cksum = npf_addr_cksum(*ocksum, alen, oaddr, addr); 804 if (port) { 805 cksum = npf_fixup16_cksum(cksum, oport, port); 806 } 807 memcpy(ocksum, &cksum, sizeof(uint16_t)); 808 return true; 809 } 810 811 /* 812 * npf_napt_rwr: perform address and/or port translation. 813 */ 814 int 815 npf_napt_rwr(const npf_cache_t *npc, u_int which, 816 const npf_addr_t *addr, const in_addr_t port) 817 { 818 const unsigned proto = npc->npc_proto; 819 820 /* 821 * Rewrite IP and/or TCP/UDP checksums first, since we need the 822 * current (old) address/port for the calculations. Then perform 823 * the address translation i.e. rewrite source or destination. 824 */ 825 if (!npf_rwrcksum(npc, which, addr, port)) { 826 return EINVAL; 827 } 828 if (!npf_rwrip(npc, which, addr)) { 829 return EINVAL; 830 } 831 if (port == 0) { 832 /* Done. */ 833 return 0; 834 } 835 836 switch (proto) { 837 case IPPROTO_TCP: 838 case IPPROTO_UDP: 839 /* Rewrite source/destination port. */ 840 if (!npf_rwrport(npc, which, port)) { 841 return EINVAL; 842 } 843 break; 844 case IPPROTO_ICMP: 845 case IPPROTO_ICMPV6: 846 KASSERT(npf_iscached(npc, NPC_ICMP)); 847 /* Nothing. */ 848 break; 849 default: 850 return ENOTSUP; 851 } 852 return 0; 853 } 854 855 /* 856 * IPv6-to-IPv6 Network Prefix Translation (NPTv6), as per RFC 6296. 857 */ 858 int 859 npf_npt66_rwr(const npf_cache_t *npc, u_int which, const npf_addr_t *pref, 860 npf_netmask_t len, uint16_t adj) 861 { 862 npf_addr_t *addr = npc->npc_ips[which]; 863 unsigned remnant, word, preflen = len >> 4; 864 uint32_t sum; 865 866 KASSERT(which == NPF_SRC || which == NPF_DST); 867 868 if (!npf_iscached(npc, NPC_IP6)) { 869 return EINVAL; 870 } 871 if (len <= 48) { 872 /* 873 * The word to adjust. Cannot translate the 0xffff 874 * subnet if /48 or shorter. 875 */ 876 word = 3; 877 if (addr->word16[word] == 0xffff) { 878 return EINVAL; 879 } 880 } else { 881 /* 882 * Also, all 0s or 1s in the host part are disallowed for 883 * longer than /48 prefixes. 884 */ 885 if ((addr->word32[2] == 0 && addr->word32[3] == 0) || 886 (addr->word32[2] == ~0U && addr->word32[3] == ~0U)) 887 return EINVAL; 888 889 /* Determine the 16-bit word to adjust. */ 890 for (word = 4; word < 8; word++) 891 if (addr->word16[word] != 0xffff) 892 break; 893 } 894 895 /* Rewrite the prefix. */ 896 for (unsigned i = 0; i < preflen; i++) { 897 addr->word16[i] = pref->word16[i]; 898 } 899 900 /* 901 * If prefix length is within a 16-bit word (not dividable by 16), 902 * then prepare a mask, determine the word and adjust it. 903 */ 904 if ((remnant = len - (preflen << 4)) != 0) { 905 const uint16_t wordmask = (1U << remnant) - 1; 906 const unsigned i = preflen; 907 908 addr->word16[i] = (pref->word16[i] & wordmask) | 909 (addr->word16[i] & ~wordmask); 910 } 911 912 /* 913 * Performing 1's complement sum/difference. 914 */ 915 sum = addr->word16[word] + adj; 916 while (sum >> 16) { 917 sum = (sum >> 16) + (sum & 0xffff); 918 } 919 if (sum == 0xffff) { 920 /* RFC 1071. */ 921 sum = 0x0000; 922 } 923 addr->word16[word] = sum; 924 return 0; 925 } 926 927 #if defined(DDB) || defined(_NPF_TESTING) 928 929 const char * 930 npf_addr_dump(const npf_addr_t *addr, int alen) 931 { 932 if (alen == sizeof(struct in_addr)) { 933 struct in_addr ip; 934 memcpy(&ip, addr, alen); 935 return inet_ntoa(ip); 936 } 937 return "[IPv6]"; 938 } 939 940 #endif 941