1 /*- 2 * Copyright (c) 2011-2025 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This material is based upon work partially supported by The 6 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * npfctl(8) building of the configuration. 32 */ 33 34 #include <sys/cdefs.h> 35 __RCSID("$NetBSD: npf_build.c,v 1.63 2025/08/20 16:25:19 joe Exp $"); 36 37 #include <sys/types.h> 38 #define __FAVOR_BSD 39 #include <netinet/tcp.h> 40 41 #include <stdlib.h> 42 #include <inttypes.h> 43 #include <string.h> 44 #include <ctype.h> 45 #include <unistd.h> 46 #include <fcntl.h> 47 #include <errno.h> 48 #include <err.h> 49 50 #include <pcap/pcap.h> 51 52 #include "npfctl.h" 53 54 #define MAX_RULE_NESTING 16 55 56 static nl_config_t * npf_conf = NULL; 57 static bool npf_debug = false; 58 static nl_rule_t * the_rule = NULL; 59 static bool npf_conf_built = false; 60 61 static bool l2_group = false; 62 static nl_rule_t * defgroup_l3 = NULL; 63 static nl_rule_t * defgroup_l2 = NULL; 64 static nl_rule_t * current_group[MAX_RULE_NESTING]; 65 static unsigned rule_nesting_level = 0; 66 static unsigned npfctl_tid_counter = 0; 67 68 static void npfctl_dump_bpf(struct bpf_program *); 69 70 void 71 npfctl_config_init(bool debug) 72 { 73 npf_conf = npf_config_create(); 74 if (npf_conf == NULL) { 75 errx(EXIT_FAILURE, "npf_config_create() failed"); 76 } 77 memset(current_group, 0, sizeof(current_group)); 78 npf_debug = debug; 79 npf_conf_built = false; 80 } 81 82 nl_config_t * 83 npfctl_config_ref(void) 84 { 85 return npf_conf; 86 } 87 88 nl_rule_t * 89 npfctl_rule_ref(void) 90 { 91 return the_rule; 92 } 93 94 void 95 npfctl_config_build(void) 96 { 97 /* Run-once. */ 98 if (npf_conf_built) { 99 return; 100 } 101 102 /* 103 * The layer 3 default group is mandatory. Note: npfctl_build_group_end() 104 * skipped the default rule, since it must be the last one. 105 * if you set a layer 2 rule, layer 2 default also becomes mandatory. 106 * if you don't set layer 2 rules, only layer 3 default is mandatory 107 */ 108 if (!defgroup_l3) { 109 errx(EXIT_FAILURE, "layer 3 default group was not defined"); 110 } 111 112 if (l2_group & !defgroup_l2) { 113 errx(EXIT_FAILURE, "layer 2 default group not defined"); 114 } 115 assert(rule_nesting_level == 0); 116 npf_rule_insert(npf_conf, NULL, defgroup_l3); 117 118 if (defgroup_l2) 119 npf_rule_insert(npf_conf, NULL, defgroup_l2); 120 121 npf_config_build(npf_conf); 122 npf_conf_built = true; 123 } 124 125 int 126 npfctl_config_send(int fd) 127 { 128 npf_error_t errinfo; 129 int error = 0; 130 131 npfctl_config_build(); 132 error = npf_config_submit(npf_conf, fd, &errinfo); 133 if (error) { 134 npfctl_print_error(&errinfo); 135 } 136 npf_config_destroy(npf_conf); 137 return error; 138 } 139 140 void 141 npfctl_config_save(nl_config_t *ncf, const char *outfile) 142 { 143 void *blob; 144 size_t len; 145 int fd; 146 147 blob = npf_config_export(ncf, &len); 148 if (!blob) { 149 err(EXIT_FAILURE, "npf_config_export"); 150 } 151 if ((fd = open(outfile, O_CREAT | O_TRUNC | O_WRONLY, 0644)) == -1) { 152 err(EXIT_FAILURE, "could not open %s", outfile); 153 } 154 if (write(fd, blob, len) != (ssize_t)len) { 155 err(EXIT_FAILURE, "write to %s failed", outfile); 156 } 157 free(blob); 158 close(fd); 159 } 160 161 bool 162 npfctl_debug_addif(const char *ifname) 163 { 164 const char tname[] = "npftest"; 165 const size_t tnamelen = sizeof(tname) - 1; 166 167 if (npf_debug) { 168 _npf_debug_addif(npf_conf, ifname); 169 return strncmp(ifname, tname, tnamelen) == 0; 170 } 171 return 0; 172 } 173 174 nl_table_t * 175 npfctl_table_getbyname(nl_config_t *ncf, const char *name) 176 { 177 nl_iter_t i = NPF_ITER_BEGIN; 178 nl_table_t *tl; 179 180 /* XXX dynamic ruleset */ 181 if (!ncf) { 182 return NULL; 183 } 184 while ((tl = npf_table_iterate(ncf, &i)) != NULL) { 185 const char *tname = npf_table_getname(tl); 186 if (strcmp(tname, name) == 0) { 187 break; 188 } 189 } 190 return tl; 191 } 192 193 unsigned 194 npfctl_table_getid(const char *name) 195 { 196 nl_table_t *tl; 197 198 tl = npfctl_table_getbyname(npf_conf, name); 199 return tl ? npf_table_getid(tl) : (unsigned)-1; 200 } 201 202 const char * 203 npfctl_table_getname(nl_config_t *ncf, unsigned tid, bool *ifaddr) 204 { 205 const char *name = NULL; 206 nl_iter_t i = NPF_ITER_BEGIN; 207 nl_table_t *tl; 208 209 while ((tl = npf_table_iterate(ncf, &i)) != NULL) { 210 if (npf_table_getid(tl) == tid) { 211 name = npf_table_getname(tl); 212 break; 213 } 214 } 215 if (!name) { 216 return NULL; 217 } 218 if (!strncmp(name, NPF_IFNET_TABLE_PREF, NPF_IFNET_TABLE_PREFLEN)) { 219 name += NPF_IFNET_TABLE_PREFLEN; 220 *ifaddr = true; 221 } else { 222 *ifaddr = false; 223 } 224 return name; 225 } 226 227 static in_port_t 228 npfctl_get_singleport(const npfvar_t *vp) 229 { 230 port_range_t *pr; 231 in_port_t *port; 232 233 if (npfvar_get_count(vp) > 1) { 234 yyerror("multiple ports are not valid"); 235 } 236 pr = npfvar_get_data(vp, NPFVAR_PORT_RANGE, 0); 237 if (pr->pr_start != pr->pr_end) { 238 yyerror("port range is not valid"); 239 } 240 port = &pr->pr_start; 241 return *port; 242 } 243 244 static fam_addr_mask_t * 245 npfctl_get_singlefam(const npfvar_t *vp) 246 { 247 fam_addr_mask_t *am; 248 249 if (npfvar_get_type(vp, 0) != NPFVAR_FAM) { 250 yyerror("map segment must be an address or network"); 251 } 252 if (npfvar_get_count(vp) > 1) { 253 yyerror("map segment cannot have multiple static addresses"); 254 } 255 am = npfvar_get_data(vp, NPFVAR_FAM, 0); 256 if (am == NULL) { 257 yyerror("invalid map segment"); 258 } 259 return am; 260 } 261 262 static unsigned 263 npfctl_get_singletable(const npfvar_t *vp) 264 { 265 unsigned *tid; 266 267 if (npfvar_get_count(vp) > 1) { 268 yyerror("invalid use of multiple tables"); 269 } 270 tid = npfvar_get_data(vp, NPFVAR_TABLE, 0); 271 assert(tid != NULL); 272 return *tid; 273 } 274 275 static bool 276 npfctl_build_fam(npf_bpf_t *ctx, sa_family_t family, 277 fam_addr_mask_t *fam, unsigned opts) 278 { 279 /* 280 * If family is specified, address does not match it and the 281 * address is extracted from the interface, then simply ignore. 282 * Otherwise, address of invalid family was passed manually. 283 */ 284 if (family != AF_UNSPEC && family != fam->fam_family) { 285 if (!fam->fam_ifindex) { 286 yyerror("specified address is not of the required " 287 "family %d", family); 288 } 289 return false; 290 } 291 292 family = fam->fam_family; 293 if (family != AF_INET && family != AF_INET6) { 294 yyerror("family %d is not supported", family); 295 } 296 297 /* 298 * Optimise 0.0.0.0/0 case to be NOP. Otherwise, address with 299 * zero mask would never match and therefore is not valid. 300 */ 301 if (fam->fam_mask == 0) { 302 if (!npfctl_addr_iszero(&fam->fam_addr)) { 303 yyerror("filter criterion would never match"); 304 } 305 return false; 306 } 307 308 npfctl_bpf_cidr(ctx, opts, family, &fam->fam_addr, fam->fam_mask); 309 return true; 310 } 311 312 static void 313 build_vars(npf_bpf_t *ctx, sa_family_t family, npfvar_t *vars, int opts) 314 { 315 size_t var_cnt = npfvar_get_count(vars); 316 for (unsigned i = 0; i < var_cnt; i++) { 317 const unsigned type = npfvar_getfilt_type(vars, i); 318 void *data = npfvar_getfilt_data(vars, type, i); 319 320 assert(data != NULL); 321 322 switch (type) { 323 case NPFVAR_VAR_ID: { 324 /* allow us to go through nested variables ourselves */ 325 npfvar_t *rvp = npfvar_lookup(data); 326 if (rvp == NULL) 327 yyerror("variable not found"); 328 329 build_vars(ctx, family, rvp, opts); 330 break; 331 } 332 case NPFVAR_FAM: { 333 fam_addr_mask_t *fam = data; 334 npfctl_build_fam(ctx, family, fam, opts); 335 break; 336 } 337 case NPFVAR_PORT_RANGE: { 338 port_range_t *pr = data; 339 npfctl_bpf_ports(ctx, opts, pr->pr_start, pr->pr_end); 340 break; 341 } 342 case NPFVAR_TABLE: { 343 unsigned tid; 344 memcpy(&tid, data, sizeof(unsigned)); 345 npfctl_bpf_table(ctx, opts, tid); 346 break; 347 } 348 case NPFVAR_MAC: { 349 struct ether_addr *eth = data; 350 npfctl_bpf_ether(ctx, opts, eth); 351 break; 352 } 353 default: 354 yyerror("unexpected %s", npfvar_type(type)); 355 } 356 } 357 } 358 359 static void 360 npfctl_build_vars(npf_bpf_t *ctx, sa_family_t family, npfvar_t *vars, int opts) 361 { 362 npfctl_bpf_group_enter(ctx, (opts & MATCH_INVERT) != 0); 363 build_vars(ctx, family, vars, opts); 364 npfctl_bpf_group_exit(ctx); 365 } 366 367 static void 368 npfctl_build_proto_block(npf_bpf_t *ctx, const opt_proto_t *op, bool multiple) 369 { 370 const unsigned proto = op->op_proto; 371 npfvar_t *popts = op->op_opts; 372 373 if (multiple && popts) { 374 yyerror("multiple protocol options with protocol filters " 375 "are not yet supported"); 376 } 377 378 /* Build the protocol filter. */ 379 npfctl_bpf_proto(ctx, proto); 380 381 switch (proto) { 382 case IPPROTO_TCP: 383 /* Build TCP flags matching (optional). */ 384 if (popts) { 385 uint8_t *tf, *tf_mask; 386 387 assert(npfvar_get_count(popts) == 2); 388 tf = npfvar_get_data(popts, NPFVAR_TCPFLAG, 0); 389 tf_mask = npfvar_get_data(popts, NPFVAR_TCPFLAG, 1); 390 npfctl_bpf_tcpfl(ctx, *tf, *tf_mask); 391 } 392 break; 393 case IPPROTO_ICMP: 394 case IPPROTO_ICMPV6: 395 /* Build ICMP/ICMPv6 type and/or code matching. */ 396 if (popts) { 397 int *icmp_type, *icmp_code; 398 399 assert(npfvar_get_count(popts) == 2); 400 icmp_type = npfvar_get_data(popts, NPFVAR_ICMP, 0); 401 icmp_code = npfvar_get_data(popts, NPFVAR_ICMP, 1); 402 npfctl_bpf_icmp(ctx, *icmp_type, *icmp_code); 403 } 404 break; 405 default: 406 /* No options for other protocols. */ 407 break; 408 } 409 } 410 411 static void 412 npfctl_build_proto(npf_bpf_t *ctx, const npfvar_t *vars) 413 { 414 const unsigned count = npfvar_get_count(vars); 415 416 /* 417 * XXX: For now, just do not support multiple protocol 418 * blocks with options; this is because npfctl_bpf_tcpfl() 419 * and npfctl_bpf_icmp() will not work correctly in a group. 420 */ 421 if (count == 1) { 422 const opt_proto_t *op = npfvar_get_data(vars, NPFVAR_PROTO, 0); 423 npfctl_build_proto_block(ctx, op, false); 424 return; 425 } 426 427 npfctl_bpf_group_enter(ctx, false); 428 for (unsigned i = 0; i < count; i++) { 429 const opt_proto_t *op = npfvar_get_data(vars, NPFVAR_PROTO, i); 430 npfctl_build_proto_block(ctx, op, true); 431 } 432 npfctl_bpf_group_exit(ctx); 433 } 434 435 static bool 436 npfctl_check_proto(const npfvar_t *vars, bool *non_tcpudp, bool *tcp_with_nofl) 437 { 438 unsigned count; 439 440 *non_tcpudp = false; 441 *tcp_with_nofl = false; 442 443 if (vars == NULL) { 444 return false; 445 } 446 447 count = npfvar_get_count(vars); 448 for (unsigned i = 0; i < count; i++) { 449 const opt_proto_t *op = npfvar_get_data(vars, NPFVAR_PROTO, i); 450 451 switch (op->op_proto) { 452 case IPPROTO_TCP: 453 *tcp_with_nofl = op->op_opts == NULL; 454 break; 455 case IPPROTO_UDP: 456 case -1: 457 break; 458 default: 459 *non_tcpudp = true; 460 break; 461 } 462 } 463 return count != 0; 464 } 465 466 static bool 467 build_l3_code(npf_bpf_t *bc, nl_rule_t *rl, sa_family_t family, const npfvar_t *popts, 468 const filt_opts_t *fopts) 469 { 470 unsigned opts; 471 const addr_port_t *apfrom = &fopts->filt.opt3.fo_from; 472 const addr_port_t *apto = &fopts->filt.opt3.fo_to; 473 bool any_proto, any_addrs, any_ports, stateful; 474 bool any_l4proto, non_tcpudp, tcp_with_nofl; 475 476 /* 477 * Gather some information about the protocol options, if any. 478 * Check the filter criteria in general -- if none specified, 479 * then no byte-code. 480 */ 481 any_l4proto = npfctl_check_proto(popts, &non_tcpudp, &tcp_with_nofl); 482 any_proto = (family != AF_UNSPEC) || any_l4proto; 483 any_addrs = apfrom->ap_netaddr || apto->ap_netaddr; 484 any_ports = apfrom->ap_portrange || apto->ap_portrange; 485 stateful = (npf_rule_getattr(rl) & NPF_RULE_STATEFUL) != 0; 486 if (!any_proto && !any_addrs && !any_ports && !stateful) { 487 return false; 488 } 489 490 /* 491 * Sanity check: ports can only be used with TCP or UDP protocol. 492 */ 493 if (any_ports && non_tcpudp) { 494 yyerror("invalid filter options for given the protocol(s)"); 495 } 496 497 /* Build layer 3 and 4 protocol blocks. */ 498 if (family != AF_UNSPEC) { 499 npfctl_bpf_ipver(bc, family); 500 } 501 if (any_l4proto) { 502 npfctl_build_proto(bc, popts); 503 } 504 505 /* 506 * If this is a stateful rule and TCP flags are not specified, 507 * then add "flags S/SAFR" filter for TCP protocol case. 508 */ 509 if (stateful && (!any_l4proto || tcp_with_nofl)) { 510 npfctl_bpf_tcpfl(bc, TH_SYN, TH_SYN | TH_ACK | TH_FIN | TH_RST); 511 } 512 513 /* Build IP address blocks. */ 514 opts = MATCH_SRC | (fopts->fo_finvert ? MATCH_INVERT : 0); 515 npfctl_build_vars(bc, family, apfrom->ap_netaddr, opts); 516 opts = MATCH_DST | (fopts->fo_tinvert ? MATCH_INVERT : 0); 517 npfctl_build_vars(bc, family, apto->ap_netaddr, opts); 518 519 /* 520 * Build the port-range blocks. If no protocol is specified, 521 * then we implicitly filter for the TCP / UDP protocols. 522 */ 523 if (any_ports && !any_l4proto) { 524 npfctl_bpf_group_enter(bc, false); 525 npfctl_bpf_proto(bc, IPPROTO_TCP); 526 npfctl_bpf_proto(bc, IPPROTO_UDP); 527 npfctl_bpf_group_exit(bc); 528 } 529 530 npfctl_build_vars(bc, family, apfrom->ap_portrange, MATCH_SRC); 531 npfctl_build_vars(bc, family, apto->ap_portrange, MATCH_DST); 532 533 return true; 534 } 535 536 static bool 537 build_l2_code(npf_bpf_t *bc, const filt_opts_t *fopts) 538 { 539 unsigned opts; 540 npfvar_t *ap_from = fopts->filt.opt2.from_mac; 541 npfvar_t *ap_to = fopts->filt.opt2.to_mac; 542 const uint16_t ether_type = fopts->filt.opt2.ether_type; 543 bool addr_or_ether; 544 545 addr_or_ether = ap_from || ap_to || ether_type; 546 if(!addr_or_ether) 547 return false; 548 549 if (ether_type != 0) { 550 fetch_ether_type(bc, ether_type); 551 } 552 553 /* Build ether address blocks. */ 554 opts = MATCH_DST | (fopts->fo_tinvert ? MATCH_INVERT : 0); 555 npfctl_build_vars(bc, 0, ap_to, opts); 556 opts = MATCH_SRC | (fopts->fo_finvert ? MATCH_INVERT : 0); 557 npfctl_build_vars(bc, 0, ap_from, opts); 558 559 return true; 560 } 561 562 static bool 563 npfctl_build_code(nl_rule_t *rl, sa_family_t family, const npfvar_t *popts, 564 const filt_opts_t *fopts) 565 { 566 npf_bpf_t *bc; 567 size_t len; 568 uint32_t layer = fopts->layer; 569 570 bc = npfctl_bpf_create(); 571 if (layer == NPF_RULE_LAYER_3) { 572 if (!build_l3_code(bc, rl, family, popts, fopts)) 573 return false; 574 } else if (layer == NPF_RULE_LAYER_2) { 575 if (!build_l2_code(bc, fopts)) 576 return false; 577 } else { 578 yyerror("%s: layer not supported", __func__); 579 } 580 581 /* Set the byte-code marks, if any. */ 582 const void *bmarks = npfctl_bpf_bmarks(bc, &len); 583 if (bmarks && npf_rule_setinfo(rl, bmarks, len) != 0) { 584 errx(EXIT_FAILURE, "npf_rule_setinfo"); 585 } 586 587 /* Complete BPF byte-code and pass to the rule. */ 588 struct bpf_program *bf = npfctl_bpf_complete(bc); 589 if (bf == NULL) { 590 npfctl_bpf_destroy(bc); 591 return true; 592 } 593 len = bf->bf_len * sizeof(struct bpf_insn); 594 595 if (npf_rule_setcode(rl, NPF_CODE_BPF, bf->bf_insns, len) != 0) { 596 errx(EXIT_FAILURE, "npf_rule_setcode"); 597 } 598 npfctl_dump_bpf(bf); 599 npfctl_bpf_destroy(bc); 600 601 return true; 602 } 603 604 static void 605 npfctl_build_pcap(nl_rule_t *rl, const char *filter) 606 { 607 const size_t maxsnaplen = 64 * 1024; 608 struct bpf_program bf; 609 size_t len; 610 pcap_t *pd; 611 612 pd = pcap_open_dead(DLT_RAW, maxsnaplen); 613 if (pd == NULL) { 614 err(EXIT_FAILURE, "pcap_open_dead"); 615 } 616 617 if (pcap_compile(pd, &bf, 618 filter, 1, PCAP_NETMASK_UNKNOWN) == -1) { 619 yyerror("invalid pcap-filter(7) syntax"); 620 } 621 len = bf.bf_len * sizeof(struct bpf_insn); 622 623 if (npf_rule_setcode(rl, NPF_CODE_BPF, bf.bf_insns, len) != 0) { 624 errx(EXIT_FAILURE, "npf_rule_setcode failed"); 625 } 626 npfctl_dump_bpf(&bf); 627 pcap_freecode(&bf); 628 pcap_close(pd); 629 } 630 631 static void 632 npfctl_build_rpcall(nl_rproc_t *rp, const char *name, npfvar_t *args) 633 { 634 npf_extmod_t *extmod; 635 nl_ext_t *extcall; 636 int error; 637 638 extmod = npf_extmod_get(name, &extcall); 639 if (extmod == NULL) { 640 yyerror("unknown rule procedure '%s'", name); 641 } 642 643 for (size_t i = 0; i < npfvar_get_count(args); i++) { 644 const char *param, *value; 645 proc_param_t *p; 646 647 p = npfvar_get_data(args, NPFVAR_PROC_PARAM, i); 648 param = p->pp_param; 649 value = p->pp_value; 650 651 error = npf_extmod_param(extmod, extcall, param, value); 652 switch (error) { 653 case EINVAL: 654 yyerror("invalid parameter '%s'", param); 655 default: 656 break; 657 } 658 } 659 error = npf_rproc_extcall(rp, extcall); 660 if (error) { 661 yyerror(error == EEXIST ? 662 "duplicate procedure call" : "unexpected error"); 663 } 664 } 665 666 /* 667 * npfctl_build_rproc: create and insert a rule procedure. 668 */ 669 void 670 npfctl_build_rproc(const char *name, npfvar_t *procs) 671 { 672 nl_rproc_t *rp; 673 size_t i; 674 675 rp = npf_rproc_create(name); 676 if (rp == NULL) { 677 errx(EXIT_FAILURE, "%s failed", __func__); 678 } 679 680 for (i = 0; i < npfvar_get_count(procs); i++) { 681 proc_call_t *pc = npfvar_get_data(procs, NPFVAR_PROC, i); 682 npfctl_build_rpcall(rp, pc->pc_name, pc->pc_opts); 683 } 684 npf_rproc_insert(npf_conf, rp); 685 } 686 687 /* 688 * npfctl_build_maprset: create and insert a NAT ruleset. 689 */ 690 void 691 npfctl_build_maprset(const char *name, int attr, const char *ifname) 692 { 693 const int attr_di = (NPF_RULE_IN | NPF_RULE_OUT); 694 nl_rule_t *rl; 695 bool natset; 696 int err; 697 698 /* Validate the prefix. */ 699 err = npfctl_nat_ruleset_p(name, &natset); 700 if (!natset) { 701 yyerror("NAT ruleset names must be prefixed with `" 702 NPF_RULESET_MAP_PREF "`"); 703 } 704 if (err) { 705 yyerror("NAT ruleset is missing a name (only prefix found)"); 706 } 707 708 /* If no direction is not specified, then both. */ 709 if ((attr & attr_di) == 0) { 710 attr |= attr_di; 711 } 712 713 /* Allow only "in/out" attributes. */ 714 attr = NPF_RULE_GROUP | NPF_RULE_DYNAMIC | (attr & attr_di); 715 rl = npf_rule_create(name, attr, ifname); 716 npf_rule_setprio(rl, NPF_PRI_LAST); 717 npf_nat_insert(npf_conf, rl); 718 } 719 720 static void 721 npf_check_layer(const char **lstr, uint32_t lattr, const char *func) 722 { 723 if (lattr & NPF_RULE_LAYER_2) 724 *lstr = "layer 2"; 725 else if (lattr & NPF_RULE_LAYER_3) 726 *lstr = "layer 3"; 727 else 728 yyerror("%s: layer not yet supported", func); 729 } 730 731 static nl_rule_t * 732 set_defgroup(nl_rule_t *rl, nl_rule_t *def_group, int attr) 733 { 734 if (def_group) { 735 const char *str; 736 npf_check_layer(&str, attr, __func__); 737 yyerror("multiple %s default groups are not valid", str); 738 } 739 if (rule_nesting_level) { 740 yyerror("default group can only be at the top level"); 741 } 742 743 return rl; 744 } 745 746 /* 747 * npfctl_build_group: create a group, update the current group pointer 748 * and increase the nesting level. 749 */ 750 void 751 npfctl_build_group(const char *name, int attr, const char *ifname, bool def) 752 { 753 const int attr_di = (NPF_RULE_IN | NPF_RULE_OUT); 754 nl_rule_t *rl; 755 756 if (def || (attr & attr_di) == 0) { 757 attr |= attr_di; 758 } 759 760 rl = npf_rule_create(name, attr | NPF_RULE_GROUP, ifname); 761 npf_rule_setprio(rl, NPF_PRI_LAST); 762 if (def) { 763 if (attr & NPF_RULE_LAYER_3) { 764 defgroup_l3 = set_defgroup(rl, defgroup_l3, attr); 765 } 766 else if (attr & NPF_RULE_LAYER_2) { 767 defgroup_l2 = set_defgroup(rl, defgroup_l2, attr); 768 } 769 else { 770 yyerror("%s: layer not supported", __func__); 771 } 772 } else { 773 if (attr & NPF_RULE_LAYER_2) 774 l2_group = true; 775 } 776 777 /* Set the current group and increase the nesting level. */ 778 if (rule_nesting_level >= MAX_RULE_NESTING) { 779 yyerror("rule nesting limit reached"); 780 } 781 current_group[++rule_nesting_level] = rl; 782 } 783 784 void 785 npfctl_build_group_end(void) 786 { 787 nl_rule_t *parent, *group; 788 789 assert(rule_nesting_level > 0); 790 parent = current_group[rule_nesting_level - 1]; 791 group = current_group[rule_nesting_level]; 792 current_group[rule_nesting_level--] = NULL; 793 794 /* 795 * Note: 796 * - If the parent is NULL, then it is a global rule. 797 * - The default rule must be the last, so it is inserted later. 798 */ 799 if (group == defgroup_l3 || group == defgroup_l2) { 800 assert(parent == NULL); 801 return; 802 } 803 npf_rule_insert(npf_conf, parent, group); 804 } 805 806 /* 807 * this function is here to ensure that layer 2 rules are 808 * rightfully embedded in layer2 groups 809 * and vice versa. layer3 group => layer 3 rules 810 * does not allow setting layer 2 rules in layer 3 groups 811 */ 812 static uint32_t 813 npf_rule_layer_compat(nl_rule_t *cg, uint32_t layer) 814 { 815 uint32_t attr = attr = npf_rule_getattr(cg); 816 817 if ((attr & layer) == 0) { 818 /* only set the layer strings when you need them */ 819 const char *str; 820 npf_check_layer(&str, layer, __func__); 821 822 yyerror("cannot insert %s rules in this group" 823 " make sure to insert same layer rules in the same group ", str); 824 } 825 return layer; 826 } 827 828 /* 829 * npfctl_build_rule: create a rule, build byte-code from filter options, 830 * if any, and insert into the ruleset of current group, or set the rule. 831 */ 832 void 833 npfctl_build_rule(uint32_t attr, const char *ifname, sa_family_t family, 834 const npfvar_t *popts, const filt_opts_t *fopts, 835 const char *pcap_filter, const char *rproc) 836 { 837 nl_rule_t *rl, *cg; 838 const filt_opts_t empty_fopts = { 839 .uid = { .id = { 0, 0 }, .op = NPF_OP_NONE }, 840 .gid = { .id = { 0, 0 }, .op = NPF_OP_NONE }, 841 .layer = NPF_RULE_LAYER_3, 842 .fo_finvert = true, 843 .fo_tinvert = true 844 }; 845 846 if (fopts == NULL) 847 fopts = &empty_fopts; 848 849 attr |= (npf_conf ? 0 : NPF_RULE_DYNAMIC); 850 851 /* 852 * quickly check for group-rule layer compat 853 * if the filter layer matches group layer, 854 * set the layer bit in rule attribute for kernel 855 */ 856 if (npf_conf) { 857 cg = current_group[rule_nesting_level]; 858 attr |= npf_rule_layer_compat(cg, fopts->layer); 859 } else { 860 /* set the layer bit directly for dynamic rules */ 861 attr |= fopts->layer; 862 } 863 864 if (attr & NPF_RULE_LAYER_2 && attr & (NPF_RULE_RETRST | NPF_RULE_RETICMP)) 865 yyerror("return blocks not yet supported in layer 2"); 866 867 rl = npf_rule_create(NULL, attr, ifname); 868 if (pcap_filter) { 869 npfctl_build_pcap(rl, pcap_filter); 870 } else { 871 npfctl_build_code(rl, family, popts, fopts); 872 } 873 874 if (fopts->uid.op != NPF_OP_NONE) { 875 npf_rule_setrid(rl, fopts->uid, "r_user"); 876 } 877 878 if (fopts->gid.op != NPF_OP_NONE) { 879 npf_rule_setrid(rl, fopts->gid, "r_group"); 880 } 881 882 if (rproc) { 883 npf_rule_setproc(rl, rproc); 884 } 885 886 if (npf_conf) { 887 cg = current_group[rule_nesting_level]; 888 889 if (rproc && !npf_rproc_exists_p(npf_conf, rproc)) { 890 yyerror("rule procedure '%s' is not defined", rproc); 891 } 892 assert(cg != NULL); 893 npf_rule_setprio(rl, NPF_PRI_LAST); 894 npf_rule_insert(npf_conf, cg, rl); 895 } else { 896 /* We have parsed a single rule - set it. */ 897 the_rule = rl; 898 } 899 } 900 901 /* 902 * npfctl_build_nat: create a single NAT policy of a specified 903 * type with a given filter options. 904 */ 905 static nl_nat_t * 906 npfctl_build_nat(int type, const char *ifname, const addr_port_t *ap, 907 const npfvar_t *popts, const filt_opts_t *fopts, unsigned flags) 908 { 909 fam_addr_mask_t *am; 910 sa_family_t family; 911 in_port_t port; 912 nl_nat_t *nat; 913 unsigned tid; 914 915 if (ap->ap_portrange) { 916 /* 917 * The port forwarding case. In such case, there has to 918 * be a single port used for translation; we keep the port 919 * translation on, but disable the port map. 920 */ 921 port = npfctl_get_singleport(ap->ap_portrange); 922 flags = (flags & ~NPF_NAT_PORTMAP) | NPF_NAT_PORTS; 923 } else { 924 port = 0; 925 } 926 927 nat = npf_nat_create(type, flags, ifname); 928 929 switch (npfvar_get_type(ap->ap_netaddr, 0)) { 930 case NPFVAR_FAM: 931 /* Translation address. */ 932 am = npfctl_get_singlefam(ap->ap_netaddr); 933 family = am->fam_family; 934 npf_nat_setaddr(nat, family, &am->fam_addr, am->fam_mask); 935 break; 936 case NPFVAR_TABLE: 937 /* Translation table. */ 938 family = AF_UNSPEC; 939 tid = npfctl_get_singletable(ap->ap_netaddr); 940 npf_nat_settable(nat, tid); 941 break; 942 default: 943 yyerror("map must have a valid translation address"); 944 abort(); 945 } 946 npf_nat_setport(nat, port); 947 npfctl_build_code(nat, family, popts, fopts); 948 return nat; 949 } 950 951 static void 952 npfctl_dnat_check(const addr_port_t *ap, const unsigned algo) 953 { 954 const unsigned type = npfvar_get_type(ap->ap_netaddr, 0); 955 fam_addr_mask_t *am; 956 957 switch (algo) { 958 case NPF_ALGO_NETMAP: 959 if (type == NPFVAR_FAM) { 960 break; 961 } 962 yyerror("translation address using NETMAP must be " 963 "a network and not a dynamic pool"); 964 break; 965 case NPF_ALGO_IPHASH: 966 case NPF_ALGO_RR: 967 case NPF_ALGO_NONE: 968 if (type != NPFVAR_FAM) { 969 break; 970 } 971 am = npfctl_get_singlefam(ap->ap_netaddr); 972 if (am->fam_mask == NPF_NO_NETMASK) { 973 break; 974 } 975 yyerror("translation address, given the specified algorithm, " 976 "must be a pool or a single address"); 977 break; 978 default: 979 yyerror("invalid algorithm specified for dynamic NAT"); 980 } 981 } 982 983 /* 984 * npfctl_build_natseg: validate and create NAT policies. 985 */ 986 void 987 npfctl_build_natseg(int sd, int type, unsigned mflags, const char *ifname, 988 const addr_port_t *ap1, const addr_port_t *ap2, const npfvar_t *popts, 989 const filt_opts_t *fopts, unsigned algo) 990 { 991 fam_addr_mask_t *am1 = NULL, *am2 = NULL; 992 nl_nat_t *nt1 = NULL, *nt2 = NULL; 993 filt_opts_t imfopts; 994 uint16_t adj = 0; 995 unsigned flags; 996 bool binat; 997 998 assert(ifname != NULL); 999 1000 /* 1001 * Validate that mapping has the translation address(es) set. 1002 */ 1003 if ((type & NPF_NATIN) != 0 && ap1->ap_netaddr == NULL) { 1004 yyerror("inbound network segment is not specified"); 1005 } 1006 if ((type & NPF_NATOUT) != 0 && ap2->ap_netaddr == NULL) { 1007 yyerror("outbound network segment is not specified"); 1008 } 1009 1010 /* 1011 * Bi-directional NAT is a combination of inbound NAT and outbound 1012 * NAT policies with the translation segments inverted respectively. 1013 */ 1014 binat = (NPF_NATIN | NPF_NATOUT) == type; 1015 1016 switch (sd) { 1017 case NPFCTL_NAT_DYNAMIC: 1018 /* 1019 * Dynamic NAT: stateful translation -- traditional NAPT 1020 * is expected. Unless it is bi-directional NAT, perform 1021 * the port mapping. 1022 */ 1023 flags = !binat ? (NPF_NAT_PORTS | NPF_NAT_PORTMAP) : 0; 1024 if (type & NPF_NATIN) { 1025 npfctl_dnat_check(ap1, algo); 1026 } 1027 if (type & NPF_NATOUT) { 1028 npfctl_dnat_check(ap2, algo); 1029 } 1030 break; 1031 case NPFCTL_NAT_STATIC: 1032 /* 1033 * Static NAT: stateless translation. 1034 */ 1035 flags = NPF_NAT_STATIC; 1036 1037 /* Note: translation address/network cannot be a table. */ 1038 if (type & NPF_NATIN) { 1039 am1 = npfctl_get_singlefam(ap1->ap_netaddr); 1040 } 1041 if (type & NPF_NATOUT) { 1042 am2 = npfctl_get_singlefam(ap2->ap_netaddr); 1043 } 1044 1045 /* Validate the algorithm. */ 1046 switch (algo) { 1047 case NPF_ALGO_NPT66: 1048 if (!binat || am1->fam_mask != am2->fam_mask) { 1049 yyerror("asymmetric NPTv6 is not supported"); 1050 } 1051 adj = npfctl_npt66_calcadj(am1->fam_mask, 1052 &am1->fam_addr, &am2->fam_addr); 1053 break; 1054 case NPF_ALGO_NETMAP: 1055 if (binat && am1->fam_mask != am2->fam_mask) { 1056 yyerror("net-to-net mapping using the " 1057 "NETMAP algorithm must be 1:1"); 1058 } 1059 break; 1060 case NPF_ALGO_NONE: 1061 if ((am1 && am1->fam_mask != NPF_NO_NETMASK) || 1062 (am2 && am2->fam_mask != NPF_NO_NETMASK)) { 1063 yyerror("static net-to-net translation " 1064 "must have an algorithm specified"); 1065 } 1066 break; 1067 default: 1068 yyerror("invalid algorithm specified for static NAT"); 1069 } 1070 break; 1071 default: 1072 abort(); 1073 } 1074 1075 /* 1076 * Apply the flag modifications. 1077 */ 1078 if (mflags & NPF_NAT_PORTS) { 1079 flags &= ~(NPF_NAT_PORTS | NPF_NAT_PORTMAP); 1080 } 1081 1082 /* 1083 * If the filter criteria is not specified explicitly, apply implicit 1084 * filtering according to the given network segments. 1085 * 1086 * Note: filled below, depending on the type. 1087 */ 1088 if (__predict_true(!fopts)) { 1089 fopts = &imfopts; 1090 } 1091 1092 if (type & NPF_NATIN) { 1093 memset(&imfopts, 0, sizeof(imfopts)); 1094 imfopts.layer = NPF_RULE_LAYER_3; 1095 memcpy(&imfopts.filt.opt3.fo_to, ap2, sizeof(imfopts.filt.opt3.fo_to)); 1096 nt1 = npfctl_build_nat(NPF_NATIN, ifname, 1097 ap1, popts, fopts, flags); 1098 } 1099 if (type & NPF_NATOUT) { 1100 memset(&imfopts, 0, sizeof(imfopts)); 1101 imfopts.layer = NPF_RULE_LAYER_3; 1102 memcpy(&imfopts.filt.opt3.fo_from, ap1, sizeof(imfopts.filt.opt3.fo_from)); 1103 nt2 = npfctl_build_nat(NPF_NATOUT, ifname, 1104 ap2, popts, fopts, flags); 1105 } 1106 1107 switch (algo) { 1108 case NPF_ALGO_NONE: 1109 break; 1110 case NPF_ALGO_NPT66: 1111 /* 1112 * NPTv6 is a special case using special adjustment value. 1113 * It is always bidirectional NAT. 1114 */ 1115 assert(nt1 && nt2); 1116 npf_nat_setnpt66(nt1, ~adj); 1117 npf_nat_setnpt66(nt2, adj); 1118 break; 1119 default: 1120 /* 1121 * Set the algorithm. 1122 */ 1123 if (nt1) { 1124 npf_nat_setalgo(nt1, algo); 1125 } 1126 if (nt2) { 1127 npf_nat_setalgo(nt2, algo); 1128 } 1129 } 1130 1131 if (npf_conf) { 1132 if (nt1) { 1133 npf_rule_setprio(nt1, NPF_PRI_LAST); 1134 npf_nat_insert(npf_conf, nt1); 1135 } 1136 if (nt2) { 1137 npf_rule_setprio(nt2, NPF_PRI_LAST); 1138 npf_nat_insert(npf_conf, nt2); 1139 } 1140 } else { 1141 // XXX/TODO: need to refactor a bit to enable this.. 1142 if (nt1 && nt2) { 1143 errx(EXIT_FAILURE, "bidirectional NAT is currently " 1144 "not yet supported in the dynamic rules"); 1145 } 1146 the_rule = nt1 ? nt1 : nt2; 1147 } 1148 } 1149 1150 /* 1151 * npfctl_fill_table: fill NPF table with entries from a specified file. 1152 */ 1153 static void 1154 npfctl_fill_table(nl_table_t *tl, unsigned type, const char *fname, FILE *fp) 1155 { 1156 char *buf = NULL; 1157 int l = 0; 1158 size_t n; 1159 1160 if (fp == NULL && (fp = fopen(fname, "r")) == NULL) { 1161 err(EXIT_FAILURE, "open '%s'", fname); 1162 } 1163 while (l++, getline(&buf, &n, fp) != -1) { 1164 fam_addr_mask_t fam; 1165 int alen; 1166 1167 if (*buf == '\n' || *buf == '#') { 1168 continue; 1169 } 1170 1171 if (!npfctl_parse_cidr(buf, &fam, &alen)) { 1172 errx(EXIT_FAILURE, 1173 "%s:%d: invalid table entry", fname, l); 1174 } 1175 if (type != NPF_TABLE_LPM && fam.fam_mask != NPF_NO_NETMASK) { 1176 errx(EXIT_FAILURE, "%s:%d: mask used with the " 1177 "table type other than \"lpm\"", fname, l); 1178 } 1179 1180 npf_table_add_entry(tl, fam.fam_family, 1181 &fam.fam_addr, fam.fam_mask); 1182 } 1183 free(buf); 1184 } 1185 1186 /* 1187 * npfctl_load_table: create an NPF table and fill with contents from a file. 1188 */ 1189 nl_table_t * 1190 npfctl_load_table(const char *tname, int tid, unsigned type, 1191 const char *fname, FILE *fp) 1192 { 1193 nl_table_t *tl; 1194 1195 tl = npf_table_create(tname, tid, type); 1196 if (tl && fname) { 1197 npfctl_fill_table(tl, type, fname, fp); 1198 } 1199 1200 return tl; 1201 } 1202 1203 /* 1204 * npfctl_build_table: create an NPF table, add to the configuration and, 1205 * if required, fill with contents from a file. 1206 */ 1207 void 1208 npfctl_build_table(const char *tname, unsigned type, const char *fname) 1209 { 1210 nl_table_t *tl; 1211 1212 if (type == NPF_TABLE_CONST && !fname) { 1213 yyerror("table type 'const' must be loaded from a file"); 1214 } 1215 1216 tl = npfctl_load_table(tname, npfctl_tid_counter++, type, fname, NULL); 1217 assert(tl != NULL); 1218 1219 if (npf_table_insert(npf_conf, tl)) { 1220 yyerror("table '%s' is already defined", tname); 1221 } 1222 } 1223 1224 /* 1225 * npfctl_ifnet_table: get a variable with ifaddr-table; auto-create 1226 * the table on first reference. 1227 */ 1228 npfvar_t * 1229 npfctl_ifnet_table(const char *ifname) 1230 { 1231 char tname[NPF_TABLE_MAXNAMELEN]; 1232 nl_table_t *tl; 1233 unsigned tid; 1234 1235 snprintf(tname, sizeof(tname), NPF_IFNET_TABLE_PREF "%s", ifname); 1236 if (!npf_conf) { 1237 errx(EXIT_FAILURE, "expression `ifaddrs(%s)` is currently " 1238 "not yet supported in dynamic rules", ifname); 1239 } 1240 1241 tid = npfctl_table_getid(tname); 1242 if (tid == (unsigned)-1) { 1243 tid = npfctl_tid_counter++; 1244 tl = npf_table_create(tname, tid, NPF_TABLE_IFADDR); 1245 (void)npf_table_insert(npf_conf, tl); 1246 } 1247 return npfvar_create_element(NPFVAR_TABLE, &tid, sizeof(unsigned)); 1248 } 1249 1250 /* 1251 * npfctl_build_alg: create an NPF application level gateway and add it 1252 * to the configuration. 1253 */ 1254 void 1255 npfctl_build_alg(const char *al_name) 1256 { 1257 if (npf_alg_load(npf_conf, al_name) != 0) { 1258 yyerror("ALG '%s' is already loaded", al_name); 1259 } 1260 } 1261 1262 void 1263 npfctl_setparam(const char *name, int val) 1264 { 1265 if (strcmp(name, "bpf.jit") == 0) { 1266 npfctl_bpfjit(val != 0); 1267 return; 1268 } 1269 if (npf_param_set(npf_conf, name, val) != 0) { 1270 yyerror("invalid parameter `%s` or its value", name); 1271 } 1272 } 1273 1274 static void 1275 npfctl_dump_bpf(struct bpf_program *bf) 1276 { 1277 if (npf_debug) { 1278 extern char *yytext; 1279 extern int yylineno; 1280 1281 int rule_line = yylineno - (int)(*yytext == '\n'); 1282 printf("\nRULE AT LINE %d\n", rule_line); 1283 bpf_dump(bf, 0); 1284 } 1285 } 1286