1 1.1 rmind /*- 2 1.49 rmind * Copyright (c) 2020 Mindaugas Rasiukevicius <rmind at noxt eu> 3 1.52 joe * Copyright (c) 2009-2025 The NetBSD Foundation, Inc. 4 1.1 rmind * All rights reserved. 5 1.1 rmind * 6 1.1 rmind * This material is based upon work partially supported by The 7 1.1 rmind * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 1.1 rmind * 9 1.1 rmind * Redistribution and use in source and binary forms, with or without 10 1.1 rmind * modification, are permitted provided that the following conditions 11 1.1 rmind * are met: 12 1.1 rmind * 1. Redistributions of source code must retain the above copyright 13 1.1 rmind * notice, this list of conditions and the following disclaimer. 14 1.1 rmind * 2. Redistributions in binary form must reproduce the above copyright 15 1.1 rmind * notice, this list of conditions and the following disclaimer in the 16 1.1 rmind * documentation and/or other materials provided with the distribution. 17 1.1 rmind * 18 1.1 rmind * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 1.1 rmind * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 1.1 rmind * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 1.1 rmind * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 1.1 rmind * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 1.1 rmind * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 1.1 rmind * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 1.1 rmind * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 1.1 rmind * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 1.1 rmind * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 1.1 rmind * POSSIBILITY OF SUCH DAMAGE. 29 1.1 rmind */ 30 1.1 rmind 31 1.1 rmind /* 32 1.1 rmind * NPF packet handler. 33 1.28 rmind * 34 1.49 rmind * This is the main entry point to the NPF where packet processing happens. 35 1.49 rmind * There are some important synchronization rules: 36 1.49 rmind * 37 1.49 rmind * 1) Lookups into the connection database and configuration (ruleset, 38 1.49 rmind * tables, etc) are protected by Epoch-Based Reclamation (EBR); 39 1.49 rmind * 40 1.49 rmind * 2) The code in the critical path (protected by EBR) should generally 41 1.49 rmind * not block (that includes adaptive mutex acquisitions); 42 1.49 rmind * 43 1.49 rmind * 3) Where it will blocks, references should be acquired atomically, 44 1.49 rmind * while in the critical path, on the relevant objects. 45 1.1 rmind */ 46 1.1 rmind 47 1.35 christos #ifdef _KERNEL 48 1.1 rmind #include <sys/cdefs.h> 49 1.54 joe __KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.54 2025/07/08 15:56:23 joe Exp $"); 50 1.1 rmind 51 1.14 rmind #include <sys/types.h> 52 1.1 rmind #include <sys/param.h> 53 1.1 rmind 54 1.1 rmind #include <sys/mbuf.h> 55 1.1 rmind #include <sys/mutex.h> 56 1.1 rmind #include <net/if.h> 57 1.1 rmind #include <net/pfil.h> 58 1.1 rmind #include <sys/socketvar.h> 59 1.1 rmind 60 1.4 rmind #include <netinet/in_systm.h> 61 1.4 rmind #include <netinet/in.h> 62 1.4 rmind #include <netinet/ip_var.h> 63 1.8 zoltan #include <netinet/ip6.h> 64 1.8 zoltan #include <netinet6/ip6_var.h> 65 1.35 christos #endif 66 1.4 rmind 67 1.1 rmind #include "npf_impl.h" 68 1.31 rmind #include "npf_conn.h" 69 1.1 rmind 70 1.35 christos #if defined(_NPF_STANDALONE) 71 1.35 christos #define m_freem(m) npf->mbufops->free(m) 72 1.35 christos #define m_clear_flag(m,f) 73 1.35 christos #else 74 1.35 christos #define m_clear_flag(m,f) (m)->m_flags &= ~(f) 75 1.35 christos #endif 76 1.1 rmind 77 1.26 rmind #ifndef INET6 78 1.26 rmind #define ip6_reass_packet(x, y) ENOTSUP 79 1.26 rmind #endif 80 1.26 rmind 81 1.24 rmind static int 82 1.42 maxv npf_reassembly(npf_t *npf, npf_cache_t *npc, bool *mff) 83 1.24 rmind { 84 1.32 rmind nbuf_t *nbuf = npc->npc_nbuf; 85 1.24 rmind int error = EINVAL; 86 1.42 maxv struct mbuf *m; 87 1.42 maxv 88 1.42 maxv *mff = false; 89 1.42 maxv m = nbuf_head_mbuf(nbuf); 90 1.24 rmind 91 1.49 rmind if (npf_iscached(npc, NPC_IP4) && npf->ip4_reassembly) { 92 1.43 maxv error = ip_reass_packet(&m); 93 1.49 rmind } else if (npf_iscached(npc, NPC_IP6) && npf->ip6_reassembly) { 94 1.42 maxv error = ip6_reass_packet(&m, npc->npc_hlen); 95 1.49 rmind } else { 96 1.49 rmind /* 97 1.49 rmind * Reassembly is disabled: just pass the packet through 98 1.49 rmind * the ruleset for inspection. 99 1.49 rmind */ 100 1.49 rmind return 0; 101 1.24 rmind } 102 1.44 maxv 103 1.24 rmind if (error) { 104 1.49 rmind /* Reassembly failed; free the mbuf, clear the nbuf. */ 105 1.35 christos npf_stats_inc(npf, NPF_STAT_REASSFAIL); 106 1.44 maxv m_freem(m); 107 1.44 maxv memset(nbuf, 0, sizeof(nbuf_t)); 108 1.24 rmind return error; 109 1.24 rmind } 110 1.42 maxv if (m == NULL) { 111 1.24 rmind /* More fragments should come. */ 112 1.35 christos npf_stats_inc(npf, NPF_STAT_FRAGMENTS); 113 1.42 maxv *mff = true; 114 1.24 rmind return 0; 115 1.24 rmind } 116 1.24 rmind 117 1.24 rmind /* 118 1.24 rmind * Reassembly is complete, we have the final packet. 119 1.24 rmind * Cache again, since layer 4 data is accessible now. 120 1.24 rmind */ 121 1.42 maxv nbuf_init(npf, nbuf, m, nbuf->nb_ifp); 122 1.24 rmind npc->npc_info = 0; 123 1.24 rmind 124 1.38 maxv if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) { 125 1.24 rmind return EINVAL; 126 1.24 rmind } 127 1.35 christos npf_stats_inc(npf, NPF_STAT_REASSEMBLY); 128 1.24 rmind return 0; 129 1.24 rmind } 130 1.24 rmind 131 1.49 rmind static inline bool 132 1.49 rmind npf_packet_bypass_tag_p(nbuf_t *nbuf) 133 1.49 rmind { 134 1.49 rmind uint32_t ntag; 135 1.49 rmind return nbuf_find_tag(nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0; 136 1.49 rmind } 137 1.49 rmind 138 1.1 rmind /* 139 1.47 rmind * npfk_packet_handler: main packet handling routine for layer 3. 140 1.1 rmind * 141 1.1 rmind * Note: packet flow and inspection logic is in strict order. 142 1.1 rmind */ 143 1.35 christos __dso_public int 144 1.47 rmind npfk_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di) 145 1.1 rmind { 146 1.24 rmind nbuf_t nbuf; 147 1.1 rmind npf_cache_t npc; 148 1.31 rmind npf_conn_t *con; 149 1.1 rmind npf_rule_t *rl; 150 1.5 rmind npf_rproc_t *rp; 151 1.51 joe int error, decision, flags, id_match; 152 1.36 christos npf_match_info_t mi; 153 1.42 maxv bool mff; 154 1.1 rmind 155 1.35 christos KASSERT(ifp != NULL); 156 1.35 christos 157 1.1 rmind /* 158 1.49 rmind * Initialize packet information cache. 159 1.1 rmind * Note: it is enough to clear the info bits. 160 1.1 rmind */ 161 1.49 rmind nbuf_init(npf, &nbuf, *mp, ifp); 162 1.49 rmind memset(&npc, 0, sizeof(npf_cache_t)); 163 1.35 christos npc.npc_ctx = npf; 164 1.32 rmind npc.npc_nbuf = &nbuf; 165 1.32 rmind 166 1.36 christos mi.mi_di = di; 167 1.36 christos mi.mi_rid = 0; 168 1.36 christos mi.mi_retfl = 0; 169 1.36 christos 170 1.42 maxv *mp = NULL; 171 1.14 rmind decision = NPF_DECISION_BLOCK; 172 1.2 rmind error = 0; 173 1.5 rmind rp = NULL; 174 1.40 maxv con = NULL; 175 1.1 rmind 176 1.38 maxv /* Cache everything. */ 177 1.37 christos flags = npf_cache_all(&npc); 178 1.38 maxv 179 1.49 rmind /* Malformed packet, leave quickly. */ 180 1.38 maxv if (flags & NPC_FMTERR) { 181 1.38 maxv error = EINVAL; 182 1.42 maxv goto out; 183 1.38 maxv } 184 1.38 maxv 185 1.38 maxv /* Determine whether it is an IP fragment. */ 186 1.37 christos if (__predict_false(flags & NPC_IPFRAG)) { 187 1.42 maxv /* Pass to IPv4/IPv6 reassembly mechanism. */ 188 1.42 maxv error = npf_reassembly(npf, &npc, &mff); 189 1.18 rmind if (error) { 190 1.4 rmind goto out; 191 1.4 rmind } 192 1.42 maxv if (mff) { 193 1.42 maxv /* More fragments should come. */ 194 1.4 rmind return 0; 195 1.4 rmind } 196 1.4 rmind } 197 1.4 rmind 198 1.34 rmind /* Just pass-through if specially tagged. */ 199 1.49 rmind if (npf_packet_bypass_tag_p(&nbuf)) { 200 1.34 rmind goto pass; 201 1.34 rmind } 202 1.34 rmind 203 1.31 rmind /* Inspect the list of connections (if found, acquires a reference). */ 204 1.32 rmind con = npf_conn_inspect(&npc, di, &error); 205 1.2 rmind 206 1.31 rmind /* If "passing" connection found - skip the ruleset inspection. */ 207 1.36 christos if (con && npf_conn_pass(con, &mi, &rp)) { 208 1.35 christos npf_stats_inc(npf, NPF_STAT_PASS_CONN); 209 1.14 rmind KASSERT(error == 0); 210 1.2 rmind goto pass; 211 1.14 rmind } 212 1.32 rmind if (__predict_false(error)) { 213 1.24 rmind if (error == ENETUNREACH) 214 1.24 rmind goto block; 215 1.24 rmind goto out; 216 1.2 rmind } 217 1.1 rmind 218 1.7 rmind /* Acquire the lock, inspect the ruleset using this packet. */ 219 1.48 rmind int slock = npf_config_read_enter(npf); 220 1.35 christos npf_ruleset_t *rlset = npf_config_ruleset(npf); 221 1.26 rmind 222 1.53 joe rl = npf_ruleset_inspect(&npc, rlset, di, NPF_RULE_LAYER_3); 223 1.32 rmind if (__predict_false(rl == NULL)) { 224 1.35 christos const bool pass = npf_default_pass(npf); 225 1.48 rmind npf_config_read_exit(npf, slock); 226 1.14 rmind 227 1.26 rmind if (pass) { 228 1.35 christos npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT); 229 1.2 rmind goto pass; 230 1.2 rmind } 231 1.35 christos npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT); 232 1.6 rmind goto block; 233 1.1 rmind } 234 1.1 rmind 235 1.13 rmind /* 236 1.24 rmind * Get the rule procedure (acquires a reference) for association 237 1.31 rmind * with a connection (if any) and execution. 238 1.13 rmind */ 239 1.6 rmind KASSERT(rp == NULL); 240 1.13 rmind rp = npf_rule_getrproc(rl); 241 1.6 rmind 242 1.51 joe /* check for matching process uid/gid before concluding */ 243 1.51 joe id_match = npf_rule_match_rid(rl, &npc, di); 244 1.51 joe 245 1.26 rmind /* Conclude with the rule and release the lock. */ 246 1.36 christos error = npf_rule_conclude(rl, &mi); 247 1.48 rmind npf_config_read_exit(npf, slock); 248 1.26 rmind 249 1.51 joe /* reverse between pass and block conditions */ 250 1.51 joe if (id_match != -1 && !id_match) { 251 1.51 joe error = npf_rule_reverse(&npc, &mi, error); 252 1.51 joe } 253 1.51 joe 254 1.51 joe /* reject packets whose addr-port pair matches no sockets */ 255 1.51 joe if (id_match == ENOTCONN || error) { 256 1.35 christos npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET); 257 1.6 rmind goto block; 258 1.1 rmind } 259 1.35 christos npf_stats_inc(npf, NPF_STAT_PASS_RULESET); 260 1.1 rmind 261 1.14 rmind /* 262 1.31 rmind * Establish a "pass" connection, if required. Just proceed if 263 1.31 rmind * connection creation fails (e.g. due to unsupported protocol). 264 1.14 rmind */ 265 1.36 christos if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) { 266 1.32 rmind con = npf_conn_establish(&npc, di, 267 1.46 rmind (mi.mi_retfl & NPF_RULE_GSTATEFUL) == 0); 268 1.31 rmind if (con) { 269 1.26 rmind /* 270 1.26 rmind * Note: the reference on the rule procedure is 271 1.49 rmind * transferred to the connection. It will be 272 1.31 rmind * released on connection destruction. 273 1.26 rmind */ 274 1.36 christos npf_conn_setpass(con, &mi, rp); 275 1.2 rmind } 276 1.1 rmind } 277 1.42 maxv 278 1.2 rmind pass: 279 1.14 rmind decision = NPF_DECISION_PASS; 280 1.2 rmind KASSERT(error == 0); 281 1.48 rmind 282 1.5 rmind /* 283 1.6 rmind * Perform NAT. 284 1.6 rmind */ 285 1.32 rmind error = npf_do_nat(&npc, con, di); 286 1.42 maxv 287 1.6 rmind block: 288 1.6 rmind /* 289 1.22 rmind * Execute the rule procedure, if any is associated. 290 1.22 rmind * It may reverse the decision from pass to block. 291 1.5 rmind */ 292 1.36 christos if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) { 293 1.31 rmind if (con) { 294 1.31 rmind npf_conn_release(con); 295 1.30 jakllsch } 296 1.30 jakllsch npf_rproc_release(rp); 297 1.42 maxv /* mbuf already freed */ 298 1.30 jakllsch return 0; 299 1.5 rmind } 300 1.42 maxv 301 1.1 rmind out: 302 1.13 rmind /* 303 1.31 rmind * Release the reference on a connection. Release the reference 304 1.31 rmind * on a rule procedure only if there was no association. 305 1.13 rmind */ 306 1.31 rmind if (con) { 307 1.31 rmind npf_conn_release(con); 308 1.6 rmind } else if (rp) { 309 1.13 rmind npf_rproc_release(rp); 310 1.1 rmind } 311 1.1 rmind 312 1.42 maxv /* Get the new mbuf pointer. */ 313 1.24 rmind if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) { 314 1.25 rmind return error ? error : ENOMEM; 315 1.24 rmind } 316 1.24 rmind 317 1.14 rmind /* Pass the packet if decided and there is no error. */ 318 1.14 rmind if (decision == NPF_DECISION_PASS && !error) { 319 1.3 rmind /* 320 1.3 rmind * XXX: Disable for now, it will be set accordingly later, 321 1.3 rmind * for optimisations (to reduce inspection). 322 1.3 rmind */ 323 1.35 christos m_clear_flag(*mp, M_CANFASTFWD); 324 1.13 rmind return 0; 325 1.1 rmind } 326 1.13 rmind 327 1.13 rmind /* 328 1.13 rmind * Block the packet. ENETUNREACH is used to indicate blocking. 329 1.13 rmind * Depending on the flags and protocol, return TCP reset (RST) or 330 1.13 rmind * ICMP destination unreachable. 331 1.13 rmind */ 332 1.36 christos if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) { 333 1.16 rmind *mp = NULL; 334 1.13 rmind } 335 1.16 rmind 336 1.20 rmind if (!error) { 337 1.14 rmind error = ENETUNREACH; 338 1.13 rmind } 339 1.13 rmind 340 1.50 rin /* Free the mbuf chain. */ 341 1.50 rin m_freem(*mp); 342 1.50 rin *mp = NULL; 343 1.1 rmind return error; 344 1.1 rmind } 345 1.53 joe 346 1.53 joe __dso_public int 347 1.53 joe npfk_layer2_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di) 348 1.53 joe { 349 1.53 joe nbuf_t nbuf; 350 1.53 joe npf_cache_t npc; 351 1.53 joe npf_rule_t *rl; 352 1.53 joe int error, decision, flags; 353 1.53 joe npf_match_info_t mi; 354 1.53 joe 355 1.53 joe KASSERT(ifp != NULL); 356 1.53 joe 357 1.53 joe /* 358 1.53 joe * as usual, get packet info 359 1.53 joe * including the interface the frame is traveling on 360 1.53 joe */ 361 1.53 joe nbuf_init(npf, &nbuf, *mp, ifp); 362 1.53 joe memset(&npc, 0, sizeof(npc)); 363 1.53 joe npc.npc_ctx = npf; 364 1.53 joe npc.npc_nbuf = &nbuf; 365 1.53 joe 366 1.53 joe mi.mi_di = di; 367 1.53 joe mi.mi_rid = 0; 368 1.53 joe mi.mi_retfl = 0; 369 1.53 joe 370 1.53 joe *mp = NULL; 371 1.53 joe decision = NPF_DECISION_BLOCK; 372 1.53 joe error = 0; 373 1.53 joe 374 1.53 joe /* Cache only ether header. */ 375 1.53 joe flags = npf_cache_ether(&npc); 376 1.53 joe 377 1.53 joe /* Malformed packet, leave quickly. */ 378 1.53 joe if (flags & NPC_FMTERR) { 379 1.53 joe error = EINVAL; 380 1.53 joe goto out; 381 1.53 joe } 382 1.53 joe 383 1.53 joe /* Just pass-through if specially tagged. */ 384 1.53 joe if (npf_packet_bypass_tag_p(&nbuf)) { 385 1.53 joe goto pass; 386 1.53 joe } 387 1.53 joe 388 1.53 joe /* Acquire the lock, inspect the ruleset using this packet. */ 389 1.53 joe int slock = npf_config_read_enter(npf); 390 1.53 joe npf_ruleset_t *rlset = npf_config_ruleset(npf); 391 1.53 joe 392 1.53 joe rl = npf_ruleset_inspect(&npc, rlset, di, NPF_RULE_LAYER_2); 393 1.53 joe if (__predict_false(rl == NULL)) { 394 1.53 joe npf_config_read_exit(npf, slock); 395 1.53 joe 396 1.54 joe npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT); 397 1.54 joe goto pass; 398 1.53 joe } 399 1.53 joe 400 1.53 joe /* Conclude with the rule and release the lock. */ 401 1.53 joe error = npf_rule_conclude(rl, &mi); 402 1.53 joe npf_config_read_exit(npf, slock); 403 1.53 joe 404 1.53 joe if (error) { 405 1.53 joe npf_stats_inc(npf, NPF_ETHER_STAT_BLOCK); 406 1.53 joe goto out; 407 1.53 joe } 408 1.53 joe npf_stats_inc(npf, NPF_ETHER_STAT_PASS); 409 1.53 joe 410 1.53 joe pass: 411 1.53 joe decision = NPF_DECISION_PASS; 412 1.53 joe KASSERT(error == 0); 413 1.53 joe 414 1.53 joe out: 415 1.53 joe 416 1.53 joe /* Get the new mbuf pointer. */ 417 1.53 joe if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) { 418 1.53 joe return error ? error : ENOMEM; 419 1.53 joe } 420 1.53 joe 421 1.53 joe /* Pass the packet if decided and there is no error. */ 422 1.53 joe if (decision == NPF_DECISION_PASS && !error) { 423 1.53 joe return 0; 424 1.53 joe } 425 1.53 joe 426 1.53 joe if (!error) { 427 1.53 joe error = ENETUNREACH; 428 1.53 joe } 429 1.53 joe 430 1.53 joe if (*mp) { 431 1.53 joe /* Free the mbuf chain. */ 432 1.53 joe m_freem(*mp); 433 1.53 joe *mp = NULL; 434 1.53 joe } 435 1.53 joe return error; 436 1.53 joe } 437