1 /*- 2 * Copyright (c) 2020 Mindaugas Rasiukevicius <rmind at noxt eu> 3 * Copyright (c) 2009-2025 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This material is based upon work partially supported by The 7 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * NPF packet handler. 33 * 34 * This is the main entry point to the NPF where packet processing happens. 35 * There are some important synchronization rules: 36 * 37 * 1) Lookups into the connection database and configuration (ruleset, 38 * tables, etc) are protected by Epoch-Based Reclamation (EBR); 39 * 40 * 2) The code in the critical path (protected by EBR) should generally 41 * not block (that includes adaptive mutex acquisitions); 42 * 43 * 3) Where it will blocks, references should be acquired atomically, 44 * while in the critical path, on the relevant objects. 45 */ 46 47 #ifdef _KERNEL 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.54 2025/07/08 15:56:23 joe Exp $"); 50 51 #include <sys/types.h> 52 #include <sys/param.h> 53 54 #include <sys/mbuf.h> 55 #include <sys/mutex.h> 56 #include <net/if.h> 57 #include <net/pfil.h> 58 #include <sys/socketvar.h> 59 60 #include <netinet/in_systm.h> 61 #include <netinet/in.h> 62 #include <netinet/ip_var.h> 63 #include <netinet/ip6.h> 64 #include <netinet6/ip6_var.h> 65 #endif 66 67 #include "npf_impl.h" 68 #include "npf_conn.h" 69 70 #if defined(_NPF_STANDALONE) 71 #define m_freem(m) npf->mbufops->free(m) 72 #define m_clear_flag(m,f) 73 #else 74 #define m_clear_flag(m,f) (m)->m_flags &= ~(f) 75 #endif 76 77 #ifndef INET6 78 #define ip6_reass_packet(x, y) ENOTSUP 79 #endif 80 81 static int 82 npf_reassembly(npf_t *npf, npf_cache_t *npc, bool *mff) 83 { 84 nbuf_t *nbuf = npc->npc_nbuf; 85 int error = EINVAL; 86 struct mbuf *m; 87 88 *mff = false; 89 m = nbuf_head_mbuf(nbuf); 90 91 if (npf_iscached(npc, NPC_IP4) && npf->ip4_reassembly) { 92 error = ip_reass_packet(&m); 93 } else if (npf_iscached(npc, NPC_IP6) && npf->ip6_reassembly) { 94 error = ip6_reass_packet(&m, npc->npc_hlen); 95 } else { 96 /* 97 * Reassembly is disabled: just pass the packet through 98 * the ruleset for inspection. 99 */ 100 return 0; 101 } 102 103 if (error) { 104 /* Reassembly failed; free the mbuf, clear the nbuf. */ 105 npf_stats_inc(npf, NPF_STAT_REASSFAIL); 106 m_freem(m); 107 memset(nbuf, 0, sizeof(nbuf_t)); 108 return error; 109 } 110 if (m == NULL) { 111 /* More fragments should come. */ 112 npf_stats_inc(npf, NPF_STAT_FRAGMENTS); 113 *mff = true; 114 return 0; 115 } 116 117 /* 118 * Reassembly is complete, we have the final packet. 119 * Cache again, since layer 4 data is accessible now. 120 */ 121 nbuf_init(npf, nbuf, m, nbuf->nb_ifp); 122 npc->npc_info = 0; 123 124 if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) { 125 return EINVAL; 126 } 127 npf_stats_inc(npf, NPF_STAT_REASSEMBLY); 128 return 0; 129 } 130 131 static inline bool 132 npf_packet_bypass_tag_p(nbuf_t *nbuf) 133 { 134 uint32_t ntag; 135 return nbuf_find_tag(nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0; 136 } 137 138 /* 139 * npfk_packet_handler: main packet handling routine for layer 3. 140 * 141 * Note: packet flow and inspection logic is in strict order. 142 */ 143 __dso_public int 144 npfk_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di) 145 { 146 nbuf_t nbuf; 147 npf_cache_t npc; 148 npf_conn_t *con; 149 npf_rule_t *rl; 150 npf_rproc_t *rp; 151 int error, decision, flags, id_match; 152 npf_match_info_t mi; 153 bool mff; 154 155 KASSERT(ifp != NULL); 156 157 /* 158 * Initialize packet information cache. 159 * Note: it is enough to clear the info bits. 160 */ 161 nbuf_init(npf, &nbuf, *mp, ifp); 162 memset(&npc, 0, sizeof(npf_cache_t)); 163 npc.npc_ctx = npf; 164 npc.npc_nbuf = &nbuf; 165 166 mi.mi_di = di; 167 mi.mi_rid = 0; 168 mi.mi_retfl = 0; 169 170 *mp = NULL; 171 decision = NPF_DECISION_BLOCK; 172 error = 0; 173 rp = NULL; 174 con = NULL; 175 176 /* Cache everything. */ 177 flags = npf_cache_all(&npc); 178 179 /* Malformed packet, leave quickly. */ 180 if (flags & NPC_FMTERR) { 181 error = EINVAL; 182 goto out; 183 } 184 185 /* Determine whether it is an IP fragment. */ 186 if (__predict_false(flags & NPC_IPFRAG)) { 187 /* Pass to IPv4/IPv6 reassembly mechanism. */ 188 error = npf_reassembly(npf, &npc, &mff); 189 if (error) { 190 goto out; 191 } 192 if (mff) { 193 /* More fragments should come. */ 194 return 0; 195 } 196 } 197 198 /* Just pass-through if specially tagged. */ 199 if (npf_packet_bypass_tag_p(&nbuf)) { 200 goto pass; 201 } 202 203 /* Inspect the list of connections (if found, acquires a reference). */ 204 con = npf_conn_inspect(&npc, di, &error); 205 206 /* If "passing" connection found - skip the ruleset inspection. */ 207 if (con && npf_conn_pass(con, &mi, &rp)) { 208 npf_stats_inc(npf, NPF_STAT_PASS_CONN); 209 KASSERT(error == 0); 210 goto pass; 211 } 212 if (__predict_false(error)) { 213 if (error == ENETUNREACH) 214 goto block; 215 goto out; 216 } 217 218 /* Acquire the lock, inspect the ruleset using this packet. */ 219 int slock = npf_config_read_enter(npf); 220 npf_ruleset_t *rlset = npf_config_ruleset(npf); 221 222 rl = npf_ruleset_inspect(&npc, rlset, di, NPF_RULE_LAYER_3); 223 if (__predict_false(rl == NULL)) { 224 const bool pass = npf_default_pass(npf); 225 npf_config_read_exit(npf, slock); 226 227 if (pass) { 228 npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT); 229 goto pass; 230 } 231 npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT); 232 goto block; 233 } 234 235 /* 236 * Get the rule procedure (acquires a reference) for association 237 * with a connection (if any) and execution. 238 */ 239 KASSERT(rp == NULL); 240 rp = npf_rule_getrproc(rl); 241 242 /* check for matching process uid/gid before concluding */ 243 id_match = npf_rule_match_rid(rl, &npc, di); 244 245 /* Conclude with the rule and release the lock. */ 246 error = npf_rule_conclude(rl, &mi); 247 npf_config_read_exit(npf, slock); 248 249 /* reverse between pass and block conditions */ 250 if (id_match != -1 && !id_match) { 251 error = npf_rule_reverse(&npc, &mi, error); 252 } 253 254 /* reject packets whose addr-port pair matches no sockets */ 255 if (id_match == ENOTCONN || error) { 256 npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET); 257 goto block; 258 } 259 npf_stats_inc(npf, NPF_STAT_PASS_RULESET); 260 261 /* 262 * Establish a "pass" connection, if required. Just proceed if 263 * connection creation fails (e.g. due to unsupported protocol). 264 */ 265 if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) { 266 con = npf_conn_establish(&npc, di, 267 (mi.mi_retfl & NPF_RULE_GSTATEFUL) == 0); 268 if (con) { 269 /* 270 * Note: the reference on the rule procedure is 271 * transferred to the connection. It will be 272 * released on connection destruction. 273 */ 274 npf_conn_setpass(con, &mi, rp); 275 } 276 } 277 278 pass: 279 decision = NPF_DECISION_PASS; 280 KASSERT(error == 0); 281 282 /* 283 * Perform NAT. 284 */ 285 error = npf_do_nat(&npc, con, di); 286 287 block: 288 /* 289 * Execute the rule procedure, if any is associated. 290 * It may reverse the decision from pass to block. 291 */ 292 if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) { 293 if (con) { 294 npf_conn_release(con); 295 } 296 npf_rproc_release(rp); 297 /* mbuf already freed */ 298 return 0; 299 } 300 301 out: 302 /* 303 * Release the reference on a connection. Release the reference 304 * on a rule procedure only if there was no association. 305 */ 306 if (con) { 307 npf_conn_release(con); 308 } else if (rp) { 309 npf_rproc_release(rp); 310 } 311 312 /* Get the new mbuf pointer. */ 313 if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) { 314 return error ? error : ENOMEM; 315 } 316 317 /* Pass the packet if decided and there is no error. */ 318 if (decision == NPF_DECISION_PASS && !error) { 319 /* 320 * XXX: Disable for now, it will be set accordingly later, 321 * for optimisations (to reduce inspection). 322 */ 323 m_clear_flag(*mp, M_CANFASTFWD); 324 return 0; 325 } 326 327 /* 328 * Block the packet. ENETUNREACH is used to indicate blocking. 329 * Depending on the flags and protocol, return TCP reset (RST) or 330 * ICMP destination unreachable. 331 */ 332 if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) { 333 *mp = NULL; 334 } 335 336 if (!error) { 337 error = ENETUNREACH; 338 } 339 340 /* Free the mbuf chain. */ 341 m_freem(*mp); 342 *mp = NULL; 343 return error; 344 } 345 346 __dso_public int 347 npfk_layer2_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di) 348 { 349 nbuf_t nbuf; 350 npf_cache_t npc; 351 npf_rule_t *rl; 352 int error, decision, flags; 353 npf_match_info_t mi; 354 355 KASSERT(ifp != NULL); 356 357 /* 358 * as usual, get packet info 359 * including the interface the frame is traveling on 360 */ 361 nbuf_init(npf, &nbuf, *mp, ifp); 362 memset(&npc, 0, sizeof(npc)); 363 npc.npc_ctx = npf; 364 npc.npc_nbuf = &nbuf; 365 366 mi.mi_di = di; 367 mi.mi_rid = 0; 368 mi.mi_retfl = 0; 369 370 *mp = NULL; 371 decision = NPF_DECISION_BLOCK; 372 error = 0; 373 374 /* Cache only ether header. */ 375 flags = npf_cache_ether(&npc); 376 377 /* Malformed packet, leave quickly. */ 378 if (flags & NPC_FMTERR) { 379 error = EINVAL; 380 goto out; 381 } 382 383 /* Just pass-through if specially tagged. */ 384 if (npf_packet_bypass_tag_p(&nbuf)) { 385 goto pass; 386 } 387 388 /* Acquire the lock, inspect the ruleset using this packet. */ 389 int slock = npf_config_read_enter(npf); 390 npf_ruleset_t *rlset = npf_config_ruleset(npf); 391 392 rl = npf_ruleset_inspect(&npc, rlset, di, NPF_RULE_LAYER_2); 393 if (__predict_false(rl == NULL)) { 394 npf_config_read_exit(npf, slock); 395 396 npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT); 397 goto pass; 398 } 399 400 /* Conclude with the rule and release the lock. */ 401 error = npf_rule_conclude(rl, &mi); 402 npf_config_read_exit(npf, slock); 403 404 if (error) { 405 npf_stats_inc(npf, NPF_ETHER_STAT_BLOCK); 406 goto out; 407 } 408 npf_stats_inc(npf, NPF_ETHER_STAT_PASS); 409 410 pass: 411 decision = NPF_DECISION_PASS; 412 KASSERT(error == 0); 413 414 out: 415 416 /* Get the new mbuf pointer. */ 417 if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) { 418 return error ? error : ENOMEM; 419 } 420 421 /* Pass the packet if decided and there is no error. */ 422 if (decision == NPF_DECISION_PASS && !error) { 423 return 0; 424 } 425 426 if (!error) { 427 error = ENETUNREACH; 428 } 429 430 if (*mp) { 431 /* Free the mbuf chain. */ 432 m_freem(*mp); 433 *mp = NULL; 434 } 435 return error; 436 } 437