Home | History | Annotate | Line # | Download | only in npf
      1 /*-
      2  * Copyright (c) 2020 Mindaugas Rasiukevicius <rmind at noxt eu>
      3  * Copyright (c) 2009-2025 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This material is based upon work partially supported by The
      7  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * NPF packet handler.
     33  *
     34  * This is the main entry point to the NPF where packet processing happens.
     35  * There are some important synchronization rules:
     36  *
     37  *	1) Lookups into the connection database and configuration (ruleset,
     38  *	tables, etc) are protected by Epoch-Based Reclamation (EBR);
     39  *
     40  *	2) The code in the critical path (protected by EBR) should generally
     41  *	not block (that includes adaptive mutex acquisitions);
     42  *
     43  *	3) Where it will blocks, references should be acquired atomically,
     44  *	while in the critical path, on the relevant objects.
     45  */
     46 
     47 #ifdef _KERNEL
     48 #include <sys/cdefs.h>
     49 __KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.54 2025/07/08 15:56:23 joe Exp $");
     50 
     51 #include <sys/types.h>
     52 #include <sys/param.h>
     53 
     54 #include <sys/mbuf.h>
     55 #include <sys/mutex.h>
     56 #include <net/if.h>
     57 #include <net/pfil.h>
     58 #include <sys/socketvar.h>
     59 
     60 #include <netinet/in_systm.h>
     61 #include <netinet/in.h>
     62 #include <netinet/ip_var.h>
     63 #include <netinet/ip6.h>
     64 #include <netinet6/ip6_var.h>
     65 #endif
     66 
     67 #include "npf_impl.h"
     68 #include "npf_conn.h"
     69 
     70 #if defined(_NPF_STANDALONE)
     71 #define	m_freem(m)		npf->mbufops->free(m)
     72 #define	m_clear_flag(m,f)
     73 #else
     74 #define	m_clear_flag(m,f)	(m)->m_flags &= ~(f)
     75 #endif
     76 
     77 #ifndef INET6
     78 #define ip6_reass_packet(x, y)	ENOTSUP
     79 #endif
     80 
     81 static int
     82 npf_reassembly(npf_t *npf, npf_cache_t *npc, bool *mff)
     83 {
     84 	nbuf_t *nbuf = npc->npc_nbuf;
     85 	int error = EINVAL;
     86 	struct mbuf *m;
     87 
     88 	*mff = false;
     89 	m = nbuf_head_mbuf(nbuf);
     90 
     91 	if (npf_iscached(npc, NPC_IP4) && npf->ip4_reassembly) {
     92 		error = ip_reass_packet(&m);
     93 	} else if (npf_iscached(npc, NPC_IP6) && npf->ip6_reassembly) {
     94 		error = ip6_reass_packet(&m, npc->npc_hlen);
     95 	} else {
     96 		/*
     97 		 * Reassembly is disabled: just pass the packet through
     98 		 * the ruleset for inspection.
     99 		 */
    100 		return 0;
    101 	}
    102 
    103 	if (error) {
    104 		/* Reassembly failed; free the mbuf, clear the nbuf. */
    105 		npf_stats_inc(npf, NPF_STAT_REASSFAIL);
    106 		m_freem(m);
    107 		memset(nbuf, 0, sizeof(nbuf_t));
    108 		return error;
    109 	}
    110 	if (m == NULL) {
    111 		/* More fragments should come. */
    112 		npf_stats_inc(npf, NPF_STAT_FRAGMENTS);
    113 		*mff = true;
    114 		return 0;
    115 	}
    116 
    117 	/*
    118 	 * Reassembly is complete, we have the final packet.
    119 	 * Cache again, since layer 4 data is accessible now.
    120 	 */
    121 	nbuf_init(npf, nbuf, m, nbuf->nb_ifp);
    122 	npc->npc_info = 0;
    123 
    124 	if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) {
    125 		return EINVAL;
    126 	}
    127 	npf_stats_inc(npf, NPF_STAT_REASSEMBLY);
    128 	return 0;
    129 }
    130 
    131 static inline bool
    132 npf_packet_bypass_tag_p(nbuf_t *nbuf)
    133 {
    134 	uint32_t ntag;
    135 	return nbuf_find_tag(nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0;
    136 }
    137 
    138 /*
    139  * npfk_packet_handler: main packet handling routine for layer 3.
    140  *
    141  * Note: packet flow and inspection logic is in strict order.
    142  */
    143 __dso_public int
    144 npfk_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di)
    145 {
    146 	nbuf_t nbuf;
    147 	npf_cache_t npc;
    148 	npf_conn_t *con;
    149 	npf_rule_t *rl;
    150 	npf_rproc_t *rp;
    151 	int error, decision, flags, id_match;
    152 	npf_match_info_t mi;
    153 	bool mff;
    154 
    155 	KASSERT(ifp != NULL);
    156 
    157 	/*
    158 	 * Initialize packet information cache.
    159 	 * Note: it is enough to clear the info bits.
    160 	 */
    161 	nbuf_init(npf, &nbuf, *mp, ifp);
    162 	memset(&npc, 0, sizeof(npf_cache_t));
    163 	npc.npc_ctx = npf;
    164 	npc.npc_nbuf = &nbuf;
    165 
    166 	mi.mi_di = di;
    167 	mi.mi_rid = 0;
    168 	mi.mi_retfl = 0;
    169 
    170 	*mp = NULL;
    171 	decision = NPF_DECISION_BLOCK;
    172 	error = 0;
    173 	rp = NULL;
    174 	con = NULL;
    175 
    176 	/* Cache everything. */
    177 	flags = npf_cache_all(&npc);
    178 
    179 	/* Malformed packet, leave quickly. */
    180 	if (flags & NPC_FMTERR) {
    181 		error = EINVAL;
    182 		goto out;
    183 	}
    184 
    185 	/* Determine whether it is an IP fragment. */
    186 	if (__predict_false(flags & NPC_IPFRAG)) {
    187 		/* Pass to IPv4/IPv6 reassembly mechanism. */
    188 		error = npf_reassembly(npf, &npc, &mff);
    189 		if (error) {
    190 			goto out;
    191 		}
    192 		if (mff) {
    193 			/* More fragments should come. */
    194 			return 0;
    195 		}
    196 	}
    197 
    198 	/* Just pass-through if specially tagged. */
    199 	if (npf_packet_bypass_tag_p(&nbuf)) {
    200 		goto pass;
    201 	}
    202 
    203 	/* Inspect the list of connections (if found, acquires a reference). */
    204 	con = npf_conn_inspect(&npc, di, &error);
    205 
    206 	/* If "passing" connection found - skip the ruleset inspection. */
    207 	if (con && npf_conn_pass(con, &mi, &rp)) {
    208 		npf_stats_inc(npf, NPF_STAT_PASS_CONN);
    209 		KASSERT(error == 0);
    210 		goto pass;
    211 	}
    212 	if (__predict_false(error)) {
    213 		if (error == ENETUNREACH)
    214 			goto block;
    215 		goto out;
    216 	}
    217 
    218 	/* Acquire the lock, inspect the ruleset using this packet. */
    219 	int slock = npf_config_read_enter(npf);
    220 	npf_ruleset_t *rlset = npf_config_ruleset(npf);
    221 
    222 	rl = npf_ruleset_inspect(&npc, rlset, di, NPF_RULE_LAYER_3);
    223 	if (__predict_false(rl == NULL)) {
    224 		const bool pass = npf_default_pass(npf);
    225 		npf_config_read_exit(npf, slock);
    226 
    227 		if (pass) {
    228 			npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT);
    229 			goto pass;
    230 		}
    231 		npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT);
    232 		goto block;
    233 	}
    234 
    235 	/*
    236 	 * Get the rule procedure (acquires a reference) for association
    237 	 * with a connection (if any) and execution.
    238 	 */
    239 	KASSERT(rp == NULL);
    240 	rp = npf_rule_getrproc(rl);
    241 
    242 	/* check for matching process uid/gid before concluding */
    243 	id_match = npf_rule_match_rid(rl, &npc, di);
    244 
    245 	/* Conclude with the rule and release the lock. */
    246 	error = npf_rule_conclude(rl, &mi);
    247 	npf_config_read_exit(npf, slock);
    248 
    249 	/* reverse between pass and block conditions */
    250 	if (id_match != -1 && !id_match) {
    251 		error = npf_rule_reverse(&npc, &mi, error);
    252 	}
    253 
    254 	/* reject packets whose addr-port pair matches no sockets  */
    255 	if (id_match == ENOTCONN || error) {
    256 		npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET);
    257 		goto block;
    258 	}
    259 	npf_stats_inc(npf, NPF_STAT_PASS_RULESET);
    260 
    261 	/*
    262 	 * Establish a "pass" connection, if required.  Just proceed if
    263 	 * connection creation fails (e.g. due to unsupported protocol).
    264 	 */
    265 	if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) {
    266 		con = npf_conn_establish(&npc, di,
    267 		    (mi.mi_retfl & NPF_RULE_GSTATEFUL) == 0);
    268 		if (con) {
    269 			/*
    270 			 * Note: the reference on the rule procedure is
    271 			 * transferred to the connection.  It will be
    272 			 * released on connection destruction.
    273 			 */
    274 			npf_conn_setpass(con, &mi, rp);
    275 		}
    276 	}
    277 
    278 pass:
    279 	decision = NPF_DECISION_PASS;
    280 	KASSERT(error == 0);
    281 
    282 	/*
    283 	 * Perform NAT.
    284 	 */
    285 	error = npf_do_nat(&npc, con, di);
    286 
    287 block:
    288 	/*
    289 	 * Execute the rule procedure, if any is associated.
    290 	 * It may reverse the decision from pass to block.
    291 	 */
    292 	if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) {
    293 		if (con) {
    294 			npf_conn_release(con);
    295 		}
    296 		npf_rproc_release(rp);
    297 		/* mbuf already freed */
    298 		return 0;
    299 	}
    300 
    301 out:
    302 	/*
    303 	 * Release the reference on a connection.  Release the reference
    304 	 * on a rule procedure only if there was no association.
    305 	 */
    306 	if (con) {
    307 		npf_conn_release(con);
    308 	} else if (rp) {
    309 		npf_rproc_release(rp);
    310 	}
    311 
    312 	/* Get the new mbuf pointer. */
    313 	if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) {
    314 		return error ? error : ENOMEM;
    315 	}
    316 
    317 	/* Pass the packet if decided and there is no error. */
    318 	if (decision == NPF_DECISION_PASS && !error) {
    319 		/*
    320 		 * XXX: Disable for now, it will be set accordingly later,
    321 		 * for optimisations (to reduce inspection).
    322 		 */
    323 		m_clear_flag(*mp, M_CANFASTFWD);
    324 		return 0;
    325 	}
    326 
    327 	/*
    328 	 * Block the packet.  ENETUNREACH is used to indicate blocking.
    329 	 * Depending on the flags and protocol, return TCP reset (RST) or
    330 	 * ICMP destination unreachable.
    331 	 */
    332 	if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) {
    333 		*mp = NULL;
    334 	}
    335 
    336 	if (!error) {
    337 		error = ENETUNREACH;
    338 	}
    339 
    340 	/* Free the mbuf chain. */
    341 	m_freem(*mp);
    342 	*mp = NULL;
    343 	return error;
    344 }
    345 
    346 __dso_public int
    347 npfk_layer2_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di)
    348 {
    349 	nbuf_t nbuf;
    350 	npf_cache_t npc;
    351 	npf_rule_t *rl;
    352 	int error, decision, flags;
    353 	npf_match_info_t mi;
    354 
    355 	KASSERT(ifp != NULL);
    356 
    357 	/*
    358 	 * as usual, get packet info
    359 	 * including the interface the frame is traveling on
    360 	 */
    361 	nbuf_init(npf, &nbuf, *mp, ifp);
    362 	memset(&npc, 0, sizeof(npc));
    363 	npc.npc_ctx = npf;
    364 	npc.npc_nbuf = &nbuf;
    365 
    366 	mi.mi_di = di;
    367 	mi.mi_rid = 0;
    368 	mi.mi_retfl = 0;
    369 
    370 	*mp = NULL;
    371 	decision = NPF_DECISION_BLOCK;
    372 	error = 0;
    373 
    374 	/* Cache only ether header. */
    375 	flags = npf_cache_ether(&npc);
    376 
    377 	/* Malformed packet, leave quickly. */
    378 	if (flags & NPC_FMTERR) {
    379 		error = EINVAL;
    380 		goto out;
    381 	}
    382 
    383 	/* Just pass-through if specially tagged. */
    384 	if (npf_packet_bypass_tag_p(&nbuf)) {
    385 		goto pass;
    386 	}
    387 
    388 	/* Acquire the lock, inspect the ruleset using this packet. */
    389 	int slock = npf_config_read_enter(npf);
    390 	npf_ruleset_t *rlset = npf_config_ruleset(npf);
    391 
    392 	rl = npf_ruleset_inspect(&npc, rlset, di, NPF_RULE_LAYER_2);
    393 	if (__predict_false(rl == NULL)) {
    394 		npf_config_read_exit(npf, slock);
    395 
    396 		npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT);
    397 		goto pass;
    398 	}
    399 
    400 	/* Conclude with the rule and release the lock. */
    401 	error = npf_rule_conclude(rl, &mi);
    402 	npf_config_read_exit(npf, slock);
    403 
    404 	if (error) {
    405 		npf_stats_inc(npf, NPF_ETHER_STAT_BLOCK);
    406 		goto out;
    407 	}
    408 	npf_stats_inc(npf, NPF_ETHER_STAT_PASS);
    409 
    410 pass:
    411 	decision = NPF_DECISION_PASS;
    412 	KASSERT(error == 0);
    413 
    414 out:
    415 
    416 	/* Get the new mbuf pointer. */
    417 	if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) {
    418 		return error ? error : ENOMEM;
    419 	}
    420 
    421 	/* Pass the packet if decided and there is no error. */
    422 	if (decision == NPF_DECISION_PASS && !error) {
    423 		return 0;
    424 	}
    425 
    426 	if (!error) {
    427 		error = ENETUNREACH;
    428 	}
    429 
    430 	if (*mp) {
    431 		/* Free the mbuf chain. */
    432 		m_freem(*mp);
    433 		*mp = NULL;
    434 	}
    435 	return error;
    436 }
    437