npf_handler.c revision 1.54 1 1.1 rmind /*-
2 1.49 rmind * Copyright (c) 2020 Mindaugas Rasiukevicius <rmind at noxt eu>
3 1.52 joe * Copyright (c) 2009-2025 The NetBSD Foundation, Inc.
4 1.1 rmind * All rights reserved.
5 1.1 rmind *
6 1.1 rmind * This material is based upon work partially supported by The
7 1.1 rmind * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
8 1.1 rmind *
9 1.1 rmind * Redistribution and use in source and binary forms, with or without
10 1.1 rmind * modification, are permitted provided that the following conditions
11 1.1 rmind * are met:
12 1.1 rmind * 1. Redistributions of source code must retain the above copyright
13 1.1 rmind * notice, this list of conditions and the following disclaimer.
14 1.1 rmind * 2. Redistributions in binary form must reproduce the above copyright
15 1.1 rmind * notice, this list of conditions and the following disclaimer in the
16 1.1 rmind * documentation and/or other materials provided with the distribution.
17 1.1 rmind *
18 1.1 rmind * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 1.1 rmind * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 1.1 rmind * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 1.1 rmind * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 1.1 rmind * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 1.1 rmind * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 1.1 rmind * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 1.1 rmind * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 1.1 rmind * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 1.1 rmind * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 1.1 rmind * POSSIBILITY OF SUCH DAMAGE.
29 1.1 rmind */
30 1.1 rmind
31 1.1 rmind /*
32 1.1 rmind * NPF packet handler.
33 1.28 rmind *
34 1.49 rmind * This is the main entry point to the NPF where packet processing happens.
35 1.49 rmind * There are some important synchronization rules:
36 1.49 rmind *
37 1.49 rmind * 1) Lookups into the connection database and configuration (ruleset,
38 1.49 rmind * tables, etc) are protected by Epoch-Based Reclamation (EBR);
39 1.49 rmind *
40 1.49 rmind * 2) The code in the critical path (protected by EBR) should generally
41 1.49 rmind * not block (that includes adaptive mutex acquisitions);
42 1.49 rmind *
43 1.49 rmind * 3) Where it will blocks, references should be acquired atomically,
44 1.49 rmind * while in the critical path, on the relevant objects.
45 1.1 rmind */
46 1.1 rmind
47 1.35 christos #ifdef _KERNEL
48 1.1 rmind #include <sys/cdefs.h>
49 1.54 joe __KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.54 2025/07/08 15:56:23 joe Exp $");
50 1.1 rmind
51 1.14 rmind #include <sys/types.h>
52 1.1 rmind #include <sys/param.h>
53 1.1 rmind
54 1.1 rmind #include <sys/mbuf.h>
55 1.1 rmind #include <sys/mutex.h>
56 1.1 rmind #include <net/if.h>
57 1.1 rmind #include <net/pfil.h>
58 1.1 rmind #include <sys/socketvar.h>
59 1.1 rmind
60 1.4 rmind #include <netinet/in_systm.h>
61 1.4 rmind #include <netinet/in.h>
62 1.4 rmind #include <netinet/ip_var.h>
63 1.8 zoltan #include <netinet/ip6.h>
64 1.8 zoltan #include <netinet6/ip6_var.h>
65 1.35 christos #endif
66 1.4 rmind
67 1.1 rmind #include "npf_impl.h"
68 1.31 rmind #include "npf_conn.h"
69 1.1 rmind
70 1.35 christos #if defined(_NPF_STANDALONE)
71 1.35 christos #define m_freem(m) npf->mbufops->free(m)
72 1.35 christos #define m_clear_flag(m,f)
73 1.35 christos #else
74 1.35 christos #define m_clear_flag(m,f) (m)->m_flags &= ~(f)
75 1.35 christos #endif
76 1.1 rmind
77 1.26 rmind #ifndef INET6
78 1.26 rmind #define ip6_reass_packet(x, y) ENOTSUP
79 1.26 rmind #endif
80 1.26 rmind
81 1.24 rmind static int
82 1.42 maxv npf_reassembly(npf_t *npf, npf_cache_t *npc, bool *mff)
83 1.24 rmind {
84 1.32 rmind nbuf_t *nbuf = npc->npc_nbuf;
85 1.24 rmind int error = EINVAL;
86 1.42 maxv struct mbuf *m;
87 1.42 maxv
88 1.42 maxv *mff = false;
89 1.42 maxv m = nbuf_head_mbuf(nbuf);
90 1.24 rmind
91 1.49 rmind if (npf_iscached(npc, NPC_IP4) && npf->ip4_reassembly) {
92 1.43 maxv error = ip_reass_packet(&m);
93 1.49 rmind } else if (npf_iscached(npc, NPC_IP6) && npf->ip6_reassembly) {
94 1.42 maxv error = ip6_reass_packet(&m, npc->npc_hlen);
95 1.49 rmind } else {
96 1.49 rmind /*
97 1.49 rmind * Reassembly is disabled: just pass the packet through
98 1.49 rmind * the ruleset for inspection.
99 1.49 rmind */
100 1.49 rmind return 0;
101 1.24 rmind }
102 1.44 maxv
103 1.24 rmind if (error) {
104 1.49 rmind /* Reassembly failed; free the mbuf, clear the nbuf. */
105 1.35 christos npf_stats_inc(npf, NPF_STAT_REASSFAIL);
106 1.44 maxv m_freem(m);
107 1.44 maxv memset(nbuf, 0, sizeof(nbuf_t));
108 1.24 rmind return error;
109 1.24 rmind }
110 1.42 maxv if (m == NULL) {
111 1.24 rmind /* More fragments should come. */
112 1.35 christos npf_stats_inc(npf, NPF_STAT_FRAGMENTS);
113 1.42 maxv *mff = true;
114 1.24 rmind return 0;
115 1.24 rmind }
116 1.24 rmind
117 1.24 rmind /*
118 1.24 rmind * Reassembly is complete, we have the final packet.
119 1.24 rmind * Cache again, since layer 4 data is accessible now.
120 1.24 rmind */
121 1.42 maxv nbuf_init(npf, nbuf, m, nbuf->nb_ifp);
122 1.24 rmind npc->npc_info = 0;
123 1.24 rmind
124 1.38 maxv if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) {
125 1.24 rmind return EINVAL;
126 1.24 rmind }
127 1.35 christos npf_stats_inc(npf, NPF_STAT_REASSEMBLY);
128 1.24 rmind return 0;
129 1.24 rmind }
130 1.24 rmind
131 1.49 rmind static inline bool
132 1.49 rmind npf_packet_bypass_tag_p(nbuf_t *nbuf)
133 1.49 rmind {
134 1.49 rmind uint32_t ntag;
135 1.49 rmind return nbuf_find_tag(nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0;
136 1.49 rmind }
137 1.49 rmind
138 1.1 rmind /*
139 1.47 rmind * npfk_packet_handler: main packet handling routine for layer 3.
140 1.1 rmind *
141 1.1 rmind * Note: packet flow and inspection logic is in strict order.
142 1.1 rmind */
143 1.35 christos __dso_public int
144 1.47 rmind npfk_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di)
145 1.1 rmind {
146 1.24 rmind nbuf_t nbuf;
147 1.1 rmind npf_cache_t npc;
148 1.31 rmind npf_conn_t *con;
149 1.1 rmind npf_rule_t *rl;
150 1.5 rmind npf_rproc_t *rp;
151 1.51 joe int error, decision, flags, id_match;
152 1.36 christos npf_match_info_t mi;
153 1.42 maxv bool mff;
154 1.1 rmind
155 1.35 christos KASSERT(ifp != NULL);
156 1.35 christos
157 1.1 rmind /*
158 1.49 rmind * Initialize packet information cache.
159 1.1 rmind * Note: it is enough to clear the info bits.
160 1.1 rmind */
161 1.49 rmind nbuf_init(npf, &nbuf, *mp, ifp);
162 1.49 rmind memset(&npc, 0, sizeof(npf_cache_t));
163 1.35 christos npc.npc_ctx = npf;
164 1.32 rmind npc.npc_nbuf = &nbuf;
165 1.32 rmind
166 1.36 christos mi.mi_di = di;
167 1.36 christos mi.mi_rid = 0;
168 1.36 christos mi.mi_retfl = 0;
169 1.36 christos
170 1.42 maxv *mp = NULL;
171 1.14 rmind decision = NPF_DECISION_BLOCK;
172 1.2 rmind error = 0;
173 1.5 rmind rp = NULL;
174 1.40 maxv con = NULL;
175 1.1 rmind
176 1.38 maxv /* Cache everything. */
177 1.37 christos flags = npf_cache_all(&npc);
178 1.38 maxv
179 1.49 rmind /* Malformed packet, leave quickly. */
180 1.38 maxv if (flags & NPC_FMTERR) {
181 1.38 maxv error = EINVAL;
182 1.42 maxv goto out;
183 1.38 maxv }
184 1.38 maxv
185 1.38 maxv /* Determine whether it is an IP fragment. */
186 1.37 christos if (__predict_false(flags & NPC_IPFRAG)) {
187 1.42 maxv /* Pass to IPv4/IPv6 reassembly mechanism. */
188 1.42 maxv error = npf_reassembly(npf, &npc, &mff);
189 1.18 rmind if (error) {
190 1.4 rmind goto out;
191 1.4 rmind }
192 1.42 maxv if (mff) {
193 1.42 maxv /* More fragments should come. */
194 1.4 rmind return 0;
195 1.4 rmind }
196 1.4 rmind }
197 1.4 rmind
198 1.34 rmind /* Just pass-through if specially tagged. */
199 1.49 rmind if (npf_packet_bypass_tag_p(&nbuf)) {
200 1.34 rmind goto pass;
201 1.34 rmind }
202 1.34 rmind
203 1.31 rmind /* Inspect the list of connections (if found, acquires a reference). */
204 1.32 rmind con = npf_conn_inspect(&npc, di, &error);
205 1.2 rmind
206 1.31 rmind /* If "passing" connection found - skip the ruleset inspection. */
207 1.36 christos if (con && npf_conn_pass(con, &mi, &rp)) {
208 1.35 christos npf_stats_inc(npf, NPF_STAT_PASS_CONN);
209 1.14 rmind KASSERT(error == 0);
210 1.2 rmind goto pass;
211 1.14 rmind }
212 1.32 rmind if (__predict_false(error)) {
213 1.24 rmind if (error == ENETUNREACH)
214 1.24 rmind goto block;
215 1.24 rmind goto out;
216 1.2 rmind }
217 1.1 rmind
218 1.7 rmind /* Acquire the lock, inspect the ruleset using this packet. */
219 1.48 rmind int slock = npf_config_read_enter(npf);
220 1.35 christos npf_ruleset_t *rlset = npf_config_ruleset(npf);
221 1.26 rmind
222 1.53 joe rl = npf_ruleset_inspect(&npc, rlset, di, NPF_RULE_LAYER_3);
223 1.32 rmind if (__predict_false(rl == NULL)) {
224 1.35 christos const bool pass = npf_default_pass(npf);
225 1.48 rmind npf_config_read_exit(npf, slock);
226 1.14 rmind
227 1.26 rmind if (pass) {
228 1.35 christos npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT);
229 1.2 rmind goto pass;
230 1.2 rmind }
231 1.35 christos npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT);
232 1.6 rmind goto block;
233 1.1 rmind }
234 1.1 rmind
235 1.13 rmind /*
236 1.24 rmind * Get the rule procedure (acquires a reference) for association
237 1.31 rmind * with a connection (if any) and execution.
238 1.13 rmind */
239 1.6 rmind KASSERT(rp == NULL);
240 1.13 rmind rp = npf_rule_getrproc(rl);
241 1.6 rmind
242 1.51 joe /* check for matching process uid/gid before concluding */
243 1.51 joe id_match = npf_rule_match_rid(rl, &npc, di);
244 1.51 joe
245 1.26 rmind /* Conclude with the rule and release the lock. */
246 1.36 christos error = npf_rule_conclude(rl, &mi);
247 1.48 rmind npf_config_read_exit(npf, slock);
248 1.26 rmind
249 1.51 joe /* reverse between pass and block conditions */
250 1.51 joe if (id_match != -1 && !id_match) {
251 1.51 joe error = npf_rule_reverse(&npc, &mi, error);
252 1.51 joe }
253 1.51 joe
254 1.51 joe /* reject packets whose addr-port pair matches no sockets */
255 1.51 joe if (id_match == ENOTCONN || error) {
256 1.35 christos npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET);
257 1.6 rmind goto block;
258 1.1 rmind }
259 1.35 christos npf_stats_inc(npf, NPF_STAT_PASS_RULESET);
260 1.1 rmind
261 1.14 rmind /*
262 1.31 rmind * Establish a "pass" connection, if required. Just proceed if
263 1.31 rmind * connection creation fails (e.g. due to unsupported protocol).
264 1.14 rmind */
265 1.36 christos if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) {
266 1.32 rmind con = npf_conn_establish(&npc, di,
267 1.46 rmind (mi.mi_retfl & NPF_RULE_GSTATEFUL) == 0);
268 1.31 rmind if (con) {
269 1.26 rmind /*
270 1.26 rmind * Note: the reference on the rule procedure is
271 1.49 rmind * transferred to the connection. It will be
272 1.31 rmind * released on connection destruction.
273 1.26 rmind */
274 1.36 christos npf_conn_setpass(con, &mi, rp);
275 1.2 rmind }
276 1.1 rmind }
277 1.42 maxv
278 1.2 rmind pass:
279 1.14 rmind decision = NPF_DECISION_PASS;
280 1.2 rmind KASSERT(error == 0);
281 1.48 rmind
282 1.5 rmind /*
283 1.6 rmind * Perform NAT.
284 1.6 rmind */
285 1.32 rmind error = npf_do_nat(&npc, con, di);
286 1.42 maxv
287 1.6 rmind block:
288 1.6 rmind /*
289 1.22 rmind * Execute the rule procedure, if any is associated.
290 1.22 rmind * It may reverse the decision from pass to block.
291 1.5 rmind */
292 1.36 christos if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) {
293 1.31 rmind if (con) {
294 1.31 rmind npf_conn_release(con);
295 1.30 jakllsch }
296 1.30 jakllsch npf_rproc_release(rp);
297 1.42 maxv /* mbuf already freed */
298 1.30 jakllsch return 0;
299 1.5 rmind }
300 1.42 maxv
301 1.1 rmind out:
302 1.13 rmind /*
303 1.31 rmind * Release the reference on a connection. Release the reference
304 1.31 rmind * on a rule procedure only if there was no association.
305 1.13 rmind */
306 1.31 rmind if (con) {
307 1.31 rmind npf_conn_release(con);
308 1.6 rmind } else if (rp) {
309 1.13 rmind npf_rproc_release(rp);
310 1.1 rmind }
311 1.1 rmind
312 1.42 maxv /* Get the new mbuf pointer. */
313 1.24 rmind if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) {
314 1.25 rmind return error ? error : ENOMEM;
315 1.24 rmind }
316 1.24 rmind
317 1.14 rmind /* Pass the packet if decided and there is no error. */
318 1.14 rmind if (decision == NPF_DECISION_PASS && !error) {
319 1.3 rmind /*
320 1.3 rmind * XXX: Disable for now, it will be set accordingly later,
321 1.3 rmind * for optimisations (to reduce inspection).
322 1.3 rmind */
323 1.35 christos m_clear_flag(*mp, M_CANFASTFWD);
324 1.13 rmind return 0;
325 1.1 rmind }
326 1.13 rmind
327 1.13 rmind /*
328 1.13 rmind * Block the packet. ENETUNREACH is used to indicate blocking.
329 1.13 rmind * Depending on the flags and protocol, return TCP reset (RST) or
330 1.13 rmind * ICMP destination unreachable.
331 1.13 rmind */
332 1.36 christos if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) {
333 1.16 rmind *mp = NULL;
334 1.13 rmind }
335 1.16 rmind
336 1.20 rmind if (!error) {
337 1.14 rmind error = ENETUNREACH;
338 1.13 rmind }
339 1.13 rmind
340 1.50 rin /* Free the mbuf chain. */
341 1.50 rin m_freem(*mp);
342 1.50 rin *mp = NULL;
343 1.1 rmind return error;
344 1.1 rmind }
345 1.53 joe
346 1.53 joe __dso_public int
347 1.53 joe npfk_layer2_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di)
348 1.53 joe {
349 1.53 joe nbuf_t nbuf;
350 1.53 joe npf_cache_t npc;
351 1.53 joe npf_rule_t *rl;
352 1.53 joe int error, decision, flags;
353 1.53 joe npf_match_info_t mi;
354 1.53 joe
355 1.53 joe KASSERT(ifp != NULL);
356 1.53 joe
357 1.53 joe /*
358 1.53 joe * as usual, get packet info
359 1.53 joe * including the interface the frame is traveling on
360 1.53 joe */
361 1.53 joe nbuf_init(npf, &nbuf, *mp, ifp);
362 1.53 joe memset(&npc, 0, sizeof(npc));
363 1.53 joe npc.npc_ctx = npf;
364 1.53 joe npc.npc_nbuf = &nbuf;
365 1.53 joe
366 1.53 joe mi.mi_di = di;
367 1.53 joe mi.mi_rid = 0;
368 1.53 joe mi.mi_retfl = 0;
369 1.53 joe
370 1.53 joe *mp = NULL;
371 1.53 joe decision = NPF_DECISION_BLOCK;
372 1.53 joe error = 0;
373 1.53 joe
374 1.53 joe /* Cache only ether header. */
375 1.53 joe flags = npf_cache_ether(&npc);
376 1.53 joe
377 1.53 joe /* Malformed packet, leave quickly. */
378 1.53 joe if (flags & NPC_FMTERR) {
379 1.53 joe error = EINVAL;
380 1.53 joe goto out;
381 1.53 joe }
382 1.53 joe
383 1.53 joe /* Just pass-through if specially tagged. */
384 1.53 joe if (npf_packet_bypass_tag_p(&nbuf)) {
385 1.53 joe goto pass;
386 1.53 joe }
387 1.53 joe
388 1.53 joe /* Acquire the lock, inspect the ruleset using this packet. */
389 1.53 joe int slock = npf_config_read_enter(npf);
390 1.53 joe npf_ruleset_t *rlset = npf_config_ruleset(npf);
391 1.53 joe
392 1.53 joe rl = npf_ruleset_inspect(&npc, rlset, di, NPF_RULE_LAYER_2);
393 1.53 joe if (__predict_false(rl == NULL)) {
394 1.53 joe npf_config_read_exit(npf, slock);
395 1.53 joe
396 1.54 joe npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT);
397 1.54 joe goto pass;
398 1.53 joe }
399 1.53 joe
400 1.53 joe /* Conclude with the rule and release the lock. */
401 1.53 joe error = npf_rule_conclude(rl, &mi);
402 1.53 joe npf_config_read_exit(npf, slock);
403 1.53 joe
404 1.53 joe if (error) {
405 1.53 joe npf_stats_inc(npf, NPF_ETHER_STAT_BLOCK);
406 1.53 joe goto out;
407 1.53 joe }
408 1.53 joe npf_stats_inc(npf, NPF_ETHER_STAT_PASS);
409 1.53 joe
410 1.53 joe pass:
411 1.53 joe decision = NPF_DECISION_PASS;
412 1.53 joe KASSERT(error == 0);
413 1.53 joe
414 1.53 joe out:
415 1.53 joe
416 1.53 joe /* Get the new mbuf pointer. */
417 1.53 joe if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) {
418 1.53 joe return error ? error : ENOMEM;
419 1.53 joe }
420 1.53 joe
421 1.53 joe /* Pass the packet if decided and there is no error. */
422 1.53 joe if (decision == NPF_DECISION_PASS && !error) {
423 1.53 joe return 0;
424 1.53 joe }
425 1.53 joe
426 1.53 joe if (!error) {
427 1.53 joe error = ENETUNREACH;
428 1.53 joe }
429 1.53 joe
430 1.53 joe if (*mp) {
431 1.53 joe /* Free the mbuf chain. */
432 1.53 joe m_freem(*mp);
433 1.53 joe *mp = NULL;
434 1.53 joe }
435 1.53 joe return error;
436 1.53 joe }
437