npf_handler.c revision 1.53 1 /*-
2 * Copyright (c) 2020 Mindaugas Rasiukevicius <rmind at noxt eu>
3 * Copyright (c) 2009-2025 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This material is based upon work partially supported by The
7 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * NPF packet handler.
33 *
34 * This is the main entry point to the NPF where packet processing happens.
35 * There are some important synchronization rules:
36 *
37 * 1) Lookups into the connection database and configuration (ruleset,
38 * tables, etc) are protected by Epoch-Based Reclamation (EBR);
39 *
40 * 2) The code in the critical path (protected by EBR) should generally
41 * not block (that includes adaptive mutex acquisitions);
42 *
43 * 3) Where it will blocks, references should be acquired atomically,
44 * while in the critical path, on the relevant objects.
45 */
46
47 #ifdef _KERNEL
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.53 2025/07/01 18:42:37 joe Exp $");
50
51 #include <sys/types.h>
52 #include <sys/param.h>
53
54 #include <sys/mbuf.h>
55 #include <sys/mutex.h>
56 #include <net/if.h>
57 #include <net/pfil.h>
58 #include <sys/socketvar.h>
59
60 #include <netinet/in_systm.h>
61 #include <netinet/in.h>
62 #include <netinet/ip_var.h>
63 #include <netinet/ip6.h>
64 #include <netinet6/ip6_var.h>
65 #endif
66
67 #include "npf_impl.h"
68 #include "npf_conn.h"
69
70 #if defined(_NPF_STANDALONE)
71 #define m_freem(m) npf->mbufops->free(m)
72 #define m_clear_flag(m,f)
73 #else
74 #define m_clear_flag(m,f) (m)->m_flags &= ~(f)
75 #endif
76
77 #ifndef INET6
78 #define ip6_reass_packet(x, y) ENOTSUP
79 #endif
80
81 static int
82 npf_reassembly(npf_t *npf, npf_cache_t *npc, bool *mff)
83 {
84 nbuf_t *nbuf = npc->npc_nbuf;
85 int error = EINVAL;
86 struct mbuf *m;
87
88 *mff = false;
89 m = nbuf_head_mbuf(nbuf);
90
91 if (npf_iscached(npc, NPC_IP4) && npf->ip4_reassembly) {
92 error = ip_reass_packet(&m);
93 } else if (npf_iscached(npc, NPC_IP6) && npf->ip6_reassembly) {
94 error = ip6_reass_packet(&m, npc->npc_hlen);
95 } else {
96 /*
97 * Reassembly is disabled: just pass the packet through
98 * the ruleset for inspection.
99 */
100 return 0;
101 }
102
103 if (error) {
104 /* Reassembly failed; free the mbuf, clear the nbuf. */
105 npf_stats_inc(npf, NPF_STAT_REASSFAIL);
106 m_freem(m);
107 memset(nbuf, 0, sizeof(nbuf_t));
108 return error;
109 }
110 if (m == NULL) {
111 /* More fragments should come. */
112 npf_stats_inc(npf, NPF_STAT_FRAGMENTS);
113 *mff = true;
114 return 0;
115 }
116
117 /*
118 * Reassembly is complete, we have the final packet.
119 * Cache again, since layer 4 data is accessible now.
120 */
121 nbuf_init(npf, nbuf, m, nbuf->nb_ifp);
122 npc->npc_info = 0;
123
124 if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) {
125 return EINVAL;
126 }
127 npf_stats_inc(npf, NPF_STAT_REASSEMBLY);
128 return 0;
129 }
130
131 static inline bool
132 npf_packet_bypass_tag_p(nbuf_t *nbuf)
133 {
134 uint32_t ntag;
135 return nbuf_find_tag(nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0;
136 }
137
138 /*
139 * npfk_packet_handler: main packet handling routine for layer 3.
140 *
141 * Note: packet flow and inspection logic is in strict order.
142 */
143 __dso_public int
144 npfk_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di)
145 {
146 nbuf_t nbuf;
147 npf_cache_t npc;
148 npf_conn_t *con;
149 npf_rule_t *rl;
150 npf_rproc_t *rp;
151 int error, decision, flags, id_match;
152 npf_match_info_t mi;
153 bool mff;
154
155 KASSERT(ifp != NULL);
156
157 /*
158 * Initialize packet information cache.
159 * Note: it is enough to clear the info bits.
160 */
161 nbuf_init(npf, &nbuf, *mp, ifp);
162 memset(&npc, 0, sizeof(npf_cache_t));
163 npc.npc_ctx = npf;
164 npc.npc_nbuf = &nbuf;
165
166 mi.mi_di = di;
167 mi.mi_rid = 0;
168 mi.mi_retfl = 0;
169
170 *mp = NULL;
171 decision = NPF_DECISION_BLOCK;
172 error = 0;
173 rp = NULL;
174 con = NULL;
175
176 /* Cache everything. */
177 flags = npf_cache_all(&npc);
178
179 /* Malformed packet, leave quickly. */
180 if (flags & NPC_FMTERR) {
181 error = EINVAL;
182 goto out;
183 }
184
185 /* Determine whether it is an IP fragment. */
186 if (__predict_false(flags & NPC_IPFRAG)) {
187 /* Pass to IPv4/IPv6 reassembly mechanism. */
188 error = npf_reassembly(npf, &npc, &mff);
189 if (error) {
190 goto out;
191 }
192 if (mff) {
193 /* More fragments should come. */
194 return 0;
195 }
196 }
197
198 /* Just pass-through if specially tagged. */
199 if (npf_packet_bypass_tag_p(&nbuf)) {
200 goto pass;
201 }
202
203 /* Inspect the list of connections (if found, acquires a reference). */
204 con = npf_conn_inspect(&npc, di, &error);
205
206 /* If "passing" connection found - skip the ruleset inspection. */
207 if (con && npf_conn_pass(con, &mi, &rp)) {
208 npf_stats_inc(npf, NPF_STAT_PASS_CONN);
209 KASSERT(error == 0);
210 goto pass;
211 }
212 if (__predict_false(error)) {
213 if (error == ENETUNREACH)
214 goto block;
215 goto out;
216 }
217
218 /* Acquire the lock, inspect the ruleset using this packet. */
219 int slock = npf_config_read_enter(npf);
220 npf_ruleset_t *rlset = npf_config_ruleset(npf);
221
222 rl = npf_ruleset_inspect(&npc, rlset, di, NPF_RULE_LAYER_3);
223 if (__predict_false(rl == NULL)) {
224 const bool pass = npf_default_pass(npf);
225 npf_config_read_exit(npf, slock);
226
227 if (pass) {
228 npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT);
229 goto pass;
230 }
231 npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT);
232 goto block;
233 }
234
235 /*
236 * Get the rule procedure (acquires a reference) for association
237 * with a connection (if any) and execution.
238 */
239 KASSERT(rp == NULL);
240 rp = npf_rule_getrproc(rl);
241
242 /* check for matching process uid/gid before concluding */
243 id_match = npf_rule_match_rid(rl, &npc, di);
244
245 /* Conclude with the rule and release the lock. */
246 error = npf_rule_conclude(rl, &mi);
247 npf_config_read_exit(npf, slock);
248
249 /* reverse between pass and block conditions */
250 if (id_match != -1 && !id_match) {
251 error = npf_rule_reverse(&npc, &mi, error);
252 }
253
254 /* reject packets whose addr-port pair matches no sockets */
255 if (id_match == ENOTCONN || error) {
256 npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET);
257 goto block;
258 }
259 npf_stats_inc(npf, NPF_STAT_PASS_RULESET);
260
261 /*
262 * Establish a "pass" connection, if required. Just proceed if
263 * connection creation fails (e.g. due to unsupported protocol).
264 */
265 if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) {
266 con = npf_conn_establish(&npc, di,
267 (mi.mi_retfl & NPF_RULE_GSTATEFUL) == 0);
268 if (con) {
269 /*
270 * Note: the reference on the rule procedure is
271 * transferred to the connection. It will be
272 * released on connection destruction.
273 */
274 npf_conn_setpass(con, &mi, rp);
275 }
276 }
277
278 pass:
279 decision = NPF_DECISION_PASS;
280 KASSERT(error == 0);
281
282 /*
283 * Perform NAT.
284 */
285 error = npf_do_nat(&npc, con, di);
286
287 block:
288 /*
289 * Execute the rule procedure, if any is associated.
290 * It may reverse the decision from pass to block.
291 */
292 if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) {
293 if (con) {
294 npf_conn_release(con);
295 }
296 npf_rproc_release(rp);
297 /* mbuf already freed */
298 return 0;
299 }
300
301 out:
302 /*
303 * Release the reference on a connection. Release the reference
304 * on a rule procedure only if there was no association.
305 */
306 if (con) {
307 npf_conn_release(con);
308 } else if (rp) {
309 npf_rproc_release(rp);
310 }
311
312 /* Get the new mbuf pointer. */
313 if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) {
314 return error ? error : ENOMEM;
315 }
316
317 /* Pass the packet if decided and there is no error. */
318 if (decision == NPF_DECISION_PASS && !error) {
319 /*
320 * XXX: Disable for now, it will be set accordingly later,
321 * for optimisations (to reduce inspection).
322 */
323 m_clear_flag(*mp, M_CANFASTFWD);
324 return 0;
325 }
326
327 /*
328 * Block the packet. ENETUNREACH is used to indicate blocking.
329 * Depending on the flags and protocol, return TCP reset (RST) or
330 * ICMP destination unreachable.
331 */
332 if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) {
333 *mp = NULL;
334 }
335
336 if (!error) {
337 error = ENETUNREACH;
338 }
339
340 /* Free the mbuf chain. */
341 m_freem(*mp);
342 *mp = NULL;
343 return error;
344 }
345
346 __dso_public int
347 npfk_layer2_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di)
348 {
349 nbuf_t nbuf;
350 npf_cache_t npc;
351 npf_rule_t *rl;
352 int error, decision, flags;
353 npf_match_info_t mi;
354
355 KASSERT(ifp != NULL);
356
357 /*
358 * as usual, get packet info
359 * including the interface the frame is traveling on
360 */
361 nbuf_init(npf, &nbuf, *mp, ifp);
362 memset(&npc, 0, sizeof(npc));
363 npc.npc_ctx = npf;
364 npc.npc_nbuf = &nbuf;
365
366 mi.mi_di = di;
367 mi.mi_rid = 0;
368 mi.mi_retfl = 0;
369
370 *mp = NULL;
371 decision = NPF_DECISION_BLOCK;
372 error = 0;
373
374 /* Cache only ether header. */
375 flags = npf_cache_ether(&npc);
376
377 /* Malformed packet, leave quickly. */
378 if (flags & NPC_FMTERR) {
379 error = EINVAL;
380 goto out;
381 }
382
383 /* Just pass-through if specially tagged. */
384 if (npf_packet_bypass_tag_p(&nbuf)) {
385 goto pass;
386 }
387
388 /* Acquire the lock, inspect the ruleset using this packet. */
389 int slock = npf_config_read_enter(npf);
390 npf_ruleset_t *rlset = npf_config_ruleset(npf);
391
392 rl = npf_ruleset_inspect(&npc, rlset, di, NPF_RULE_LAYER_2);
393 if (__predict_false(rl == NULL)) {
394 const bool pass = npf_default_pass(npf);
395 npf_config_read_exit(npf, slock);
396
397 if (pass) {
398 npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT);
399 goto pass;
400 }
401 npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT);
402 goto out;
403 }
404
405 /* Conclude with the rule and release the lock. */
406 error = npf_rule_conclude(rl, &mi);
407 npf_config_read_exit(npf, slock);
408
409 if (error) {
410 npf_stats_inc(npf, NPF_ETHER_STAT_BLOCK);
411 goto out;
412 }
413 npf_stats_inc(npf, NPF_ETHER_STAT_PASS);
414
415 pass:
416 decision = NPF_DECISION_PASS;
417 KASSERT(error == 0);
418
419 out:
420
421 /* Get the new mbuf pointer. */
422 if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) {
423 return error ? error : ENOMEM;
424 }
425
426 /* Pass the packet if decided and there is no error. */
427 if (decision == NPF_DECISION_PASS && !error) {
428 return 0;
429 }
430
431 if (!error) {
432 error = ENETUNREACH;
433 }
434
435 if (*mp) {
436 /* Free the mbuf chain. */
437 m_freem(*mp);
438 *mp = NULL;
439 }
440 return error;
441 }
442