npf_conn.c revision 1.32 1 /*-
2 * Copyright (c) 2014-2020 Mindaugas Rasiukevicius <rmind at noxt eu>
3 * Copyright (c) 2010-2014 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This material is based upon work partially supported by The
7 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * NPF connection tracking for stateful filtering and translation.
33 *
34 * Overview
35 *
36 * Packets can be incoming or outgoing with respect to an interface.
37 * Connection direction is identified by the direction of its first
38 * packet. The meaning of incoming/outgoing packet in the context of
39 * connection direction can be confusing. Therefore, we will use the
40 * terms "forwards stream" and "backwards stream", where packets in
41 * the forwards stream mean the packets travelling in the direction
42 * as the connection direction.
43 *
44 * All connections have two keys and thus two entries:
45 *
46 * - npf_conn_getforwkey(con) -- for the forwards stream;
47 * - npf_conn_getbackkey(con, alen) -- for the backwards stream.
48 *
49 * Note: the keys are stored in npf_conn_t::c_keys[], which is used
50 * to allocate variable-length npf_conn_t structures based on whether
51 * the IPv4 or IPv6 addresses are used.
52 *
53 * The key is an n-tuple used to identify the connection flow: see the
54 * npf_connkey.c source file for the description of the key layouts.
55 * The key may be formed using translated values in a case of NAT.
56 *
57 * Connections can serve two purposes: for the implicit passing and/or
58 * to accommodate the dynamic NAT. Connections for the former purpose
59 * are created by the rules with "stateful" attribute and are used for
60 * stateful filtering. Such connections indicate that the packet of
61 * the backwards stream should be passed without inspection of the
62 * ruleset. The other purpose is to associate a dynamic NAT mechanism
63 * with a connection. Such connections are created by the NAT policies
64 * and they have a relationship with NAT translation structure via
65 * npf_conn_t::c_nat. A single connection can serve both purposes,
66 * which is a common case.
67 *
68 * Connection life-cycle
69 *
70 * Connections are established when a packet matches said rule or
71 * NAT policy. Both keys of the established connection are inserted
72 * into the connection database. A garbage collection thread
73 * periodically scans all connections and depending on connection
74 * properties (e.g. last activity time, protocol) removes connection
75 * entries and expires the actual connections.
76 *
77 * Each connection has a reference count. The reference is acquired
78 * on lookup and should be released by the caller. It guarantees that
79 * the connection will not be destroyed, although it may be expired.
80 *
81 * Synchronization
82 *
83 * Connection database is accessed in a lock-free manner by the main
84 * routines: npf_conn_inspect() and npf_conn_establish(). Since they
85 * are always called from a software interrupt, the database is
86 * protected using EBR. The main place which can destroy a connection
87 * is npf_conn_worker(). The database itself can be replaced and
88 * destroyed in npf_conn_reload().
89 *
90 * ALG support
91 *
92 * Application-level gateways (ALGs) can override generic connection
93 * inspection (npf_alg_conn() call in npf_conn_inspect() function) by
94 * performing their own lookup using different key. Recursive call
95 * to npf_conn_inspect() is not allowed. The ALGs ought to use the
96 * npf_conn_lookup() function for this purpose.
97 *
98 * Lock order
99 *
100 * npf_config_lock ->
101 * conn_lock ->
102 * npf_conn_t::c_lock
103 */
104
105 #ifdef _KERNEL
106 #include <sys/cdefs.h>
107 __KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.32 2020/05/30 14:16:56 rmind Exp $");
108
109 #include <sys/param.h>
110 #include <sys/types.h>
111
112 #include <netinet/in.h>
113 #include <netinet/tcp.h>
114
115 #include <sys/atomic.h>
116 #include <sys/kmem.h>
117 #include <sys/mutex.h>
118 #include <net/pfil.h>
119 #include <sys/pool.h>
120 #include <sys/queue.h>
121 #include <sys/systm.h>
122 #endif
123
124 #define __NPF_CONN_PRIVATE
125 #include "npf_conn.h"
126 #include "npf_impl.h"
127
128 /* A helper to select the IPv4 or IPv6 connection cache. */
129 #define NPF_CONNCACHE(alen) (((alen) >> 4) & 0x1)
130
131 /*
132 * Connection flags: PFIL_IN and PFIL_OUT values are reserved for direction.
133 */
134 CTASSERT(PFIL_ALL == (0x001 | 0x002));
135 #define CONN_ACTIVE 0x004 /* visible on inspection */
136 #define CONN_PASS 0x008 /* perform implicit passing */
137 #define CONN_EXPIRE 0x010 /* explicitly expire */
138 #define CONN_REMOVED 0x020 /* "forw/back" entries removed */
139
140 enum { CONN_TRACKING_OFF, CONN_TRACKING_ON };
141
142 static int npf_conn_export(npf_t *, npf_conn_t *, nvlist_t *);
143
144 /*
145 * npf_conn_sys{init,fini}: initialize/destroy connection tracking.
146 */
147
148 void
149 npf_conn_init(npf_t *npf)
150 {
151 npf_conn_params_t *params = npf_param_allocgroup(npf,
152 NPF_PARAMS_CONN, sizeof(npf_conn_params_t));
153 npf_param_t param_map[] = {
154 {
155 "state.key.interface",
156 ¶ms->connkey_interface,
157 .default_val = 1, // true
158 .min = 0, .max = 1
159 },
160 {
161 "state.key.direction",
162 ¶ms->connkey_direction,
163 .default_val = 1, // true
164 .min = 0, .max = 1
165 },
166 };
167 npf_param_register(npf, param_map, __arraycount(param_map));
168
169 npf->conn_cache[0] = pool_cache_init(
170 offsetof(npf_conn_t, c_keys[NPF_CONNKEY_V4WORDS * 2]),
171 0, 0, 0, "npfcn4pl", NULL, IPL_NET, NULL, NULL, NULL);
172 npf->conn_cache[1] = pool_cache_init(
173 offsetof(npf_conn_t, c_keys[NPF_CONNKEY_V6WORDS * 2]),
174 0, 0, 0, "npfcn6pl", NULL, IPL_NET, NULL, NULL, NULL);
175
176 mutex_init(&npf->conn_lock, MUTEX_DEFAULT, IPL_NONE);
177 atomic_store_relaxed(&npf->conn_tracking, CONN_TRACKING_OFF);
178 npf->conn_db = npf_conndb_create();
179 npf_conndb_sysinit(npf);
180
181 npf_worker_addfunc(npf, npf_conn_worker);
182 }
183
184 void
185 npf_conn_fini(npf_t *npf)
186 {
187 const size_t len = sizeof(npf_conn_params_t);
188
189 /* Note: the caller should have flushed the connections. */
190 KASSERT(atomic_load_relaxed(&npf->conn_tracking) == CONN_TRACKING_OFF);
191
192 npf_conndb_destroy(npf->conn_db);
193 pool_cache_destroy(npf->conn_cache[0]);
194 pool_cache_destroy(npf->conn_cache[1]);
195 mutex_destroy(&npf->conn_lock);
196
197 npf_param_freegroup(npf, NPF_PARAMS_CONN, len);
198 npf_conndb_sysfini(npf);
199 }
200
201 /*
202 * npf_conn_load: perform the load by flushing the current connection
203 * database and replacing it with the new one or just destroying.
204 *
205 * => The caller must disable the connection tracking and ensure that
206 * there are no connection database lookups or references in-flight.
207 */
208 void
209 npf_conn_load(npf_t *npf, npf_conndb_t *ndb, bool track)
210 {
211 npf_conndb_t *odb = NULL;
212
213 KASSERT(npf_config_locked_p(npf));
214
215 /*
216 * The connection database is in the quiescent state.
217 * Prevent G/C thread from running and install a new database.
218 */
219 mutex_enter(&npf->conn_lock);
220 if (ndb) {
221 KASSERT(atomic_load_relaxed(&npf->conn_tracking)
222 == CONN_TRACKING_OFF);
223 odb = atomic_load_relaxed(&npf->conn_db);
224 membar_sync();
225 atomic_store_relaxed(&npf->conn_db, ndb);
226 }
227 if (track) {
228 /* After this point lookups start flying in. */
229 membar_producer();
230 atomic_store_relaxed(&npf->conn_tracking, CONN_TRACKING_ON);
231 }
232 mutex_exit(&npf->conn_lock);
233
234 if (odb) {
235 /*
236 * Flush all, no sync since the caller did it for us.
237 * Also, release the pool cache memory.
238 */
239 npf_conndb_gc(npf, odb, true, false);
240 npf_conndb_destroy(odb);
241 pool_cache_invalidate(npf->conn_cache[0]);
242 pool_cache_invalidate(npf->conn_cache[1]);
243 }
244 }
245
246 /*
247 * npf_conn_tracking: enable/disable connection tracking.
248 */
249 void
250 npf_conn_tracking(npf_t *npf, bool track)
251 {
252 KASSERT(npf_config_locked_p(npf));
253 atomic_store_relaxed(&npf->conn_tracking,
254 track ? CONN_TRACKING_ON : CONN_TRACKING_OFF);
255 }
256
257 static inline bool
258 npf_conn_trackable_p(const npf_cache_t *npc)
259 {
260 const npf_t *npf = npc->npc_ctx;
261
262 /*
263 * Check if connection tracking is on. Also, if layer 3 and 4 are
264 * not cached - protocol is not supported or packet is invalid.
265 */
266 if (atomic_load_relaxed(&npf->conn_tracking) != CONN_TRACKING_ON) {
267 return false;
268 }
269 if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
270 return false;
271 }
272 return true;
273 }
274
275 static inline void
276 conn_update_atime(npf_conn_t *con)
277 {
278 struct timespec tsnow;
279
280 getnanouptime(&tsnow);
281 atomic_store_relaxed(&con->c_atime, tsnow.tv_sec);
282 }
283
284 /*
285 * npf_conn_check: check that:
286 *
287 * - the connection is active;
288 *
289 * - the packet is travelling in the right direction with the respect
290 * to the connection direction (if interface-id is not zero);
291 *
292 * - the packet is travelling on the same interface as the
293 * connection interface (if interface-id is not zero).
294 */
295 static bool
296 npf_conn_check(const npf_conn_t *con, const nbuf_t *nbuf,
297 const unsigned di, const npf_flow_t flow)
298 {
299 const uint32_t flags = atomic_load_relaxed(&con->c_flags);
300 const unsigned ifid = atomic_load_relaxed(&con->c_ifid);
301 bool active;
302
303 active = (flags & (CONN_ACTIVE | CONN_EXPIRE)) == CONN_ACTIVE;
304 if (__predict_false(!active)) {
305 return false;
306 }
307 if (ifid && nbuf) {
308 const bool match = (flags & PFIL_ALL) == di;
309 npf_flow_t pflow = match ? NPF_FLOW_FORW : NPF_FLOW_BACK;
310
311 if (__predict_false(flow != pflow)) {
312 return false;
313 }
314 if (__predict_false(ifid != nbuf->nb_ifid)) {
315 return false;
316 }
317 }
318 return true;
319 }
320
321 /*
322 * npf_conn_lookup: lookup if there is an established connection.
323 *
324 * => If found, we will hold a reference for the caller.
325 */
326 npf_conn_t *
327 npf_conn_lookup(const npf_cache_t *npc, const unsigned di, npf_flow_t *flow)
328 {
329 npf_t *npf = npc->npc_ctx;
330 const nbuf_t *nbuf = npc->npc_nbuf;
331 npf_conn_t *con;
332 npf_connkey_t key;
333
334 /* Construct a key and lookup for a connection in the store. */
335 if (!npf_conn_conkey(npc, &key, di, NPF_FLOW_FORW)) {
336 return NULL;
337 }
338 con = npf_conndb_lookup(npf, &key, flow);
339 if (con == NULL) {
340 return NULL;
341 }
342 KASSERT(npc->npc_proto == atomic_load_relaxed(&con->c_proto));
343
344 /* Extra checks for the connection and packet. */
345 if (!npf_conn_check(con, nbuf, di, *flow)) {
346 atomic_dec_uint(&con->c_refcnt);
347 return NULL;
348 }
349
350 /* Update the last activity time. */
351 conn_update_atime(con);
352 return con;
353 }
354
355 /*
356 * npf_conn_inspect: lookup a connection and inspecting the protocol data.
357 *
358 * => If found, we will hold a reference for the caller.
359 */
360 npf_conn_t *
361 npf_conn_inspect(npf_cache_t *npc, const unsigned di, int *error)
362 {
363 nbuf_t *nbuf = npc->npc_nbuf;
364 npf_flow_t flow;
365 npf_conn_t *con;
366 bool ok;
367
368 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
369 if (!npf_conn_trackable_p(npc)) {
370 return NULL;
371 }
372
373 /* Query ALG which may lookup connection for us. */
374 if ((con = npf_alg_conn(npc, di)) != NULL) {
375 /* Note: reference is held. */
376 return con;
377 }
378 if (nbuf_head_mbuf(nbuf) == NULL) {
379 *error = ENOMEM;
380 return NULL;
381 }
382 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
383
384 /* The main lookup of the connection (acquires a reference). */
385 if ((con = npf_conn_lookup(npc, di, &flow)) == NULL) {
386 return NULL;
387 }
388
389 /* Inspect the protocol data and handle state changes. */
390 mutex_enter(&con->c_lock);
391 ok = npf_state_inspect(npc, &con->c_state, flow);
392 mutex_exit(&con->c_lock);
393
394 /* If invalid state: let the rules deal with it. */
395 if (__predict_false(!ok)) {
396 npf_conn_release(con);
397 npf_stats_inc(npc->npc_ctx, NPF_STAT_INVALID_STATE);
398 return NULL;
399 }
400 #if 0
401 /*
402 * TODO -- determine when this might be wanted/used.
403 *
404 * Note: skipping the connection lookup and ruleset inspection
405 * on other interfaces will also bypass dynamic NAT.
406 */
407 if (atomic_load_relaxed(&con->c_flags) & CONN_GPASS) {
408 /*
409 * Note: if tagging fails, then give this packet a chance
410 * to go through a regular ruleset.
411 */
412 (void)nbuf_add_tag(nbuf, NPF_NTAG_PASS);
413 }
414 #endif
415 return con;
416 }
417
418 /*
419 * npf_conn_establish: create a new connection, insert into the global list.
420 *
421 * => Connection is created with the reference held for the caller.
422 * => Connection will be activated on the first reference release.
423 */
424 npf_conn_t *
425 npf_conn_establish(npf_cache_t *npc, const unsigned di, bool global)
426 {
427 npf_t *npf = npc->npc_ctx;
428 const unsigned alen = npc->npc_alen;
429 const unsigned idx = NPF_CONNCACHE(alen);
430 const nbuf_t *nbuf = npc->npc_nbuf;
431 npf_connkey_t *fw, *bk;
432 npf_conndb_t *conn_db;
433 npf_conn_t *con;
434 int error = 0;
435
436 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
437
438 if (!npf_conn_trackable_p(npc)) {
439 return NULL;
440 }
441
442 /* Allocate and initialize the new connection. */
443 con = pool_cache_get(npf->conn_cache[idx], PR_NOWAIT);
444 if (__predict_false(!con)) {
445 npf_worker_signal(npf);
446 return NULL;
447 }
448 NPF_PRINTF(("NPF: create conn %p\n", con));
449 npf_stats_inc(npf, NPF_STAT_CONN_CREATE);
450
451 mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET);
452 atomic_store_relaxed(&con->c_flags, di & PFIL_ALL);
453 atomic_store_relaxed(&con->c_refcnt, 0);
454 con->c_rproc = NULL;
455 con->c_nat = NULL;
456
457 con->c_proto = npc->npc_proto;
458 CTASSERT(sizeof(con->c_proto) >= sizeof(npc->npc_proto));
459 con->c_alen = alen;
460
461 /* Initialize the protocol state. */
462 if (!npf_state_init(npc, &con->c_state)) {
463 npf_conn_destroy(npf, con);
464 return NULL;
465 }
466 KASSERT(npf_iscached(npc, NPC_IP46));
467
468 fw = npf_conn_getforwkey(con);
469 bk = npf_conn_getbackkey(con, alen);
470
471 /*
472 * Construct "forwards" and "backwards" keys. Also, set the
473 * interface ID for this connection (unless it is global).
474 */
475 if (!npf_conn_conkey(npc, fw, di, NPF_FLOW_FORW) ||
476 !npf_conn_conkey(npc, bk, di ^ PFIL_ALL, NPF_FLOW_BACK)) {
477 npf_conn_destroy(npf, con);
478 return NULL;
479 }
480 con->c_ifid = global ? nbuf->nb_ifid : 0;
481
482 /*
483 * Set last activity time for a new connection and acquire
484 * a reference for the caller before we make it visible.
485 */
486 conn_update_atime(con);
487 atomic_store_relaxed(&con->c_refcnt, 1);
488
489 /*
490 * Insert both keys (entries representing directions) of the
491 * connection. At this point it becomes visible, but we activate
492 * the connection later.
493 */
494 mutex_enter(&con->c_lock);
495 conn_db = atomic_load_relaxed(&npf->conn_db);
496 if (!npf_conndb_insert(conn_db, fw, con, NPF_FLOW_FORW)) {
497 error = EISCONN;
498 goto err;
499 }
500 if (!npf_conndb_insert(conn_db, bk, con, NPF_FLOW_BACK)) {
501 npf_conn_t *ret __diagused;
502 ret = npf_conndb_remove(conn_db, fw);
503 KASSERT(ret == con);
504 error = EISCONN;
505 goto err;
506 }
507 err:
508 /*
509 * If we have hit the duplicate: mark the connection as expired
510 * and let the G/C thread to take care of it. We cannot do it
511 * here since there might be references acquired already.
512 */
513 if (error) {
514 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
515 atomic_dec_uint(&con->c_refcnt);
516 npf_stats_inc(npf, NPF_STAT_RACE_CONN);
517 } else {
518 NPF_PRINTF(("NPF: establish conn %p\n", con));
519 }
520
521 /* Finally, insert into the connection list. */
522 npf_conndb_enqueue(conn_db, con);
523 mutex_exit(&con->c_lock);
524
525 return error ? NULL : con;
526 }
527
528 void
529 npf_conn_destroy(npf_t *npf, npf_conn_t *con)
530 {
531 const unsigned idx __unused = NPF_CONNCACHE(con->c_alen);
532
533 KASSERT(atomic_load_relaxed(&con->c_refcnt) == 0);
534
535 if (con->c_nat) {
536 /* Release any NAT structures. */
537 npf_nat_destroy(con, con->c_nat);
538 }
539 if (con->c_rproc) {
540 /* Release the rule procedure. */
541 npf_rproc_release(con->c_rproc);
542 }
543
544 /* Destroy the state. */
545 npf_state_destroy(&con->c_state);
546 mutex_destroy(&con->c_lock);
547
548 /* Free the structure, increase the counter. */
549 pool_cache_put(npf->conn_cache[idx], con);
550 npf_stats_inc(npf, NPF_STAT_CONN_DESTROY);
551 NPF_PRINTF(("NPF: conn %p destroyed\n", con));
552 }
553
554 /*
555 * npf_conn_setnat: associate NAT entry with the connection, update and
556 * re-insert connection entry using the translation values.
557 *
558 * => The caller must be holding a reference.
559 */
560 int
561 npf_conn_setnat(const npf_cache_t *npc, npf_conn_t *con,
562 npf_nat_t *nt, unsigned ntype)
563 {
564 static const unsigned nat_type_which[] = {
565 /* See the description in npf_nat_which(). */
566 [NPF_NATOUT] = NPF_DST,
567 [NPF_NATIN] = NPF_SRC,
568 };
569 npf_t *npf = npc->npc_ctx;
570 npf_conn_t *ret __diagused;
571 npf_conndb_t *conn_db;
572 npf_connkey_t *bk;
573 npf_addr_t *taddr;
574 in_port_t tport;
575 uint32_t flags;
576
577 KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0);
578
579 npf_nat_gettrans(nt, &taddr, &tport);
580 KASSERT(ntype == NPF_NATOUT || ntype == NPF_NATIN);
581
582 /* Acquire the lock and check for the races. */
583 mutex_enter(&con->c_lock);
584 flags = atomic_load_relaxed(&con->c_flags);
585 if (__predict_false(flags & CONN_EXPIRE)) {
586 /* The connection got expired. */
587 mutex_exit(&con->c_lock);
588 return EINVAL;
589 }
590 KASSERT((flags & CONN_REMOVED) == 0);
591
592 if (__predict_false(con->c_nat != NULL)) {
593 /* Race with a duplicate packet. */
594 mutex_exit(&con->c_lock);
595 npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT);
596 return EISCONN;
597 }
598
599 /* Remove the "backwards" key. */
600 conn_db = atomic_load_relaxed(&npf->conn_db);
601 bk = npf_conn_getbackkey(con, con->c_alen);
602 ret = npf_conndb_remove(conn_db, bk);
603 KASSERT(ret == con);
604
605 /* Set the source/destination IDs to the translation values. */
606 npf_conn_adjkey(bk, taddr, tport, nat_type_which[ntype]);
607
608 /* Finally, re-insert the "backwards" key. */
609 if (!npf_conndb_insert(conn_db, bk, con, NPF_FLOW_BACK)) {
610 /*
611 * Race: we have hit the duplicate, remove the "forwards"
612 * key and expire our connection; it is no longer valid.
613 */
614 npf_connkey_t *fw = npf_conn_getforwkey(con);
615 ret = npf_conndb_remove(conn_db, fw);
616 KASSERT(ret == con);
617
618 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
619 mutex_exit(&con->c_lock);
620
621 npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT);
622 return EISCONN;
623 }
624
625 /* Associate the NAT entry and release the lock. */
626 con->c_nat = nt;
627 mutex_exit(&con->c_lock);
628 return 0;
629 }
630
631 /*
632 * npf_conn_expire: explicitly mark connection as expired.
633 *
634 * => Must be called with: a) reference held b) the relevant lock held.
635 * The relevant lock should prevent from connection destruction, e.g.
636 * npf_t::conn_lock or npf_natpolicy_t::n_lock.
637 */
638 void
639 npf_conn_expire(npf_conn_t *con)
640 {
641 atomic_or_uint(&con->c_flags, CONN_EXPIRE);
642 }
643
644 /*
645 * npf_conn_pass: return true if connection is "pass" one, otherwise false.
646 */
647 bool
648 npf_conn_pass(const npf_conn_t *con, npf_match_info_t *mi, npf_rproc_t **rp)
649 {
650 KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0);
651 if (__predict_true(atomic_load_relaxed(&con->c_flags) & CONN_PASS)) {
652 mi->mi_retfl = atomic_load_relaxed(&con->c_retfl);
653 mi->mi_rid = con->c_rid;
654 *rp = con->c_rproc;
655 return true;
656 }
657 return false;
658 }
659
660 /*
661 * npf_conn_setpass: mark connection as a "pass" one and associate the
662 * rule procedure with it.
663 */
664 void
665 npf_conn_setpass(npf_conn_t *con, const npf_match_info_t *mi, npf_rproc_t *rp)
666 {
667 KASSERT((atomic_load_relaxed(&con->c_flags) & CONN_ACTIVE) == 0);
668 KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0);
669 KASSERT(con->c_rproc == NULL);
670
671 /*
672 * No need for atomic since the connection is not yet active.
673 * If rproc is set, the caller transfers its reference to us,
674 * which will be released on npf_conn_destroy().
675 */
676 atomic_or_uint(&con->c_flags, CONN_PASS);
677 con->c_rproc = rp;
678 if (rp) {
679 con->c_rid = mi->mi_rid;
680 con->c_retfl = mi->mi_retfl;
681 }
682 }
683
684 /*
685 * npf_conn_release: release a reference, which might allow G/C thread
686 * to destroy this connection.
687 */
688 void
689 npf_conn_release(npf_conn_t *con)
690 {
691 const unsigned flags = atomic_load_relaxed(&con->c_flags);
692
693 if ((flags & (CONN_ACTIVE | CONN_EXPIRE)) == 0) {
694 /* Activate: after this, connection is globally visible. */
695 atomic_or_uint(&con->c_flags, CONN_ACTIVE);
696 }
697 KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0);
698 atomic_dec_uint(&con->c_refcnt);
699 }
700
701 /*
702 * npf_conn_getnat: return the associated NAT entry, if any.
703 */
704 npf_nat_t *
705 npf_conn_getnat(const npf_conn_t *con)
706 {
707 return con->c_nat;
708 }
709
710 /*
711 * npf_conn_expired: criterion to check if connection is expired.
712 */
713 bool
714 npf_conn_expired(npf_t *npf, const npf_conn_t *con, uint64_t tsnow)
715 {
716 const unsigned flags = atomic_load_relaxed(&con->c_flags);
717 const int etime = npf_state_etime(npf, &con->c_state, con->c_proto);
718 int elapsed;
719
720 if (__predict_false(flags & CONN_EXPIRE)) {
721 /* Explicitly marked to be expired. */
722 return true;
723 }
724
725 /*
726 * Note: another thread may update 'atime' and it might
727 * become greater than 'now'.
728 */
729 elapsed = (int64_t)tsnow - atomic_load_relaxed(&con->c_atime);
730 return elapsed > etime;
731 }
732
733 /*
734 * npf_conn_remove: unlink the connection and mark as expired.
735 */
736 void
737 npf_conn_remove(npf_conndb_t *cd, npf_conn_t *con)
738 {
739 /* Remove both entries of the connection. */
740 mutex_enter(&con->c_lock);
741 if ((atomic_load_relaxed(&con->c_flags) & CONN_REMOVED) == 0) {
742 npf_connkey_t *fw, *bk;
743 npf_conn_t *ret __diagused;
744
745 fw = npf_conn_getforwkey(con);
746 ret = npf_conndb_remove(cd, fw);
747 KASSERT(ret == con);
748
749 bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw));
750 ret = npf_conndb_remove(cd, bk);
751 KASSERT(ret == con);
752 }
753
754 /* Flag the removal and expiration. */
755 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
756 mutex_exit(&con->c_lock);
757 }
758
759 /*
760 * npf_conn_worker: G/C to run from a worker thread or via npfk_gc().
761 */
762 void
763 npf_conn_worker(npf_t *npf)
764 {
765 npf_conndb_t *conn_db = atomic_load_relaxed(&npf->conn_db);
766 npf_conndb_gc(npf, conn_db, false, true);
767 }
768
769 /*
770 * npf_conndb_export: construct a list of connections prepared for saving.
771 * Note: this is expected to be an expensive operation.
772 */
773 int
774 npf_conndb_export(npf_t *npf, nvlist_t *nvl)
775 {
776 npf_conn_t *head, *con;
777 npf_conndb_t *conn_db;
778
779 /*
780 * Note: acquire conn_lock to prevent from the database
781 * destruction and G/C thread.
782 */
783 mutex_enter(&npf->conn_lock);
784 if (atomic_load_relaxed(&npf->conn_tracking) != CONN_TRACKING_ON) {
785 mutex_exit(&npf->conn_lock);
786 return 0;
787 }
788 conn_db = atomic_load_relaxed(&npf->conn_db);
789 head = npf_conndb_getlist(conn_db);
790 con = head;
791 while (con) {
792 nvlist_t *con_nvl;
793
794 con_nvl = nvlist_create(0);
795 if (npf_conn_export(npf, con, con_nvl) == 0) {
796 nvlist_append_nvlist_array(nvl, "conn-list", con_nvl);
797 }
798 nvlist_destroy(con_nvl);
799
800 if ((con = npf_conndb_getnext(conn_db, con)) == head) {
801 break;
802 }
803 }
804 mutex_exit(&npf->conn_lock);
805 return 0;
806 }
807
808 /*
809 * npf_conn_export: serialize a single connection.
810 */
811 static int
812 npf_conn_export(npf_t *npf, npf_conn_t *con, nvlist_t *nvl)
813 {
814 nvlist_t *knvl;
815 npf_connkey_t *fw, *bk;
816 unsigned flags, alen;
817
818 flags = atomic_load_relaxed(&con->c_flags);
819 if ((flags & (CONN_ACTIVE|CONN_EXPIRE)) != CONN_ACTIVE) {
820 return ESRCH;
821 }
822 nvlist_add_number(nvl, "flags", flags);
823 nvlist_add_number(nvl, "proto", con->c_proto);
824 if (con->c_ifid) {
825 char ifname[IFNAMSIZ];
826 npf_ifmap_copyname(npf, con->c_ifid, ifname, sizeof(ifname));
827 nvlist_add_string(nvl, "ifname", ifname);
828 }
829 nvlist_add_binary(nvl, "state", &con->c_state, sizeof(npf_state_t));
830
831 fw = npf_conn_getforwkey(con);
832 alen = NPF_CONNKEY_ALEN(fw);
833 KASSERT(alen == con->c_alen);
834 bk = npf_conn_getbackkey(con, alen);
835
836 knvl = npf_connkey_export(npf, fw);
837 nvlist_move_nvlist(nvl, "forw-key", knvl);
838
839 knvl = npf_connkey_export(npf, bk);
840 nvlist_move_nvlist(nvl, "back-key", knvl);
841
842 /* Let the address length be based on on first key. */
843 nvlist_add_number(nvl, "alen", alen);
844
845 if (con->c_nat) {
846 npf_nat_export(npf, con->c_nat, nvl);
847 }
848 return 0;
849 }
850
851 /*
852 * npf_conn_import: fully reconstruct a single connection from a
853 * nvlist and insert into the given database.
854 */
855 int
856 npf_conn_import(npf_t *npf, npf_conndb_t *cd, const nvlist_t *cdict,
857 npf_ruleset_t *natlist)
858 {
859 npf_conn_t *con;
860 npf_connkey_t *fw, *bk;
861 const nvlist_t *nat, *conkey;
862 unsigned flags, alen, idx;
863 const char *ifname;
864 const void *state;
865 size_t len;
866
867 /*
868 * To determine the length of the connection, which depends
869 * on the address length in the connection keys.
870 */
871 alen = dnvlist_get_number(cdict, "alen", 0);
872 idx = NPF_CONNCACHE(alen);
873
874 /* Allocate a connection and initialize it (clear first). */
875 con = pool_cache_get(npf->conn_cache[idx], PR_WAITOK);
876 memset(con, 0, sizeof(npf_conn_t));
877 mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET);
878 npf_stats_inc(npf, NPF_STAT_CONN_CREATE);
879
880 con->c_proto = dnvlist_get_number(cdict, "proto", 0);
881 flags = dnvlist_get_number(cdict, "flags", 0);
882 flags &= PFIL_ALL | CONN_ACTIVE | CONN_PASS;
883 atomic_store_relaxed(&con->c_flags, flags);
884 conn_update_atime(con);
885
886 ifname = dnvlist_get_string(cdict, "ifname", NULL);
887 if (ifname && (con->c_ifid = npf_ifmap_register(npf, ifname)) == 0) {
888 goto err;
889 }
890
891 state = dnvlist_get_binary(cdict, "state", &len, NULL, 0);
892 if (!state || len != sizeof(npf_state_t)) {
893 goto err;
894 }
895 memcpy(&con->c_state, state, sizeof(npf_state_t));
896
897 /* Reconstruct NAT association, if any. */
898 if ((nat = dnvlist_get_nvlist(cdict, "nat", NULL)) != NULL &&
899 (con->c_nat = npf_nat_import(npf, nat, natlist, con)) == NULL) {
900 goto err;
901 }
902
903 /*
904 * Fetch and copy the keys for each direction.
905 */
906 fw = npf_conn_getforwkey(con);
907 conkey = dnvlist_get_nvlist(cdict, "forw-key", NULL);
908 if (conkey == NULL || !npf_connkey_import(npf, conkey, fw)) {
909 goto err;
910 }
911 bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw));
912 conkey = dnvlist_get_nvlist(cdict, "back-key", NULL);
913 if (conkey == NULL || !npf_connkey_import(npf, conkey, bk)) {
914 goto err;
915 }
916
917 /* Guard against the contradicting address lengths. */
918 if (NPF_CONNKEY_ALEN(fw) != alen || NPF_CONNKEY_ALEN(bk) != alen) {
919 goto err;
920 }
921
922 /* Insert the entries and the connection itself. */
923 if (!npf_conndb_insert(cd, fw, con, NPF_FLOW_FORW)) {
924 goto err;
925 }
926 if (!npf_conndb_insert(cd, bk, con, NPF_FLOW_BACK)) {
927 npf_conndb_remove(cd, fw);
928 goto err;
929 }
930
931 NPF_PRINTF(("NPF: imported conn %p\n", con));
932 npf_conndb_enqueue(cd, con);
933 return 0;
934 err:
935 npf_conn_destroy(npf, con);
936 return EINVAL;
937 }
938
939 /*
940 * npf_conn_find: lookup a connection in the list of connections
941 */
942 int
943 npf_conn_find(npf_t *npf, const nvlist_t *req, nvlist_t *resp)
944 {
945 const nvlist_t *key_nv;
946 npf_conn_t *con;
947 npf_connkey_t key;
948 npf_flow_t flow;
949 int error;
950
951 key_nv = dnvlist_get_nvlist(req, "key", NULL);
952 if (!key_nv || !npf_connkey_import(npf, key_nv, &key)) {
953 return EINVAL;
954 }
955 con = npf_conndb_lookup(npf, &key, &flow);
956 if (con == NULL) {
957 return ESRCH;
958 }
959 if (!npf_conn_check(con, NULL, 0, NPF_FLOW_FORW)) {
960 atomic_dec_uint(&con->c_refcnt);
961 return ESRCH;
962 }
963 error = npf_conn_export(npf, con, resp);
964 nvlist_add_number(resp, "flow", flow);
965 atomic_dec_uint(&con->c_refcnt);
966 return error;
967 }
968
969 #if defined(DDB) || defined(_NPF_TESTING)
970
971 void
972 npf_conn_print(npf_conn_t *con)
973 {
974 const npf_connkey_t *fw = npf_conn_getforwkey(con);
975 const npf_connkey_t *bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw));
976 const unsigned flags = atomic_load_relaxed(&con->c_flags);
977 const unsigned proto = con->c_proto;
978 struct timespec tspnow;
979
980 getnanouptime(&tspnow);
981 printf("%p:\n\tproto %d flags 0x%x tsdiff %ld etime %d\n", con,
982 proto, flags, (long)(tspnow.tv_sec - con->c_atime),
983 npf_state_etime(npf_getkernctx(), &con->c_state, proto));
984 npf_connkey_print(fw);
985 npf_connkey_print(bk);
986 npf_state_dump(&con->c_state);
987 if (con->c_nat) {
988 npf_nat_dump(con->c_nat);
989 }
990 }
991
992 #endif
993