npf_conn.c revision 1.28 1 /*-
2 * Copyright (c) 2014-2018 Mindaugas Rasiukevicius <rmind at netbsd org>
3 * Copyright (c) 2010-2014 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This material is based upon work partially supported by The
7 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 * NPF connection tracking for stateful filtering and translation.
33 *
34 * Overview
35 *
36 * Packets can be incoming or outgoing with respect to an interface.
37 * Connection direction is identified by the direction of its first
38 * packet. The meaning of incoming/outgoing packet in the context of
39 * connection direction can be confusing. Therefore, we will use the
40 * terms "forwards stream" and "backwards stream", where packets in
41 * the forwards stream mean the packets travelling in the direction
42 * as the connection direction.
43 *
44 * All connections have two keys and thus two entries:
45 *
46 * - npf_conn_getforwkey(con) -- for the forwards stream;
47 * - npf_conn_getbackkey(con, alen) -- for the backwards stream.
48 *
49 * Note: the keys are stored in npf_conn_t::c_keys[], which is used
50 * to allocate variable-length npf_conn_t structures based on whether
51 * the IPv4 or IPv6 addresses are used. See the npf_connkey.c source
52 * file for the description of the key layouts.
53 *
54 * The keys are formed from the 5-tuple (source/destination address,
55 * source/destination port and the protocol). Additional matching
56 * is performed for the interface (a common behaviour is equivalent
57 * to the 6-tuple lookup including the interface ID). Note that the
58 * key may be formed using translated values in a case of NAT.
59 *
60 * Connections can serve two purposes: for the implicit passing or
61 * to accommodate the dynamic NAT. Connections for the former purpose
62 * are created by the rules with "stateful" attribute and are used for
63 * stateful filtering. Such connections indicate that the packet of
64 * the backwards stream should be passed without inspection of the
65 * ruleset. The other purpose is to associate a dynamic NAT mechanism
66 * with a connection. Such connections are created by the NAT policies
67 * and they have a relationship with NAT translation structure via
68 * npf_conn_t::c_nat. A single connection can serve both purposes,
69 * which is a common case.
70 *
71 * Connection life-cycle
72 *
73 * Connections are established when a packet matches said rule or
74 * NAT policy. Both keys of the established connection are inserted
75 * into the connection database. A garbage collection thread
76 * periodically scans all connections and depending on connection
77 * properties (e.g. last activity time, protocol) removes connection
78 * entries and expires the actual connections.
79 *
80 * Each connection has a reference count. The reference is acquired
81 * on lookup and should be released by the caller. It guarantees that
82 * the connection will not be destroyed, although it may be expired.
83 *
84 * Synchronisation
85 *
86 * Connection database is accessed in a lock-less manner by the main
87 * routines: npf_conn_inspect() and npf_conn_establish(). Since they
88 * are always called from a software interrupt, the database is
89 * protected using passive serialisation. The main place which can
90 * destroy a connection is npf_conn_worker(). The database itself
91 * can be replaced and destroyed in npf_conn_reload().
92 *
93 * ALG support
94 *
95 * Application-level gateways (ALGs) can override generic connection
96 * inspection (npf_alg_conn() call in npf_conn_inspect() function) by
97 * performing their own lookup using different key. Recursive call
98 * to npf_conn_inspect() is not allowed. The ALGs ought to use the
99 * npf_conn_lookup() function for this purpose.
100 *
101 * Lock order
102 *
103 * npf_config_lock ->
104 * conn_lock ->
105 * npf_conn_t::c_lock
106 */
107
108 #ifdef _KERNEL
109 #include <sys/cdefs.h>
110 __KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.28 2019/08/06 10:25:13 christos Exp $");
111
112 #include <sys/param.h>
113 #include <sys/types.h>
114
115 #include <netinet/in.h>
116 #include <netinet/tcp.h>
117
118 #include <sys/atomic.h>
119 #include <sys/kmem.h>
120 #include <sys/mutex.h>
121 #include <net/pfil.h>
122 #include <sys/pool.h>
123 #include <sys/queue.h>
124 #include <sys/systm.h>
125 #endif
126
127 #define __NPF_CONN_PRIVATE
128 #include "npf_conn.h"
129 #include "npf_impl.h"
130
131 /* A helper to select the IPv4 or IPv6 connection cache. */
132 #define NPF_CONNCACHE(alen) (((alen) >> 4) & 0x1)
133
134 /*
135 * Connection flags: PFIL_IN and PFIL_OUT values are reserved for direction.
136 */
137 CTASSERT(PFIL_ALL == (0x001 | 0x002));
138 #define CONN_ACTIVE 0x004 /* visible on inspection */
139 #define CONN_PASS 0x008 /* perform implicit passing */
140 #define CONN_EXPIRE 0x010 /* explicitly expire */
141 #define CONN_REMOVED 0x020 /* "forw/back" entries removed */
142
143 enum { CONN_TRACKING_OFF, CONN_TRACKING_ON };
144
145 static nvlist_t *npf_conn_export(npf_t *, npf_conn_t *);
146 static void npf_conn_destroy_idx(npf_t *, npf_conn_t *, unsigned);
147
148 /*
149 * npf_conn_sys{init,fini}: initialise/destroy connection tracking.
150 */
151
152 void
153 npf_conn_init(npf_t *npf, int flags)
154 {
155 npf->conn_cache[0] = pool_cache_init(
156 offsetof(npf_conn_t, c_keys[NPF_CONNKEY_V4WORDS * 2]),
157 0, 0, 0, "npfcn4pl", NULL, IPL_NET, NULL, NULL, NULL);
158 npf->conn_cache[1] = pool_cache_init(
159 offsetof(npf_conn_t, c_keys[NPF_CONNKEY_V6WORDS * 2]),
160 0, 0, 0, "npfcn6pl", NULL, IPL_NET, NULL, NULL, NULL);
161
162 mutex_init(&npf->conn_lock, MUTEX_DEFAULT, IPL_NONE);
163 npf->conn_tracking = CONN_TRACKING_OFF;
164 npf->conn_db = npf_conndb_create();
165
166 if ((flags & NPF_NO_GC) == 0) {
167 npf_worker_register(npf, npf_conn_worker);
168 }
169 npf_conndb_sysinit(npf);
170 }
171
172 void
173 npf_conn_fini(npf_t *npf)
174 {
175 npf_conndb_sysfini(npf);
176
177 /* Note: the caller should have flushed the connections. */
178 KASSERT(npf->conn_tracking == CONN_TRACKING_OFF);
179 npf_worker_unregister(npf, npf_conn_worker);
180
181 npf_conndb_destroy(npf->conn_db);
182 pool_cache_destroy(npf->conn_cache[0]);
183 pool_cache_destroy(npf->conn_cache[1]);
184 mutex_destroy(&npf->conn_lock);
185 }
186
187 /*
188 * npf_conn_load: perform the load by flushing the current connection
189 * database and replacing it with the new one or just destroying.
190 *
191 * => The caller must disable the connection tracking and ensure that
192 * there are no connection database lookups or references in-flight.
193 */
194 void
195 npf_conn_load(npf_t *npf, npf_conndb_t *ndb, bool track)
196 {
197 npf_conndb_t *odb = NULL;
198
199 KASSERT(npf_config_locked_p(npf));
200
201 /*
202 * The connection database is in the quiescent state.
203 * Prevent G/C thread from running and install a new database.
204 */
205 mutex_enter(&npf->conn_lock);
206 if (ndb) {
207 KASSERT(npf->conn_tracking == CONN_TRACKING_OFF);
208 odb = npf->conn_db;
209 npf->conn_db = ndb;
210 membar_sync();
211 }
212 if (track) {
213 /* After this point lookups start flying in. */
214 npf->conn_tracking = CONN_TRACKING_ON;
215 }
216 mutex_exit(&npf->conn_lock);
217
218 if (odb) {
219 /*
220 * Flush all, no sync since the caller did it for us.
221 * Also, release the pool cache memory.
222 */
223 npf_conndb_gc(npf, odb, true, false);
224 npf_conndb_destroy(odb);
225 pool_cache_invalidate(npf->conn_cache[0]);
226 pool_cache_invalidate(npf->conn_cache[1]);
227 }
228 }
229
230 /*
231 * npf_conn_tracking: enable/disable connection tracking.
232 */
233 void
234 npf_conn_tracking(npf_t *npf, bool track)
235 {
236 KASSERT(npf_config_locked_p(npf));
237 npf->conn_tracking = track ? CONN_TRACKING_ON : CONN_TRACKING_OFF;
238 }
239
240 static inline bool
241 npf_conn_trackable_p(const npf_cache_t *npc)
242 {
243 const npf_t *npf = npc->npc_ctx;
244
245 /*
246 * Check if connection tracking is on. Also, if layer 3 and 4 are
247 * not cached - protocol is not supported or packet is invalid.
248 */
249 if (npf->conn_tracking != CONN_TRACKING_ON) {
250 return false;
251 }
252 if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
253 return false;
254 }
255 return true;
256 }
257
258 static inline void
259 conn_update_atime(npf_conn_t *con)
260 {
261 struct timespec tsnow;
262
263 getnanouptime(&tsnow);
264 con->c_atime = tsnow.tv_sec;
265 }
266
267 /*
268 * npf_conn_check: check that:
269 *
270 * - the connection is active;
271 *
272 * - the packet is travelling in the right direction with the respect
273 * to the connection direction (if interface-id is not zero);
274 *
275 * - the packet is travelling on the same interface as the
276 * connection interface (if interface-id is not zero).
277 */
278 static bool
279 npf_conn_check(const npf_conn_t *con, const nbuf_t *nbuf,
280 const unsigned di, const bool forw)
281 {
282 const uint32_t flags = con->c_flags;
283 const unsigned ifid = con->c_ifid;
284 bool active, pforw;
285
286 active = (flags & (CONN_ACTIVE | CONN_EXPIRE)) == CONN_ACTIVE;
287 if (__predict_false(!active)) {
288 return false;
289 }
290 if (ifid && nbuf) {
291 pforw = (flags & PFIL_ALL) == (unsigned)di;
292 if (__predict_false(forw != pforw)) {
293 return false;
294 }
295 if (__predict_false(ifid != nbuf->nb_ifid)) {
296 return false;
297 }
298 }
299 return true;
300 }
301
302 /*
303 * npf_conn_lookup: lookup if there is an established connection.
304 *
305 * => If found, we will hold a reference for the caller.
306 */
307 npf_conn_t *
308 npf_conn_lookup(const npf_cache_t *npc, const int di, bool *forw)
309 {
310 npf_t *npf = npc->npc_ctx;
311 const nbuf_t *nbuf = npc->npc_nbuf;
312 npf_conn_t *con;
313 npf_connkey_t key;
314
315 /* Construct a key and lookup for a connection in the store. */
316 if (!npf_conn_conkey(npc, &key, true)) {
317 return NULL;
318 }
319 con = npf_conndb_lookup(npf->conn_db, &key, forw);
320 if (con == NULL) {
321 return NULL;
322 }
323 KASSERT(npc->npc_proto == con->c_proto);
324
325 /* Extra checks for the connection and packet. */
326 if (!npf_conn_check(con, nbuf, di, *forw)) {
327 atomic_dec_uint(&con->c_refcnt);
328 return NULL;
329 }
330
331 /* Update the last activity time. */
332 conn_update_atime(con);
333 return con;
334 }
335
336 /*
337 * npf_conn_inspect: lookup a connection and inspecting the protocol data.
338 *
339 * => If found, we will hold a reference for the caller.
340 */
341 npf_conn_t *
342 npf_conn_inspect(npf_cache_t *npc, const int di, int *error)
343 {
344 nbuf_t *nbuf = npc->npc_nbuf;
345 npf_conn_t *con;
346 bool forw, ok;
347
348 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
349 if (!npf_conn_trackable_p(npc)) {
350 return NULL;
351 }
352
353 /* Query ALG which may lookup connection for us. */
354 if ((con = npf_alg_conn(npc, di)) != NULL) {
355 /* Note: reference is held. */
356 return con;
357 }
358 if (nbuf_head_mbuf(nbuf) == NULL) {
359 *error = ENOMEM;
360 return NULL;
361 }
362 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
363
364 /* Main lookup of the connection. */
365 if ((con = npf_conn_lookup(npc, di, &forw)) == NULL) {
366 return NULL;
367 }
368
369 /* Inspect the protocol data and handle state changes. */
370 mutex_enter(&con->c_lock);
371 ok = npf_state_inspect(npc, &con->c_state, forw);
372 mutex_exit(&con->c_lock);
373
374 /* If invalid state: let the rules deal with it. */
375 if (__predict_false(!ok)) {
376 npf_conn_release(con);
377 npf_stats_inc(npc->npc_ctx, NPF_STAT_INVALID_STATE);
378 return NULL;
379 }
380
381 /*
382 * If this is multi-end state, then specially tag the packet
383 * so it will be just passed-through on other interfaces.
384 */
385 if (con->c_ifid == 0 && nbuf_add_tag(nbuf, NPF_NTAG_PASS) != 0) {
386 npf_conn_release(con);
387 *error = ENOMEM;
388 return NULL;
389 }
390 return con;
391 }
392
393 /*
394 * npf_conn_establish: create a new connection, insert into the global list.
395 *
396 * => Connection is created with the reference held for the caller.
397 * => Connection will be activated on the first reference release.
398 */
399 npf_conn_t *
400 npf_conn_establish(npf_cache_t *npc, int di, bool global)
401 {
402 npf_t *npf = npc->npc_ctx;
403 const unsigned alen = npc->npc_alen;
404 const unsigned idx = NPF_CONNCACHE(alen);
405 const nbuf_t *nbuf = npc->npc_nbuf;
406 npf_connkey_t *fw, *bk;
407 npf_conn_t *con;
408 int error = 0;
409
410 KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
411
412 if (!npf_conn_trackable_p(npc)) {
413 return NULL;
414 }
415
416 /* Allocate and initialise the new connection. */
417 con = pool_cache_get(npf->conn_cache[idx], PR_NOWAIT);
418 if (__predict_false(!con)) {
419 npf_worker_signal(npf);
420 return NULL;
421 }
422 NPF_PRINTF(("NPF: create conn %p\n", con));
423 npf_stats_inc(npf, NPF_STAT_CONN_CREATE);
424
425 mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET);
426 con->c_flags = (di & PFIL_ALL);
427 con->c_refcnt = 0;
428 con->c_rproc = NULL;
429 con->c_nat = NULL;
430
431 con->c_proto = npc->npc_proto;
432 CTASSERT(sizeof(con->c_proto) >= sizeof(npc->npc_proto));
433
434 /* Initialize the protocol state. */
435 if (!npf_state_init(npc, &con->c_state)) {
436 npf_conn_destroy_idx(npf, con, idx);
437 return NULL;
438 }
439 KASSERT(npf_iscached(npc, NPC_IP46));
440
441 fw = npf_conn_getforwkey(con);
442 bk = npf_conn_getbackkey(con, alen);
443
444 /*
445 * Construct "forwards" and "backwards" keys. Also, set the
446 * interface ID for this connection (unless it is global).
447 */
448 if (!npf_conn_conkey(npc, fw, true) ||
449 !npf_conn_conkey(npc, bk, false)) {
450 npf_conn_destroy_idx(npf, con, idx);
451 return NULL;
452 }
453 con->c_ifid = global ? nbuf->nb_ifid : 0;
454
455 /*
456 * Set last activity time for a new connection and acquire
457 * a reference for the caller before we make it visible.
458 */
459 conn_update_atime(con);
460 con->c_refcnt = 1;
461
462 /*
463 * Insert both keys (entries representing directions) of the
464 * connection. At this point it becomes visible, but we activate
465 * the connection later.
466 */
467 mutex_enter(&con->c_lock);
468 if (!npf_conndb_insert(npf->conn_db, fw, con, true)) {
469 error = EISCONN;
470 goto err;
471 }
472 if (!npf_conndb_insert(npf->conn_db, bk, con, false)) {
473 npf_conn_t *ret __diagused;
474 ret = npf_conndb_remove(npf->conn_db, fw);
475 KASSERT(ret == con);
476 error = EISCONN;
477 goto err;
478 }
479 err:
480 /*
481 * If we have hit the duplicate: mark the connection as expired
482 * and let the G/C thread to take care of it. We cannot do it
483 * here since there might be references acquired already.
484 */
485 if (error) {
486 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
487 atomic_dec_uint(&con->c_refcnt);
488 npf_stats_inc(npf, NPF_STAT_RACE_CONN);
489 } else {
490 NPF_PRINTF(("NPF: establish conn %p\n", con));
491 }
492
493 /* Finally, insert into the connection list. */
494 npf_conndb_enqueue(npf->conn_db, con);
495 mutex_exit(&con->c_lock);
496
497 return error ? NULL : con;
498 }
499
500 void
501 npf_conn_destroy(npf_t *npf, npf_conn_t *con)
502 {
503 const npf_connkey_t *key = npf_conn_getforwkey(con);
504 const unsigned alen = NPF_CONNKEY_ALEN(key);
505 npf_conn_destroy_idx(npf, con, NPF_CONNCACHE(alen));
506 }
507
508 static void
509 npf_conn_destroy_idx(npf_t *npf, npf_conn_t *con, unsigned idx)
510 {
511 KASSERT(con->c_refcnt == 0);
512
513 if (con->c_nat) {
514 /* Release any NAT structures. */
515 npf_nat_destroy(con->c_nat);
516 }
517 if (con->c_rproc) {
518 /* Release the rule procedure. */
519 npf_rproc_release(con->c_rproc);
520 }
521
522 /* Destroy the state. */
523 npf_state_destroy(&con->c_state);
524 mutex_destroy(&con->c_lock);
525
526 /* Free the structure, increase the counter. */
527 pool_cache_put(npf->conn_cache[idx], con);
528 npf_stats_inc(npf, NPF_STAT_CONN_DESTROY);
529 NPF_PRINTF(("NPF: conn %p destroyed\n", con));
530 }
531
532 /*
533 * npf_conn_setnat: associate NAT entry with the connection, update and
534 * re-insert connection entry using the translation values.
535 *
536 * => The caller must be holding a reference.
537 */
538 int
539 npf_conn_setnat(const npf_cache_t *npc, npf_conn_t *con,
540 npf_nat_t *nt, unsigned ntype)
541 {
542 static const u_int nat_type_dimap[] = {
543 [NPF_NATOUT] = NPF_DST,
544 [NPF_NATIN] = NPF_SRC,
545 };
546 npf_t *npf = npc->npc_ctx;
547 npf_connkey_t key, *fw, *bk;
548 npf_conn_t *ret __diagused;
549 npf_addr_t *taddr;
550 in_port_t tport;
551
552 KASSERT(con->c_refcnt > 0);
553
554 npf_nat_gettrans(nt, &taddr, &tport);
555 KASSERT(ntype == NPF_NATOUT || ntype == NPF_NATIN);
556
557 /* Construct a "backwards" key. */
558 if (!npf_conn_conkey(npc, &key, false)) {
559 return EINVAL;
560 }
561
562 /* Acquire the lock and check for the races. */
563 mutex_enter(&con->c_lock);
564 if (__predict_false(con->c_flags & CONN_EXPIRE)) {
565 /* The connection got expired. */
566 mutex_exit(&con->c_lock);
567 return EINVAL;
568 }
569 KASSERT((con->c_flags & CONN_REMOVED) == 0);
570
571 if (__predict_false(con->c_nat != NULL)) {
572 /* Race with a duplicate packet. */
573 mutex_exit(&con->c_lock);
574 npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT);
575 return EISCONN;
576 }
577
578 /* Remove the "backwards" key. */
579 fw = npf_conn_getforwkey(con);
580 bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw));
581 ret = npf_conndb_remove(npf->conn_db, bk);
582 KASSERT(ret == con);
583
584 /* Set the source/destination IDs to the translation values. */
585 npf_conn_adjkey(bk, taddr, tport, nat_type_dimap[ntype]);
586
587 /* Finally, re-insert the "backwards" key. */
588 if (!npf_conndb_insert(npf->conn_db, bk, con, false)) {
589 /*
590 * Race: we have hit the duplicate, remove the "forwards"
591 * key and expire our connection; it is no longer valid.
592 */
593 ret = npf_conndb_remove(npf->conn_db, fw);
594 KASSERT(ret == con);
595
596 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
597 mutex_exit(&con->c_lock);
598
599 npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT);
600 return EISCONN;
601 }
602
603 /* Associate the NAT entry and release the lock. */
604 con->c_nat = nt;
605 mutex_exit(&con->c_lock);
606 return 0;
607 }
608
609 /*
610 * npf_conn_expire: explicitly mark connection as expired.
611 */
612 void
613 npf_conn_expire(npf_conn_t *con)
614 {
615 /* KASSERT(con->c_refcnt > 0); XXX: npf_nat_freepolicy() */
616 atomic_or_uint(&con->c_flags, CONN_EXPIRE);
617 }
618
619 /*
620 * npf_conn_pass: return true if connection is "pass" one, otherwise false.
621 */
622 bool
623 npf_conn_pass(const npf_conn_t *con, npf_match_info_t *mi, npf_rproc_t **rp)
624 {
625 KASSERT(con->c_refcnt > 0);
626 if (__predict_true(con->c_flags & CONN_PASS)) {
627 mi->mi_rid = con->c_rid;
628 mi->mi_retfl = con->c_retfl;
629 *rp = con->c_rproc;
630 return true;
631 }
632 return false;
633 }
634
635 /*
636 * npf_conn_setpass: mark connection as a "pass" one and associate the
637 * rule procedure with it.
638 */
639 void
640 npf_conn_setpass(npf_conn_t *con, const npf_match_info_t *mi, npf_rproc_t *rp)
641 {
642 KASSERT((con->c_flags & CONN_ACTIVE) == 0);
643 KASSERT(con->c_refcnt > 0);
644 KASSERT(con->c_rproc == NULL);
645
646 /*
647 * No need for atomic since the connection is not yet active.
648 * If rproc is set, the caller transfers its reference to us,
649 * which will be released on npf_conn_destroy().
650 */
651 atomic_or_uint(&con->c_flags, CONN_PASS);
652 con->c_rproc = rp;
653 if (rp) {
654 con->c_rid = mi->mi_rid;
655 con->c_retfl = mi->mi_retfl;
656 }
657 }
658
659 /*
660 * npf_conn_release: release a reference, which might allow G/C thread
661 * to destroy this connection.
662 */
663 void
664 npf_conn_release(npf_conn_t *con)
665 {
666 if ((con->c_flags & (CONN_ACTIVE | CONN_EXPIRE)) == 0) {
667 /* Activate: after this, connection is globally visible. */
668 atomic_or_uint(&con->c_flags, CONN_ACTIVE);
669 }
670 KASSERT(con->c_refcnt > 0);
671 atomic_dec_uint(&con->c_refcnt);
672 }
673
674 /*
675 * npf_conn_getnat: return associated NAT data entry and indicate
676 * whether it is a "forwards" or "backwards" stream.
677 */
678 npf_nat_t *
679 npf_conn_getnat(npf_conn_t *con, const int di, bool *forw)
680 {
681 KASSERT(con->c_refcnt > 0);
682 *forw = (con->c_flags & PFIL_ALL) == (u_int)di;
683 return con->c_nat;
684 }
685
686 /*
687 * npf_conn_expired: criterion to check if connection is expired.
688 */
689 bool
690 npf_conn_expired(npf_t *npf, const npf_conn_t *con, uint64_t tsnow)
691 {
692 const int etime = npf_state_etime(npf, &con->c_state, con->c_proto);
693 int elapsed;
694
695 if (__predict_false(con->c_flags & CONN_EXPIRE)) {
696 /* Explicitly marked to be expired. */
697 return true;
698 }
699
700 /*
701 * Note: another thread may update 'atime' and it might
702 * become greater than 'now'.
703 */
704 elapsed = (int64_t)tsnow - con->c_atime;
705 return elapsed > etime;
706 }
707
708 /*
709 * npf_conn_remove: unlink the connection and mark as expired.
710 */
711 void
712 npf_conn_remove(npf_conndb_t *cd, npf_conn_t *con)
713 {
714 /* Remove both entries of the connection. */
715 mutex_enter(&con->c_lock);
716 if ((con->c_flags & CONN_REMOVED) == 0) {
717 npf_connkey_t *fw, *bk;
718 npf_conn_t *ret __diagused;
719
720 fw = npf_conn_getforwkey(con);
721 ret = npf_conndb_remove(cd, fw);
722 KASSERT(ret == con);
723
724 bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw));
725 ret = npf_conndb_remove(cd, bk);
726 KASSERT(ret == con);
727 }
728
729 /* Flag the removal and expiration. */
730 atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
731 mutex_exit(&con->c_lock);
732 }
733
734 /*
735 * npf_conn_worker: G/C to run from a worker thread.
736 */
737 void
738 npf_conn_worker(npf_t *npf)
739 {
740 npf_conndb_gc(npf, npf->conn_db, false, true);
741 }
742
743 /*
744 * npf_conndb_export: construct a list of connections prepared for saving.
745 * Note: this is expected to be an expensive operation.
746 */
747 int
748 npf_conndb_export(npf_t *npf, nvlist_t *npf_dict)
749 {
750 npf_conn_t *head, *con;
751
752 /*
753 * Note: acquire conn_lock to prevent from the database
754 * destruction and G/C thread.
755 */
756 mutex_enter(&npf->conn_lock);
757 if (npf->conn_tracking != CONN_TRACKING_ON) {
758 mutex_exit(&npf->conn_lock);
759 return 0;
760 }
761 head = npf_conndb_getlist(npf->conn_db);
762 con = head;
763 while (con) {
764 nvlist_t *cdict;
765
766 if ((cdict = npf_conn_export(npf, con)) != NULL) {
767 nvlist_append_nvlist_array(npf_dict, "conn-list", cdict);
768 nvlist_destroy(cdict);
769 }
770 if ((con = npf_conndb_getnext(npf->conn_db, con)) == head) {
771 break;
772 }
773 }
774 mutex_exit(&npf->conn_lock);
775 return 0;
776 }
777
778 /*
779 * npf_conn_export: serialise a single connection.
780 */
781 static nvlist_t *
782 npf_conn_export(npf_t *npf, npf_conn_t *con)
783 {
784 nvlist_t *cdict, *kdict;
785 npf_connkey_t *fw, *bk;
786 unsigned alen;
787
788 if ((con->c_flags & (CONN_ACTIVE|CONN_EXPIRE)) != CONN_ACTIVE) {
789 return NULL;
790 }
791 cdict = nvlist_create(0);
792 nvlist_add_number(cdict, "flags", con->c_flags);
793 nvlist_add_number(cdict, "proto", con->c_proto);
794 if (con->c_ifid) {
795 const char *ifname = npf_ifmap_getname(npf, con->c_ifid);
796 nvlist_add_string(cdict, "ifname", ifname);
797 }
798 nvlist_add_binary(cdict, "state", &con->c_state, sizeof(npf_state_t));
799
800 fw = npf_conn_getforwkey(con);
801 alen = NPF_CONNKEY_ALEN(fw);
802 bk = npf_conn_getbackkey(con, alen);
803
804 kdict = npf_connkey_export(fw);
805 nvlist_move_nvlist(cdict, "forw-key", kdict);
806
807 kdict = npf_connkey_export(bk);
808 nvlist_move_nvlist(cdict, "back-key", kdict);
809
810 /* Let the address length be based on on first key. */
811 nvlist_add_number(cdict, "alen", alen);
812
813 if (con->c_nat) {
814 npf_nat_export(cdict, con->c_nat);
815 }
816 return cdict;
817 }
818
819 /*
820 * npf_conn_import: fully reconstruct a single connection from a
821 * nvlist and insert into the given database.
822 */
823 int
824 npf_conn_import(npf_t *npf, npf_conndb_t *cd, const nvlist_t *cdict,
825 npf_ruleset_t *natlist)
826 {
827 npf_conn_t *con;
828 npf_connkey_t *fw, *bk;
829 const nvlist_t *nat, *conkey;
830 const char *ifname;
831 const void *state;
832 unsigned alen, idx;
833 size_t len;
834
835 /*
836 * To determine the length of the connection, which depends
837 * on the address length in the connection keys.
838 */
839 alen = dnvlist_get_number(cdict, "alen", 0);
840 idx = NPF_CONNCACHE(alen);
841
842 /* Allocate a connection and initialise it (clear first). */
843 con = pool_cache_get(npf->conn_cache[idx], PR_WAITOK);
844 memset(con, 0, sizeof(npf_conn_t));
845 mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET);
846 npf_stats_inc(npf, NPF_STAT_CONN_CREATE);
847
848 con->c_proto = dnvlist_get_number(cdict, "proto", 0);
849 con->c_flags = dnvlist_get_number(cdict, "flags", 0);
850 con->c_flags &= PFIL_ALL | CONN_ACTIVE | CONN_PASS;
851 conn_update_atime(con);
852
853 ifname = dnvlist_get_string(cdict, "ifname", NULL);
854 if (ifname && (con->c_ifid = npf_ifmap_register(npf, ifname)) == 0) {
855 goto err;
856 }
857
858 state = dnvlist_get_binary(cdict, "state", &len, NULL, 0);
859 if (!state || len != sizeof(npf_state_t)) {
860 goto err;
861 }
862 memcpy(&con->c_state, state, sizeof(npf_state_t));
863
864 /* Reconstruct NAT association, if any. */
865 if ((nat = dnvlist_get_nvlist(cdict, "nat", NULL)) != NULL &&
866 (con->c_nat = npf_nat_import(npf, nat, natlist, con)) == NULL) {
867 goto err;
868 }
869
870 /*
871 * Fetch and copy the keys for each direction.
872 */
873 fw = npf_conn_getforwkey(con);
874 conkey = dnvlist_get_nvlist(cdict, "forw-key", NULL);
875 if (conkey == NULL || !npf_connkey_import(conkey, fw)) {
876 goto err;
877 }
878 bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw));
879 conkey = dnvlist_get_nvlist(cdict, "back-key", NULL);
880 if (conkey == NULL || !npf_connkey_import(conkey, bk)) {
881 goto err;
882 }
883
884 /* Guard against the contradicting address lengths. */
885 if (NPF_CONNKEY_ALEN(fw) != alen || NPF_CONNKEY_ALEN(bk) != alen) {
886 goto err;
887 }
888
889 /* Insert the entries and the connection itself. */
890 if (!npf_conndb_insert(cd, fw, con, true)) {
891 goto err;
892 }
893 if (!npf_conndb_insert(cd, bk, con, false)) {
894 npf_conndb_remove(cd, fw);
895 goto err;
896 }
897
898 NPF_PRINTF(("NPF: imported conn %p\n", con));
899 npf_conndb_enqueue(cd, con);
900 return 0;
901 err:
902 npf_conn_destroy_idx(npf, con, idx);
903 return EINVAL;
904 }
905
906 int
907 npf_conn_find(npf_t *npf, const nvlist_t *idict, nvlist_t **odict)
908 {
909 const nvlist_t *kdict;
910 npf_connkey_t key;
911 npf_conn_t *con;
912 uint16_t dir;
913 bool forw;
914
915 kdict = dnvlist_get_nvlist(idict, "key", NULL);
916 if (!kdict || !npf_connkey_import(kdict, &key)) {
917 return EINVAL;
918 }
919 con = npf_conndb_lookup(npf->conn_db, &key, &forw);
920 if (con == NULL) {
921 return ESRCH;
922 }
923 dir = dnvlist_get_number(idict, "direction", 0);
924 if (!npf_conn_check(con, NULL, dir, true)) {
925 atomic_dec_uint(&con->c_refcnt);
926 return ESRCH;
927 }
928 *odict = npf_conn_export(npf, con);
929 atomic_dec_uint(&con->c_refcnt);
930 return *odict ? 0 : ENOSPC;
931 }
932
933 #if defined(DDB) || defined(_NPF_TESTING)
934
935 void
936 npf_conn_print(npf_conn_t *con)
937 {
938 const npf_connkey_t *fw = npf_conn_getforwkey(con);
939 const npf_connkey_t *bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw));
940 const unsigned proto = con->c_proto;
941 struct timespec tspnow;
942
943 getnanouptime(&tspnow);
944 printf("%p:\n\tproto %d flags 0x%x tsdiff %ld etime %d\n", con,
945 proto, con->c_flags, (long)(tspnow.tv_sec - con->c_atime),
946 npf_state_etime(npf_getkernctx(), &con->c_state, proto));
947 npf_connkey_print(fw);
948 npf_connkey_print(bk);
949 npf_state_dump(&con->c_state);
950 if (con->c_nat) {
951 npf_nat_dump(con->c_nat);
952 }
953 }
954
955 #endif
956