Home | History | Annotate | Line # | Download | only in npf
npf_nat.c revision 1.50
      1 /*-
      2  * Copyright (c) 2014-2020 Mindaugas Rasiukevicius <rmind at noxt eu>
      3  * Copyright (c) 2010-2013 The NetBSD Foundation, Inc.
      4  * All rights reserved.
      5  *
      6  * This material is based upon work partially supported by The
      7  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * NPF network address port translation (NAPT) and other forms of NAT.
     33  * Described in RFC 2663, RFC 3022, etc.
     34  *
     35  * Overview
     36  *
     37  *	There are a few mechanisms: NAT policy, port map and translation.
     38  *	The NAT module has a separate ruleset where rules always have an
     39  *	associated NAT policy.
     40  *
     41  * Translation types
     42  *
     43  *	There are two types of translation: outbound (NPF_NATOUT) and
     44  *	inbound (NPF_NATIN).  It should not be confused with connection
     45  *	direction.  See npf_nat_which() for the description of how the
     46  *	addresses are rewritten.  The bi-directional NAT is a combined
     47  *	outbound and inbound translation, therefore is constructed as
     48  *	two policies.
     49  *
     50  * NAT policies and port maps
     51  *
     52  *	The NAT (translation) policy is applied when packet matches the
     53  *	rule.  Apart from the filter criteria, the NAT policy always has
     54  *	a translation IP address or a table.  If port translation is set,
     55  *	then NAT mechanism relies on port map mechanism.
     56  *
     57  * Connections, translation entries and their life-cycle
     58  *
     59  *	NAT relies on the connection tracking module.  Each translated
     60  *	connection has an associated translation entry (npf_nat_t) which
     61  *	contains information used for backwards stream translation, i.e.
     62  *	the original IP address with port and translation port, allocated
     63  *	from the port map.  Each NAT entry is associated with the policy,
     64  *	which contains translation IP address.  Allocated port is returned
     65  *	to the port map and NAT entry is destroyed when connection expires.
     66  */
     67 
     68 #ifdef _KERNEL
     69 #include <sys/cdefs.h>
     70 __KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.50 2020/05/30 14:16:56 rmind Exp $");
     71 
     72 #include <sys/param.h>
     73 #include <sys/types.h>
     74 
     75 #include <sys/atomic.h>
     76 #include <sys/condvar.h>
     77 #include <sys/kmem.h>
     78 #include <sys/mutex.h>
     79 #include <sys/pool.h>
     80 #include <sys/proc.h>
     81 #endif
     82 
     83 #include "npf_impl.h"
     84 #include "npf_conn.h"
     85 
     86 /*
     87  * NAT policy structure.
     88  */
     89 struct npf_natpolicy {
     90 	npf_t *			n_npfctx;
     91 	kmutex_t		n_lock;
     92 	LIST_HEAD(, npf_nat)	n_nat_list;
     93 	unsigned		n_refcnt;
     94 	uint64_t		n_id;
     95 
     96 	/*
     97 	 * Translation type, flags, address or table and the port.
     98 	 * Additionally, there may be translation algorithm and any
     99 	 * auxiliary data, e.g. NPTv6 adjustment value.
    100 	 *
    101 	 * NPF_NP_CMP_START mark starts here.
    102 	 */
    103 	unsigned		n_type;
    104 	unsigned		n_flags;
    105 	unsigned		n_alen;
    106 
    107 	npf_addr_t		n_taddr;
    108 	npf_netmask_t		n_tmask;
    109 	in_port_t		n_tport;
    110 	unsigned		n_tid;
    111 
    112 	unsigned		n_algo;
    113 	union {
    114 		unsigned	n_rr_idx;
    115 		uint16_t	n_npt66_adj;
    116 	};
    117 };
    118 
    119 /*
    120  * Private flags - must be in the NPF_NAT_PRIVMASK range.
    121  */
    122 #define	NPF_NAT_USETABLE	(0x01000000 & NPF_NAT_PRIVMASK)
    123 
    124 #define	NPF_NP_CMP_START	offsetof(npf_natpolicy_t, n_type)
    125 #define	NPF_NP_CMP_SIZE		(sizeof(npf_natpolicy_t) - NPF_NP_CMP_START)
    126 
    127 /*
    128  * NAT entry for a connection.
    129  */
    130 struct npf_nat {
    131 	/* Associated NAT policy. */
    132 	npf_natpolicy_t *	nt_natpolicy;
    133 
    134 	uint16_t		nt_ifid;
    135 	uint16_t		nt_alen;
    136 
    137 	/*
    138 	 * Translation address as well as the original address which is
    139 	 * used for backwards translation.  The same for ports.
    140 	 */
    141 	npf_addr_t		nt_taddr;
    142 	npf_addr_t		nt_oaddr;
    143 
    144 	in_port_t		nt_oport;
    145 	in_port_t		nt_tport;
    146 
    147 	/* ALG (if any) associated with this NAT entry. */
    148 	npf_alg_t *		nt_alg;
    149 	uintptr_t		nt_alg_arg;
    150 
    151 	LIST_ENTRY(npf_nat)	nt_entry;
    152 	npf_conn_t *		nt_conn;
    153 };
    154 
    155 static pool_cache_t		nat_cache	__read_mostly;
    156 
    157 /*
    158  * npf_nat_sys{init,fini}: initialize/destroy NAT subsystem structures.
    159  */
    160 
    161 void
    162 npf_nat_sysinit(void)
    163 {
    164 	nat_cache = pool_cache_init(sizeof(npf_nat_t), 0,
    165 	    0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL);
    166 	KASSERT(nat_cache != NULL);
    167 }
    168 
    169 void
    170 npf_nat_sysfini(void)
    171 {
    172 	/* All NAT policies should already be destroyed. */
    173 	pool_cache_destroy(nat_cache);
    174 }
    175 
    176 /*
    177  * npf_natpolicy_create: create a new NAT policy.
    178  */
    179 npf_natpolicy_t *
    180 npf_natpolicy_create(npf_t *npf, const nvlist_t *nat, npf_ruleset_t *rset)
    181 {
    182 	npf_natpolicy_t *np;
    183 	const void *addr;
    184 	size_t len;
    185 
    186 	np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP);
    187 	atomic_store_relaxed(&np->n_refcnt, 1);
    188 	np->n_npfctx = npf;
    189 
    190 	/* The translation type, flags and policy ID. */
    191 	np->n_type = dnvlist_get_number(nat, "type", 0);
    192 	np->n_flags = dnvlist_get_number(nat, "flags", 0) & ~NPF_NAT_PRIVMASK;
    193 	np->n_id = dnvlist_get_number(nat, "nat-policy", 0);
    194 
    195 	/* Should be exclusively either inbound or outbound NAT. */
    196 	if (((np->n_type == NPF_NATIN) ^ (np->n_type == NPF_NATOUT)) == 0) {
    197 		goto err;
    198 	}
    199 	mutex_init(&np->n_lock, MUTEX_DEFAULT, IPL_SOFTNET);
    200 	LIST_INIT(&np->n_nat_list);
    201 
    202 	/*
    203 	 * Translation IP, mask and port (if applicable).  If using the
    204 	 * the table, specified by the ID, then the nat-addr/nat-mask will
    205 	 * be used as a filter for the addresses selected from table.
    206 	 */
    207 	if (nvlist_exists_number(nat, "nat-table-id")) {
    208 		if (np->n_flags & NPF_NAT_STATIC) {
    209 			goto err;
    210 		}
    211 		np->n_tid = nvlist_get_number(nat, "nat-table-id");
    212 		np->n_tmask = NPF_NO_NETMASK;
    213 		np->n_flags |= NPF_NAT_USETABLE;
    214 	} else {
    215 		addr = dnvlist_get_binary(nat, "nat-addr", &len, NULL, 0);
    216 		if (!addr || len == 0 || len > sizeof(npf_addr_t)) {
    217 			goto err;
    218 		}
    219 		memcpy(&np->n_taddr, addr, len);
    220 		np->n_alen = len;
    221 		np->n_tmask = dnvlist_get_number(nat, "nat-mask", NPF_NO_NETMASK);
    222 		if (npf_netmask_check(np->n_alen, np->n_tmask)) {
    223 			goto err;
    224 		}
    225 	}
    226 	np->n_tport = dnvlist_get_number(nat, "nat-port", 0);
    227 
    228 	/*
    229 	 * NAT algorithm.
    230 	 */
    231 	np->n_algo = dnvlist_get_number(nat, "nat-algo", 0);
    232 	switch (np->n_algo) {
    233 	case NPF_ALGO_NPT66:
    234 		np->n_npt66_adj = dnvlist_get_number(nat, "npt66-adj", 0);
    235 		break;
    236 	case NPF_ALGO_NETMAP:
    237 		break;
    238 	case NPF_ALGO_IPHASH:
    239 	case NPF_ALGO_RR:
    240 	default:
    241 		if (np->n_tmask != NPF_NO_NETMASK) {
    242 			goto err;
    243 		}
    244 		break;
    245 	}
    246 	return np;
    247 err:
    248 	mutex_destroy(&np->n_lock);
    249 	kmem_free(np, sizeof(npf_natpolicy_t));
    250 	return NULL;
    251 }
    252 
    253 int
    254 npf_natpolicy_export(const npf_natpolicy_t *np, nvlist_t *nat)
    255 {
    256 	nvlist_add_number(nat, "nat-policy", np->n_id);
    257 	nvlist_add_number(nat, "type", np->n_type);
    258 	nvlist_add_number(nat, "flags", np->n_flags);
    259 
    260 	if (np->n_flags & NPF_NAT_USETABLE) {
    261 		nvlist_add_number(nat, "nat-table-id", np->n_tid);
    262 	} else {
    263 		nvlist_add_binary(nat, "nat-addr", &np->n_taddr, np->n_alen);
    264 		nvlist_add_number(nat, "nat-mask", np->n_tmask);
    265 	}
    266 	nvlist_add_number(nat, "nat-port", np->n_tport);
    267 	nvlist_add_number(nat, "nat-algo", np->n_algo);
    268 
    269 	switch (np->n_algo) {
    270 	case NPF_ALGO_NPT66:
    271 		nvlist_add_number(nat, "npt66-adj", np->n_npt66_adj);
    272 		break;
    273 	}
    274 	return 0;
    275 }
    276 
    277 static void
    278 npf_natpolicy_release(npf_natpolicy_t *np)
    279 {
    280 	KASSERT(atomic_load_relaxed(&np->n_refcnt) > 0);
    281 
    282 	if (atomic_dec_uint_nv(&np->n_refcnt) != 0) {
    283 		return;
    284 	}
    285 	KASSERT(LIST_EMPTY(&np->n_nat_list));
    286 	mutex_destroy(&np->n_lock);
    287 	kmem_free(np, sizeof(npf_natpolicy_t));
    288 }
    289 
    290 /*
    291  * npf_natpolicy_destroy: free the NAT policy.
    292  *
    293  * => Called from npf_rule_free() during the reload via npf_ruleset_destroy().
    294  * => At this point, NAT policy cannot acquire new references.
    295  */
    296 void
    297 npf_natpolicy_destroy(npf_natpolicy_t *np)
    298 {
    299 	/*
    300 	 * Drain the references.  If there are active NAT connections,
    301 	 * then expire them and kick the worker.
    302 	 */
    303 	if (atomic_load_relaxed(&np->n_refcnt) > 1) {
    304 		npf_nat_t *nt;
    305 
    306 		mutex_enter(&np->n_lock);
    307 		LIST_FOREACH(nt, &np->n_nat_list, nt_entry) {
    308 			npf_conn_t *con = nt->nt_conn;
    309 			KASSERT(con != NULL);
    310 			npf_conn_expire(con);
    311 		}
    312 		mutex_exit(&np->n_lock);
    313 		npf_worker_signal(np->n_npfctx);
    314 	}
    315 	KASSERT(atomic_load_relaxed(&np->n_refcnt) >= 1);
    316 
    317 	/*
    318 	 * Drop the initial reference, but it might not be the last one.
    319 	 * If so, the last reference will be triggered via:
    320 	 *
    321 	 * npf_conn_destroy() -> npf_nat_destroy() -> npf_natpolicy_release()
    322 	 */
    323 	npf_natpolicy_release(np);
    324 }
    325 
    326 void
    327 npf_nat_freealg(npf_natpolicy_t *np, npf_alg_t *alg)
    328 {
    329 	npf_nat_t *nt;
    330 
    331 	mutex_enter(&np->n_lock);
    332 	LIST_FOREACH(nt, &np->n_nat_list, nt_entry) {
    333 		if (nt->nt_alg == alg) {
    334 			npf_alg_destroy(np->n_npfctx, alg, nt, nt->nt_conn);
    335 			nt->nt_alg = NULL;
    336 		}
    337 	}
    338 	mutex_exit(&np->n_lock);
    339 }
    340 
    341 /*
    342  * npf_natpolicy_cmp: compare two NAT policies.
    343  *
    344  * => Return 0 on match, and non-zero otherwise.
    345  */
    346 bool
    347 npf_natpolicy_cmp(npf_natpolicy_t *np, npf_natpolicy_t *mnp)
    348 {
    349 	const void *np_raw, *mnp_raw;
    350 
    351 	/*
    352 	 * Compare the relevant NAT policy information (in its raw form)
    353 	 * that is enough as a matching criteria.
    354 	 */
    355 	KASSERT(np && mnp && np != mnp);
    356 	np_raw = (const uint8_t *)np + NPF_NP_CMP_START;
    357 	mnp_raw = (const uint8_t *)mnp + NPF_NP_CMP_START;
    358 	return memcmp(np_raw, mnp_raw, NPF_NP_CMP_SIZE) == 0;
    359 }
    360 
    361 void
    362 npf_nat_setid(npf_natpolicy_t *np, uint64_t id)
    363 {
    364 	np->n_id = id;
    365 }
    366 
    367 uint64_t
    368 npf_nat_getid(const npf_natpolicy_t *np)
    369 {
    370 	return np->n_id;
    371 }
    372 
    373 /*
    374  * npf_nat_which: tell which address (source or destination) should be
    375  * rewritten given the combination of the NAT type and flow direction.
    376  *
    377  * => Returns NPF_SRC or NPF_DST constant.
    378  */
    379 static inline unsigned
    380 npf_nat_which(const unsigned type, const npf_flow_t flow)
    381 {
    382 	unsigned which;
    383 
    384 	/* The logic below relies on these values being 0 or 1. */
    385 	CTASSERT(NPF_SRC == 0 && NPF_DST == 1);
    386 	CTASSERT(NPF_FLOW_FORW == NPF_SRC && NPF_FLOW_BACK == NPF_DST);
    387 
    388 	KASSERT(type == NPF_NATIN || type == NPF_NATOUT);
    389 	KASSERT(flow == NPF_FLOW_FORW || flow == NPF_FLOW_BACK);
    390 
    391 	/*
    392 	 * Outbound NAT rewrites:
    393 	 *
    394 	 * - Source (NPF_SRC) on "forwards" stream.
    395 	 * - Destination (NPF_DST) on "backwards" stream.
    396 	 *
    397 	 * Inbound NAT is other way round.
    398 	 */
    399 	which = (type == NPF_NATOUT) ? flow : !flow;
    400 	KASSERT(which == NPF_SRC || which == NPF_DST);
    401 	return which;
    402 }
    403 
    404 /*
    405  * npf_nat_inspect: inspect packet against NAT ruleset and return a policy.
    406  *
    407  * => Acquire a reference on the policy, if found.
    408  * => NAT lookup is protected by EBR.
    409  */
    410 static npf_natpolicy_t *
    411 npf_nat_inspect(npf_cache_t *npc, const unsigned di)
    412 {
    413 	npf_t *npf = npc->npc_ctx;
    414 	int slock = npf_config_read_enter(npf);
    415 	npf_ruleset_t *rlset = npf_config_natset(npf);
    416 	npf_natpolicy_t *np;
    417 	npf_rule_t *rl;
    418 
    419 	rl = npf_ruleset_inspect(npc, rlset, di, NPF_LAYER_3);
    420 	if (rl == NULL) {
    421 		npf_config_read_exit(npf, slock);
    422 		return NULL;
    423 	}
    424 	np = npf_rule_getnat(rl);
    425 	atomic_inc_uint(&np->n_refcnt);
    426 	npf_config_read_exit(npf, slock);
    427 	return np;
    428 }
    429 
    430 static void
    431 npf_nat_algo_netmap(const npf_cache_t *npc, const npf_natpolicy_t *np,
    432     const unsigned which, npf_addr_t *addr)
    433 {
    434 	const npf_addr_t *orig_addr = npc->npc_ips[which];
    435 
    436 	/*
    437 	 * NETMAP:
    438 	 *
    439 	 *	addr = net-addr | (orig-addr & ~mask)
    440 	 */
    441 	npf_addr_mask(&np->n_taddr, np->n_tmask, npc->npc_alen, addr);
    442 	npf_addr_bitor(orig_addr, np->n_tmask, npc->npc_alen, addr);
    443 }
    444 
    445 static inline npf_addr_t *
    446 npf_nat_getaddr(npf_cache_t *npc, npf_natpolicy_t *np, const unsigned alen)
    447 {
    448 	npf_tableset_t *ts = npf_config_tableset(np->n_npfctx);
    449 	npf_table_t *t = npf_tableset_getbyid(ts, np->n_tid);
    450 	unsigned idx;
    451 
    452 	/*
    453 	 * Dynamically select the translation IP address.
    454 	 */
    455 	switch (np->n_algo) {
    456 	case NPF_ALGO_RR:
    457 		idx = atomic_inc_uint_nv(&np->n_rr_idx);
    458 		break;
    459 	case NPF_ALGO_IPHASH:
    460 	default:
    461 		idx = npf_addr_mix(alen,
    462 		    npc->npc_ips[NPF_SRC],
    463 		    npc->npc_ips[NPF_DST]);
    464 		break;
    465 	}
    466 	return npf_table_getsome(t, alen, idx);
    467 }
    468 
    469 /*
    470  * npf_nat_create: create a new NAT translation entry.
    471  *
    472  * => The caller must pass the NAT policy with a reference acquired for us.
    473  */
    474 static npf_nat_t *
    475 npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np, npf_conn_t *con)
    476 {
    477 	const unsigned proto = npc->npc_proto;
    478 	const unsigned alen = npc->npc_alen;
    479 	const nbuf_t *nbuf = npc->npc_nbuf;
    480 	npf_t *npf = npc->npc_ctx;
    481 	npf_addr_t *taddr;
    482 	npf_nat_t *nt;
    483 
    484 	KASSERT(npf_iscached(npc, NPC_IP46));
    485 	KASSERT(npf_iscached(npc, NPC_LAYER4));
    486 
    487 	/* Construct a new NAT entry and associate it with the connection. */
    488 	nt = pool_cache_get(nat_cache, PR_NOWAIT);
    489 	if (__predict_false(!nt)) {
    490 		return NULL;
    491 	}
    492 	npf_stats_inc(npf, NPF_STAT_NAT_CREATE);
    493 	nt->nt_natpolicy = np;
    494 	nt->nt_conn = con;
    495 	nt->nt_alg = NULL;
    496 
    497 	/*
    498 	 * Save the interface ID.
    499 	 *
    500 	 * Note: this can be different from the given connection if it
    501 	 * was established on a different interface, using the global state
    502 	 * mode (state.key.interface = 0).
    503 	 */
    504 	KASSERT(nbuf->nb_ifid != 0);
    505 	nt->nt_ifid = nbuf->nb_ifid;
    506 
    507 	/*
    508 	 * Select the translation address.
    509 	 */
    510 	if (np->n_flags & NPF_NAT_USETABLE) {
    511 		int slock = npf_config_read_enter(npf);
    512 		taddr = npf_nat_getaddr(npc, np, alen);
    513 		if (__predict_false(!taddr)) {
    514 			npf_config_read_exit(npf, slock);
    515 			pool_cache_put(nat_cache, nt);
    516 			return NULL;
    517 		}
    518 		memcpy(&nt->nt_taddr, taddr, alen);
    519 		npf_config_read_exit(npf, slock);
    520 
    521 	} else if (np->n_algo == NPF_ALGO_NETMAP) {
    522 		const unsigned which = npf_nat_which(np->n_type, NPF_FLOW_FORW);
    523 		npf_nat_algo_netmap(npc, np, which, &nt->nt_taddr);
    524 		taddr = &nt->nt_taddr;
    525 	} else {
    526 		/* Static IP address. */
    527 		taddr = &np->n_taddr;
    528 		memcpy(&nt->nt_taddr, taddr, alen);
    529 	}
    530 	nt->nt_alen = alen;
    531 
    532 	/* Save the original address which may be rewritten. */
    533 	if (np->n_type == NPF_NATOUT) {
    534 		/* Outbound NAT: source (think internal) address. */
    535 		memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_SRC], alen);
    536 	} else {
    537 		/* Inbound NAT: destination (think external) address. */
    538 		KASSERT(np->n_type == NPF_NATIN);
    539 		memcpy(&nt->nt_oaddr, npc->npc_ips[NPF_DST], alen);
    540 	}
    541 
    542 	/*
    543 	 * Port translation, if required, and if it is TCP/UDP.
    544 	 */
    545 	if ((np->n_flags & NPF_NAT_PORTS) == 0 ||
    546 	    (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) {
    547 		nt->nt_oport = 0;
    548 		nt->nt_tport = 0;
    549 		goto out;
    550 	}
    551 
    552 	/* Save the relevant TCP/UDP port. */
    553 	if (proto == IPPROTO_TCP) {
    554 		const struct tcphdr *th = npc->npc_l4.tcp;
    555 		nt->nt_oport = (np->n_type == NPF_NATOUT) ?
    556 		    th->th_sport : th->th_dport;
    557 	} else {
    558 		const struct udphdr *uh = npc->npc_l4.udp;
    559 		nt->nt_oport = (np->n_type == NPF_NATOUT) ?
    560 		    uh->uh_sport : uh->uh_dport;
    561 	}
    562 
    563 	/* Get a new port for translation. */
    564 	if ((np->n_flags & NPF_NAT_PORTMAP) != 0) {
    565 		npf_portmap_t *pm = np->n_npfctx->portmap;
    566 		nt->nt_tport = npf_portmap_get(pm, alen, taddr);
    567 	} else {
    568 		nt->nt_tport = np->n_tport;
    569 	}
    570 out:
    571 	mutex_enter(&np->n_lock);
    572 	LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry);
    573 	/* Note: we also consume the reference on policy. */
    574 	mutex_exit(&np->n_lock);
    575 	return nt;
    576 }
    577 
    578 /*
    579  * npf_dnat_translate: perform translation given the state data.
    580  */
    581 static inline int
    582 npf_dnat_translate(npf_cache_t *npc, npf_nat_t *nt, npf_flow_t flow)
    583 {
    584 	const npf_natpolicy_t *np = nt->nt_natpolicy;
    585 	const unsigned which = npf_nat_which(np->n_type, flow);
    586 	const npf_addr_t *addr;
    587 	in_port_t port;
    588 
    589 	KASSERT(npf_iscached(npc, NPC_IP46));
    590 	KASSERT(npf_iscached(npc, NPC_LAYER4));
    591 
    592 	if (flow == NPF_FLOW_FORW) {
    593 		/* "Forwards" stream: use translation address/port. */
    594 		addr = &nt->nt_taddr;
    595 		port = nt->nt_tport;
    596 	} else {
    597 		/* "Backwards" stream: use original address/port. */
    598 		addr = &nt->nt_oaddr;
    599 		port = nt->nt_oport;
    600 	}
    601 	KASSERT((np->n_flags & NPF_NAT_PORTS) != 0 || port == 0);
    602 
    603 	/* Execute ALG translation first. */
    604 	if ((npc->npc_info & NPC_ALG_EXEC) == 0) {
    605 		npc->npc_info |= NPC_ALG_EXEC;
    606 		npf_alg_exec(npc, nt, flow);
    607 		npf_recache(npc);
    608 	}
    609 	KASSERT(!nbuf_flag_p(npc->npc_nbuf, NBUF_DATAREF_RESET));
    610 
    611 	/* Finally, perform the translation. */
    612 	return npf_napt_rwr(npc, which, addr, port);
    613 }
    614 
    615 /*
    616  * npf_snat_translate: perform translation given the algorithm.
    617  */
    618 static inline int
    619 npf_snat_translate(npf_cache_t *npc, const npf_natpolicy_t *np, npf_flow_t flow)
    620 {
    621 	const unsigned which = npf_nat_which(np->n_type, flow);
    622 	const npf_addr_t *taddr;
    623 	npf_addr_t addr;
    624 
    625 	KASSERT(np->n_flags & NPF_NAT_STATIC);
    626 
    627 	switch (np->n_algo) {
    628 	case NPF_ALGO_NETMAP:
    629 		npf_nat_algo_netmap(npc, np, which, &addr);
    630 		taddr = &addr;
    631 		break;
    632 	case NPF_ALGO_NPT66:
    633 		return npf_npt66_rwr(npc, which, &np->n_taddr,
    634 		    np->n_tmask, np->n_npt66_adj);
    635 	default:
    636 		taddr = &np->n_taddr;
    637 		break;
    638 	}
    639 	return npf_napt_rwr(npc, which, taddr, np->n_tport);
    640 }
    641 
    642 /*
    643  * Associate NAT policy with an existing connection state.
    644  */
    645 npf_nat_t *
    646 npf_nat_share_policy(npf_cache_t *npc, npf_conn_t *con, npf_nat_t *src_nt)
    647 {
    648 	npf_natpolicy_t *np = src_nt->nt_natpolicy;
    649 	npf_nat_t *nt;
    650 	int ret;
    651 
    652 	/* Create a new NAT entry. */
    653 	nt = npf_nat_create(npc, np, con);
    654 	if (__predict_false(nt == NULL)) {
    655 		return NULL;
    656 	}
    657 	atomic_inc_uint(&np->n_refcnt);
    658 
    659 	/* Associate the NAT translation entry with the connection. */
    660 	ret = npf_conn_setnat(npc, con, nt, np->n_type);
    661 	if (__predict_false(ret)) {
    662 		/* Will release the reference. */
    663 		npf_nat_destroy(con, nt);
    664 		return NULL;
    665 	}
    666 	return nt;
    667 }
    668 
    669 /*
    670  * npf_nat_lookup: lookup the (dynamic) NAT state and return its entry,
    671  *
    672  * => Checks that the packet is on the interface where NAT policy is applied.
    673  * => Determines the flow direction in the context of the NAT policy.
    674  */
    675 static npf_nat_t *
    676 npf_nat_lookup(const npf_cache_t *npc, npf_conn_t *con,
    677     const unsigned di, npf_flow_t *flow)
    678 {
    679 	const nbuf_t *nbuf = npc->npc_nbuf;
    680 	const npf_natpolicy_t *np;
    681 	npf_nat_t *nt;
    682 
    683 	if ((nt = npf_conn_getnat(con)) == NULL) {
    684 		return NULL;
    685 	}
    686 	if (nt->nt_ifid != nbuf->nb_ifid) {
    687 		return NULL;
    688 	}
    689 
    690 	np = nt->nt_natpolicy;
    691 	KASSERT(atomic_load_relaxed(&np->n_refcnt) > 0);
    692 
    693 	/*
    694 	 * We rely on NPF_NAT{IN,OUT} being equal to PFIL_{IN,OUT}.
    695 	 */
    696 	CTASSERT(NPF_NATIN == PFIL_IN && NPF_NATOUT == PFIL_OUT);
    697 	*flow = (np->n_type == di) ? NPF_FLOW_FORW : NPF_FLOW_BACK;
    698 	return nt;
    699 }
    700 
    701 /*
    702  * npf_do_nat:
    703  *
    704  *	- Inspect packet for a NAT policy, unless a connection with a NAT
    705  *	  association already exists.  In such case, determine whether it
    706  *	  is a "forwards" or "backwards" stream.
    707  *
    708  *	- Perform translation: rewrite source or destination fields,
    709  *	  depending on translation type and direction.
    710  *
    711  *	- Associate a NAT policy with a connection (may establish a new).
    712  */
    713 int
    714 npf_do_nat(npf_cache_t *npc, npf_conn_t *con, const unsigned di)
    715 {
    716 	nbuf_t *nbuf = npc->npc_nbuf;
    717 	npf_conn_t *ncon = NULL;
    718 	npf_natpolicy_t *np;
    719 	npf_flow_t flow;
    720 	npf_nat_t *nt;
    721 	int error;
    722 
    723 	/* All relevant data should be already cached. */
    724 	if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
    725 		return 0;
    726 	}
    727 	KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
    728 
    729 	/*
    730 	 * Return the NAT entry associated with the connection, if any.
    731 	 * Determines whether the stream is "forwards" or "backwards".
    732 	 * Note: no need to lock, since reference on connection is held.
    733 	 */
    734 	if (con && (nt = npf_nat_lookup(npc, con, di, &flow)) != NULL) {
    735 		np = nt->nt_natpolicy;
    736 		goto translate;
    737 	}
    738 
    739 	/*
    740 	 * Inspect the packet for a NAT policy, if there is no connection.
    741 	 * Note: acquires a reference if found.
    742 	 */
    743 	np = npf_nat_inspect(npc, di);
    744 	if (np == NULL) {
    745 		/* If packet does not match - done. */
    746 		return 0;
    747 	}
    748 	flow = NPF_FLOW_FORW;
    749 
    750 	/* Static NAT - just perform the translation. */
    751 	if (np->n_flags & NPF_NAT_STATIC) {
    752 		if (nbuf_cksum_barrier(nbuf, di)) {
    753 			npf_recache(npc);
    754 		}
    755 		error = npf_snat_translate(npc, np, flow);
    756 		npf_natpolicy_release(np);
    757 		return error;
    758 	}
    759 
    760 	/*
    761 	 * If there is no local connection (no "stateful" rule - unusual,
    762 	 * but possible configuration), establish one before translation.
    763 	 * Note that it is not a "pass" connection, therefore passing of
    764 	 * "backwards" stream depends on other, stateless filtering rules.
    765 	 */
    766 	if (con == NULL) {
    767 		ncon = npf_conn_establish(npc, di, true);
    768 		if (ncon == NULL) {
    769 			npf_natpolicy_release(np);
    770 			return ENOMEM;
    771 		}
    772 		con = ncon;
    773 	}
    774 
    775 	/*
    776 	 * Create a new NAT entry and associate with the connection.
    777 	 * We will consume the reference on success (release on error).
    778 	 */
    779 	nt = npf_nat_create(npc, np, con);
    780 	if (nt == NULL) {
    781 		npf_natpolicy_release(np);
    782 		error = ENOMEM;
    783 		goto out;
    784 	}
    785 
    786 	/* Determine whether any ALG matches. */
    787 	if (npf_alg_match(npc, nt, di)) {
    788 		KASSERT(nt->nt_alg != NULL);
    789 	}
    790 
    791 	/* Associate the NAT translation entry with the connection. */
    792 	error = npf_conn_setnat(npc, con, nt, np->n_type);
    793 	if (error) {
    794 		/* Will release the reference. */
    795 		npf_nat_destroy(con, nt);
    796 		goto out;
    797 	}
    798 
    799 translate:
    800 	/* May need to process the delayed checksums first (XXX: NetBSD). */
    801 	if (nbuf_cksum_barrier(nbuf, di)) {
    802 		npf_recache(npc);
    803 	}
    804 
    805 	/* Perform the translation. */
    806 	error = npf_dnat_translate(npc, nt, flow);
    807 out:
    808 	if (__predict_false(ncon)) {
    809 		if (error) {
    810 			/* It was created for NAT - just expire. */
    811 			npf_conn_expire(ncon);
    812 		}
    813 		npf_conn_release(ncon);
    814 	}
    815 	return error;
    816 }
    817 
    818 /*
    819  * npf_nat_gettrans: return translation IP address and port.
    820  */
    821 void
    822 npf_nat_gettrans(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port)
    823 {
    824 	*addr = &nt->nt_taddr;
    825 	*port = nt->nt_tport;
    826 }
    827 
    828 /*
    829  * npf_nat_getorig: return original IP address and port from translation entry.
    830  */
    831 void
    832 npf_nat_getorig(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port)
    833 {
    834 	*addr = &nt->nt_oaddr;
    835 	*port = nt->nt_oport;
    836 }
    837 
    838 /*
    839  * npf_nat_setalg: associate an ALG with the NAT entry.
    840  */
    841 void
    842 npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg)
    843 {
    844 	nt->nt_alg = alg;
    845 	nt->nt_alg_arg = arg;
    846 }
    847 
    848 npf_alg_t *
    849 npf_nat_getalg(const npf_nat_t *nt)
    850 {
    851 	return nt->nt_alg;
    852 }
    853 
    854 uintptr_t
    855 npf_nat_getalgarg(const npf_nat_t *nt)
    856 {
    857 	return nt->nt_alg_arg;
    858 }
    859 
    860 /*
    861  * npf_nat_destroy: destroy NAT structure (performed on connection expiration).
    862  */
    863 void
    864 npf_nat_destroy(npf_conn_t *con, npf_nat_t *nt)
    865 {
    866 	npf_natpolicy_t *np = nt->nt_natpolicy;
    867 	npf_t *npf = np->n_npfctx;
    868 	npf_alg_t *alg;
    869 
    870 	/* Execute the ALG destroy callback, if any. */
    871 	if ((alg = npf_nat_getalg(nt)) != NULL) {
    872 		npf_alg_destroy(npf, alg, nt, con);
    873 		nt->nt_alg = NULL;
    874 	}
    875 
    876 	/* Return taken port to the portmap. */
    877 	if ((np->n_flags & NPF_NAT_PORTMAP) != 0 && nt->nt_tport) {
    878 		npf_portmap_t *pm = npf->portmap;
    879 		npf_portmap_put(pm, nt->nt_alen, &nt->nt_taddr, nt->nt_tport);
    880 	}
    881 	npf_stats_inc(np->n_npfctx, NPF_STAT_NAT_DESTROY);
    882 
    883 	/*
    884 	 * Remove the connection from the list and drop the reference on
    885 	 * the NAT policy.  Note: this might trigger its destruction.
    886 	 */
    887 	mutex_enter(&np->n_lock);
    888 	LIST_REMOVE(nt, nt_entry);
    889 	mutex_exit(&np->n_lock);
    890 	npf_natpolicy_release(np);
    891 
    892 	pool_cache_put(nat_cache, nt);
    893 }
    894 
    895 /*
    896  * npf_nat_export: serialize the NAT entry with a NAT policy ID.
    897  */
    898 void
    899 npf_nat_export(npf_t *npf, const npf_nat_t *nt, nvlist_t *con_nv)
    900 {
    901 	npf_natpolicy_t *np = nt->nt_natpolicy;
    902 	unsigned alen = nt->nt_alen;
    903 	nvlist_t *nat_nv;
    904 
    905 	nat_nv = nvlist_create(0);
    906 	if (nt->nt_ifid) {
    907 		char ifname[IFNAMSIZ];
    908 		npf_ifmap_copyname(npf, nt->nt_ifid, ifname, sizeof(ifname));
    909 		nvlist_add_string(nat_nv, "ifname", ifname);
    910 	}
    911 	nvlist_add_number(nat_nv, "alen", alen);
    912 
    913 	nvlist_add_binary(nat_nv, "oaddr", &nt->nt_oaddr, alen);
    914 	nvlist_add_number(nat_nv, "oport", nt->nt_oport);
    915 
    916 	nvlist_add_binary(nat_nv, "taddr", &nt->nt_taddr, alen);
    917 	nvlist_add_number(nat_nv, "tport", nt->nt_tport);
    918 
    919 	nvlist_add_number(nat_nv, "nat-policy", np->n_id);
    920 	nvlist_move_nvlist(con_nv, "nat", nat_nv);
    921 }
    922 
    923 /*
    924  * npf_nat_import: find the NAT policy and unserialize the NAT entry.
    925  */
    926 npf_nat_t *
    927 npf_nat_import(npf_t *npf, const nvlist_t *nat,
    928     npf_ruleset_t *natlist, npf_conn_t *con)
    929 {
    930 	npf_natpolicy_t *np;
    931 	npf_nat_t *nt;
    932 	const char *ifname;
    933 	const void *taddr, *oaddr;
    934 	size_t alen, len;
    935 	uint64_t np_id;
    936 
    937 	np_id = dnvlist_get_number(nat, "nat-policy", UINT64_MAX);
    938 	if ((np = npf_ruleset_findnat(natlist, np_id)) == NULL) {
    939 		return NULL;
    940 	}
    941 	nt = pool_cache_get(nat_cache, PR_WAITOK);
    942 	memset(nt, 0, sizeof(npf_nat_t));
    943 
    944 	ifname = dnvlist_get_string(nat, "ifname", NULL);
    945 	if (ifname && (nt->nt_ifid = npf_ifmap_register(npf, ifname)) == 0) {
    946 		goto err;
    947 	}
    948 
    949 	alen = dnvlist_get_number(nat, "alen", 0);
    950 	if (alen == 0 || alen > sizeof(npf_addr_t)) {
    951 		goto err;
    952 	}
    953 
    954 	taddr = dnvlist_get_binary(nat, "taddr", &len, NULL, 0);
    955 	if (!taddr || len != alen) {
    956 		goto err;
    957 	}
    958 	memcpy(&nt->nt_taddr, taddr, sizeof(npf_addr_t));
    959 
    960 	oaddr = dnvlist_get_binary(nat, "oaddr", &len, NULL, 0);
    961 	if (!oaddr || len != alen) {
    962 		goto err;
    963 	}
    964 	memcpy(&nt->nt_oaddr, oaddr, sizeof(npf_addr_t));
    965 
    966 	nt->nt_oport = dnvlist_get_number(nat, "oport", 0);
    967 	nt->nt_tport = dnvlist_get_number(nat, "tport", 0);
    968 
    969 	/* Take a specific port from port-map. */
    970 	if ((np->n_flags & NPF_NAT_PORTMAP) != 0 && nt->nt_tport) {
    971 		npf_portmap_t *pm = npf->portmap;
    972 
    973 		if (!npf_portmap_take(pm, nt->nt_alen,
    974 		    &nt->nt_taddr, nt->nt_tport)) {
    975 			goto err;
    976 		}
    977 	}
    978 	npf_stats_inc(npf, NPF_STAT_NAT_CREATE);
    979 
    980 	/*
    981 	 * Associate, take a reference and insert.  Unlocked/non-atomic
    982 	 * since the policy is not yet globally visible.
    983 	 */
    984 	nt->nt_natpolicy = np;
    985 	nt->nt_conn = con;
    986 	atomic_store_relaxed(&np->n_refcnt,
    987 	    atomic_load_relaxed(&np->n_refcnt) + 1);
    988 	LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry);
    989 	return nt;
    990 err:
    991 	pool_cache_put(nat_cache, nt);
    992 	return NULL;
    993 }
    994 
    995 #if defined(DDB) || defined(_NPF_TESTING)
    996 
    997 void
    998 npf_nat_dump(const npf_nat_t *nt)
    999 {
   1000 	const npf_natpolicy_t *np;
   1001 	struct in_addr ip;
   1002 
   1003 	np = nt->nt_natpolicy;
   1004 	memcpy(&ip, &nt->nt_taddr, sizeof(ip));
   1005 	printf("\tNATP(%p): type %u flags 0x%x taddr %s tport %d\n", np,
   1006 	    np->n_type, np->n_flags, inet_ntoa(ip), ntohs(np->n_tport));
   1007 	memcpy(&ip, &nt->nt_oaddr, sizeof(ip));
   1008 	printf("\tNAT: original address %s oport %d tport %d\n",
   1009 	    inet_ntoa(ip), ntohs(nt->nt_oport), ntohs(nt->nt_tport));
   1010 	if (nt->nt_alg) {
   1011 		printf("\tNAT ALG = %p, ARG = %p\n",
   1012 		    nt->nt_alg, (void *)nt->nt_alg_arg);
   1013 	}
   1014 }
   1015 
   1016 #endif
   1017