Home | History | Annotate | Line # | Download | only in npf
npf_nat.c revision 1.1
      1 /*	$NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2010 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This material is based upon work partially supported by The
      8  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * NPF network address port translation (NAPT).
     34  * Described in RFC 2663, RFC 3022.  Commonly just "NAT".
     35  *
     36  * Overview
     37  *
     38  *	There are few mechanisms: NAT policy, port map and translation.
     39  *	NAT module has a separate ruleset, where rules contain associated
     40  *	NAT policy, thus flexible filter criteria can be used.
     41  *
     42  * NAT policies and port maps
     43  *
     44  *	NAT policy is applied when a packet matches the rule.  Apart from
     45  *	filter criteria, NAT policy has a translation (gateway) IP address
     46  *	and associated port map.  Port map is a bitmap used to reserve and
     47  *	use unique TCP/UDP ports for translation.  Port maps are unique to
     48  *	the IP addresses, therefore multiple NAT policies with the same IP
     49  *	will share the same port map.
     50  *
     51  * NAT sessions and translation entries
     52  *
     53  *	NAT module relies on session management module.  Each "NAT" session
     54  *	has an associated translation entry (npf_nat_t).  It contains local
     55  *	i.e. original IP address with port and translation port, allocated
     56  *	from the port map.  Each NAT translation entry is associated with
     57  *	the policy, which contains translation IP address.  Allocated port
     58  *	is returned to the port map and translation entry destroyed when
     59  *	"NAT" session expires.
     60  */
     61 
     62 #ifdef _KERNEL
     63 #include <sys/cdefs.h>
     64 __KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
     65 
     66 #include <sys/param.h>
     67 #include <sys/kernel.h>
     68 #endif
     69 
     70 #include <sys/atomic.h>
     71 #include <sys/bitops.h>
     72 #include <sys/kmem.h>
     73 #include <sys/pool.h>
     74 #include <net/pfil.h>
     75 #include <netinet/in.h>
     76 
     77 #include "npf_impl.h"
     78 
     79 /*
     80  * NPF portmap structure.
     81  */
     82 typedef struct {
     83 	u_int				p_refcnt;
     84 	uint32_t			p_bitmap[0];
     85 } npf_portmap_t;
     86 
     87 /* Portmap range: [ 1024 .. 65535 ] */
     88 #define	PORTMAP_FIRST			(1024)
     89 #define	PORTMAP_SIZE			((65536 - PORTMAP_FIRST) / 32)
     90 #define	PORTMAP_FILLED			((uint32_t)~0)
     91 #define	PORTMAP_MASK			(31)
     92 #define	PORTMAP_SHIFT			(5)
     93 
     94 /* NAT policy structure. */
     95 struct npf_natpolicy {
     96 	LIST_ENTRY(npf_natpolicy)	n_entry;
     97 	in_addr_t			n_gw_ip;
     98 	npf_portmap_t *			n_portmap;
     99 };
    100 
    101 /* NAT translation entry for a session. */
    102 struct npf_nat {
    103 	npf_natpolicy_t *		nt_natpolicy;
    104 	/* Local address and port (for backwards translation). */
    105 	in_addr_t			nt_laddr;
    106 	in_port_t			nt_lport;
    107 	/* Translation port (for forwards). */
    108 	in_port_t			nt_tport;
    109 	/* ALG (if any) associated with this NAT entry. */
    110 	npf_alg_t *			nt_alg;
    111 	uintptr_t			nt_alg_arg;
    112 };
    113 
    114 static npf_ruleset_t *			nat_ruleset;
    115 static LIST_HEAD(, npf_natpolicy)	nat_policy_list;
    116 static pool_cache_t			nat_cache;
    117 
    118 /*
    119  * npf_nat_sys{init,fini}: initialise/destroy NAT subsystem structures.
    120  */
    121 
    122 void
    123 npf_nat_sysinit(void)
    124 {
    125 
    126 	nat_cache = pool_cache_init(sizeof(npf_nat_t), coherency_unit,
    127 	    0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL);
    128 	KASSERT(nat_cache != NULL);
    129 	nat_ruleset = npf_ruleset_create();
    130 	LIST_INIT(&nat_policy_list);
    131 }
    132 
    133 void
    134 npf_nat_sysfini(void)
    135 {
    136 
    137 	/* Flush NAT policies. */
    138 	npf_nat_reload(NULL);
    139 	KASSERT(LIST_EMPTY(&nat_policy_list));
    140 	pool_cache_destroy(nat_cache);
    141 }
    142 
    143 /*
    144  * npf_nat_newpolicy: allocate a new NAT policy.
    145  *
    146  * => Shares portmap if policy is on existing translation address.
    147  * => XXX: serialise at upper layer.
    148  */
    149 npf_natpolicy_t *
    150 npf_nat_newpolicy(in_addr_t gip)
    151 {
    152 	npf_natpolicy_t *np, *it;
    153 	npf_portmap_t *pm;
    154 
    155 	np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP);
    156 	if (np == NULL) {
    157 		return NULL;
    158 	}
    159 	np->n_gw_ip = gip;
    160 
    161 	/* Search for a NAT policy using the same translation address. */
    162 	pm = NULL;
    163 	LIST_FOREACH(it, &nat_policy_list, n_entry) {
    164 		if (it->n_gw_ip != np->n_gw_ip)
    165 			continue;
    166 		pm = it->n_portmap;
    167 		break;
    168 	}
    169 	if (pm == NULL) {
    170 		/* Allocate a new port map for the NAT policy. */
    171 		pm = kmem_zalloc(sizeof(npf_portmap_t) +
    172 		    (PORTMAP_SIZE * sizeof(uint32_t)), KM_SLEEP);
    173 		if (pm == NULL) {
    174 			kmem_free(np, sizeof(npf_natpolicy_t));
    175 			return NULL;
    176 		}
    177 		pm->p_refcnt = 1;
    178 		KASSERT((uintptr_t)pm->p_bitmap == (uintptr_t)pm + sizeof(*pm));
    179 	} else {
    180 		/* Share the port map. */
    181 		pm->p_refcnt++;
    182 	}
    183 	np->n_portmap = pm;
    184 	/*
    185 	 * Note: old policies with new might co-exist in the list,
    186 	 * while reload is in progress, but that is not an issue.
    187 	 */
    188 	LIST_INSERT_HEAD(&nat_policy_list, np, n_entry);
    189 	return np;
    190 }
    191 
    192 /*
    193  * npf_nat_freepolicy: free NAT policy and, on last reference, free portmap.
    194  *
    195  * => Called from npf_rule_free() during the reload via npf_nat_reload().
    196  */
    197 void
    198 npf_nat_freepolicy(npf_natpolicy_t *np)
    199 {
    200 	npf_portmap_t *pm = np->n_portmap;
    201 
    202 	LIST_REMOVE(np, n_entry);
    203 	if (--pm->p_refcnt == 0) {
    204 		kmem_free(pm, sizeof(npf_portmap_t) +
    205 		    (PORTMAP_SIZE * sizeof(uint32_t)));
    206 	}
    207 	kmem_free(np, sizeof(npf_natpolicy_t));
    208 }
    209 
    210 /*
    211  * npf_nat_reload: activate new ruleset of NAT policies and destroy old.
    212  *
    213  * => Destruction of ruleset will perform npf_nat_freepolicy() for each policy.
    214  */
    215 void
    216 npf_nat_reload(npf_ruleset_t *nset)
    217 {
    218 	npf_ruleset_t *oldnset;
    219 
    220 	oldnset = atomic_swap_ptr(&nat_ruleset, nset);
    221 	if (oldnset) {
    222 		npf_ruleset_destroy(oldnset);
    223 	}
    224 }
    225 
    226 /*
    227  * npf_nat_getport: allocate and return a port in the NAT policy portmap.
    228  *
    229  * => Returns in network byte-order.
    230  * => Zero indicates failure.
    231  */
    232 static in_port_t
    233 npf_nat_getport(npf_natpolicy_t *np)
    234 {
    235 	npf_portmap_t *pm = np->n_portmap;
    236 	u_int n = PORTMAP_SIZE, idx, bit;
    237 	uint32_t map, nmap;
    238 
    239 	idx = arc4random() % PORTMAP_SIZE;
    240 	for (;;) {
    241 		KASSERT(idx < PORTMAP_SIZE);
    242 		map = pm->p_bitmap[idx];
    243 		if (__predict_false(map == PORTMAP_FILLED)) {
    244 			if (n-- == 0) {
    245 				/* No space. */
    246 				return 0;
    247 			}
    248 			/* This bitmap is sfilled, next. */
    249 			idx = (idx ? idx : PORTMAP_SIZE) - 1;
    250 			continue;
    251 		}
    252 		bit = ffs32(~map) - 1;
    253 		nmap = map | (1 << bit);
    254 		if (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) == map) {
    255 			/* Success. */
    256 			break;
    257 		}
    258 	}
    259 	return htons(PORTMAP_FIRST + (idx << PORTMAP_SHIFT) + bit);
    260 }
    261 
    262 /*
    263  * npf_nat_putport: return port as available in the NAT policy portmap.
    264  *
    265  * => Port should be in network byte-order.
    266  */
    267 static void
    268 npf_nat_putport(npf_natpolicy_t *np, in_port_t port)
    269 {
    270 	npf_portmap_t *pm = np->n_portmap;
    271 	uint32_t map, nmap;
    272 	u_int idx, bit;
    273 
    274 	port = ntohs(port) - PORTMAP_FIRST;
    275 	idx = port >> PORTMAP_SHIFT;
    276 	bit = port & PORTMAP_MASK;
    277 	do {
    278 		map = pm->p_bitmap[idx];
    279 		KASSERT(map | (1 << bit));
    280 		nmap = map & ~(1 << bit);
    281 	} while (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) != map);
    282 }
    283 
    284 /*
    285  * npf_natout:
    286  *	- Inspect packet for a NAT policy, unless session with NAT
    287  *	  association already exists.
    288  *	- Perform "forwards" translation: rewrite source address, etc.
    289  *	- Establish sessions or if already exists, associate NAT policy.
    290  */
    291 int
    292 npf_natout(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
    293     struct ifnet *ifp, const int layer)
    294 {
    295 	const int proto = npc->npc_proto;
    296 	void *n_ptr = nbuf_dataptr(nbuf);
    297 	npf_session_t *nse = NULL; /* XXXgcc */
    298 	npf_natpolicy_t *np;
    299 	npf_nat_t *nt;
    300 	npf_rule_t *rl;
    301 	in_addr_t gwip;
    302 	in_port_t tport;
    303 	int error;
    304 	bool new;
    305 
    306 	/* All relevant IPv4 data should be already cached. */
    307 	if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS)) {
    308 		return 0;
    309 	}
    310 
    311 	/* Detect if there is a linked session pointing to the NAT entry. */
    312 	nt = se ? npf_session_retlinknat(se) : NULL;
    313 	if (nt) {
    314 		np = nt->nt_natpolicy;
    315 		new = false;
    316 		goto skip;
    317 	}
    318 
    319 	/* Inspect packet against NAT ruleset, return a policy. */
    320 	rl = npf_ruleset_match(nat_ruleset, npc, nbuf, ifp, PFIL_OUT, layer);
    321 	np = rl ? npf_rule_getnat(rl) : NULL;
    322 	if (np == NULL) {
    323 		/* If packet does not match - done. */
    324 		return 0;
    325 	}
    326 
    327 	/* New NAT association. */
    328 	nt = pool_cache_get(nat_cache, PR_NOWAIT);
    329 	if (nt == NULL){
    330 		return ENOMEM;
    331 	}
    332 	nt->nt_natpolicy = np;
    333 	nt->nt_alg = NULL;
    334 	new = true;
    335 
    336 	/* Save local (source) address. */
    337 	nt->nt_laddr = npc->npc_srcip;
    338 
    339 	if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
    340 		/* Also, save local TCP/UDP port. */
    341 		KASSERT(npf_iscached(npc, NPC_PORTS));
    342 		nt->nt_lport = npc->npc_sport;
    343 		/* Get a new port for translation. */
    344 		nt->nt_tport = npf_nat_getport(np);
    345 	} else {
    346 		nt->nt_lport = 0;
    347 		nt->nt_tport = 0;
    348 	}
    349 
    350 	/* Match any ALGs. */
    351 	npf_alg_exec(npc, nbuf, nt, PFIL_OUT);
    352 
    353 	/* If there is no local session, establish one before translation. */
    354 	if (se == NULL) {
    355 		nse = npf_session_establish(npc, NULL, PFIL_OUT);
    356 		if (nse == NULL) {
    357 			error = ENOMEM;
    358 			goto out;
    359 		}
    360 		se = nse;
    361 	} else {
    362 		nse = NULL;
    363 	}
    364 skip:
    365 	if (layer == NPF_LAYER_2 && /* XXX */
    366 	    (n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen)) == NULL)
    367 		return EINVAL;
    368 
    369 	/* Execute ALG hooks first. */
    370 	npf_alg_exec(npc, nbuf, nt, PFIL_OUT);
    371 
    372 	gwip = np->n_gw_ip;
    373 	tport = nt->nt_tport;
    374 
    375 	/*
    376 	 * Perform translation: rewrite source address et al.
    377 	 * Note: cache may be used in npf_rwrport(), update only in the end.
    378 	 */
    379 	if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_OUT, gwip)) {
    380 		error = EINVAL;
    381 		goto out;
    382 	}
    383 	if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
    384 		KASSERT(tport != 0);
    385 		if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_OUT, tport, gwip)) {
    386 			error = EINVAL;
    387 			goto out;
    388 		}
    389 	}
    390 	/* Success: cache new address and port (if any). */
    391 	npc->npc_srcip = gwip;
    392 	npc->npc_sport = tport;
    393 	error = 0;
    394 
    395 	if (__predict_false(new)) {
    396 		npf_session_t *natse;
    397 		/*
    398 		 * Establish a new NAT session using translated address and
    399 		 * associate NAT translation data with this session.
    400 		 *
    401 		 * Note: packet now has a translated address in the cache.
    402 		 */
    403 		natse = npf_session_establish(npc, nt, PFIL_OUT);
    404 		if (natse == NULL) {
    405 			error = ENOMEM;
    406 			goto out;
    407 		}
    408 		/*
    409 		 * Link local session with NAT session, if no link already.
    410 		 */
    411 		npf_session_link(se, natse);
    412 		npf_session_release(natse);
    413 out:
    414 		if (error) {
    415 			if (nse != NULL) {
    416 				/* XXX: expire local session if new? */
    417 			}
    418 			/* Will free the structure and return the port. */
    419 			npf_nat_expire(nt);
    420 		}
    421 		if (nse != NULL) {
    422 			/* Drop the reference local session was new. */
    423 			npf_session_release(nse);
    424 		}
    425 	}
    426 	return error;
    427 }
    428 
    429 /*
    430  * npf_natin:
    431  *	- Inspect packet for a session with associated NAT policy.
    432  *	- Perform "backwards" translation: rewrite destination address, etc.
    433  */
    434 int
    435 npf_natin(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf, const int layer)
    436 {
    437 	npf_nat_t *nt = se ? npf_session_retnat(se) : NULL;
    438 
    439 	if (nt == NULL) {
    440 		/* No association - no translation. */
    441 		return 0;
    442 	}
    443 	KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
    444 
    445 	void *n_ptr = nbuf_dataptr(nbuf);
    446 	in_addr_t laddr = nt->nt_laddr;
    447 	in_port_t lport = nt->nt_lport;
    448 
    449 	if (layer == NPF_LAYER_2) {
    450 		n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen);
    451 		if (n_ptr == NULL) {
    452 			return EINVAL;
    453 		}
    454 	}
    455 
    456 	/* Execute ALG hooks first. */
    457 	npf_alg_exec(npc, nbuf, nt, PFIL_IN);
    458 
    459 	/*
    460 	 * Address translation: rewrite destination address.
    461 	 * Note: cache will be used in npf_rwrport(), update only in the end.
    462 	 */
    463 	if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_IN, laddr)) {
    464 		return EINVAL;
    465 	}
    466 	switch (npc->npc_proto) {
    467 	case IPPROTO_TCP:
    468 	case IPPROTO_UDP:
    469 		KASSERT(npf_iscached(npc, NPC_PORTS));
    470 		/* Rewrite destination port. */
    471 		if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_IN, lport, laddr)) {
    472 			return EINVAL;
    473 		}
    474 		break;
    475 	case IPPROTO_ICMP:
    476 		/* None. */
    477 		break;
    478 	default:
    479 		return ENOTSUP;
    480 	}
    481 	/* Cache new address and port. */
    482 	npc->npc_dstip = laddr;
    483 	npc->npc_dport = lport;
    484 	return 0;
    485 }
    486 
    487 /*
    488  * npf_nat_getlocal: return local IP address and port from translation entry.
    489  */
    490 void
    491 npf_nat_getlocal(npf_nat_t *nt, in_addr_t *addr, in_port_t *port)
    492 {
    493 
    494 	*addr = nt->nt_laddr;
    495 	*port = nt->nt_lport;
    496 }
    497 
    498 void
    499 npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg)
    500 {
    501 
    502 	nt->nt_alg = alg;
    503 	nt->nt_alg_arg = arg;
    504 }
    505 
    506 /*
    507  * npf_nat_expire: free NAT-related data structures on session expiration.
    508  */
    509 void
    510 npf_nat_expire(npf_nat_t *nt)
    511 {
    512 
    513 	if (nt->nt_tport) {
    514 		npf_natpolicy_t *np = nt->nt_natpolicy;
    515 		npf_nat_putport(np, nt->nt_tport);
    516 	}
    517 	pool_cache_put(nat_cache, nt);
    518 }
    519 
    520 #if defined(DDB) || defined(_NPF_TESTING)
    521 
    522 void
    523 npf_nat_dump(npf_nat_t *nt)
    524 {
    525 	npf_natpolicy_t *np;
    526 	struct in_addr ip;
    527 
    528 	if (nt) {
    529 		np = nt->nt_natpolicy;
    530 		goto skip;
    531 	}
    532 	LIST_FOREACH(np, &nat_policy_list, n_entry) {
    533 skip:
    534 		ip.s_addr = np->n_gw_ip;
    535 		printf("\tNAT policy: gw_ip = %s\n", inet_ntoa(ip));
    536 		if (nt == NULL) {
    537 			continue;
    538 		}
    539 		ip.s_addr = nt->nt_laddr;
    540 		printf("\tNAT: original address %s, lport %d, tport = %d\n",
    541 		    inet_ntoa(ip), ntohs(nt->nt_lport), ntohs(nt->nt_tport));
    542 		if (nt->nt_alg) {
    543 			printf("\tNAT ALG = %p, ARG = %p\n",
    544 			    nt->nt_alg, (void *)nt->nt_alg_arg);
    545 		}
    546 		return;
    547 	}
    548 }
    549 
    550 #endif
    551