npf_nat.c revision 1.1 1 /* $NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $ */
2
3 /*-
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * NPF network address port translation (NAPT).
34 * Described in RFC 2663, RFC 3022. Commonly just "NAT".
35 *
36 * Overview
37 *
38 * There are few mechanisms: NAT policy, port map and translation.
39 * NAT module has a separate ruleset, where rules contain associated
40 * NAT policy, thus flexible filter criteria can be used.
41 *
42 * NAT policies and port maps
43 *
44 * NAT policy is applied when a packet matches the rule. Apart from
45 * filter criteria, NAT policy has a translation (gateway) IP address
46 * and associated port map. Port map is a bitmap used to reserve and
47 * use unique TCP/UDP ports for translation. Port maps are unique to
48 * the IP addresses, therefore multiple NAT policies with the same IP
49 * will share the same port map.
50 *
51 * NAT sessions and translation entries
52 *
53 * NAT module relies on session management module. Each "NAT" session
54 * has an associated translation entry (npf_nat_t). It contains local
55 * i.e. original IP address with port and translation port, allocated
56 * from the port map. Each NAT translation entry is associated with
57 * the policy, which contains translation IP address. Allocated port
58 * is returned to the port map and translation entry destroyed when
59 * "NAT" session expires.
60 */
61
62 #ifdef _KERNEL
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
65
66 #include <sys/param.h>
67 #include <sys/kernel.h>
68 #endif
69
70 #include <sys/atomic.h>
71 #include <sys/bitops.h>
72 #include <sys/kmem.h>
73 #include <sys/pool.h>
74 #include <net/pfil.h>
75 #include <netinet/in.h>
76
77 #include "npf_impl.h"
78
79 /*
80 * NPF portmap structure.
81 */
82 typedef struct {
83 u_int p_refcnt;
84 uint32_t p_bitmap[0];
85 } npf_portmap_t;
86
87 /* Portmap range: [ 1024 .. 65535 ] */
88 #define PORTMAP_FIRST (1024)
89 #define PORTMAP_SIZE ((65536 - PORTMAP_FIRST) / 32)
90 #define PORTMAP_FILLED ((uint32_t)~0)
91 #define PORTMAP_MASK (31)
92 #define PORTMAP_SHIFT (5)
93
94 /* NAT policy structure. */
95 struct npf_natpolicy {
96 LIST_ENTRY(npf_natpolicy) n_entry;
97 in_addr_t n_gw_ip;
98 npf_portmap_t * n_portmap;
99 };
100
101 /* NAT translation entry for a session. */
102 struct npf_nat {
103 npf_natpolicy_t * nt_natpolicy;
104 /* Local address and port (for backwards translation). */
105 in_addr_t nt_laddr;
106 in_port_t nt_lport;
107 /* Translation port (for forwards). */
108 in_port_t nt_tport;
109 /* ALG (if any) associated with this NAT entry. */
110 npf_alg_t * nt_alg;
111 uintptr_t nt_alg_arg;
112 };
113
114 static npf_ruleset_t * nat_ruleset;
115 static LIST_HEAD(, npf_natpolicy) nat_policy_list;
116 static pool_cache_t nat_cache;
117
118 /*
119 * npf_nat_sys{init,fini}: initialise/destroy NAT subsystem structures.
120 */
121
122 void
123 npf_nat_sysinit(void)
124 {
125
126 nat_cache = pool_cache_init(sizeof(npf_nat_t), coherency_unit,
127 0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL);
128 KASSERT(nat_cache != NULL);
129 nat_ruleset = npf_ruleset_create();
130 LIST_INIT(&nat_policy_list);
131 }
132
133 void
134 npf_nat_sysfini(void)
135 {
136
137 /* Flush NAT policies. */
138 npf_nat_reload(NULL);
139 KASSERT(LIST_EMPTY(&nat_policy_list));
140 pool_cache_destroy(nat_cache);
141 }
142
143 /*
144 * npf_nat_newpolicy: allocate a new NAT policy.
145 *
146 * => Shares portmap if policy is on existing translation address.
147 * => XXX: serialise at upper layer.
148 */
149 npf_natpolicy_t *
150 npf_nat_newpolicy(in_addr_t gip)
151 {
152 npf_natpolicy_t *np, *it;
153 npf_portmap_t *pm;
154
155 np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP);
156 if (np == NULL) {
157 return NULL;
158 }
159 np->n_gw_ip = gip;
160
161 /* Search for a NAT policy using the same translation address. */
162 pm = NULL;
163 LIST_FOREACH(it, &nat_policy_list, n_entry) {
164 if (it->n_gw_ip != np->n_gw_ip)
165 continue;
166 pm = it->n_portmap;
167 break;
168 }
169 if (pm == NULL) {
170 /* Allocate a new port map for the NAT policy. */
171 pm = kmem_zalloc(sizeof(npf_portmap_t) +
172 (PORTMAP_SIZE * sizeof(uint32_t)), KM_SLEEP);
173 if (pm == NULL) {
174 kmem_free(np, sizeof(npf_natpolicy_t));
175 return NULL;
176 }
177 pm->p_refcnt = 1;
178 KASSERT((uintptr_t)pm->p_bitmap == (uintptr_t)pm + sizeof(*pm));
179 } else {
180 /* Share the port map. */
181 pm->p_refcnt++;
182 }
183 np->n_portmap = pm;
184 /*
185 * Note: old policies with new might co-exist in the list,
186 * while reload is in progress, but that is not an issue.
187 */
188 LIST_INSERT_HEAD(&nat_policy_list, np, n_entry);
189 return np;
190 }
191
192 /*
193 * npf_nat_freepolicy: free NAT policy and, on last reference, free portmap.
194 *
195 * => Called from npf_rule_free() during the reload via npf_nat_reload().
196 */
197 void
198 npf_nat_freepolicy(npf_natpolicy_t *np)
199 {
200 npf_portmap_t *pm = np->n_portmap;
201
202 LIST_REMOVE(np, n_entry);
203 if (--pm->p_refcnt == 0) {
204 kmem_free(pm, sizeof(npf_portmap_t) +
205 (PORTMAP_SIZE * sizeof(uint32_t)));
206 }
207 kmem_free(np, sizeof(npf_natpolicy_t));
208 }
209
210 /*
211 * npf_nat_reload: activate new ruleset of NAT policies and destroy old.
212 *
213 * => Destruction of ruleset will perform npf_nat_freepolicy() for each policy.
214 */
215 void
216 npf_nat_reload(npf_ruleset_t *nset)
217 {
218 npf_ruleset_t *oldnset;
219
220 oldnset = atomic_swap_ptr(&nat_ruleset, nset);
221 if (oldnset) {
222 npf_ruleset_destroy(oldnset);
223 }
224 }
225
226 /*
227 * npf_nat_getport: allocate and return a port in the NAT policy portmap.
228 *
229 * => Returns in network byte-order.
230 * => Zero indicates failure.
231 */
232 static in_port_t
233 npf_nat_getport(npf_natpolicy_t *np)
234 {
235 npf_portmap_t *pm = np->n_portmap;
236 u_int n = PORTMAP_SIZE, idx, bit;
237 uint32_t map, nmap;
238
239 idx = arc4random() % PORTMAP_SIZE;
240 for (;;) {
241 KASSERT(idx < PORTMAP_SIZE);
242 map = pm->p_bitmap[idx];
243 if (__predict_false(map == PORTMAP_FILLED)) {
244 if (n-- == 0) {
245 /* No space. */
246 return 0;
247 }
248 /* This bitmap is sfilled, next. */
249 idx = (idx ? idx : PORTMAP_SIZE) - 1;
250 continue;
251 }
252 bit = ffs32(~map) - 1;
253 nmap = map | (1 << bit);
254 if (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) == map) {
255 /* Success. */
256 break;
257 }
258 }
259 return htons(PORTMAP_FIRST + (idx << PORTMAP_SHIFT) + bit);
260 }
261
262 /*
263 * npf_nat_putport: return port as available in the NAT policy portmap.
264 *
265 * => Port should be in network byte-order.
266 */
267 static void
268 npf_nat_putport(npf_natpolicy_t *np, in_port_t port)
269 {
270 npf_portmap_t *pm = np->n_portmap;
271 uint32_t map, nmap;
272 u_int idx, bit;
273
274 port = ntohs(port) - PORTMAP_FIRST;
275 idx = port >> PORTMAP_SHIFT;
276 bit = port & PORTMAP_MASK;
277 do {
278 map = pm->p_bitmap[idx];
279 KASSERT(map | (1 << bit));
280 nmap = map & ~(1 << bit);
281 } while (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) != map);
282 }
283
284 /*
285 * npf_natout:
286 * - Inspect packet for a NAT policy, unless session with NAT
287 * association already exists.
288 * - Perform "forwards" translation: rewrite source address, etc.
289 * - Establish sessions or if already exists, associate NAT policy.
290 */
291 int
292 npf_natout(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
293 struct ifnet *ifp, const int layer)
294 {
295 const int proto = npc->npc_proto;
296 void *n_ptr = nbuf_dataptr(nbuf);
297 npf_session_t *nse = NULL; /* XXXgcc */
298 npf_natpolicy_t *np;
299 npf_nat_t *nt;
300 npf_rule_t *rl;
301 in_addr_t gwip;
302 in_port_t tport;
303 int error;
304 bool new;
305
306 /* All relevant IPv4 data should be already cached. */
307 if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS)) {
308 return 0;
309 }
310
311 /* Detect if there is a linked session pointing to the NAT entry. */
312 nt = se ? npf_session_retlinknat(se) : NULL;
313 if (nt) {
314 np = nt->nt_natpolicy;
315 new = false;
316 goto skip;
317 }
318
319 /* Inspect packet against NAT ruleset, return a policy. */
320 rl = npf_ruleset_match(nat_ruleset, npc, nbuf, ifp, PFIL_OUT, layer);
321 np = rl ? npf_rule_getnat(rl) : NULL;
322 if (np == NULL) {
323 /* If packet does not match - done. */
324 return 0;
325 }
326
327 /* New NAT association. */
328 nt = pool_cache_get(nat_cache, PR_NOWAIT);
329 if (nt == NULL){
330 return ENOMEM;
331 }
332 nt->nt_natpolicy = np;
333 nt->nt_alg = NULL;
334 new = true;
335
336 /* Save local (source) address. */
337 nt->nt_laddr = npc->npc_srcip;
338
339 if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
340 /* Also, save local TCP/UDP port. */
341 KASSERT(npf_iscached(npc, NPC_PORTS));
342 nt->nt_lport = npc->npc_sport;
343 /* Get a new port for translation. */
344 nt->nt_tport = npf_nat_getport(np);
345 } else {
346 nt->nt_lport = 0;
347 nt->nt_tport = 0;
348 }
349
350 /* Match any ALGs. */
351 npf_alg_exec(npc, nbuf, nt, PFIL_OUT);
352
353 /* If there is no local session, establish one before translation. */
354 if (se == NULL) {
355 nse = npf_session_establish(npc, NULL, PFIL_OUT);
356 if (nse == NULL) {
357 error = ENOMEM;
358 goto out;
359 }
360 se = nse;
361 } else {
362 nse = NULL;
363 }
364 skip:
365 if (layer == NPF_LAYER_2 && /* XXX */
366 (n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen)) == NULL)
367 return EINVAL;
368
369 /* Execute ALG hooks first. */
370 npf_alg_exec(npc, nbuf, nt, PFIL_OUT);
371
372 gwip = np->n_gw_ip;
373 tport = nt->nt_tport;
374
375 /*
376 * Perform translation: rewrite source address et al.
377 * Note: cache may be used in npf_rwrport(), update only in the end.
378 */
379 if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_OUT, gwip)) {
380 error = EINVAL;
381 goto out;
382 }
383 if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) {
384 KASSERT(tport != 0);
385 if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_OUT, tport, gwip)) {
386 error = EINVAL;
387 goto out;
388 }
389 }
390 /* Success: cache new address and port (if any). */
391 npc->npc_srcip = gwip;
392 npc->npc_sport = tport;
393 error = 0;
394
395 if (__predict_false(new)) {
396 npf_session_t *natse;
397 /*
398 * Establish a new NAT session using translated address and
399 * associate NAT translation data with this session.
400 *
401 * Note: packet now has a translated address in the cache.
402 */
403 natse = npf_session_establish(npc, nt, PFIL_OUT);
404 if (natse == NULL) {
405 error = ENOMEM;
406 goto out;
407 }
408 /*
409 * Link local session with NAT session, if no link already.
410 */
411 npf_session_link(se, natse);
412 npf_session_release(natse);
413 out:
414 if (error) {
415 if (nse != NULL) {
416 /* XXX: expire local session if new? */
417 }
418 /* Will free the structure and return the port. */
419 npf_nat_expire(nt);
420 }
421 if (nse != NULL) {
422 /* Drop the reference local session was new. */
423 npf_session_release(nse);
424 }
425 }
426 return error;
427 }
428
429 /*
430 * npf_natin:
431 * - Inspect packet for a session with associated NAT policy.
432 * - Perform "backwards" translation: rewrite destination address, etc.
433 */
434 int
435 npf_natin(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf, const int layer)
436 {
437 npf_nat_t *nt = se ? npf_session_retnat(se) : NULL;
438
439 if (nt == NULL) {
440 /* No association - no translation. */
441 return 0;
442 }
443 KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
444
445 void *n_ptr = nbuf_dataptr(nbuf);
446 in_addr_t laddr = nt->nt_laddr;
447 in_port_t lport = nt->nt_lport;
448
449 if (layer == NPF_LAYER_2) {
450 n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_elen);
451 if (n_ptr == NULL) {
452 return EINVAL;
453 }
454 }
455
456 /* Execute ALG hooks first. */
457 npf_alg_exec(npc, nbuf, nt, PFIL_IN);
458
459 /*
460 * Address translation: rewrite destination address.
461 * Note: cache will be used in npf_rwrport(), update only in the end.
462 */
463 if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_IN, laddr)) {
464 return EINVAL;
465 }
466 switch (npc->npc_proto) {
467 case IPPROTO_TCP:
468 case IPPROTO_UDP:
469 KASSERT(npf_iscached(npc, NPC_PORTS));
470 /* Rewrite destination port. */
471 if (!npf_rwrport(npc, nbuf, n_ptr, PFIL_IN, lport, laddr)) {
472 return EINVAL;
473 }
474 break;
475 case IPPROTO_ICMP:
476 /* None. */
477 break;
478 default:
479 return ENOTSUP;
480 }
481 /* Cache new address and port. */
482 npc->npc_dstip = laddr;
483 npc->npc_dport = lport;
484 return 0;
485 }
486
487 /*
488 * npf_nat_getlocal: return local IP address and port from translation entry.
489 */
490 void
491 npf_nat_getlocal(npf_nat_t *nt, in_addr_t *addr, in_port_t *port)
492 {
493
494 *addr = nt->nt_laddr;
495 *port = nt->nt_lport;
496 }
497
498 void
499 npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg)
500 {
501
502 nt->nt_alg = alg;
503 nt->nt_alg_arg = arg;
504 }
505
506 /*
507 * npf_nat_expire: free NAT-related data structures on session expiration.
508 */
509 void
510 npf_nat_expire(npf_nat_t *nt)
511 {
512
513 if (nt->nt_tport) {
514 npf_natpolicy_t *np = nt->nt_natpolicy;
515 npf_nat_putport(np, nt->nt_tport);
516 }
517 pool_cache_put(nat_cache, nt);
518 }
519
520 #if defined(DDB) || defined(_NPF_TESTING)
521
522 void
523 npf_nat_dump(npf_nat_t *nt)
524 {
525 npf_natpolicy_t *np;
526 struct in_addr ip;
527
528 if (nt) {
529 np = nt->nt_natpolicy;
530 goto skip;
531 }
532 LIST_FOREACH(np, &nat_policy_list, n_entry) {
533 skip:
534 ip.s_addr = np->n_gw_ip;
535 printf("\tNAT policy: gw_ip = %s\n", inet_ntoa(ip));
536 if (nt == NULL) {
537 continue;
538 }
539 ip.s_addr = nt->nt_laddr;
540 printf("\tNAT: original address %s, lport %d, tport = %d\n",
541 inet_ntoa(ip), ntohs(nt->nt_lport), ntohs(nt->nt_tport));
542 if (nt->nt_alg) {
543 printf("\tNAT ALG = %p, ARG = %p\n",
544 nt->nt_alg, (void *)nt->nt_alg_arg);
545 }
546 return;
547 }
548 }
549
550 #endif
551