npf_nat.c revision 1.2 1 /* $NetBSD: npf_nat.c,v 1.2 2010/09/16 04:53:27 rmind Exp $ */
2
3 /*-
4 * Copyright (c) 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * NPF network address port translation (NAPT).
34 * Described in RFC 2663, RFC 3022. Commonly just "NAT".
35 *
36 * Overview
37 *
38 * There are few mechanisms: NAT policy, port map and translation.
39 * NAT module has a separate ruleset, where rules contain associated
40 * NAT policy, thus flexible filter criteria can be used.
41 *
42 * Translation types
43 *
44 * There are two types of translation: outbound (NPF_NATOUT) and
45 * inbound (NPF_NATIN). It should not be confused with connection
46 * direction.
47 *
48 * Outbound NAT rewrites:
49 * - Source on "forwards" stream.
50 * - Destination on "backwards" stream.
51 * Inbound NAT rewrites:
52 * - Destination on "forwards" stream.
53 * - Source on "backwards" stream.
54 *
55 * It should be noted that bi-directional NAT is a combined outbound
56 * and inbound translation, therefore constructed as two policies.
57 *
58 * NAT policies and port maps
59 *
60 * NAT (translation) policy is applied when a packet matches the rule.
61 * Apart from filter criteria, NAT policy has a translation IP address
62 * and associated port map. Port map is a bitmap used to reserve and
63 * use unique TCP/UDP ports for translation. Port maps are unique to
64 * the IP addresses, therefore multiple NAT policies with the same IP
65 * will share the same port map.
66 *
67 * NAT sessions and translation entries
68 *
69 * NAT module relies on session management module. Each "NAT" session
70 * has an associated translation entry (npf_nat_t). It contains saved
71 * i.e. original IP address with port and translation port, allocated
72 * from the port map. Each NAT translation entry is associated with
73 * the policy, which contains translation IP address. Allocated port
74 * is returned to the port map and translation entry destroyed when
75 * "NAT" session expires.
76 */
77
78 #ifdef _KERNEL
79 #include <sys/cdefs.h>
80 __KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
81
82 #include <sys/param.h>
83 #include <sys/kernel.h>
84 #endif
85
86 #include <sys/atomic.h>
87 #include <sys/bitops.h>
88 #include <sys/kmem.h>
89 #include <sys/pool.h>
90 #include <net/pfil.h>
91 #include <netinet/in.h>
92
93 #include "npf_impl.h"
94
95 /*
96 * NPF portmap structure.
97 */
98 typedef struct {
99 u_int p_refcnt;
100 uint32_t p_bitmap[0];
101 } npf_portmap_t;
102
103 /* Portmap range: [ 1024 .. 65535 ] */
104 #define PORTMAP_FIRST (1024)
105 #define PORTMAP_SIZE ((65536 - PORTMAP_FIRST) / 32)
106 #define PORTMAP_FILLED ((uint32_t)~0)
107 #define PORTMAP_MASK (31)
108 #define PORTMAP_SHIFT (5)
109
110 /* NAT policy structure. */
111 struct npf_natpolicy {
112 LIST_ENTRY(npf_natpolicy) n_entry;
113 int n_type;
114 int n_flags;
115 in_addr_t n_taddr;
116 in_port_t n_tport;
117 npf_portmap_t * n_portmap;
118 };
119
120 /* NAT translation entry for a session. */
121 struct npf_nat {
122 npf_natpolicy_t * nt_natpolicy;
123 /* Original address and port (for backwards translation). */
124 in_addr_t nt_oaddr;
125 in_port_t nt_oport;
126 /* Translation port (for redirects). */
127 in_port_t nt_tport;
128 /* ALG (if any) associated with this NAT entry. */
129 npf_alg_t * nt_alg;
130 uintptr_t nt_alg_arg;
131 };
132
133 static npf_ruleset_t * nat_ruleset __read_mostly;
134 static LIST_HEAD(, npf_natpolicy) nat_policy_list __read_mostly;
135 static pool_cache_t nat_cache __read_mostly;
136
137 /*
138 * npf_nat_sys{init,fini}: initialise/destroy NAT subsystem structures.
139 */
140
141 void
142 npf_nat_sysinit(void)
143 {
144
145 nat_cache = pool_cache_init(sizeof(npf_nat_t), coherency_unit,
146 0, 0, "npfnatpl", NULL, IPL_NET, NULL, NULL, NULL);
147 KASSERT(nat_cache != NULL);
148 nat_ruleset = npf_ruleset_create();
149 LIST_INIT(&nat_policy_list);
150 }
151
152 void
153 npf_nat_sysfini(void)
154 {
155
156 /* Flush NAT policies. */
157 npf_nat_reload(NULL);
158 KASSERT(LIST_EMPTY(&nat_policy_list));
159 pool_cache_destroy(nat_cache);
160 }
161
162 /*
163 * npf_nat_newpolicy: create a new NAT policy.
164 *
165 * => Shares portmap if policy is on existing translation address.
166 * => XXX: serialise at upper layer.
167 */
168 npf_natpolicy_t *
169 npf_nat_newpolicy(int type, int flags, in_addr_t taddr, in_port_t tport)
170 {
171 npf_natpolicy_t *np, *it;
172 npf_portmap_t *pm;
173
174 np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP);
175 if (np == NULL) {
176 return NULL;
177 }
178 KASSERT(type == NPF_NATIN || type == NPF_NATOUT);
179 np->n_type = type;
180 np->n_flags = flags;
181 np->n_taddr = taddr;
182 np->n_tport = tport;
183
184 pm = NULL;
185 if ((flags & NPF_NAT_PORTMAP) == 0) {
186 goto nopm;
187 }
188
189 /* Search for a NAT policy using the same translation address. */
190 LIST_FOREACH(it, &nat_policy_list, n_entry) {
191 if (it->n_taddr != np->n_taddr)
192 continue;
193 pm = it->n_portmap;
194 break;
195 }
196 if (pm == NULL) {
197 /* Allocate a new port map for the NAT policy. */
198 pm = kmem_zalloc(sizeof(npf_portmap_t) +
199 (PORTMAP_SIZE * sizeof(uint32_t)), KM_SLEEP);
200 if (pm == NULL) {
201 kmem_free(np, sizeof(npf_natpolicy_t));
202 return NULL;
203 }
204 pm->p_refcnt = 1;
205 KASSERT((uintptr_t)pm->p_bitmap == (uintptr_t)pm + sizeof(*pm));
206 } else {
207 /* Share the port map. */
208 pm->p_refcnt++;
209 }
210 nopm:
211 np->n_portmap = pm;
212 /*
213 * Note: old policies with new might co-exist in the list,
214 * while reload is in progress, but that is not an issue.
215 */
216 LIST_INSERT_HEAD(&nat_policy_list, np, n_entry);
217 return np;
218 }
219
220 /*
221 * npf_nat_freepolicy: free NAT policy and, on last reference, free portmap.
222 *
223 * => Called from npf_rule_free() during the reload via npf_nat_reload().
224 */
225 void
226 npf_nat_freepolicy(npf_natpolicy_t *np)
227 {
228 npf_portmap_t *pm = np->n_portmap;
229
230 LIST_REMOVE(np, n_entry);
231 if (pm && --pm->p_refcnt == 0) {
232 KASSERT((np->n_flags & NPF_NAT_PORTMAP) != 0);
233 kmem_free(pm, sizeof(npf_portmap_t) +
234 (PORTMAP_SIZE * sizeof(uint32_t)));
235 }
236 kmem_free(np, sizeof(npf_natpolicy_t));
237 }
238
239 /*
240 * npf_nat_reload: activate new ruleset of NAT policies and destroy old.
241 *
242 * => Destruction of ruleset will perform npf_nat_freepolicy() for each policy.
243 */
244 void
245 npf_nat_reload(npf_ruleset_t *nset)
246 {
247 npf_ruleset_t *oldnset;
248
249 oldnset = atomic_swap_ptr(&nat_ruleset, nset);
250 if (oldnset) {
251 npf_ruleset_destroy(oldnset);
252 }
253 }
254
255 /*
256 * npf_nat_getport: allocate and return a port in the NAT policy portmap.
257 *
258 * => Returns in network byte-order.
259 * => Zero indicates failure.
260 */
261 static in_port_t
262 npf_nat_getport(npf_natpolicy_t *np)
263 {
264 npf_portmap_t *pm = np->n_portmap;
265 u_int n = PORTMAP_SIZE, idx, bit;
266 uint32_t map, nmap;
267
268 idx = arc4random() % PORTMAP_SIZE;
269 for (;;) {
270 KASSERT(idx < PORTMAP_SIZE);
271 map = pm->p_bitmap[idx];
272 if (__predict_false(map == PORTMAP_FILLED)) {
273 if (n-- == 0) {
274 /* No space. */
275 return 0;
276 }
277 /* This bitmap is filled, next. */
278 idx = (idx ? idx : PORTMAP_SIZE) - 1;
279 continue;
280 }
281 bit = ffs32(~map) - 1;
282 nmap = map | (1 << bit);
283 if (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) == map) {
284 /* Success. */
285 break;
286 }
287 }
288 return htons(PORTMAP_FIRST + (idx << PORTMAP_SHIFT) + bit);
289 }
290
291 /*
292 * npf_nat_putport: return port as available in the NAT policy portmap.
293 *
294 * => Port should be in network byte-order.
295 */
296 static void
297 npf_nat_putport(npf_natpolicy_t *np, in_port_t port)
298 {
299 npf_portmap_t *pm = np->n_portmap;
300 uint32_t map, nmap;
301 u_int idx, bit;
302
303 port = ntohs(port) - PORTMAP_FIRST;
304 idx = port >> PORTMAP_SHIFT;
305 bit = port & PORTMAP_MASK;
306 do {
307 map = pm->p_bitmap[idx];
308 KASSERT(map | (1 << bit));
309 nmap = map & ~(1 << bit);
310 } while (atomic_cas_32(&pm->p_bitmap[idx], map, nmap) != map);
311 }
312
313 /*
314 * npf_nat_inspect: inspect packet against NAT ruleset and return a policy.
315 */
316 static npf_natpolicy_t *
317 npf_nat_inspect(npf_cache_t *npc, nbuf_t *nbuf, struct ifnet *ifp, const int di)
318 {
319 npf_rule_t *rl;
320
321 rl = npf_ruleset_match(nat_ruleset, npc, nbuf, ifp, di, NPF_LAYER_3);
322
323 return rl ? npf_rule_getnat(rl) : NULL;
324 }
325
326 /*
327 * npf_nat_create: create a new NAT translation entry.
328 */
329 static npf_nat_t *
330 npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np)
331 {
332 const int proto = npc->npc_proto;
333 npf_nat_t *nt;
334
335 /* New NAT association. */
336 nt = pool_cache_get(nat_cache, PR_NOWAIT);
337 if (nt == NULL){
338 return NULL;
339 }
340 nt->nt_natpolicy = np;
341 nt->nt_alg = NULL;
342
343 /* Save the original address which may be rewritten. */
344 if (np->n_type == NPF_NATOUT) {
345 /* Source (local) for Outbound NAT. */
346 nt->nt_oaddr = npc->npc_srcip;
347 } else {
348 /* Destination (external) for Inbound NAT. */
349 KASSERT(np->n_type == NPF_NATIN);
350 nt->nt_oaddr = npc->npc_dstip;
351 }
352
353 /*
354 * Port translation, if required, and if it is TCP/UDP.
355 */
356 if ((np->n_flags & NPF_NAT_PORTS) == 0 ||
357 (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) {
358 nt->nt_oport = 0;
359 nt->nt_tport = 0;
360 return nt;
361 }
362 /* Save a relevant TCP/UDP port. */
363 KASSERT(npf_iscached(npc, NPC_PORTS));
364 if (np->n_type == NPF_NATOUT) {
365 nt->nt_oport = npc->npc_sport;
366 } else {
367 nt->nt_oport = npc->npc_dport;
368 }
369 /* Get a new port for translation. */
370 if ((np->n_flags & NPF_NAT_PORTMAP) != 0) {
371 nt->nt_tport = npf_nat_getport(np);
372 } else {
373 nt->nt_tport = np->n_tport;
374 }
375 return nt;
376 }
377
378 /*
379 * npf_nat_translate: perform address and/or port translation.
380 */
381 static int
382 npf_nat_translate(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt,
383 const bool forw, const int di)
384 {
385 const npf_natpolicy_t *np = nt->nt_natpolicy;
386 void *n_ptr = nbuf_dataptr(nbuf);
387 in_addr_t addr;
388 in_port_t port;
389
390 KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
391
392 if (forw) {
393 /* "Forwards" stream: use translation address/port. */
394 KASSERT(
395 (np->n_type == NPF_NATIN && di == PFIL_IN) ^
396 (np->n_type == NPF_NATOUT && di == PFIL_OUT)
397 );
398 addr = np->n_taddr;
399 port = nt->nt_tport;
400 } else {
401 /* "Backwards" stream: use original address/port. */
402 KASSERT(
403 (np->n_type == NPF_NATIN && di == PFIL_OUT) ^
404 (np->n_type == NPF_NATOUT && di == PFIL_IN)
405 );
406 addr = nt->nt_oaddr;
407 port = nt->nt_oport;
408 }
409
410 /* Execute ALG hooks first. */
411 npf_alg_exec(npc, nbuf, nt, di);
412
413 /*
414 * Address translation: rewrite source/destination address, depending
415 * on direction (PFIL_OUT - for source, PFIL_IN - for destination).
416 * Note: cache will be used in npf_rwrport(), update only in the end.
417 */
418 if (!npf_rwrip(npc, nbuf, n_ptr, di, addr)) {
419 return EINVAL;
420 }
421 if ((np->n_flags & NPF_NAT_PORTS) == 0) {
422 /* Cache new address. */
423 if (di == PFIL_OUT) {
424 npc->npc_srcip = addr;
425 } else {
426 npc->npc_dstip = addr;
427 }
428 return 0;
429 }
430 switch (npc->npc_proto) {
431 case IPPROTO_TCP:
432 case IPPROTO_UDP:
433 KASSERT(npf_iscached(npc, NPC_PORTS));
434 /* Rewrite source/destination port. */
435 if (!npf_rwrport(npc, nbuf, n_ptr, di, port, addr)) {
436 return EINVAL;
437 }
438 break;
439 case IPPROTO_ICMP:
440 /* None. */
441 break;
442 default:
443 return ENOTSUP;
444 }
445 /* Cache new address and port. */
446 if (di == PFIL_OUT) {
447 npc->npc_srcip = addr;
448 npc->npc_sport = port;
449 } else {
450 npc->npc_dstip = addr;
451 npc->npc_dport = port;
452 }
453 return 0;
454 }
455
456 /*
457 * npf_do_nat:
458 * - Inspect packet for a NAT policy, unless a session with a NAT
459 * association already exists. In such case, determine whether is
460 * is a "forwards" or "backwards" stream.
461 * - Perform translation: rewrite source address if "forwards" stream
462 * and destination address if "backwards".
463 * - Establish sessions or, if already exists, associate a NAT policy.
464 */
465 int
466 npf_do_nat(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
467 struct ifnet *ifp, const int di)
468 {
469 npf_session_t *nse = NULL;
470 npf_natpolicy_t *np;
471 npf_nat_t *nt;
472 int error;
473 bool forw, new;
474
475 /* All relevant IPv4 data should be already cached. */
476 if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS)) {
477 return 0;
478 }
479
480 /*
481 * Return the NAT entry associated with the session, if any.
482 * Assumptions:
483 * - If associated via linked session, then "forwards" stream.
484 * - If associated directly, then "backwards" stream.
485 */
486 if (se && (nt = npf_session_retnat(se, di, &forw)) != NULL) {
487 np = nt->nt_natpolicy;
488 new = false;
489 goto translate;
490 }
491
492 /* Inspect the packet for a NAT policy, if there is no session. */
493 np = npf_nat_inspect(npc, nbuf, ifp, di);
494 if (np == NULL) {
495 /* If packet does not match - done. */
496 return 0;
497 }
498 forw = true;
499
500 /* Create a new NAT translation entry. */
501 nt = npf_nat_create(npc, np);
502 if (nt == NULL) {
503 return ENOMEM;
504 }
505 new = true;
506
507 /*
508 * If there is no local session (no "keep state" rule - unusual, but
509 * possible configuration), establish one before translation. Note
510 * that it is not a "pass" session, therefore passing of "backwards"
511 * stream depends on other, stateless filtering rules.
512 */
513 if (se == NULL) {
514 nse = npf_session_establish(npc, NULL, di);
515 if (nse == NULL) {
516 error = ENOMEM;
517 goto out;
518 }
519 se = nse;
520 }
521 translate:
522 /* Perform the translation. */
523 error = npf_nat_translate(npc, nbuf, nt, forw, di);
524 if (error) {
525 goto out;
526 }
527
528 if (__predict_false(new)) {
529 npf_session_t *natse;
530 /*
531 * Establish a new NAT session using translated address and
532 * associate NAT translation data with this session.
533 *
534 * Note: packet now has a translated address in the cache.
535 */
536 natse = npf_session_establish(npc, nt, di);
537 if (natse == NULL) {
538 error = ENOMEM;
539 goto out;
540 }
541 /*
542 * Link local session with NAT session, if no link already.
543 */
544 npf_session_link(se, natse);
545 npf_session_release(natse);
546 out:
547 if (error) {
548 if (nse != NULL) {
549 /* XXX: Expire it?? */
550 }
551 /* Will free the structure and return the port. */
552 npf_nat_expire(nt);
553 }
554 if (nse != NULL) {
555 npf_session_release(nse);
556 }
557 }
558 return error;
559 }
560
561 /*
562 * npf_nat_getorig: return original IP address and port from translation entry.
563 */
564 void
565 npf_nat_getorig(npf_nat_t *nt, in_addr_t *addr, in_port_t *port)
566 {
567
568 *addr = nt->nt_oaddr;
569 *port = nt->nt_oport;
570 }
571
572 void
573 npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg)
574 {
575
576 nt->nt_alg = alg;
577 nt->nt_alg_arg = arg;
578 }
579
580 /*
581 * npf_nat_expire: free NAT-related data structures on session expiration.
582 */
583 void
584 npf_nat_expire(npf_nat_t *nt)
585 {
586 npf_natpolicy_t *np = nt->nt_natpolicy;
587
588 if ((np->n_flags & NPF_NAT_PORTMAP) != 0) {
589 KASSERT(nt->nt_tport != 0);
590 npf_nat_putport(np, nt->nt_tport);
591 }
592 pool_cache_put(nat_cache, nt);
593 }
594
595 #if defined(DDB) || defined(_NPF_TESTING)
596
597 void
598 npf_nat_dump(npf_nat_t *nt)
599 {
600 npf_natpolicy_t *np;
601 struct in_addr ip;
602
603 if (nt) {
604 np = nt->nt_natpolicy;
605 goto skip;
606 }
607 LIST_FOREACH(np, &nat_policy_list, n_entry) {
608 skip:
609 ip.s_addr = np->n_taddr;
610 printf("\tNAT policy: type = %d, flags = %d, taddr = %s\n",
611 np->n_type, np->n_flags, inet_ntoa(ip));
612 if (nt == NULL) {
613 continue;
614 }
615 ip.s_addr = nt->nt_oaddr;
616 printf("\tNAT: original address %s, oport %d, tport = %d\n",
617 inet_ntoa(ip), ntohs(nt->nt_oport), ntohs(nt->nt_tport));
618 if (nt->nt_alg) {
619 printf("\tNAT ALG = %p, ARG = %p\n",
620 nt->nt_alg, (void *)nt->nt_alg_arg);
621 }
622 return;
623 }
624 }
625
626 #endif
627