npf_bpf_comp.c revision 1.7.2.1 1 /* $NetBSD: npf_bpf_comp.c,v 1.7.2.1 2015/06/10 16:57:58 snj Exp $ */
2
3 /*-
4 * Copyright (c) 2010-2014 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This material is based upon work partially supported by The
8 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * BPF byte-code generation for NPF rules.
34 */
35
36 #include <sys/cdefs.h>
37 __RCSID("$NetBSD: npf_bpf_comp.c,v 1.7.2.1 2015/06/10 16:57:58 snj Exp $");
38
39 #include <stdlib.h>
40 #include <stdbool.h>
41 #include <stddef.h>
42 #include <string.h>
43 #include <inttypes.h>
44 #include <err.h>
45 #include <assert.h>
46
47 #include <netinet/in.h>
48 #include <netinet/in_systm.h>
49 #include <netinet/ip.h>
50 #include <netinet/ip6.h>
51 #include <netinet/udp.h>
52 #include <netinet/tcp.h>
53 #include <netinet/ip_icmp.h>
54 #include <netinet/icmp6.h>
55
56 #include <net/bpf.h>
57
58 #include "npfctl.h"
59
60 /*
61 * Note: clear X_EQ_L4OFF when register X is invalidated i.e. it stores
62 * something other than L4 header offset. Generally, when BPF_LDX is used.
63 */
64 #define FETCHED_L3 0x01
65 #define CHECKED_L4 0x02
66 #define X_EQ_L4OFF 0x04
67
68 struct npf_bpf {
69 /*
70 * BPF program code, the allocated length (in bytes), the number
71 * of logical blocks and the flags.
72 */
73 struct bpf_program prog;
74 size_t alen;
75 u_int nblocks;
76 sa_family_t af;
77 uint32_t flags;
78
79 /* The current group offset and block number. */
80 bool ingroup;
81 u_int goff;
82 u_int gblock;
83
84 /* BPF marks, allocated length and the real length. */
85 uint32_t * marks;
86 size_t malen;
87 size_t mlen;
88 };
89
90 /*
91 * NPF success and failure values to be returned from BPF.
92 */
93 #define NPF_BPF_SUCCESS ((u_int)-1)
94 #define NPF_BPF_FAILURE 0
95
96 /*
97 * Magic value to indicate the failure path, which is fixed up on completion.
98 * Note: this is the longest jump offset in BPF, since the offset is one byte.
99 */
100 #define JUMP_MAGIC 0xff
101
102 /* Reduce re-allocations by expanding in 64 byte blocks. */
103 #define ALLOC_MASK (64 - 1)
104 #define ALLOC_ROUND(x) (((x) + ALLOC_MASK) & ~ALLOC_MASK)
105
106 npf_bpf_t *
107 npfctl_bpf_create(void)
108 {
109 return ecalloc(1, sizeof(npf_bpf_t));
110 }
111
112 static void
113 fixup_jumps(npf_bpf_t *ctx, u_int start, u_int end, bool swap)
114 {
115 struct bpf_program *bp = &ctx->prog;
116
117 for (u_int i = start; i < end; i++) {
118 struct bpf_insn *insn = &bp->bf_insns[i];
119 const u_int fail_off = end - i;
120
121 if (fail_off >= JUMP_MAGIC) {
122 errx(EXIT_FAILURE, "BPF generation error: "
123 "the number of instructions is over the limit");
124 }
125 if (BPF_CLASS(insn->code) != BPF_JMP) {
126 continue;
127 }
128 if (swap) {
129 uint8_t jt = insn->jt;
130 insn->jt = insn->jf;
131 insn->jf = jt;
132 }
133 if (insn->jt == JUMP_MAGIC)
134 insn->jt = fail_off;
135 if (insn->jf == JUMP_MAGIC)
136 insn->jf = fail_off;
137 }
138 }
139
140 static void
141 add_insns(npf_bpf_t *ctx, struct bpf_insn *insns, size_t count)
142 {
143 struct bpf_program *bp = &ctx->prog;
144 size_t offset, len, reqlen;
145
146 /* Note: bf_len is the count of instructions. */
147 offset = bp->bf_len * sizeof(struct bpf_insn);
148 len = count * sizeof(struct bpf_insn);
149
150 /* Ensure the memory buffer for the program. */
151 reqlen = ALLOC_ROUND(offset + len);
152 if (reqlen > ctx->alen) {
153 bp->bf_insns = erealloc(bp->bf_insns, reqlen);
154 ctx->alen = reqlen;
155 }
156
157 /* Add the code block. */
158 memcpy((uint8_t *)bp->bf_insns + offset, insns, len);
159 bp->bf_len += count;
160 }
161
162 static void
163 done_raw_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
164 {
165 size_t reqlen, nargs = m[1];
166
167 if ((len / sizeof(uint32_t) - 2) != nargs) {
168 errx(EXIT_FAILURE, "invalid BPF block description");
169 }
170 reqlen = ALLOC_ROUND(ctx->mlen + len);
171 if (reqlen > ctx->malen) {
172 ctx->marks = erealloc(ctx->marks, reqlen);
173 ctx->malen = reqlen;
174 }
175 memcpy((uint8_t *)ctx->marks + ctx->mlen, m, len);
176 ctx->mlen += len;
177 }
178
179 static void
180 done_block(npf_bpf_t *ctx, const uint32_t *m, size_t len)
181 {
182 done_raw_block(ctx, m, len);
183 ctx->nblocks++;
184 }
185
186 struct bpf_program *
187 npfctl_bpf_complete(npf_bpf_t *ctx)
188 {
189 struct bpf_program *bp = &ctx->prog;
190 const u_int retoff = bp->bf_len;
191
192 /* No instructions (optimised out). */
193 if (!bp->bf_len)
194 return NULL;
195
196 /* Add the return fragment (success and failure paths). */
197 struct bpf_insn insns_ret[] = {
198 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_SUCCESS),
199 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
200 };
201 add_insns(ctx, insns_ret, __arraycount(insns_ret));
202
203 /* Fixup all jumps to the main failure path. */
204 fixup_jumps(ctx, 0, retoff, false);
205
206 return &ctx->prog;
207 }
208
209 const void *
210 npfctl_bpf_bmarks(npf_bpf_t *ctx, size_t *len)
211 {
212 *len = ctx->mlen;
213 return ctx->marks;
214 }
215
216 void
217 npfctl_bpf_destroy(npf_bpf_t *ctx)
218 {
219 free(ctx->prog.bf_insns);
220 free(ctx->marks);
221 free(ctx);
222 }
223
224 /*
225 * npfctl_bpf_group: begin a logical group. It merely uses logical
226 * disjunction (OR) for compares within the group.
227 */
228 void
229 npfctl_bpf_group(npf_bpf_t *ctx)
230 {
231 struct bpf_program *bp = &ctx->prog;
232
233 assert(ctx->goff == 0);
234 assert(ctx->gblock == 0);
235
236 ctx->goff = bp->bf_len;
237 ctx->gblock = ctx->nblocks;
238 ctx->ingroup = true;
239 }
240
241 void
242 npfctl_bpf_endgroup(npf_bpf_t *ctx)
243 {
244 struct bpf_program *bp = &ctx->prog;
245 const size_t curoff = bp->bf_len;
246
247 /* If there are no blocks or only one - nothing to do. */
248 if ((ctx->nblocks - ctx->gblock) <= 1) {
249 ctx->goff = ctx->gblock = 0;
250 return;
251 }
252
253 /*
254 * Append a failure return as a fall-through i.e. if there is
255 * no match within the group.
256 */
257 struct bpf_insn insns_ret[] = {
258 BPF_STMT(BPF_RET+BPF_K, NPF_BPF_FAILURE),
259 };
260 add_insns(ctx, insns_ret, __arraycount(insns_ret));
261
262 /*
263 * Adjust jump offsets: on match - jump outside the group i.e.
264 * to the current offset. Otherwise, jump to the next instruction
265 * which would lead to the fall-through code above if none matches.
266 */
267 fixup_jumps(ctx, ctx->goff, curoff, true);
268 ctx->goff = ctx->gblock = 0;
269 }
270
271 static void
272 fetch_l3(npf_bpf_t *ctx, sa_family_t af, u_int flags)
273 {
274 u_int ver;
275
276 switch (af) {
277 case AF_INET:
278 ver = IPVERSION;
279 break;
280 case AF_INET6:
281 ver = IPV6_VERSION >> 4;
282 break;
283 case AF_UNSPEC:
284 ver = 0;
285 break;
286 default:
287 abort();
288 }
289
290 /*
291 * The memory store is populated with:
292 * - BPF_MW_IPVER: IP version (4 or 6).
293 * - BPF_MW_L4OFF: L4 header offset.
294 * - BPF_MW_L4PROTO: L4 protocol.
295 */
296 if ((ctx->flags & FETCHED_L3) == 0 || (af && ctx->af == 0)) {
297 const uint8_t jt = ver ? 0 : JUMP_MAGIC;
298 const uint8_t jf = ver ? JUMP_MAGIC : 0;
299 bool ingroup = ctx->ingroup;
300
301 /*
302 * L3 block cannot be inserted in the middle of a group.
303 * In fact, it never is. Check and start the group after.
304 */
305 if (ingroup) {
306 assert(ctx->nblocks == ctx->gblock);
307 npfctl_bpf_endgroup(ctx);
308 }
309
310 /*
311 * A <- IP version; A == expected-version?
312 * If no particular version specified, check for non-zero.
313 */
314 struct bpf_insn insns_af[] = {
315 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_IPVER),
316 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ver, jt, jf),
317 };
318 add_insns(ctx, insns_af, __arraycount(insns_af));
319 ctx->flags |= FETCHED_L3;
320 ctx->af = af;
321
322 if (af) {
323 uint32_t mwords[] = { BM_IPVER, 1, af };
324 done_raw_block(ctx, mwords, sizeof(mwords));
325 }
326 if (ingroup) {
327 npfctl_bpf_group(ctx);
328 }
329
330 } else if (af && af != ctx->af) {
331 errx(EXIT_FAILURE, "address family mismatch");
332 }
333
334 if ((flags & X_EQ_L4OFF) != 0 && (ctx->flags & X_EQ_L4OFF) == 0) {
335 /* X <- IP header length */
336 struct bpf_insn insns_hlen[] = {
337 BPF_STMT(BPF_LDX+BPF_MEM, BPF_MW_L4OFF),
338 };
339 add_insns(ctx, insns_hlen, __arraycount(insns_hlen));
340 ctx->flags |= X_EQ_L4OFF;
341 }
342 }
343
344 /*
345 * npfctl_bpf_proto: code block to match IP version and L4 protocol.
346 */
347 void
348 npfctl_bpf_proto(npf_bpf_t *ctx, sa_family_t af, int proto)
349 {
350 assert(af != AF_UNSPEC || proto != -1);
351
352 /* Note: fails if IP version does not match. */
353 fetch_l3(ctx, af, 0);
354 if (proto == -1) {
355 return;
356 }
357
358 struct bpf_insn insns_proto[] = {
359 /* A <- L4 protocol; A == expected-protocol? */
360 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
361 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, proto, 0, JUMP_MAGIC),
362 };
363 add_insns(ctx, insns_proto, __arraycount(insns_proto));
364
365 uint32_t mwords[] = { BM_PROTO, 1, proto };
366 done_block(ctx, mwords, sizeof(mwords));
367 ctx->flags |= CHECKED_L4;
368 }
369
370 /*
371 * npfctl_bpf_cidr: code block to match IPv4 or IPv6 CIDR.
372 *
373 * => IP address shall be in the network byte order.
374 */
375 void
376 npfctl_bpf_cidr(npf_bpf_t *ctx, u_int opts, sa_family_t af,
377 const npf_addr_t *addr, const npf_netmask_t mask)
378 {
379 const uint32_t *awords = (const uint32_t *)addr;
380 u_int nwords, length, maxmask, off;
381
382 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
383 assert((mask && mask <= NPF_MAX_NETMASK) || mask == NPF_NO_NETMASK);
384
385 switch (af) {
386 case AF_INET:
387 maxmask = 32;
388 off = (opts & MATCH_SRC) ?
389 offsetof(struct ip, ip_src) :
390 offsetof(struct ip, ip_dst);
391 nwords = sizeof(struct in_addr) / sizeof(uint32_t);
392 break;
393 case AF_INET6:
394 maxmask = 128;
395 off = (opts & MATCH_SRC) ?
396 offsetof(struct ip6_hdr, ip6_src) :
397 offsetof(struct ip6_hdr, ip6_dst);
398 nwords = sizeof(struct in6_addr) / sizeof(uint32_t);
399 break;
400 default:
401 abort();
402 }
403
404 /* Ensure address family. */
405 fetch_l3(ctx, af, 0);
406
407 length = (mask == NPF_NO_NETMASK) ? maxmask : mask;
408
409 /* CAUTION: BPF operates in host byte-order. */
410 for (u_int i = 0; i < nwords; i++) {
411 const u_int woff = i * sizeof(uint32_t);
412 uint32_t word = ntohl(awords[i]);
413 uint32_t wordmask;
414
415 if (length >= 32) {
416 /* The mask is a full word - do not apply it. */
417 wordmask = 0;
418 length -= 32;
419 } else if (length) {
420 wordmask = 0xffffffff << (32 - length);
421 length = 0;
422 } else {
423 /* The mask became zero - skip the rest. */
424 break;
425 }
426
427 /* A <- IP address (or one word of it) */
428 struct bpf_insn insns_ip[] = {
429 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, off + woff),
430 };
431 add_insns(ctx, insns_ip, __arraycount(insns_ip));
432
433 /* A <- (A & MASK) */
434 if (wordmask) {
435 struct bpf_insn insns_mask[] = {
436 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, wordmask),
437 };
438 add_insns(ctx, insns_mask, __arraycount(insns_mask));
439 }
440
441 /* A == expected-IP-word ? */
442 struct bpf_insn insns_cmp[] = {
443 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, word, 0, JUMP_MAGIC),
444 };
445 add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
446 }
447
448 uint32_t mwords[] = {
449 (opts & MATCH_SRC) ? BM_SRC_CIDR: BM_DST_CIDR, 6,
450 af, mask, awords[0], awords[1], awords[2], awords[3],
451 };
452 done_block(ctx, mwords, sizeof(mwords));
453 }
454
455 /*
456 * npfctl_bpf_ports: code block to match TCP/UDP port range.
457 *
458 * => Port numbers shall be in the network byte order.
459 */
460 void
461 npfctl_bpf_ports(npf_bpf_t *ctx, u_int opts, in_port_t from, in_port_t to)
462 {
463 const u_int sport_off = offsetof(struct udphdr, uh_sport);
464 const u_int dport_off = offsetof(struct udphdr, uh_dport);
465 u_int off;
466
467 /* TCP and UDP port offsets are the same. */
468 assert(sport_off == offsetof(struct tcphdr, th_sport));
469 assert(dport_off == offsetof(struct tcphdr, th_dport));
470 assert(ctx->flags & CHECKED_L4);
471
472 assert(((opts & MATCH_SRC) != 0) ^ ((opts & MATCH_DST) != 0));
473 off = (opts & MATCH_SRC) ? sport_off : dport_off;
474
475 /* X <- IP header length */
476 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
477
478 struct bpf_insn insns_fetch[] = {
479 /* A <- port */
480 BPF_STMT(BPF_LD+BPF_H+BPF_IND, off),
481 };
482 add_insns(ctx, insns_fetch, __arraycount(insns_fetch));
483
484 /* CAUTION: BPF operates in host byte-order. */
485 from = ntohs(from);
486 to = ntohs(to);
487
488 if (from == to) {
489 /* Single port case. */
490 struct bpf_insn insns_port[] = {
491 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, from, 0, JUMP_MAGIC),
492 };
493 add_insns(ctx, insns_port, __arraycount(insns_port));
494 } else {
495 /* Port range case. */
496 struct bpf_insn insns_range[] = {
497 BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, from, 0, JUMP_MAGIC),
498 BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, to, JUMP_MAGIC, 0),
499 };
500 add_insns(ctx, insns_range, __arraycount(insns_range));
501 }
502
503 uint32_t mwords[] = {
504 opts & MATCH_SRC ? BM_SRC_PORTS : BM_DST_PORTS, 2, from, to
505 };
506 done_block(ctx, mwords, sizeof(mwords));
507 }
508
509 /*
510 * npfctl_bpf_tcpfl: code block to match TCP flags.
511 */
512 void
513 npfctl_bpf_tcpfl(npf_bpf_t *ctx, uint8_t tf, uint8_t tf_mask, bool checktcp)
514 {
515 const u_int tcpfl_off = offsetof(struct tcphdr, th_flags);
516 const bool usingmask = tf_mask != tf;
517
518 /* X <- IP header length */
519 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
520 if (checktcp) {
521 const u_int jf = usingmask ? 3 : 2;
522 assert(ctx->ingroup == false);
523
524 /* A <- L4 protocol; A == TCP? If not, jump out. */
525 struct bpf_insn insns_tcp[] = {
526 BPF_STMT(BPF_LD+BPF_W+BPF_MEM, BPF_MW_L4PROTO),
527 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, IPPROTO_TCP, 0, jf),
528 };
529 add_insns(ctx, insns_tcp, __arraycount(insns_tcp));
530 } else {
531 assert(ctx->flags & CHECKED_L4);
532 }
533
534 struct bpf_insn insns_tf[] = {
535 /* A <- TCP flags */
536 BPF_STMT(BPF_LD+BPF_B+BPF_IND, tcpfl_off),
537 };
538 add_insns(ctx, insns_tf, __arraycount(insns_tf));
539
540 if (usingmask) {
541 /* A <- (A & mask) */
542 struct bpf_insn insns_mask[] = {
543 BPF_STMT(BPF_ALU+BPF_AND+BPF_K, tf_mask),
544 };
545 add_insns(ctx, insns_mask, __arraycount(insns_mask));
546 }
547
548 struct bpf_insn insns_cmp[] = {
549 /* A == expected-TCP-flags? */
550 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, tf, 0, JUMP_MAGIC),
551 };
552 add_insns(ctx, insns_cmp, __arraycount(insns_cmp));
553
554 if (!checktcp) {
555 uint32_t mwords[] = { BM_TCPFL, 2, tf, tf_mask};
556 done_block(ctx, mwords, sizeof(mwords));
557 }
558 }
559
560 /*
561 * npfctl_bpf_icmp: code block to match ICMP type and/or code.
562 * Note: suitable both for the ICMPv4 and ICMPv6.
563 */
564 void
565 npfctl_bpf_icmp(npf_bpf_t *ctx, int type, int code)
566 {
567 const u_int type_off = offsetof(struct icmp, icmp_type);
568 const u_int code_off = offsetof(struct icmp, icmp_code);
569
570 assert(ctx->flags & CHECKED_L4);
571 assert(offsetof(struct icmp6_hdr, icmp6_type) == type_off);
572 assert(offsetof(struct icmp6_hdr, icmp6_code) == code_off);
573 assert(type != -1 || code != -1);
574
575 /* X <- IP header length */
576 fetch_l3(ctx, AF_UNSPEC, X_EQ_L4OFF);
577
578 if (type != -1) {
579 struct bpf_insn insns_type[] = {
580 BPF_STMT(BPF_LD+BPF_B+BPF_IND, type_off),
581 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, type, 0, JUMP_MAGIC),
582 };
583 add_insns(ctx, insns_type, __arraycount(insns_type));
584
585 uint32_t mwords[] = { BM_ICMP_TYPE, 1, type };
586 done_block(ctx, mwords, sizeof(mwords));
587 }
588
589 if (code != -1) {
590 struct bpf_insn insns_code[] = {
591 BPF_STMT(BPF_LD+BPF_B+BPF_IND, code_off),
592 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, code, 0, JUMP_MAGIC),
593 };
594 add_insns(ctx, insns_code, __arraycount(insns_code));
595
596 uint32_t mwords[] = { BM_ICMP_CODE, 1, code };
597 done_block(ctx, mwords, sizeof(mwords));
598 }
599 }
600
601 #define SRC_FLAG_BIT (1U << 31)
602
603 /*
604 * npfctl_bpf_table: code block to match source/destination IP address
605 * against NPF table specified by ID.
606 */
607 void
608 npfctl_bpf_table(npf_bpf_t *ctx, u_int opts, u_int tid)
609 {
610 const bool src = (opts & MATCH_SRC) != 0;
611
612 struct bpf_insn insns_table[] = {
613 BPF_STMT(BPF_LD+BPF_IMM, (src ? SRC_FLAG_BIT : 0) | tid),
614 BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_TABLE),
615 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, JUMP_MAGIC, 0),
616 };
617 add_insns(ctx, insns_table, __arraycount(insns_table));
618
619 uint32_t mwords[] = { src ? BM_SRC_TABLE: BM_DST_TABLE, 1, tid };
620 done_block(ctx, mwords, sizeof(mwords));
621 }
622