Home | History | Annotate | Line # | Download | only in net
bpfjit.c revision 1.1.2.3
      1 /*-
      2  * Copyright (c) 2011-2012 Alexander Nasonov.
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in
     13  *    the documentation and/or other materials provided with the
     14  *    distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     19  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
     20  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     21  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
     22  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     24  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     25  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     26  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27  * SUCH DAMAGE.
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 #ifdef _KERNEL
     32 __KERNEL_RCSID(0, "$NetBSD: bpfjit.c,v 1.1.2.3 2013/01/16 05:33:48 yamt Exp $");
     33 #else
     34 __RCSID("$NetBSD: bpfjit.c,v 1.1.2.3 2013/01/16 05:33:48 yamt Exp $");
     35 #endif
     36 
     37 #include <net/bpfjit.h>
     38 
     39 #ifndef _KERNEL
     40 #include <assert.h>
     41 #define BPFJIT_ASSERT(c) assert(c)
     42 #else
     43 #define BPFJIT_ASSERT(c) KASSERT(c)
     44 #endif
     45 
     46 #ifndef _KERNEL
     47 #include <stdlib.h>
     48 #define BPFJIT_MALLOC(sz) malloc(sz)
     49 #define BPFJIT_FREE(p) free(p)
     50 #else
     51 #include <sys/malloc.h>
     52 #define BPFJIT_MALLOC(sz) kern_malloc(sz, M_WAITOK)
     53 #define BPFJIT_FREE(p) kern_free(p)
     54 #endif
     55 
     56 #ifndef _KERNEL
     57 #include <limits.h>
     58 #include <stdbool.h>
     59 #include <stddef.h>
     60 #include <stdint.h>
     61 #else
     62 #include <machine/limits.h>
     63 #include <sys/null.h>
     64 #include <sys/types.h>
     65 #include <sys/atomic.h>
     66 #include <sys/module.h>
     67 #endif
     68 
     69 #include <sys/queue.h>
     70 #include <sys/types.h>
     71 
     72 #include <sljitLir.h>
     73 
     74 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
     75 #include <stdio.h> /* for stderr */
     76 #endif
     77 
     78 
     79 #define BPFJIT_A	SLJIT_TEMPORARY_REG1
     80 #define BPFJIT_X	SLJIT_TEMPORARY_EREG1
     81 #define BPFJIT_TMP1	SLJIT_TEMPORARY_REG2
     82 #define BPFJIT_TMP2	SLJIT_TEMPORARY_REG3
     83 #define BPFJIT_BUF	SLJIT_SAVED_REG1
     84 #define BPFJIT_WIRELEN	SLJIT_SAVED_REG2
     85 #define BPFJIT_BUFLEN	SLJIT_SAVED_REG3
     86 #define BPFJIT_KERN_TMP SLJIT_TEMPORARY_EREG2
     87 
     88 /*
     89  * Flags for bpfjit_optimization_hints().
     90  */
     91 #define BPFJIT_INIT_X 0x10000
     92 #define BPFJIT_INIT_A 0x20000
     93 
     94 
     95 /*
     96  * Node of bj_jumps list.
     97  */
     98 struct bpfjit_jump
     99 {
    100 	struct sljit_jump *bj_jump;
    101 	SLIST_ENTRY(bpfjit_jump) bj_entries;
    102 	uint32_t bj_safe_length;
    103 };
    104 
    105 /*
    106  * Data for BPF_JMP instruction.
    107  */
    108 struct bpfjit_jump_data
    109 {
    110 	/*
    111 	 * These entries make up bj_jumps list:
    112 	 * bj_jtf[0] - when coming from jt path,
    113 	 * bj_jtf[1] - when coming from jf path.
    114 	 */
    115 	struct bpfjit_jump bj_jtf[2];
    116 };
    117 
    118 /*
    119  * Data for "read from packet" instructions.
    120  * See also read_pkt_insn() function below.
    121  */
    122 struct bpfjit_read_pkt_data
    123 {
    124 	/*
    125 	 * If positive, emit "if (buflen < bj_check_length) return 0".
    126 	 * We assume that buflen is never equal to UINT32_MAX (otherwise,
    127 	 * we need a special bool variable to emit unconditional "return 0").
    128 	 */
    129 	uint32_t bj_check_length;
    130 };
    131 
    132 /*
    133  * Additional (optimization-related) data for bpf_insn.
    134  */
    135 struct bpfjit_insn_data
    136 {
    137 	/* List of jumps to this insn. */
    138 	SLIST_HEAD(, bpfjit_jump) bj_jumps;
    139 
    140 	union {
    141 		struct bpfjit_jump_data     bj_jdata;
    142 		struct bpfjit_read_pkt_data bj_rdata;
    143 	} bj_aux;
    144 
    145 	bool bj_unreachable;
    146 };
    147 
    148 #ifdef _KERNEL
    149 
    150 uint32_t m_xword(const struct mbuf *, uint32_t, int *);
    151 uint32_t m_xhalf(const struct mbuf *, uint32_t, int *);
    152 uint32_t m_xbyte(const struct mbuf *, uint32_t, int *);
    153 
    154 MODULE(MODULE_CLASS_MISC, bpfjit, "sljit")
    155 
    156 static int
    157 bpfjit_modcmd(modcmd_t cmd, void *arg)
    158 {
    159 
    160 	switch (cmd) {
    161 	case MODULE_CMD_INIT:
    162 		bpfjit_module_ops.bj_free_code = &bpfjit_free_code;
    163 		membar_producer();
    164 		bpfjit_module_ops.bj_generate_code = &bpfjit_generate_code;
    165 		membar_producer();
    166 		return 0;
    167 
    168 	case MODULE_CMD_FINI:
    169 		return EOPNOTSUPP;
    170 
    171 	default:
    172 		return ENOTTY;
    173 	}
    174 }
    175 #endif
    176 
    177 static uint32_t
    178 read_width(struct bpf_insn *pc)
    179 {
    180 
    181 	switch (BPF_SIZE(pc->code)) {
    182 	case BPF_W:
    183 		return 4;
    184 	case BPF_H:
    185 		return 2;
    186 	case BPF_B:
    187 		return 1;
    188 	default:
    189 		BPFJIT_ASSERT(false);
    190 		return 0;
    191 	}
    192 }
    193 
    194 /*
    195  * Get offset of M[k] on the stack.
    196  */
    197 static size_t
    198 mem_local_offset(uint32_t k, unsigned int minm)
    199 {
    200 	size_t moff = (k - minm) * sizeof(uint32_t);
    201 
    202 #ifdef _KERNEL
    203 	/*
    204 	 * 4 bytes for the third argument of m_xword/m_xhalf/m_xbyte.
    205 	 */
    206 	return sizeof(uint32_t) + moff;
    207 #else
    208 	return moff;
    209 #endif
    210 }
    211 
    212 /*
    213  * Generate code for BPF_LD+BPF_B+BPF_ABS    A <- P[k:1].
    214  */
    215 static int
    216 emit_read8(struct sljit_compiler* compiler, uint32_t k)
    217 {
    218 
    219 	return sljit_emit_op1(compiler,
    220 	    SLJIT_MOV_UB,
    221 	    BPFJIT_A, 0,
    222 	    SLJIT_MEM1(BPFJIT_BUF), k);
    223 }
    224 
    225 /*
    226  * Generate code for BPF_LD+BPF_H+BPF_ABS    A <- P[k:2].
    227  */
    228 static int
    229 emit_read16(struct sljit_compiler* compiler, uint32_t k)
    230 {
    231 	int status;
    232 
    233 	/* tmp1 = buf[k]; */
    234 	status = sljit_emit_op1(compiler,
    235 	    SLJIT_MOV_UB,
    236 	    BPFJIT_TMP1, 0,
    237 	    SLJIT_MEM1(BPFJIT_BUF), k);
    238 	if (status != SLJIT_SUCCESS)
    239 		return status;
    240 
    241 	/* A = buf[k+1]; */
    242 	status = sljit_emit_op1(compiler,
    243 	    SLJIT_MOV_UB,
    244 	    BPFJIT_A, 0,
    245 	    SLJIT_MEM1(BPFJIT_BUF), k+1);
    246 	if (status != SLJIT_SUCCESS)
    247 		return status;
    248 
    249 	/* tmp1 = tmp1 << 8; */
    250 	status = sljit_emit_op2(compiler,
    251 	    SLJIT_SHL,
    252 	    BPFJIT_TMP1, 0,
    253 	    BPFJIT_TMP1, 0,
    254 	    SLJIT_IMM, 8);
    255 	if (status != SLJIT_SUCCESS)
    256 		return status;
    257 
    258 	/* A = A + tmp1; */
    259 	status = sljit_emit_op2(compiler,
    260 	    SLJIT_ADD,
    261 	    BPFJIT_A, 0,
    262 	    BPFJIT_A, 0,
    263 	    BPFJIT_TMP1, 0);
    264 	return status;
    265 }
    266 
    267 /*
    268  * Generate code for BPF_LD+BPF_W+BPF_ABS    A <- P[k:4].
    269  */
    270 static int
    271 emit_read32(struct sljit_compiler* compiler, uint32_t k)
    272 {
    273 	int status;
    274 
    275 	/* tmp1 = buf[k]; */
    276 	status = sljit_emit_op1(compiler,
    277 	    SLJIT_MOV_UB,
    278 	    BPFJIT_TMP1, 0,
    279 	    SLJIT_MEM1(BPFJIT_BUF), k);
    280 	if (status != SLJIT_SUCCESS)
    281 		return status;
    282 
    283 	/* tmp2 = buf[k+1]; */
    284 	status = sljit_emit_op1(compiler,
    285 	    SLJIT_MOV_UB,
    286 	    BPFJIT_TMP2, 0,
    287 	    SLJIT_MEM1(BPFJIT_BUF), k+1);
    288 	if (status != SLJIT_SUCCESS)
    289 		return status;
    290 
    291 	/* A = buf[k+3]; */
    292 	status = sljit_emit_op1(compiler,
    293 	    SLJIT_MOV_UB,
    294 	    BPFJIT_A, 0,
    295 	    SLJIT_MEM1(BPFJIT_BUF), k+3);
    296 	if (status != SLJIT_SUCCESS)
    297 		return status;
    298 
    299 	/* tmp1 = tmp1 << 24; */
    300 	status = sljit_emit_op2(compiler,
    301 	    SLJIT_SHL,
    302 	    BPFJIT_TMP1, 0,
    303 	    BPFJIT_TMP1, 0,
    304 	    SLJIT_IMM, 24);
    305 	if (status != SLJIT_SUCCESS)
    306 		return status;
    307 
    308 	/* A = A + tmp1; */
    309 	status = sljit_emit_op2(compiler,
    310 	    SLJIT_ADD,
    311 	    BPFJIT_A, 0,
    312 	    BPFJIT_A, 0,
    313 	    BPFJIT_TMP1, 0);
    314 	if (status != SLJIT_SUCCESS)
    315 		return status;
    316 
    317 	/* tmp1 = buf[k+2]; */
    318 	status = sljit_emit_op1(compiler,
    319 	    SLJIT_MOV_UB,
    320 	    BPFJIT_TMP1, 0,
    321 	    SLJIT_MEM1(BPFJIT_BUF), k+2);
    322 	if (status != SLJIT_SUCCESS)
    323 		return status;
    324 
    325 	/* tmp2 = tmp2 << 16; */
    326 	status = sljit_emit_op2(compiler,
    327 	    SLJIT_SHL,
    328 	    BPFJIT_TMP2, 0,
    329 	    BPFJIT_TMP2, 0,
    330 	    SLJIT_IMM, 16);
    331 	if (status != SLJIT_SUCCESS)
    332 		return status;
    333 
    334 	/* A = A + tmp2; */
    335 	status = sljit_emit_op2(compiler,
    336 	    SLJIT_ADD,
    337 	    BPFJIT_A, 0,
    338 	    BPFJIT_A, 0,
    339 	    BPFJIT_TMP2, 0);
    340 	if (status != SLJIT_SUCCESS)
    341 		return status;
    342 
    343 	/* tmp1 = tmp1 << 8; */
    344 	status = sljit_emit_op2(compiler,
    345 	    SLJIT_SHL,
    346 	    BPFJIT_TMP1, 0,
    347 	    BPFJIT_TMP1, 0,
    348 	    SLJIT_IMM, 8);
    349 	if (status != SLJIT_SUCCESS)
    350 		return status;
    351 
    352 	/* A = A + tmp1; */
    353 	status = sljit_emit_op2(compiler,
    354 	    SLJIT_ADD,
    355 	    BPFJIT_A, 0,
    356 	    BPFJIT_A, 0,
    357 	    BPFJIT_TMP1, 0);
    358 	return status;
    359 }
    360 
    361 #ifdef _KERNEL
    362 /*
    363  * Generate m_xword/m_xhalf/m_xbyte call.
    364  *
    365  * pc is one of:
    366  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
    367  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
    368  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
    369  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
    370  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
    371  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
    372  * BPF_LDX+BPF_B+BPF_MSH   X <- 4*(P[k:1]&0xf)
    373  *
    374  * dst must be BPFJIT_A for BPF_LD instructions and BPFJIT_X
    375  * or any of BPFJIT_TMP* registrers for BPF_MSH instruction.
    376  */
    377 static int
    378 emit_xcall(struct sljit_compiler* compiler, struct bpf_insn *pc,
    379     int dst, sljit_w dstw, struct sljit_jump **ret0_jump,
    380     uint32_t (*fn)(const struct mbuf *, uint32_t, int *))
    381 {
    382 #if BPFJIT_X != SLJIT_TEMPORARY_EREG1 || \
    383     BPFJIT_X == SLJIT_RETURN_REG
    384 #error "Not supported assignment of registers."
    385 #endif
    386 	int status;
    387 
    388 	/*
    389 	 * The third argument of fn is an address on stack.
    390 	 */
    391 	const int arg3_offset = 0;
    392 
    393 	if (BPF_CLASS(pc->code) == BPF_LDX) {
    394 		/* save A */
    395 		status = sljit_emit_op1(compiler,
    396 		    SLJIT_MOV,
    397 		    BPFJIT_KERN_TMP, 0,
    398 		    BPFJIT_A, 0);
    399 		if (status != SLJIT_SUCCESS)
    400 			return status;
    401 	}
    402 
    403 	/*
    404 	 * Prepare registers for fn(buf, k, &err) call.
    405 	 */
    406 	status = sljit_emit_op1(compiler,
    407 	    SLJIT_MOV,
    408 	    SLJIT_TEMPORARY_REG1, 0,
    409 	    BPFJIT_BUF, 0);
    410 	if (status != SLJIT_SUCCESS)
    411 		return status;
    412 
    413 	if (BPF_CLASS(pc->code) == BPF_LD && BPF_MODE(pc->code) == BPF_IND) {
    414 		status = sljit_emit_op2(compiler,
    415 		    SLJIT_ADD,
    416 		    SLJIT_TEMPORARY_REG2, 0,
    417 		    BPFJIT_X, 0,
    418 		    SLJIT_IMM, (uint32_t)pc->k);
    419 	} else {
    420 		status = sljit_emit_op1(compiler,
    421 		    SLJIT_MOV,
    422 		    SLJIT_TEMPORARY_REG2, 0,
    423 		    SLJIT_IMM, (uint32_t)pc->k);
    424 	}
    425 
    426 	if (status != SLJIT_SUCCESS)
    427 		return status;
    428 
    429 	status = sljit_get_local_base(compiler,
    430 	    SLJIT_TEMPORARY_REG3, 0, arg3_offset);
    431 	if (status != SLJIT_SUCCESS)
    432 		return status;
    433 
    434 	/* fn(buf, k, &err); */
    435 	status = sljit_emit_ijump(compiler,
    436 	    SLJIT_CALL3,
    437 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(fn));
    438 
    439 	if (BPF_CLASS(pc->code) == BPF_LDX) {
    440 
    441 		/* move return value to dst */
    442 		BPFJIT_ASSERT(dst != SLJIT_RETURN_REG);
    443 		status = sljit_emit_op1(compiler,
    444 		    SLJIT_MOV,
    445 		    dst, dstw,
    446 		    SLJIT_RETURN_REG, 0);
    447 		if (status != SLJIT_SUCCESS)
    448 			return status;
    449 
    450 		/* restore A */
    451 		status = sljit_emit_op1(compiler,
    452 		    SLJIT_MOV,
    453 		    BPFJIT_A, 0,
    454 		    BPFJIT_KERN_TMP, 0);
    455 		if (status != SLJIT_SUCCESS)
    456 			return status;
    457 
    458 	} else if (dst != SLJIT_RETURN_REG) {
    459 		status = sljit_emit_op1(compiler,
    460 		    SLJIT_MOV,
    461 		    dst, dstw,
    462 		    SLJIT_RETURN_REG, 0);
    463 		if (status != SLJIT_SUCCESS)
    464 			return status;
    465 	}
    466 
    467 	/* tmp3 = *err; */
    468 	status = sljit_emit_op1(compiler,
    469 	    SLJIT_MOV_UI,
    470 	    SLJIT_TEMPORARY_REG3, 0,
    471 	    SLJIT_MEM1(SLJIT_LOCALS_REG), arg3_offset);
    472 	if (status != SLJIT_SUCCESS)
    473 		return status;
    474 
    475 	/* if (tmp3 != 0) return 0; */
    476 	*ret0_jump = sljit_emit_cmp(compiler,
    477 	    SLJIT_C_NOT_EQUAL,
    478 	    SLJIT_TEMPORARY_REG3, 0,
    479 	    SLJIT_IMM, 0);
    480 	if (*ret0_jump == NULL)
    481 		return SLJIT_ERR_ALLOC_FAILED;
    482 
    483 	return status;
    484 }
    485 #endif
    486 
    487 /*
    488  * Generate code for
    489  * BPF_LD+BPF_W+BPF_ABS    A <- P[k:4]
    490  * BPF_LD+BPF_H+BPF_ABS    A <- P[k:2]
    491  * BPF_LD+BPF_B+BPF_ABS    A <- P[k:1]
    492  * BPF_LD+BPF_W+BPF_IND    A <- P[X+k:4]
    493  * BPF_LD+BPF_H+BPF_IND    A <- P[X+k:2]
    494  * BPF_LD+BPF_B+BPF_IND    A <- P[X+k:1]
    495  */
    496 static int
    497 emit_pkt_read(struct sljit_compiler* compiler,
    498     struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
    499     struct sljit_jump **ret0, size_t *ret0_size)
    500 {
    501 	int status;
    502 	uint32_t width;
    503 	struct sljit_jump *jump;
    504 #ifdef _KERNEL
    505 	struct sljit_label *label;
    506 	struct sljit_jump *over_mchain_jump;
    507 	const bool check_zero_buflen = (to_mchain_jump != NULL);
    508 #endif
    509 	const uint32_t k = pc->k;
    510 
    511 #ifdef _KERNEL
    512 	if (to_mchain_jump == NULL) {
    513 		to_mchain_jump = sljit_emit_cmp(compiler,
    514 		    SLJIT_C_EQUAL,
    515 		    BPFJIT_BUFLEN, 0,
    516 		    SLJIT_IMM, 0);
    517 		if (to_mchain_jump == NULL)
    518   			return SLJIT_ERR_ALLOC_FAILED;
    519 	}
    520 #endif
    521 
    522 	width = read_width(pc);
    523 
    524 	if (BPF_MODE(pc->code) == BPF_IND) {
    525 		/* tmp1 = buflen - (pc->k + width); */
    526 		status = sljit_emit_op2(compiler,
    527 		    SLJIT_SUB,
    528 		    BPFJIT_TMP1, 0,
    529 		    BPFJIT_BUFLEN, 0,
    530 		    SLJIT_IMM, k + width);
    531 		if (status != SLJIT_SUCCESS)
    532 			return status;
    533 
    534 		/* buf += X; */
    535 		status = sljit_emit_op2(compiler,
    536 		    SLJIT_ADD,
    537 		    BPFJIT_BUF, 0,
    538 		    BPFJIT_BUF, 0,
    539 		    BPFJIT_X, 0);
    540 		if (status != SLJIT_SUCCESS)
    541 			return status;
    542 
    543 		/* if (tmp1 < X) return 0; */
    544 		jump = sljit_emit_cmp(compiler,
    545 		    SLJIT_C_LESS,
    546 		    BPFJIT_TMP1, 0,
    547 		    BPFJIT_X, 0);
    548 		if (jump == NULL)
    549   			return SLJIT_ERR_ALLOC_FAILED;
    550 		ret0[(*ret0_size)++] = jump;
    551 	}
    552 
    553 	switch (width) {
    554 	case 4:
    555 		status = emit_read32(compiler, k);
    556 		break;
    557 	case 2:
    558 		status = emit_read16(compiler, k);
    559 		break;
    560 	case 1:
    561 		status = emit_read8(compiler, k);
    562 		break;
    563 	}
    564 
    565 	if (status != SLJIT_SUCCESS)
    566 		return status;
    567 
    568 	if (BPF_MODE(pc->code) == BPF_IND) {
    569 		/* buf -= X; */
    570 		status = sljit_emit_op2(compiler,
    571 		    SLJIT_SUB,
    572 		    BPFJIT_BUF, 0,
    573 		    BPFJIT_BUF, 0,
    574 		    BPFJIT_X, 0);
    575 		if (status != SLJIT_SUCCESS)
    576 			return status;
    577 	}
    578 
    579 #ifdef _KERNEL
    580 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
    581 	if (over_mchain_jump == NULL)
    582   		return SLJIT_ERR_ALLOC_FAILED;
    583 
    584 	/* entry point to mchain handler */
    585 	label = sljit_emit_label(compiler);
    586 	if (label == NULL)
    587   		return SLJIT_ERR_ALLOC_FAILED;
    588 	sljit_set_label(to_mchain_jump, label);
    589 
    590 	if (check_zero_buflen) {
    591 		/* if (buflen != 0) return 0; */
    592 		jump = sljit_emit_cmp(compiler,
    593 		    SLJIT_C_NOT_EQUAL,
    594 		    BPFJIT_BUFLEN, 0,
    595 		    SLJIT_IMM, 0);
    596 		if (jump == NULL)
    597 			return SLJIT_ERR_ALLOC_FAILED;
    598 		ret0[(*ret0_size)++] = jump;
    599 	}
    600 
    601 	switch (width) {
    602 	case 4:
    603 		status = emit_xcall(compiler, pc, BPFJIT_A, 0, &jump, &m_xword);
    604 		break;
    605 	case 2:
    606 		status = emit_xcall(compiler, pc, BPFJIT_A, 0, &jump, &m_xhalf);
    607 		break;
    608 	case 1:
    609 		status = emit_xcall(compiler, pc, BPFJIT_A, 0, &jump, &m_xbyte);
    610 		break;
    611 	}
    612 
    613 	if (status != SLJIT_SUCCESS)
    614 		return status;
    615 
    616 	ret0[(*ret0_size)++] = jump;
    617 
    618 	label = sljit_emit_label(compiler);
    619 	if (label == NULL)
    620 		return SLJIT_ERR_ALLOC_FAILED;
    621 	sljit_set_label(over_mchain_jump, label);
    622 #endif
    623 
    624 	return status;
    625 }
    626 
    627 /*
    628  * Generate code for BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf).
    629  */
    630 static int
    631 emit_msh(struct sljit_compiler* compiler,
    632     struct bpf_insn *pc, struct sljit_jump *to_mchain_jump,
    633     struct sljit_jump **ret0, size_t *ret0_size)
    634 {
    635 	int status;
    636 #ifdef _KERNEL
    637 	struct sljit_label *label;
    638 	struct sljit_jump *jump, *over_mchain_jump;
    639 	const bool check_zero_buflen = (to_mchain_jump != NULL);
    640 #endif
    641 	const uint32_t k = pc->k;
    642 
    643 #ifdef _KERNEL
    644 	if (to_mchain_jump == NULL) {
    645 		to_mchain_jump = sljit_emit_cmp(compiler,
    646 		    SLJIT_C_EQUAL,
    647 		    BPFJIT_BUFLEN, 0,
    648 		    SLJIT_IMM, 0);
    649 		if (to_mchain_jump == NULL)
    650  			return SLJIT_ERR_ALLOC_FAILED;
    651 	}
    652 #endif
    653 
    654 	/* tmp1 = buf[k] */
    655 	status = sljit_emit_op1(compiler,
    656 	    SLJIT_MOV_UB,
    657 	    BPFJIT_TMP1, 0,
    658 	    SLJIT_MEM1(BPFJIT_BUF), k);
    659 	if (status != SLJIT_SUCCESS)
    660 		return status;
    661 
    662 	/* tmp1 &= 0xf */
    663 	status = sljit_emit_op2(compiler,
    664 	    SLJIT_AND,
    665 	    BPFJIT_TMP1, 0,
    666 	    BPFJIT_TMP1, 0,
    667 	    SLJIT_IMM, 0xf);
    668 	if (status != SLJIT_SUCCESS)
    669 		return status;
    670 
    671 	/* tmp1 = tmp1 << 2 */
    672 	status = sljit_emit_op2(compiler,
    673 	    SLJIT_SHL,
    674 	    BPFJIT_X, 0,
    675 	    BPFJIT_TMP1, 0,
    676 	    SLJIT_IMM, 2);
    677 	if (status != SLJIT_SUCCESS)
    678 		return status;
    679 
    680 #ifdef _KERNEL
    681 	over_mchain_jump = sljit_emit_jump(compiler, SLJIT_JUMP);
    682 	if (over_mchain_jump == NULL)
    683 		return SLJIT_ERR_ALLOC_FAILED;
    684 
    685 	/* entry point to mchain handler */
    686 	label = sljit_emit_label(compiler);
    687 	if (label == NULL)
    688 		return SLJIT_ERR_ALLOC_FAILED;
    689 	sljit_set_label(to_mchain_jump, label);
    690 
    691 	if (check_zero_buflen) {
    692 		/* if (buflen != 0) return 0; */
    693 		jump = sljit_emit_cmp(compiler,
    694 		    SLJIT_C_NOT_EQUAL,
    695 		    BPFJIT_BUFLEN, 0,
    696 		    SLJIT_IMM, 0);
    697 		if (jump == NULL)
    698   			return SLJIT_ERR_ALLOC_FAILED;
    699 		ret0[(*ret0_size)++] = jump;
    700 	}
    701 
    702 	status = emit_xcall(compiler, pc, BPFJIT_TMP1, 0, &jump, &m_xbyte);
    703 	if (status != SLJIT_SUCCESS)
    704 		return status;
    705 	ret0[(*ret0_size)++] = jump;
    706 
    707 	/* tmp1 &= 0xf */
    708 	status = sljit_emit_op2(compiler,
    709 	    SLJIT_AND,
    710 	    BPFJIT_TMP1, 0,
    711 	    BPFJIT_TMP1, 0,
    712 	    SLJIT_IMM, 0xf);
    713 	if (status != SLJIT_SUCCESS)
    714 		return status;
    715 
    716 	/* tmp1 = tmp1 << 2 */
    717 	status = sljit_emit_op2(compiler,
    718 	    SLJIT_SHL,
    719 	    BPFJIT_X, 0,
    720 	    BPFJIT_TMP1, 0,
    721 	    SLJIT_IMM, 2);
    722 	if (status != SLJIT_SUCCESS)
    723 		return status;
    724 
    725 
    726 	label = sljit_emit_label(compiler);
    727 	if (label == NULL)
    728 		return SLJIT_ERR_ALLOC_FAILED;
    729 	sljit_set_label(over_mchain_jump, label);
    730 #endif
    731 
    732 	return status;
    733 }
    734 
    735 static int
    736 emit_pow2_division(struct sljit_compiler* compiler, uint32_t k)
    737 {
    738 	int shift = 0;
    739 	int status = SLJIT_SUCCESS;
    740 
    741 	while (k > 1) {
    742 		k >>= 1;
    743 		shift++;
    744 	}
    745 
    746 	BPFJIT_ASSERT(k == 1 && shift < 32);
    747 
    748 	if (shift != 0) {
    749 		status = sljit_emit_op2(compiler,
    750 		    SLJIT_LSHR|SLJIT_INT_OP,
    751 		    BPFJIT_A, 0,
    752 		    BPFJIT_A, 0,
    753 		    SLJIT_IMM, shift);
    754 	}
    755 
    756 	return status;
    757 }
    758 
    759 #if !defined(BPFJIT_USE_UDIV)
    760 static sljit_uw
    761 divide(sljit_uw x, sljit_uw y)
    762 {
    763 
    764 	return (uint32_t)x / (uint32_t)y;
    765 }
    766 #endif
    767 
    768 /*
    769  * Generate A = A / div.
    770  * divt,divw are either SLJIT_IMM,pc->k or BPFJIT_X,0.
    771  */
    772 static int
    773 emit_division(struct sljit_compiler* compiler, int divt, sljit_w divw)
    774 {
    775 	int status;
    776 
    777 #if BPFJIT_X == SLJIT_TEMPORARY_REG1 || \
    778     BPFJIT_X == SLJIT_RETURN_REG     || \
    779     BPFJIT_X == SLJIT_TEMPORARY_REG2 || \
    780     BPFJIT_A == SLJIT_TEMPORARY_REG2
    781 #error "Not supported assignment of registers."
    782 #endif
    783 
    784 #if BPFJIT_A != SLJIT_TEMPORARY_REG1
    785 	status = sljit_emit_op1(compiler,
    786 	    SLJIT_MOV,
    787 	    SLJIT_TEMPORARY_REG1, 0,
    788 	    BPFJIT_A, 0);
    789 	if (status != SLJIT_SUCCESS)
    790 		return status;
    791 #endif
    792 
    793 	status = sljit_emit_op1(compiler,
    794 	    SLJIT_MOV,
    795 	    SLJIT_TEMPORARY_REG2, 0,
    796 	    divt, divw);
    797 	if (status != SLJIT_SUCCESS)
    798 		return status;
    799 
    800 #if defined(BPFJIT_USE_UDIV)
    801 	status = sljit_emit_op0(compiler, SLJIT_UDIV|SLJIT_INT_OP);
    802 
    803 #if BPFJIT_A != SLJIT_TEMPORARY_REG1
    804 	status = sljit_emit_op1(compiler,
    805 	    SLJIT_MOV,
    806 	    BPFJIT_A, 0,
    807 	    SLJIT_TEMPORARY_REG1, 0);
    808 	if (status != SLJIT_SUCCESS)
    809 		return status;
    810 #endif
    811 #else
    812 	status = sljit_emit_ijump(compiler,
    813 	    SLJIT_CALL2,
    814 	    SLJIT_IMM, SLJIT_FUNC_OFFSET(divide));
    815 
    816 #if BPFJIT_A != SLJIT_RETURN_REG
    817 	status = sljit_emit_op1(compiler,
    818 	    SLJIT_MOV,
    819 	    BPFJIT_A, 0,
    820 	    SLJIT_RETURN_REG, 0);
    821 	if (status != SLJIT_SUCCESS)
    822 		return status;
    823 #endif
    824 #endif
    825 
    826 	return status;
    827 }
    828 
    829 /*
    830  * Count BPF_RET instructions.
    831  */
    832 static size_t
    833 count_returns(struct bpf_insn *insns, size_t insn_count)
    834 {
    835 	size_t i;
    836 	size_t rv;
    837 
    838 	rv = 0;
    839 	for (i = 0; i < insn_count; i++) {
    840 		if (BPF_CLASS(insns[i].code) == BPF_RET)
    841 			rv++;
    842 	}
    843 
    844 	return rv;
    845 }
    846 
    847 /*
    848  * Return true if pc is a "read from packet" instruction.
    849  * If length is not NULL and return value is true, *length will
    850  * be set to a safe length required to read a packet.
    851  */
    852 static bool
    853 read_pkt_insn(struct bpf_insn *pc, uint32_t *length)
    854 {
    855 	bool rv;
    856 	uint32_t width;
    857 
    858 	switch (BPF_CLASS(pc->code)) {
    859 	default:
    860 		rv = false;
    861 		break;
    862 
    863 	case BPF_LD:
    864 		rv = BPF_MODE(pc->code) == BPF_ABS ||
    865 		     BPF_MODE(pc->code) == BPF_IND;
    866 		if (rv)
    867 			width = read_width(pc);
    868 		break;
    869 
    870 	case BPF_LDX:
    871 		rv = pc->code == (BPF_LDX|BPF_B|BPF_MSH);
    872 		width = 1;
    873 		break;
    874 	}
    875 
    876 	if (rv && length != NULL) {
    877 		*length = (pc->k > UINT32_MAX - width) ?
    878 		    UINT32_MAX : pc->k + width;
    879 	}
    880 
    881 	return rv;
    882 }
    883 
    884 /*
    885  * Set bj_check_length for all "read from packet" instructions
    886  * in a linear block of instructions [from, to).
    887  */
    888 static void
    889 set_check_length(struct bpf_insn *insns, struct bpfjit_insn_data *insn_dat,
    890     size_t from, size_t to, uint32_t length)
    891 {
    892 
    893 	for (; from < to; from++) {
    894 		if (read_pkt_insn(&insns[from], NULL)) {
    895 			insn_dat[from].bj_aux.bj_rdata.bj_check_length = length;
    896 			length = 0;
    897 		}
    898 	}
    899 }
    900 
    901 /*
    902  * The function divides instructions into blocks. Destination of a jump
    903  * instruction starts a new block. BPF_RET and BPF_JMP instructions
    904  * terminate a block. Blocks are linear, that is, there are no jumps out
    905  * from the middle of a block and there are no jumps in to the middle of
    906  * a block.
    907  * If a block has one or more "read from packet" instructions,
    908  * bj_check_length will be set to one value for the whole block and that
    909  * value will be equal to the greatest value of safe lengths of "read from
    910  * packet" instructions inside the block.
    911  */
    912 static int
    913 optimize(struct bpf_insn *insns,
    914     struct bpfjit_insn_data *insn_dat, size_t insn_count)
    915 {
    916 	size_t i;
    917 	size_t first_read;
    918 	bool unreachable;
    919 	uint32_t jt, jf;
    920 	uint32_t length, safe_length;
    921 	struct bpfjit_jump *jmp, *jtf;
    922 
    923 	for (i = 0; i < insn_count; i++)
    924 		SLIST_INIT(&insn_dat[i].bj_jumps);
    925 
    926 	safe_length = 0;
    927 	unreachable = false;
    928 	first_read = SIZE_MAX;
    929 
    930 	for (i = 0; i < insn_count; i++) {
    931 
    932 		if (!SLIST_EMPTY(&insn_dat[i].bj_jumps)) {
    933 			unreachable = false;
    934 
    935 			set_check_length(insns, insn_dat,
    936 			    first_read, i, safe_length);
    937 			first_read = SIZE_MAX;
    938 
    939 			safe_length = UINT32_MAX;
    940 			SLIST_FOREACH(jmp, &insn_dat[i].bj_jumps, bj_entries) {
    941 				if (jmp->bj_safe_length < safe_length)
    942 					safe_length = jmp->bj_safe_length;
    943 			}
    944 		}
    945 
    946 		insn_dat[i].bj_unreachable = unreachable;
    947 		if (unreachable)
    948 			continue;
    949 
    950 		if (read_pkt_insn(&insns[i], &length)) {
    951 			if (first_read == SIZE_MAX)
    952 				first_read = i;
    953 			if (length > safe_length)
    954 				safe_length = length;
    955 		}
    956 
    957 		switch (BPF_CLASS(insns[i].code)) {
    958 		case BPF_RET:
    959 			unreachable = true;
    960 			continue;
    961 
    962 		case BPF_JMP:
    963 			if (insns[i].code == (BPF_JMP|BPF_JA)) {
    964 				jt = jf = insns[i].k;
    965 			} else {
    966 				jt = insns[i].jt;
    967 				jf = insns[i].jf;
    968 			}
    969 
    970 			if (jt >= insn_count - (i + 1) ||
    971 			    jf >= insn_count - (i + 1)) {
    972 				return -1;
    973 			}
    974 
    975 			if (jt > 0 && jf > 0)
    976 				unreachable = true;
    977 
    978 			jtf = insn_dat[i].bj_aux.bj_jdata.bj_jtf;
    979 
    980 			jtf[0].bj_jump = NULL;
    981 			jtf[0].bj_safe_length = safe_length;
    982 			SLIST_INSERT_HEAD(&insn_dat[i + 1 + jt].bj_jumps,
    983 			    &jtf[0], bj_entries);
    984 
    985 			if (jf != jt) {
    986 				jtf[1].bj_jump = NULL;
    987 				jtf[1].bj_safe_length = safe_length;
    988 				SLIST_INSERT_HEAD(&insn_dat[i + 1 + jf].bj_jumps,
    989 				    &jtf[1], bj_entries);
    990 			}
    991 
    992 			continue;
    993 		}
    994 	}
    995 
    996 	set_check_length(insns, insn_dat, first_read, insn_count, safe_length);
    997 
    998 	return 0;
    999 }
   1000 
   1001 /*
   1002  * Count out-of-bounds and division by zero jumps.
   1003  *
   1004  * insn_dat should be initialized by optimize().
   1005  */
   1006 static size_t
   1007 get_ret0_size(struct bpf_insn *insns, struct bpfjit_insn_data *insn_dat,
   1008     size_t insn_count)
   1009 {
   1010 	size_t rv = 0;
   1011 	size_t i;
   1012 
   1013 	for (i = 0; i < insn_count; i++) {
   1014 
   1015 		if (read_pkt_insn(&insns[i], NULL)) {
   1016 			if (insn_dat[i].bj_aux.bj_rdata.bj_check_length > 0)
   1017 				rv++;
   1018 #ifdef _KERNEL
   1019 			rv++;
   1020 #endif
   1021 		}
   1022 
   1023 		if (insns[i].code == (BPF_LD|BPF_IND|BPF_B) ||
   1024 		    insns[i].code == (BPF_LD|BPF_IND|BPF_H) ||
   1025 		    insns[i].code == (BPF_LD|BPF_IND|BPF_W)) {
   1026 			rv++;
   1027 		}
   1028 
   1029 		if (insns[i].code == (BPF_ALU|BPF_DIV|BPF_X))
   1030 			rv++;
   1031 
   1032 		if (insns[i].code == (BPF_ALU|BPF_DIV|BPF_K) &&
   1033 		    insns[i].k == 0) {
   1034 			rv++;
   1035 		}
   1036 	}
   1037 
   1038 	return rv;
   1039 }
   1040 
   1041 /*
   1042  * Convert BPF_ALU operations except BPF_NEG and BPF_DIV to sljit operation.
   1043  */
   1044 static int
   1045 bpf_alu_to_sljit_op(struct bpf_insn *pc)
   1046 {
   1047 
   1048 	/*
   1049 	 * Note: all supported 64bit arches have 32bit multiply
   1050 	 * instruction so SLJIT_INT_OP doesn't have any overhead.
   1051 	 */
   1052 	switch (BPF_OP(pc->code)) {
   1053 	case BPF_ADD: return SLJIT_ADD;
   1054 	case BPF_SUB: return SLJIT_SUB;
   1055 	case BPF_MUL: return SLJIT_MUL|SLJIT_INT_OP;
   1056 	case BPF_OR:  return SLJIT_OR;
   1057 	case BPF_AND: return SLJIT_AND;
   1058 	case BPF_LSH: return SLJIT_SHL;
   1059 	case BPF_RSH: return SLJIT_LSHR|SLJIT_INT_OP;
   1060 	default:
   1061 		BPFJIT_ASSERT(false);
   1062 		return 0;
   1063 	}
   1064 }
   1065 
   1066 /*
   1067  * Convert BPF_JMP operations except BPF_JA to sljit condition.
   1068  */
   1069 static int
   1070 bpf_jmp_to_sljit_cond(struct bpf_insn *pc, bool negate)
   1071 {
   1072 	/*
   1073 	 * Note: all supported 64bit arches have 32bit comparison
   1074 	 * instructions so SLJIT_INT_OP doesn't have any overhead.
   1075 	 */
   1076 	int rv = SLJIT_INT_OP;
   1077 
   1078 	switch (BPF_OP(pc->code)) {
   1079 	case BPF_JGT:
   1080 		rv |= negate ? SLJIT_C_LESS_EQUAL : SLJIT_C_GREATER;
   1081 		break;
   1082 	case BPF_JGE:
   1083 		rv |= negate ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL;
   1084 		break;
   1085 	case BPF_JEQ:
   1086 		rv |= negate ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL;
   1087 		break;
   1088 	case BPF_JSET:
   1089 		rv |= negate ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL;
   1090 		break;
   1091 	default:
   1092 		BPFJIT_ASSERT(false);
   1093 	}
   1094 
   1095 	return rv;
   1096 }
   1097 
   1098 static unsigned int
   1099 bpfjit_optimization_hints(struct bpf_insn *insns, size_t insn_count)
   1100 {
   1101 	unsigned int rv = BPFJIT_INIT_A;
   1102 	struct bpf_insn *pc;
   1103 	unsigned int minm, maxm;
   1104 
   1105 	BPFJIT_ASSERT(BPF_MEMWORDS - 1 <= 0xff);
   1106 
   1107 	maxm = 0;
   1108 	minm = BPF_MEMWORDS - 1;
   1109 
   1110 	for (pc = insns; pc != insns + insn_count; pc++) {
   1111 		switch (BPF_CLASS(pc->code)) {
   1112 		case BPF_LD:
   1113 			if (BPF_MODE(pc->code) == BPF_IND)
   1114 				rv |= BPFJIT_INIT_X;
   1115 			if (BPF_MODE(pc->code) == BPF_MEM &&
   1116 			    (uint32_t)pc->k < BPF_MEMWORDS) {
   1117 				if (pc->k > maxm)
   1118 					maxm = pc->k;
   1119 				if (pc->k < minm)
   1120 					minm = pc->k;
   1121 			}
   1122 			continue;
   1123 		case BPF_LDX:
   1124 			rv |= BPFJIT_INIT_X;
   1125 			if (BPF_MODE(pc->code) == BPF_MEM &&
   1126 			    (uint32_t)pc->k < BPF_MEMWORDS) {
   1127 				if (pc->k > maxm)
   1128 					maxm = pc->k;
   1129 				if (pc->k < minm)
   1130 					minm = pc->k;
   1131 			}
   1132 			continue;
   1133 		case BPF_ST:
   1134 			if ((uint32_t)pc->k < BPF_MEMWORDS) {
   1135 				if (pc->k > maxm)
   1136 					maxm = pc->k;
   1137 				if (pc->k < minm)
   1138 					minm = pc->k;
   1139 			}
   1140 			continue;
   1141 		case BPF_STX:
   1142 			rv |= BPFJIT_INIT_X;
   1143 			if ((uint32_t)pc->k < BPF_MEMWORDS) {
   1144 				if (pc->k > maxm)
   1145 					maxm = pc->k;
   1146 				if (pc->k < minm)
   1147 					minm = pc->k;
   1148 			}
   1149 			continue;
   1150 		case BPF_ALU:
   1151 			if (pc->code == (BPF_ALU|BPF_NEG))
   1152 				continue;
   1153 			if (BPF_SRC(pc->code) == BPF_X)
   1154 				rv |= BPFJIT_INIT_X;
   1155 			continue;
   1156 		case BPF_JMP:
   1157 			if (pc->code == (BPF_JMP|BPF_JA))
   1158 				continue;
   1159 			if (BPF_SRC(pc->code) == BPF_X)
   1160 				rv |= BPFJIT_INIT_X;
   1161 			continue;
   1162 		case BPF_RET:
   1163 			continue;
   1164 		case BPF_MISC:
   1165 			rv |= BPFJIT_INIT_X;
   1166 			continue;
   1167 		default:
   1168 			BPFJIT_ASSERT(false);
   1169 		}
   1170 	}
   1171 
   1172 	return rv | (maxm << 8) | minm;
   1173 }
   1174 
   1175 /*
   1176  * Convert BPF_K and BPF_X to sljit register.
   1177  */
   1178 static int
   1179 kx_to_reg(struct bpf_insn *pc)
   1180 {
   1181 
   1182 	switch (BPF_SRC(pc->code)) {
   1183 	case BPF_K: return SLJIT_IMM;
   1184 	case BPF_X: return BPFJIT_X;
   1185 	default:
   1186 		BPFJIT_ASSERT(false);
   1187 		return 0;
   1188 	}
   1189 }
   1190 
   1191 static sljit_w
   1192 kx_to_reg_arg(struct bpf_insn *pc)
   1193 {
   1194 
   1195 	switch (BPF_SRC(pc->code)) {
   1196 	case BPF_K: return (uint32_t)pc->k; /* SLJIT_IMM, pc->k, */
   1197 	case BPF_X: return 0;               /* BPFJIT_X, 0,      */
   1198 	default:
   1199 		BPFJIT_ASSERT(false);
   1200 		return 0;
   1201 	}
   1202 }
   1203 
   1204 bpfjit_function_t
   1205 bpfjit_generate_code(struct bpf_insn *insns, size_t insn_count)
   1206 {
   1207 	void *rv;
   1208 	size_t i;
   1209 	int status;
   1210 	int branching, negate;
   1211 	unsigned int rval, mode, src;
   1212 	int ntmp;
   1213 	unsigned int locals_size;
   1214 	unsigned int minm, maxm; /* min/max k for M[k] */
   1215 	size_t mem_locals_start; /* start of M[] array */
   1216 	unsigned int opts;
   1217 	struct bpf_insn *pc;
   1218 	struct sljit_compiler* compiler;
   1219 
   1220 	/* a list of jumps to a normal return from a generated function */
   1221 	struct sljit_jump **returns;
   1222 	size_t returns_size, returns_maxsize;
   1223 
   1224 	/* a list of jumps to out-of-bound return from a generated function */
   1225 	struct sljit_jump **ret0;
   1226 	size_t ret0_size, ret0_maxsize;
   1227 
   1228 	struct bpfjit_insn_data *insn_dat;
   1229 
   1230 	/* for local use */
   1231 	struct sljit_label *label;
   1232 	struct sljit_jump *jump;
   1233 	struct bpfjit_jump *bjump, *jtf;
   1234 
   1235 	struct sljit_jump *to_mchain_jump;
   1236 
   1237 	uint32_t jt, jf;
   1238 
   1239 	rv = NULL;
   1240 	compiler = NULL;
   1241 	insn_dat = NULL;
   1242 	returns = NULL;
   1243 	ret0 = NULL;
   1244 
   1245 	opts = bpfjit_optimization_hints(insns, insn_count);
   1246 	minm = opts & 0xff;
   1247 	maxm = (opts >> 8) & 0xff;
   1248 	mem_locals_start = mem_local_offset(0, 0);
   1249 	locals_size = (minm <= maxm) ?
   1250 	    mem_local_offset(maxm + 1, minm) : mem_locals_start;
   1251 
   1252 	ntmp = 4;
   1253 #ifdef _KERNEL
   1254 	ntmp += 1; /* for BPFJIT_KERN_TMP */
   1255 #endif
   1256 
   1257 	returns_maxsize = count_returns(insns, insn_count);
   1258 	if (returns_maxsize  == 0)
   1259 		goto fail;
   1260 
   1261 	insn_dat = BPFJIT_MALLOC(insn_count * sizeof(insn_dat[0]));
   1262 	if (insn_dat == NULL)
   1263 		goto fail;
   1264 
   1265 	if (optimize(insns, insn_dat, insn_count) < 0)
   1266 		goto fail;
   1267 
   1268 	ret0_size = 0;
   1269 	ret0_maxsize = get_ret0_size(insns, insn_dat, insn_count);
   1270 	if (ret0_maxsize > 0) {
   1271 		ret0 = BPFJIT_MALLOC(ret0_maxsize * sizeof(ret0[0]));
   1272 		if (ret0 == NULL)
   1273 			goto fail;
   1274 	}
   1275 
   1276 	returns_size = 0;
   1277 	returns = BPFJIT_MALLOC(returns_maxsize * sizeof(returns[0]));
   1278 	if (returns == NULL)
   1279 		goto fail;
   1280 
   1281 	compiler = sljit_create_compiler();
   1282 	if (compiler == NULL)
   1283 		goto fail;
   1284 
   1285 #if !defined(_KERNEL) && defined(SLJIT_VERBOSE) && SLJIT_VERBOSE
   1286 	sljit_compiler_verbose(compiler, stderr);
   1287 #endif
   1288 
   1289 	status = sljit_emit_enter(compiler, 3, ntmp, 3, locals_size);
   1290 	if (status != SLJIT_SUCCESS)
   1291 		goto fail;
   1292 
   1293 	for (i = mem_locals_start; i < locals_size; i+= sizeof(uint32_t)) {
   1294 		status = sljit_emit_op1(compiler,
   1295 		    SLJIT_MOV_UI,
   1296 		    SLJIT_MEM1(SLJIT_LOCALS_REG), i,
   1297 		    SLJIT_IMM, 0);
   1298 		if (status != SLJIT_SUCCESS)
   1299 			goto fail;
   1300 	}
   1301 
   1302 	if (opts & BPFJIT_INIT_A) {
   1303 		/* A = 0; */
   1304 		status = sljit_emit_op1(compiler,
   1305 		    SLJIT_MOV,
   1306 		    BPFJIT_A, 0,
   1307 		    SLJIT_IMM, 0);
   1308 		if (status != SLJIT_SUCCESS)
   1309 			goto fail;
   1310 	}
   1311 
   1312 	if (opts & BPFJIT_INIT_X) {
   1313 		/* X = 0; */
   1314 		status = sljit_emit_op1(compiler,
   1315 		    SLJIT_MOV,
   1316 		    BPFJIT_X, 0,
   1317 		    SLJIT_IMM, 0);
   1318 		if (status != SLJIT_SUCCESS)
   1319 			goto fail;
   1320 	}
   1321 
   1322 	for (i = 0; i < insn_count; i++) {
   1323 		if (insn_dat[i].bj_unreachable)
   1324 			continue;
   1325 
   1326 		to_mchain_jump = NULL;
   1327 
   1328 		/*
   1329 		 * Resolve jumps to the current insn.
   1330 		 */
   1331 		label = NULL;
   1332 		SLIST_FOREACH(bjump, &insn_dat[i].bj_jumps, bj_entries) {
   1333 			if (bjump->bj_jump != NULL) {
   1334 				if (label == NULL)
   1335 					label = sljit_emit_label(compiler);
   1336 				if (label == NULL)
   1337 					goto fail;
   1338 				sljit_set_label(bjump->bj_jump, label);
   1339 			}
   1340 		}
   1341 
   1342 		if (read_pkt_insn(&insns[i], NULL) &&
   1343 		    insn_dat[i].bj_aux.bj_rdata.bj_check_length > 0) {
   1344 			/* if (buflen < bj_check_length) return 0; */
   1345 			jump = sljit_emit_cmp(compiler,
   1346 			    SLJIT_C_LESS,
   1347 			    BPFJIT_BUFLEN, 0,
   1348 			    SLJIT_IMM,
   1349 			    insn_dat[i].bj_aux.bj_rdata.bj_check_length);
   1350 			if (jump == NULL)
   1351 		  		goto fail;
   1352 #ifdef _KERNEL
   1353 			to_mchain_jump = jump;
   1354 #else
   1355 			ret0[ret0_size++] = jump;
   1356 #endif
   1357 		}
   1358 
   1359 		pc = &insns[i];
   1360 		switch (BPF_CLASS(pc->code)) {
   1361 
   1362 		default:
   1363 			goto fail;
   1364 
   1365 		case BPF_LD:
   1366 			/* BPF_LD+BPF_IMM          A <- k */
   1367 			if (pc->code == (BPF_LD|BPF_IMM)) {
   1368 				status = sljit_emit_op1(compiler,
   1369 				    SLJIT_MOV,
   1370 				    BPFJIT_A, 0,
   1371 				    SLJIT_IMM, (uint32_t)pc->k);
   1372 				if (status != SLJIT_SUCCESS)
   1373 					goto fail;
   1374 
   1375 				continue;
   1376 			}
   1377 
   1378 			/* BPF_LD+BPF_MEM          A <- M[k] */
   1379 			if (pc->code == (BPF_LD|BPF_MEM)) {
   1380 				if (pc->k < minm || pc->k > maxm)
   1381 					goto fail;
   1382 				status = sljit_emit_op1(compiler,
   1383 				    SLJIT_MOV_UI,
   1384 				    BPFJIT_A, 0,
   1385 				    SLJIT_MEM1(SLJIT_LOCALS_REG),
   1386 				    mem_local_offset(pc->k, minm));
   1387 				if (status != SLJIT_SUCCESS)
   1388 					goto fail;
   1389 
   1390 				continue;
   1391 			}
   1392 
   1393 			/* BPF_LD+BPF_W+BPF_LEN    A <- len */
   1394 			if (pc->code == (BPF_LD|BPF_W|BPF_LEN)) {
   1395 				status = sljit_emit_op1(compiler,
   1396 				    SLJIT_MOV,
   1397 				    BPFJIT_A, 0,
   1398 				    BPFJIT_WIRELEN, 0);
   1399 				if (status != SLJIT_SUCCESS)
   1400 					goto fail;
   1401 
   1402 				continue;
   1403 			}
   1404 
   1405 			mode = BPF_MODE(pc->code);
   1406 			if (mode != BPF_ABS && mode != BPF_IND)
   1407 				goto fail;
   1408 
   1409 			status = emit_pkt_read(compiler, pc,
   1410 			    to_mchain_jump, ret0, &ret0_size);
   1411 			if (status != SLJIT_SUCCESS)
   1412 				goto fail;
   1413 
   1414 			continue;
   1415 
   1416 		case BPF_LDX:
   1417 			mode = BPF_MODE(pc->code);
   1418 
   1419 			/* BPF_LDX+BPF_W+BPF_IMM    X <- k */
   1420 			if (mode == BPF_IMM) {
   1421 				if (BPF_SIZE(pc->code) != BPF_W)
   1422 					goto fail;
   1423 				status = sljit_emit_op1(compiler,
   1424 				    SLJIT_MOV,
   1425 				    BPFJIT_X, 0,
   1426 				    SLJIT_IMM, (uint32_t)pc->k);
   1427 				if (status != SLJIT_SUCCESS)
   1428 					goto fail;
   1429 
   1430 				continue;
   1431 			}
   1432 
   1433 			/* BPF_LDX+BPF_W+BPF_LEN    X <- len */
   1434 			if (mode == BPF_LEN) {
   1435 				if (BPF_SIZE(pc->code) != BPF_W)
   1436 					goto fail;
   1437 				status = sljit_emit_op1(compiler,
   1438 				    SLJIT_MOV,
   1439 				    BPFJIT_X, 0,
   1440 				    BPFJIT_WIRELEN, 0);
   1441 				if (status != SLJIT_SUCCESS)
   1442 					goto fail;
   1443 
   1444 				continue;
   1445 			}
   1446 
   1447 			/* BPF_LDX+BPF_W+BPF_MEM    X <- M[k] */
   1448 			if (mode == BPF_MEM) {
   1449 				if (BPF_SIZE(pc->code) != BPF_W)
   1450 					goto fail;
   1451 				if (pc->k < minm || pc->k > maxm)
   1452 					goto fail;
   1453 				status = sljit_emit_op1(compiler,
   1454 				    SLJIT_MOV_UI,
   1455 				    BPFJIT_X, 0,
   1456 				    SLJIT_MEM1(SLJIT_LOCALS_REG),
   1457 				    mem_local_offset(pc->k, minm));
   1458 				if (status != SLJIT_SUCCESS)
   1459 					goto fail;
   1460 
   1461 				continue;
   1462 			}
   1463 
   1464 			/* BPF_LDX+BPF_B+BPF_MSH    X <- 4*(P[k:1]&0xf) */
   1465 			if (mode != BPF_MSH || BPF_SIZE(pc->code) != BPF_B)
   1466 				goto fail;
   1467 
   1468 			status = emit_msh(compiler, pc,
   1469 			    to_mchain_jump, ret0, &ret0_size);
   1470 			if (status != SLJIT_SUCCESS)
   1471 				goto fail;
   1472 
   1473 			continue;
   1474 
   1475 		case BPF_ST:
   1476 			if (pc->code != BPF_ST || pc->k < minm || pc->k > maxm)
   1477 				goto fail;
   1478 
   1479 			status = sljit_emit_op1(compiler,
   1480 			    SLJIT_MOV_UI,
   1481 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
   1482 			    mem_local_offset(pc->k, minm),
   1483 			    BPFJIT_A, 0);
   1484 			if (status != SLJIT_SUCCESS)
   1485 				goto fail;
   1486 
   1487 			continue;
   1488 
   1489 		case BPF_STX:
   1490 			if (pc->code != BPF_STX || pc->k < minm || pc->k > maxm)
   1491 				goto fail;
   1492 
   1493 			status = sljit_emit_op1(compiler,
   1494 			    SLJIT_MOV_UI,
   1495 			    SLJIT_MEM1(SLJIT_LOCALS_REG),
   1496 			    mem_local_offset(pc->k, minm),
   1497 			    BPFJIT_X, 0);
   1498 			if (status != SLJIT_SUCCESS)
   1499 				goto fail;
   1500 
   1501 			continue;
   1502 
   1503 		case BPF_ALU:
   1504 
   1505 			if (pc->code == (BPF_ALU|BPF_NEG)) {
   1506 				status = sljit_emit_op1(compiler,
   1507 				    SLJIT_NEG,
   1508 				    BPFJIT_A, 0,
   1509 				    BPFJIT_A, 0);
   1510 				if (status != SLJIT_SUCCESS)
   1511 					goto fail;
   1512 
   1513 				continue;
   1514 			}
   1515 
   1516 			if (BPF_OP(pc->code) != BPF_DIV) {
   1517 				status = sljit_emit_op2(compiler,
   1518 				    bpf_alu_to_sljit_op(pc),
   1519 				    BPFJIT_A, 0,
   1520 				    BPFJIT_A, 0,
   1521 				    kx_to_reg(pc), kx_to_reg_arg(pc));
   1522 				if (status != SLJIT_SUCCESS)
   1523 					goto fail;
   1524 
   1525 				continue;
   1526 			}
   1527 
   1528 			/* BPF_DIV */
   1529 
   1530 			src = BPF_SRC(pc->code);
   1531 			if (src != BPF_X && src != BPF_K)
   1532 				goto fail;
   1533 
   1534 			/* division by zero? */
   1535 			if (src == BPF_X) {
   1536 				jump = sljit_emit_cmp(compiler,
   1537 				    SLJIT_C_EQUAL|SLJIT_INT_OP,
   1538 				    BPFJIT_X, 0,
   1539 				    SLJIT_IMM, 0);
   1540 				if (jump == NULL)
   1541 					goto fail;
   1542 				ret0[ret0_size++] = jump;
   1543 			} else if (pc->k == 0) {
   1544 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
   1545 				if (jump == NULL)
   1546 					goto fail;
   1547 				ret0[ret0_size++] = jump;
   1548 			}
   1549 
   1550 			if (src == BPF_X) {
   1551 				status = emit_division(compiler, BPFJIT_X, 0);
   1552 				if (status != SLJIT_SUCCESS)
   1553 					goto fail;
   1554 			} else if (pc->k != 0) {
   1555 				if (pc->k & (pc->k - 1)) {
   1556 				    status = emit_division(compiler,
   1557 				        SLJIT_IMM, (uint32_t)pc->k);
   1558 				} else {
   1559     				    status = emit_pow2_division(compiler,
   1560 				        (uint32_t)pc->k);
   1561 				}
   1562 				if (status != SLJIT_SUCCESS)
   1563 					goto fail;
   1564 			}
   1565 
   1566 			continue;
   1567 
   1568 		case BPF_JMP:
   1569 
   1570 			if (pc->code == (BPF_JMP|BPF_JA)) {
   1571 				jt = jf = pc->k;
   1572 			} else {
   1573 				jt = pc->jt;
   1574 				jf = pc->jf;
   1575 			}
   1576 
   1577 			negate = (jt == 0) ? 1 : 0;
   1578 			branching = (jt == jf) ? 0 : 1;
   1579 			jtf = insn_dat[i].bj_aux.bj_jdata.bj_jtf;
   1580 
   1581 			if (branching) {
   1582 				if (BPF_OP(pc->code) != BPF_JSET) {
   1583 					jump = sljit_emit_cmp(compiler,
   1584 					    bpf_jmp_to_sljit_cond(pc, negate),
   1585 					    BPFJIT_A, 0,
   1586 					    kx_to_reg(pc), kx_to_reg_arg(pc));
   1587 				} else {
   1588 					status = sljit_emit_op2(compiler,
   1589 					    SLJIT_AND,
   1590 					    BPFJIT_TMP1, 0,
   1591 					    BPFJIT_A, 0,
   1592 					    kx_to_reg(pc), kx_to_reg_arg(pc));
   1593 					if (status != SLJIT_SUCCESS)
   1594 						goto fail;
   1595 
   1596 					jump = sljit_emit_cmp(compiler,
   1597 					    bpf_jmp_to_sljit_cond(pc, negate),
   1598 					    BPFJIT_TMP1, 0,
   1599 					    SLJIT_IMM, 0);
   1600 				}
   1601 
   1602 				if (jump == NULL)
   1603 					goto fail;
   1604 
   1605 				BPFJIT_ASSERT(jtf[negate].bj_jump == NULL);
   1606 				jtf[negate].bj_jump = jump;
   1607 			}
   1608 
   1609 			if (!branching || (jt != 0 && jf != 0)) {
   1610 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
   1611 				if (jump == NULL)
   1612 					goto fail;
   1613 
   1614 				BPFJIT_ASSERT(jtf[branching].bj_jump == NULL);
   1615 				jtf[branching].bj_jump = jump;
   1616 			}
   1617 
   1618 			continue;
   1619 
   1620 		case BPF_RET:
   1621 
   1622 			rval = BPF_RVAL(pc->code);
   1623 			if (rval == BPF_X)
   1624 				goto fail;
   1625 
   1626 			/* BPF_RET+BPF_K    accept k bytes */
   1627 			if (rval == BPF_K) {
   1628 				status = sljit_emit_op1(compiler,
   1629 				    SLJIT_MOV,
   1630 				    BPFJIT_A, 0,
   1631 				    SLJIT_IMM, (uint32_t)pc->k);
   1632 				if (status != SLJIT_SUCCESS)
   1633 					goto fail;
   1634 			}
   1635 
   1636 			/* BPF_RET+BPF_A    accept A bytes */
   1637 			if (rval == BPF_A) {
   1638 #if BPFJIT_A != SLJIT_RETURN_REG
   1639 				status = sljit_emit_op1(compiler,
   1640 				    SLJIT_MOV,
   1641 				    SLJIT_RETURN_REG, 0,
   1642 				    BPFJIT_A, 0);
   1643 				if (status != SLJIT_SUCCESS)
   1644 					goto fail;
   1645 #endif
   1646 			}
   1647 
   1648 			/*
   1649 			 * Save a jump to a normal return. If the program
   1650 			 * ends with BPF_RET, no jump is needed because
   1651 			 * the normal return is generated right after the
   1652 			 * last instruction.
   1653 			 */
   1654 			if (i != insn_count - 1) {
   1655 				jump = sljit_emit_jump(compiler, SLJIT_JUMP);
   1656 				if (jump == NULL)
   1657 					goto fail;
   1658 				returns[returns_size++] = jump;
   1659 			}
   1660 
   1661 			continue;
   1662 
   1663 		case BPF_MISC:
   1664 
   1665 			if (pc->code == (BPF_MISC|BPF_TAX)) {
   1666 				status = sljit_emit_op1(compiler,
   1667 				    SLJIT_MOV_UI,
   1668 				    BPFJIT_X, 0,
   1669 				    BPFJIT_A, 0);
   1670 				if (status != SLJIT_SUCCESS)
   1671 					goto fail;
   1672 
   1673 				continue;
   1674 			}
   1675 
   1676 			if (pc->code == (BPF_MISC|BPF_TXA)) {
   1677 				status = sljit_emit_op1(compiler,
   1678 				    SLJIT_MOV,
   1679 				    BPFJIT_A, 0,
   1680 				    BPFJIT_X, 0);
   1681 				if (status != SLJIT_SUCCESS)
   1682 					goto fail;
   1683 
   1684 				continue;
   1685 			}
   1686 
   1687 			goto fail;
   1688 		} /* switch */
   1689 	} /* main loop */
   1690 
   1691 	BPFJIT_ASSERT(ret0_size == ret0_maxsize);
   1692 	BPFJIT_ASSERT(returns_size <= returns_maxsize);
   1693 
   1694 	if (returns_size > 0) {
   1695 		label = sljit_emit_label(compiler);
   1696 		if (label == NULL)
   1697 			goto fail;
   1698 		for (i = 0; i < returns_size; i++)
   1699 			sljit_set_label(returns[i], label);
   1700 	}
   1701 
   1702 	status = sljit_emit_return(compiler,
   1703 	    SLJIT_MOV_UI,
   1704 	    BPFJIT_A, 0);
   1705 	if (status != SLJIT_SUCCESS)
   1706 		goto fail;
   1707 
   1708 	if (ret0_size > 0) {
   1709 		label = sljit_emit_label(compiler);
   1710 		if (label == NULL)
   1711 			goto fail;
   1712 
   1713 		for (i = 0; i < ret0_size; i++)
   1714 			sljit_set_label(ret0[i], label);
   1715 
   1716 		status = sljit_emit_op1(compiler,
   1717 		    SLJIT_MOV,
   1718 		    SLJIT_RETURN_REG, 0,
   1719 		    SLJIT_IMM, 0);
   1720 		if (status != SLJIT_SUCCESS)
   1721 			goto fail;
   1722 
   1723 		status = sljit_emit_return(compiler,
   1724 		    SLJIT_MOV_UI,
   1725 		    SLJIT_RETURN_REG, 0);
   1726 		if (status != SLJIT_SUCCESS)
   1727 			goto fail;
   1728 	}
   1729 
   1730 	rv = sljit_generate_code(compiler);
   1731 
   1732 fail:
   1733 	if (compiler != NULL)
   1734 		sljit_free_compiler(compiler);
   1735 
   1736 	if (insn_dat != NULL)
   1737 		BPFJIT_FREE(insn_dat);
   1738 
   1739 	if (returns != NULL)
   1740 		BPFJIT_FREE(returns);
   1741 
   1742 	if (ret0 != NULL)
   1743 		BPFJIT_FREE(ret0);
   1744 
   1745 	return (bpfjit_function_t)rv;
   1746 }
   1747 
   1748 void
   1749 bpfjit_free_code(bpfjit_function_t code)
   1750 {
   1751 
   1752 	sljit_free_code((void *)code);
   1753 }
   1754