Home | History | Annotate | Line # | Download | only in sljit_src
      1 /*	$NetBSD: sljitNativeX86_64.c,v 1.4 2019/01/20 23:14:16 alnsn Exp $	*/
      2 
      3 /*
      4  *    Stack-less Just-In-Time compiler
      5  *
      6  *    Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without modification, are
      9  * permitted provided that the following conditions are met:
     10  *
     11  *   1. Redistributions of source code must retain the above copyright notice, this list of
     12  *      conditions and the following disclaimer.
     13  *
     14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     15  *      of conditions and the following disclaimer in the documentation and/or other materials
     16  *      provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /* x86 64-bit arch dependent functions. */
     30 
     31 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
     32 {
     33 	sljit_u8 *inst;
     34 
     35 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
     36 	FAIL_IF(!inst);
     37 	INC_SIZE(2 + sizeof(sljit_sw));
     38 	*inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
     39 	*inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
     40 	sljit_unaligned_store_sw(inst, imm);
     41 	return SLJIT_SUCCESS;
     42 }
     43 
     44 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
     45 {
     46 	if (type < SLJIT_JUMP) {
     47 		/* Invert type. */
     48 		*code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
     49 		*code_ptr++ = 10 + 3;
     50 	}
     51 
     52 	SLJIT_ASSERT(reg_map[TMP_REG3] == 9);
     53 	*code_ptr++ = REX_W | REX_B;
     54 	*code_ptr++ = MOV_r_i32 + 1;
     55 	jump->addr = (sljit_uw)code_ptr;
     56 
     57 	if (jump->flags & JUMP_LABEL)
     58 		jump->flags |= PATCH_MD;
     59 	else
     60 		sljit_unaligned_store_sw(code_ptr, jump->u.target);
     61 
     62 	code_ptr += sizeof(sljit_sw);
     63 	*code_ptr++ = REX_B;
     64 	*code_ptr++ = GROUP_FF;
     65 	*code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
     66 
     67 	return code_ptr;
     68 }
     69 
     70 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
     71 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
     72 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
     73 {
     74 	sljit_s32 i, tmp, size, saved_register_size;
     75 	sljit_u8 *inst;
     76 
     77 	CHECK_ERROR();
     78 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
     79 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
     80 
     81 #ifdef _WIN64
     82 	/* Two/four register slots for parameters plus space for xmm6 register if needed. */
     83 	if (fscratches >= 6 || fsaveds >= 1)
     84 		compiler->locals_offset = 6 * sizeof(sljit_sw);
     85 	else
     86 		compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
     87 #endif
     88 
     89 	/* Including the return address saved by the call instruction. */
     90 	saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
     91 
     92 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
     93 	for (i = SLJIT_S0; i >= tmp; i--) {
     94 		size = reg_map[i] >= 8 ? 2 : 1;
     95 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
     96 		FAIL_IF(!inst);
     97 		INC_SIZE(size);
     98 		if (reg_map[i] >= 8)
     99 			*inst++ = REX_B;
    100 		PUSH_REG(reg_lmap[i]);
    101 	}
    102 
    103 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
    104 		size = reg_map[i] >= 8 ? 2 : 1;
    105 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    106 		FAIL_IF(!inst);
    107 		INC_SIZE(size);
    108 		if (reg_map[i] >= 8)
    109 			*inst++ = REX_B;
    110 		PUSH_REG(reg_lmap[i]);
    111 	}
    112 
    113 	if (args > 0) {
    114 		size = args * 3;
    115 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    116 		FAIL_IF(!inst);
    117 
    118 		INC_SIZE(size);
    119 
    120 #ifndef _WIN64
    121 		if (args > 0) {
    122 			*inst++ = REX_W;
    123 			*inst++ = MOV_r_rm;
    124 			*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
    125 		}
    126 		if (args > 1) {
    127 			*inst++ = REX_W | REX_R;
    128 			*inst++ = MOV_r_rm;
    129 			*inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
    130 		}
    131 		if (args > 2) {
    132 			*inst++ = REX_W | REX_R;
    133 			*inst++ = MOV_r_rm;
    134 			*inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
    135 		}
    136 #else
    137 		if (args > 0) {
    138 			*inst++ = REX_W;
    139 			*inst++ = MOV_r_rm;
    140 			*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
    141 		}
    142 		if (args > 1) {
    143 			*inst++ = REX_W;
    144 			*inst++ = MOV_r_rm;
    145 			*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
    146 		}
    147 		if (args > 2) {
    148 			*inst++ = REX_W | REX_B;
    149 			*inst++ = MOV_r_rm;
    150 			*inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
    151 		}
    152 #endif
    153 	}
    154 
    155 	local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
    156 	compiler->local_size = local_size;
    157 
    158 #ifdef _WIN64
    159 	if (local_size > 1024) {
    160 		/* Allocate stack for the callback, which grows the stack. */
    161 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32)));
    162 		FAIL_IF(!inst);
    163 		INC_SIZE(4 + (3 + sizeof(sljit_s32)));
    164 		*inst++ = REX_W;
    165 		*inst++ = GROUP_BINARY_83;
    166 		*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
    167 		/* Allocated size for registers must be divisible by 8. */
    168 		SLJIT_ASSERT(!(saved_register_size & 0x7));
    169 		/* Aligned to 16 byte. */
    170 		if (saved_register_size & 0x8) {
    171 			*inst++ = 5 * sizeof(sljit_sw);
    172 			local_size -= 5 * sizeof(sljit_sw);
    173 		} else {
    174 			*inst++ = 4 * sizeof(sljit_sw);
    175 			local_size -= 4 * sizeof(sljit_sw);
    176 		}
    177 		/* Second instruction */
    178 		SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
    179 		*inst++ = REX_W;
    180 		*inst++ = MOV_rm_i32;
    181 		*inst++ = MOD_REG | reg_lmap[SLJIT_R0];
    182 		sljit_unaligned_store_s32(inst, local_size);
    183 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
    184 			|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
    185 		compiler->skip_checks = 1;
    186 #endif
    187 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
    188 	}
    189 #endif
    190 
    191 	if (local_size > 0) {
    192 		if (local_size <= 127) {
    193 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
    194 			FAIL_IF(!inst);
    195 			INC_SIZE(4);
    196 			*inst++ = REX_W;
    197 			*inst++ = GROUP_BINARY_83;
    198 			*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
    199 			*inst++ = local_size;
    200 		}
    201 		else {
    202 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
    203 			FAIL_IF(!inst);
    204 			INC_SIZE(7);
    205 			*inst++ = REX_W;
    206 			*inst++ = GROUP_BINARY_81;
    207 			*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
    208 			sljit_unaligned_store_s32(inst, local_size);
    209 			inst += sizeof(sljit_s32);
    210 		}
    211 	}
    212 
    213 #ifdef _WIN64
    214 	/* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
    215 	if (fscratches >= 6 || fsaveds >= 1) {
    216 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
    217 		FAIL_IF(!inst);
    218 		INC_SIZE(5);
    219 		*inst++ = GROUP_0F;
    220 		sljit_unaligned_store_s32(inst, 0x20247429);
    221 	}
    222 #endif
    223 
    224 	return SLJIT_SUCCESS;
    225 }
    226 
    227 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
    228 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    229 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    230 {
    231 	sljit_s32 saved_register_size;
    232 
    233 	CHECK_ERROR();
    234 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    235 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    236 
    237 #ifdef _WIN64
    238 	/* Two/four register slots for parameters plus space for xmm6 register if needed. */
    239 	if (fscratches >= 6 || fsaveds >= 1)
    240 		compiler->locals_offset = 6 * sizeof(sljit_sw);
    241 	else
    242 		compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
    243 #endif
    244 
    245 	/* Including the return address saved by the call instruction. */
    246 	saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
    247 	compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
    248 	return SLJIT_SUCCESS;
    249 }
    250 
    251 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
    252 {
    253 	sljit_s32 i, tmp, size;
    254 	sljit_u8 *inst;
    255 
    256 	CHECK_ERROR();
    257 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    258 
    259 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    260 
    261 #ifdef _WIN64
    262 	/* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
    263 	if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
    264 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
    265 		FAIL_IF(!inst);
    266 		INC_SIZE(5);
    267 		*inst++ = GROUP_0F;
    268 		sljit_unaligned_store_s32(inst, 0x20247428);
    269 	}
    270 #endif
    271 
    272 	if (compiler->local_size > 0) {
    273 		if (compiler->local_size <= 127) {
    274 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
    275 			FAIL_IF(!inst);
    276 			INC_SIZE(4);
    277 			*inst++ = REX_W;
    278 			*inst++ = GROUP_BINARY_83;
    279 			*inst++ = MOD_REG | ADD | 4;
    280 			*inst = compiler->local_size;
    281 		}
    282 		else {
    283 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
    284 			FAIL_IF(!inst);
    285 			INC_SIZE(7);
    286 			*inst++ = REX_W;
    287 			*inst++ = GROUP_BINARY_81;
    288 			*inst++ = MOD_REG | ADD | 4;
    289 			sljit_unaligned_store_s32(inst, compiler->local_size);
    290 		}
    291 	}
    292 
    293 	tmp = compiler->scratches;
    294 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
    295 		size = reg_map[i] >= 8 ? 2 : 1;
    296 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    297 		FAIL_IF(!inst);
    298 		INC_SIZE(size);
    299 		if (reg_map[i] >= 8)
    300 			*inst++ = REX_B;
    301 		POP_REG(reg_lmap[i]);
    302 	}
    303 
    304 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
    305 	for (i = tmp; i <= SLJIT_S0; i++) {
    306 		size = reg_map[i] >= 8 ? 2 : 1;
    307 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    308 		FAIL_IF(!inst);
    309 		INC_SIZE(size);
    310 		if (reg_map[i] >= 8)
    311 			*inst++ = REX_B;
    312 		POP_REG(reg_lmap[i]);
    313 	}
    314 
    315 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    316 	FAIL_IF(!inst);
    317 	INC_SIZE(1);
    318 	RET();
    319 	return SLJIT_SUCCESS;
    320 }
    321 
    322 /* --------------------------------------------------------------------- */
    323 /*  Operators                                                            */
    324 /* --------------------------------------------------------------------- */
    325 
    326 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
    327 {
    328 	sljit_u8 *inst;
    329 	sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32);
    330 
    331 	inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
    332 	FAIL_IF(!inst);
    333 	INC_SIZE(length);
    334 	if (rex)
    335 		*inst++ = rex;
    336 	*inst++ = opcode;
    337 	sljit_unaligned_store_s32(inst, imm);
    338 	return SLJIT_SUCCESS;
    339 }
    340 
    341 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
    342 	/* The register or immediate operand. */
    343 	sljit_s32 a, sljit_sw imma,
    344 	/* The general operand (not immediate). */
    345 	sljit_s32 b, sljit_sw immb)
    346 {
    347 	sljit_u8 *inst;
    348 	sljit_u8 *buf_ptr;
    349 	sljit_u8 rex = 0;
    350 	sljit_s32 flags = size & ~0xf;
    351 	sljit_s32 inst_size;
    352 
    353 	/* The immediate operand must be 32 bit. */
    354 	SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
    355 	/* Both cannot be switched on. */
    356 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
    357 	/* Size flags not allowed for typed instructions. */
    358 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
    359 	/* Both size flags cannot be switched on. */
    360 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
    361 	/* SSE2 and immediate is not possible. */
    362 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
    363 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
    364 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
    365 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
    366 
    367 	size &= 0xf;
    368 	inst_size = size;
    369 
    370 	if (!compiler->mode32 && !(flags & EX86_NO_REXW))
    371 		rex |= REX_W;
    372 	else if (flags & EX86_REX)
    373 		rex |= REX;
    374 
    375 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
    376 		inst_size++;
    377 	if (flags & EX86_PREF_66)
    378 		inst_size++;
    379 
    380 	/* Calculate size of b. */
    381 	inst_size += 1; /* mod r/m byte. */
    382 	if (b & SLJIT_MEM) {
    383 		if (!(b & OFFS_REG_MASK)) {
    384 			if (NOT_HALFWORD(immb)) {
    385 				PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG3, immb));
    386 				immb = 0;
    387 				if (b & REG_MASK)
    388 					b |= TO_OFFS_REG(TMP_REG3);
    389 				else
    390 					b |= TMP_REG3;
    391 			}
    392 			else if (reg_lmap[b & REG_MASK] == 4)
    393 				b |= TO_OFFS_REG(SLJIT_SP);
    394 		}
    395 
    396 		if ((b & REG_MASK) == SLJIT_UNUSED)
    397 			inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
    398 		else {
    399 			if (reg_map[b & REG_MASK] >= 8)
    400 				rex |= REX_B;
    401 
    402 			if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
    403 				/* Immediate operand. */
    404 				if (immb <= 127 && immb >= -128)
    405 					inst_size += sizeof(sljit_s8);
    406 				else
    407 					inst_size += sizeof(sljit_s32);
    408 			}
    409 			else if (reg_lmap[b & REG_MASK] == 5)
    410 				inst_size += sizeof(sljit_s8);
    411 
    412 			if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
    413 				inst_size += 1; /* SIB byte. */
    414 				if (reg_map[OFFS_REG(b)] >= 8)
    415 					rex |= REX_X;
    416 			}
    417 		}
    418 	}
    419 	else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8)
    420 		rex |= REX_B;
    421 
    422 	if (a & SLJIT_IMM) {
    423 		if (flags & EX86_BIN_INS) {
    424 			if (imma <= 127 && imma >= -128) {
    425 				inst_size += 1;
    426 				flags |= EX86_BYTE_ARG;
    427 			} else
    428 				inst_size += 4;
    429 		}
    430 		else if (flags & EX86_SHIFT_INS) {
    431 			imma &= compiler->mode32 ? 0x1f : 0x3f;
    432 			if (imma != 1) {
    433 				inst_size ++;
    434 				flags |= EX86_BYTE_ARG;
    435 			}
    436 		} else if (flags & EX86_BYTE_ARG)
    437 			inst_size++;
    438 		else if (flags & EX86_HALF_ARG)
    439 			inst_size += sizeof(short);
    440 		else
    441 			inst_size += sizeof(sljit_s32);
    442 	}
    443 	else {
    444 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
    445 		/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
    446 		if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8)
    447 			rex |= REX_R;
    448 	}
    449 
    450 	if (rex)
    451 		inst_size++;
    452 
    453 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
    454 	PTR_FAIL_IF(!inst);
    455 
    456 	/* Encoding the byte. */
    457 	INC_SIZE(inst_size);
    458 	if (flags & EX86_PREF_F2)
    459 		*inst++ = 0xf2;
    460 	if (flags & EX86_PREF_F3)
    461 		*inst++ = 0xf3;
    462 	if (flags & EX86_PREF_66)
    463 		*inst++ = 0x66;
    464 	if (rex)
    465 		*inst++ = rex;
    466 	buf_ptr = inst + size;
    467 
    468 	/* Encode mod/rm byte. */
    469 	if (!(flags & EX86_SHIFT_INS)) {
    470 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
    471 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
    472 
    473 		if ((a & SLJIT_IMM) || (a == 0))
    474 			*buf_ptr = 0;
    475 		else if (!(flags & EX86_SSE2_OP1))
    476 			*buf_ptr = reg_lmap[a] << 3;
    477 		else
    478 			*buf_ptr = a << 3;
    479 	}
    480 	else {
    481 		if (a & SLJIT_IMM) {
    482 			if (imma == 1)
    483 				*inst = GROUP_SHIFT_1;
    484 			else
    485 				*inst = GROUP_SHIFT_N;
    486 		} else
    487 			*inst = GROUP_SHIFT_CL;
    488 		*buf_ptr = 0;
    489 	}
    490 
    491 	if (!(b & SLJIT_MEM))
    492 		*buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b);
    493 	else if ((b & REG_MASK) != SLJIT_UNUSED) {
    494 		if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
    495 			if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
    496 				if (immb <= 127 && immb >= -128)
    497 					*buf_ptr |= 0x40;
    498 				else
    499 					*buf_ptr |= 0x80;
    500 			}
    501 
    502 			if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
    503 				*buf_ptr++ |= reg_lmap[b & REG_MASK];
    504 			else {
    505 				*buf_ptr++ |= 0x04;
    506 				*buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
    507 			}
    508 
    509 			if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
    510 				if (immb <= 127 && immb >= -128)
    511 					*buf_ptr++ = immb; /* 8 bit displacement. */
    512 				else {
    513 					sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
    514 					buf_ptr += sizeof(sljit_s32);
    515 				}
    516 			}
    517 		}
    518 		else {
    519 			if (reg_lmap[b & REG_MASK] == 5)
    520 				*buf_ptr |= 0x40;
    521 			*buf_ptr++ |= 0x04;
    522 			*buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
    523 			if (reg_lmap[b & REG_MASK] == 5)
    524 				*buf_ptr++ = 0;
    525 		}
    526 	}
    527 	else {
    528 		*buf_ptr++ |= 0x04;
    529 		*buf_ptr++ = 0x25;
    530 		sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
    531 		buf_ptr += sizeof(sljit_s32);
    532 	}
    533 
    534 	if (a & SLJIT_IMM) {
    535 		if (flags & EX86_BYTE_ARG)
    536 			*buf_ptr = imma;
    537 		else if (flags & EX86_HALF_ARG)
    538 			sljit_unaligned_store_s16(buf_ptr, imma);
    539 		else if (!(flags & EX86_SHIFT_INS))
    540 			sljit_unaligned_store_s32(buf_ptr, imma);
    541 	}
    542 
    543 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
    544 }
    545 
    546 /* --------------------------------------------------------------------- */
    547 /*  Call / return instructions                                           */
    548 /* --------------------------------------------------------------------- */
    549 
    550 static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
    551 {
    552 	sljit_u8 *inst;
    553 
    554 #ifndef _WIN64
    555 	SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8);
    556 
    557 	inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
    558 	FAIL_IF(!inst);
    559 	INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
    560 	if (type >= SLJIT_CALL3) {
    561 		*inst++ = REX_W;
    562 		*inst++ = MOV_r_rm;
    563 		*inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2];
    564 	}
    565 	*inst++ = REX_W;
    566 	*inst++ = MOV_r_rm;
    567 	*inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
    568 #else
    569 	SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8);
    570 
    571 	inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
    572 	FAIL_IF(!inst);
    573 	INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
    574 	if (type >= SLJIT_CALL3) {
    575 		*inst++ = REX_W | REX_R;
    576 		*inst++ = MOV_r_rm;
    577 		*inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2];
    578 	}
    579 	*inst++ = REX_W;
    580 	*inst++ = MOV_r_rm;
    581 	*inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0];
    582 #endif
    583 	return SLJIT_SUCCESS;
    584 }
    585 
    586 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
    587 {
    588 	sljit_u8 *inst;
    589 
    590 	CHECK_ERROR();
    591 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
    592 	ADJUST_LOCAL_OFFSET(dst, dstw);
    593 
    594 	/* For UNUSED dst. Uncommon, but possible. */
    595 	if (dst == SLJIT_UNUSED)
    596 		dst = TMP_REG1;
    597 
    598 	if (FAST_IS_REG(dst)) {
    599 		if (reg_map[dst] < 8) {
    600 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    601 			FAIL_IF(!inst);
    602 			INC_SIZE(1);
    603 			POP_REG(reg_lmap[dst]);
    604 			return SLJIT_SUCCESS;
    605 		}
    606 
    607 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
    608 		FAIL_IF(!inst);
    609 		INC_SIZE(2);
    610 		*inst++ = REX_B;
    611 		POP_REG(reg_lmap[dst]);
    612 		return SLJIT_SUCCESS;
    613 	}
    614 
    615 	/* REX_W is not necessary (src is not immediate). */
    616 	compiler->mode32 = 1;
    617 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
    618 	FAIL_IF(!inst);
    619 	*inst++ = POP_rm;
    620 	return SLJIT_SUCCESS;
    621 }
    622 
    623 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
    624 {
    625 	sljit_u8 *inst;
    626 
    627 	CHECK_ERROR();
    628 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
    629 	ADJUST_LOCAL_OFFSET(src, srcw);
    630 
    631 	if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
    632 		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
    633 		src = TMP_REG1;
    634 	}
    635 
    636 	if (FAST_IS_REG(src)) {
    637 		if (reg_map[src] < 8) {
    638 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
    639 			FAIL_IF(!inst);
    640 
    641 			INC_SIZE(1 + 1);
    642 			PUSH_REG(reg_lmap[src]);
    643 		}
    644 		else {
    645 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
    646 			FAIL_IF(!inst);
    647 
    648 			INC_SIZE(2 + 1);
    649 			*inst++ = REX_B;
    650 			PUSH_REG(reg_lmap[src]);
    651 		}
    652 	}
    653 	else if (src & SLJIT_MEM) {
    654 		/* REX_W is not necessary (src is not immediate). */
    655 		compiler->mode32 = 1;
    656 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
    657 		FAIL_IF(!inst);
    658 		*inst++ = GROUP_FF;
    659 		*inst |= PUSH_rm;
    660 
    661 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    662 		FAIL_IF(!inst);
    663 		INC_SIZE(1);
    664 	}
    665 	else {
    666 		SLJIT_ASSERT(IS_HALFWORD(srcw));
    667 		/* SLJIT_IMM. */
    668 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
    669 		FAIL_IF(!inst);
    670 
    671 		INC_SIZE(5 + 1);
    672 		*inst++ = PUSH_i32;
    673 		sljit_unaligned_store_s32(inst, srcw);
    674 		inst += sizeof(sljit_s32);
    675 	}
    676 
    677 	RET();
    678 	return SLJIT_SUCCESS;
    679 }
    680 
    681 
    682 /* --------------------------------------------------------------------- */
    683 /*  Extend input                                                         */
    684 /* --------------------------------------------------------------------- */
    685 
    686 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
    687 	sljit_s32 dst, sljit_sw dstw,
    688 	sljit_s32 src, sljit_sw srcw)
    689 {
    690 	sljit_u8* inst;
    691 	sljit_s32 dst_r;
    692 
    693 	compiler->mode32 = 0;
    694 
    695 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
    696 		return SLJIT_SUCCESS; /* Empty instruction. */
    697 
    698 	if (src & SLJIT_IMM) {
    699 		if (FAST_IS_REG(dst)) {
    700 			if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
    701 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
    702 				FAIL_IF(!inst);
    703 				*inst = MOV_rm_i32;
    704 				return SLJIT_SUCCESS;
    705 			}
    706 			return emit_load_imm64(compiler, dst, srcw);
    707 		}
    708 		compiler->mode32 = 1;
    709 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
    710 		FAIL_IF(!inst);
    711 		*inst = MOV_rm_i32;
    712 		compiler->mode32 = 0;
    713 		return SLJIT_SUCCESS;
    714 	}
    715 
    716 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
    717 
    718 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
    719 		dst_r = src;
    720 	else {
    721 		if (sign) {
    722 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
    723 			FAIL_IF(!inst);
    724 			*inst++ = MOVSXD_r_rm;
    725 		} else {
    726 			compiler->mode32 = 1;
    727 			FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
    728 			compiler->mode32 = 0;
    729 		}
    730 	}
    731 
    732 	if (dst & SLJIT_MEM) {
    733 		compiler->mode32 = 1;
    734 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
    735 		FAIL_IF(!inst);
    736 		*inst = MOV_rm_r;
    737 		compiler->mode32 = 0;
    738 	}
    739 
    740 	return SLJIT_SUCCESS;
    741 }
    742