Home | History | Annotate | Line # | Download | only in sljit_src
      1 /*	$NetBSD: sljitNativeX86_32.c,v 1.6 2019/01/20 23:14:16 alnsn Exp $	*/
      2 
      3 /*
      4  *    Stack-less Just-In-Time compiler
      5  *
      6  *    Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without modification, are
      9  * permitted provided that the following conditions are met:
     10  *
     11  *   1. Redistributions of source code must retain the above copyright notice, this list of
     12  *      conditions and the following disclaimer.
     13  *
     14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     15  *      of conditions and the following disclaimer in the documentation and/or other materials
     16  *      provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /* x86 32-bit arch dependent functions. */
     30 
     31 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
     32 {
     33 	sljit_u8 *inst;
     34 
     35 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
     36 	FAIL_IF(!inst);
     37 	INC_SIZE(1 + sizeof(sljit_sw));
     38 	*inst++ = opcode;
     39 	sljit_unaligned_store_sw(inst, imm);
     40 	return SLJIT_SUCCESS;
     41 }
     42 
     43 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset)
     44 {
     45 	if (type == SLJIT_JUMP) {
     46 		*code_ptr++ = JMP_i32;
     47 		jump->addr++;
     48 	}
     49 	else if (type >= SLJIT_FAST_CALL) {
     50 		*code_ptr++ = CALL_i32;
     51 		jump->addr++;
     52 	}
     53 	else {
     54 		*code_ptr++ = GROUP_0F;
     55 		*code_ptr++ = get_jump_code(type);
     56 		jump->addr += 2;
     57 	}
     58 
     59 	if (jump->flags & JUMP_LABEL)
     60 		jump->flags |= PATCH_MW;
     61 	else
     62 		sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset);
     63 	code_ptr += 4;
     64 
     65 	return code_ptr;
     66 }
     67 
     68 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
     69 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
     70 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
     71 {
     72 	sljit_s32 size;
     73 	sljit_u8 *inst;
     74 
     75 	CHECK_ERROR();
     76 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
     77 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
     78 
     79 	compiler->args = args;
     80 
     81 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
     82 	/* [esp+0] for saving temporaries and third argument for calls. */
     83 	compiler->saveds_offset = 1 * sizeof(sljit_sw);
     84 #else
     85 	/* [esp+0] for saving temporaries and space for maximum three arguments. */
     86 	if (scratches <= 1)
     87 		compiler->saveds_offset = 1 * sizeof(sljit_sw);
     88 	else
     89 		compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
     90 #endif
     91 
     92 	if (scratches > 3)
     93 		compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
     94 
     95 	compiler->locals_offset = compiler->saveds_offset;
     96 
     97 	if (saveds > 3)
     98 		compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
     99 
    100 	if (options & SLJIT_F64_ALIGNMENT)
    101 		compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
    102 
    103 	size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3);
    104 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    105 	size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
    106 #else
    107 	size += (args > 0 ? (2 + args * 3) : 0);
    108 #endif
    109 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    110 	FAIL_IF(!inst);
    111 
    112 	INC_SIZE(size);
    113 	PUSH_REG(reg_map[TMP_REG1]);
    114 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    115 	if (args > 0) {
    116 		*inst++ = MOV_r_rm;
    117 		*inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
    118 	}
    119 #endif
    120 	if (saveds > 2 || scratches > 9)
    121 		PUSH_REG(reg_map[SLJIT_S2]);
    122 	if (saveds > 1 || scratches > 10)
    123 		PUSH_REG(reg_map[SLJIT_S1]);
    124 	if (saveds > 0 || scratches > 11)
    125 		PUSH_REG(reg_map[SLJIT_S0]);
    126 
    127 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    128 	if (args > 0) {
    129 		*inst++ = MOV_r_rm;
    130 		*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
    131 	}
    132 	if (args > 1) {
    133 		*inst++ = MOV_r_rm;
    134 		*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
    135 	}
    136 	if (args > 2) {
    137 		*inst++ = MOV_r_rm;
    138 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
    139 		*inst++ = 0x24;
    140 		*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
    141 	}
    142 #else
    143 	if (args > 0) {
    144 		*inst++ = MOV_r_rm;
    145 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
    146 		*inst++ = sizeof(sljit_sw) * 2;
    147 	}
    148 	if (args > 1) {
    149 		*inst++ = MOV_r_rm;
    150 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
    151 		*inst++ = sizeof(sljit_sw) * 3;
    152 	}
    153 	if (args > 2) {
    154 		*inst++ = MOV_r_rm;
    155 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
    156 		*inst++ = sizeof(sljit_sw) * 4;
    157 	}
    158 #endif
    159 
    160 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
    161 
    162 #if defined(__APPLE__)
    163 	/* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
    164 	saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
    165 	local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
    166 #else
    167 	if (options & SLJIT_F64_ALIGNMENT)
    168 		local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
    169 	else
    170 		local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
    171 #endif
    172 
    173 	compiler->local_size = local_size;
    174 
    175 #ifdef _WIN32
    176 	if (local_size > 1024) {
    177 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    178 		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
    179 #else
    180 		/* Space for a single argument. This amount is excluded when the stack is allocated below. */
    181 		local_size -= sizeof(sljit_sw);
    182 		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
    183 		FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
    184 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
    185 #endif
    186 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
    187 	}
    188 #endif
    189 
    190 	SLJIT_ASSERT(local_size > 0);
    191 
    192 #if !defined(__APPLE__)
    193 	if (options & SLJIT_F64_ALIGNMENT) {
    194 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0);
    195 
    196 		/* Some space might allocated during sljit_grow_stack() above on WIN32. */
    197 		FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
    198 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw)));
    199 
    200 #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    201 		if (compiler->local_size > 1024)
    202 			FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
    203 				TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw)));
    204 #endif
    205 
    206 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
    207 		FAIL_IF(!inst);
    208 
    209 		INC_SIZE(6);
    210 		inst[0] = GROUP_BINARY_81;
    211 		inst[1] = MOD_REG | AND | reg_map[SLJIT_SP];
    212 		sljit_unaligned_store_sw(inst + 2, ~(sizeof(sljit_f64) - 1));
    213 
    214 		/* The real local size must be used. */
    215 		return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0);
    216 	}
    217 #endif
    218 	return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
    219 		SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size);
    220 }
    221 
    222 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
    223 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    224 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    225 {
    226 	CHECK_ERROR();
    227 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    228 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    229 
    230 	compiler->args = args;
    231 
    232 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    233 	/* [esp+0] for saving temporaries and third argument for calls. */
    234 	compiler->saveds_offset = 1 * sizeof(sljit_sw);
    235 #else
    236 	/* [esp+0] for saving temporaries and space for maximum three arguments. */
    237 	if (scratches <= 1)
    238 		compiler->saveds_offset = 1 * sizeof(sljit_sw);
    239 	else
    240 		compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
    241 #endif
    242 
    243 	if (scratches > 3)
    244 		compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
    245 
    246 	compiler->locals_offset = compiler->saveds_offset;
    247 
    248 	if (saveds > 3)
    249 		compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
    250 
    251 	if (options & SLJIT_F64_ALIGNMENT)
    252 		compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
    253 
    254 #if defined(__APPLE__)
    255 	saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
    256 	compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
    257 #else
    258 	if (options & SLJIT_F64_ALIGNMENT)
    259 		compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
    260 	else
    261 		compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
    262 #endif
    263 	return SLJIT_SUCCESS;
    264 }
    265 
    266 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
    267 {
    268 	sljit_s32 size;
    269 	sljit_u8 *inst;
    270 
    271 	CHECK_ERROR();
    272 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    273 	SLJIT_ASSERT(compiler->args >= 0);
    274 
    275 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    276 
    277 	SLJIT_ASSERT(compiler->local_size > 0);
    278 
    279 #if !defined(__APPLE__)
    280 	if (compiler->options & SLJIT_F64_ALIGNMENT)
    281 		EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size)
    282 	else
    283 		FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
    284 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
    285 #else
    286 	FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
    287 		SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
    288 #endif
    289 
    290 	size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) +
    291 		(compiler->saveds <= 3 ? compiler->saveds : 3);
    292 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    293 	if (compiler->args > 2)
    294 		size += 2;
    295 #else
    296 	if (compiler->args > 0)
    297 		size += 2;
    298 #endif
    299 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    300 	FAIL_IF(!inst);
    301 
    302 	INC_SIZE(size);
    303 
    304 	if (compiler->saveds > 0 || compiler->scratches > 11)
    305 		POP_REG(reg_map[SLJIT_S0]);
    306 	if (compiler->saveds > 1 || compiler->scratches > 10)
    307 		POP_REG(reg_map[SLJIT_S1]);
    308 	if (compiler->saveds > 2 || compiler->scratches > 9)
    309 		POP_REG(reg_map[SLJIT_S2]);
    310 	POP_REG(reg_map[TMP_REG1]);
    311 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    312 	if (compiler->args > 2)
    313 		RET_I16(sizeof(sljit_sw));
    314 	else
    315 		RET();
    316 #else
    317 	RET();
    318 #endif
    319 
    320 	return SLJIT_SUCCESS;
    321 }
    322 
    323 /* --------------------------------------------------------------------- */
    324 /*  Operators                                                            */
    325 /* --------------------------------------------------------------------- */
    326 
    327 /* Size contains the flags as well. */
    328 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
    329 	/* The register or immediate operand. */
    330 	sljit_s32 a, sljit_sw imma,
    331 	/* The general operand (not immediate). */
    332 	sljit_s32 b, sljit_sw immb)
    333 {
    334 	sljit_u8 *inst;
    335 	sljit_u8 *buf_ptr;
    336 	sljit_s32 flags = size & ~0xf;
    337 	sljit_s32 inst_size;
    338 
    339 	/* Both cannot be switched on. */
    340 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
    341 	/* Size flags not allowed for typed instructions. */
    342 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
    343 	/* Both size flags cannot be switched on. */
    344 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
    345 	/* SSE2 and immediate is not possible. */
    346 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
    347 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
    348 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
    349 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
    350 
    351 	size &= 0xf;
    352 	inst_size = size;
    353 
    354 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
    355 		inst_size++;
    356 	if (flags & EX86_PREF_66)
    357 		inst_size++;
    358 
    359 	/* Calculate size of b. */
    360 	inst_size += 1; /* mod r/m byte. */
    361 	if (b & SLJIT_MEM) {
    362 		if ((b & REG_MASK) == SLJIT_UNUSED)
    363 			inst_size += sizeof(sljit_sw);
    364 		else if (immb != 0 && !(b & OFFS_REG_MASK)) {
    365 			/* Immediate operand. */
    366 			if (immb <= 127 && immb >= -128)
    367 				inst_size += sizeof(sljit_s8);
    368 			else
    369 				inst_size += sizeof(sljit_sw);
    370 		}
    371 
    372 		if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
    373 			b |= TO_OFFS_REG(SLJIT_SP);
    374 
    375 		if ((b & OFFS_REG_MASK) != SLJIT_UNUSED)
    376 			inst_size += 1; /* SIB byte. */
    377 	}
    378 
    379 	/* Calculate size of a. */
    380 	if (a & SLJIT_IMM) {
    381 		if (flags & EX86_BIN_INS) {
    382 			if (imma <= 127 && imma >= -128) {
    383 				inst_size += 1;
    384 				flags |= EX86_BYTE_ARG;
    385 			} else
    386 				inst_size += 4;
    387 		}
    388 		else if (flags & EX86_SHIFT_INS) {
    389 			imma &= 0x1f;
    390 			if (imma != 1) {
    391 				inst_size ++;
    392 				flags |= EX86_BYTE_ARG;
    393 			}
    394 		} else if (flags & EX86_BYTE_ARG)
    395 			inst_size++;
    396 		else if (flags & EX86_HALF_ARG)
    397 			inst_size += sizeof(short);
    398 		else
    399 			inst_size += sizeof(sljit_sw);
    400 	}
    401 	else
    402 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
    403 
    404 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
    405 	PTR_FAIL_IF(!inst);
    406 
    407 	/* Encoding the byte. */
    408 	INC_SIZE(inst_size);
    409 	if (flags & EX86_PREF_F2)
    410 		*inst++ = 0xf2;
    411 	if (flags & EX86_PREF_F3)
    412 		*inst++ = 0xf3;
    413 	if (flags & EX86_PREF_66)
    414 		*inst++ = 0x66;
    415 
    416 	buf_ptr = inst + size;
    417 
    418 	/* Encode mod/rm byte. */
    419 	if (!(flags & EX86_SHIFT_INS)) {
    420 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
    421 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
    422 
    423 		if ((a & SLJIT_IMM) || (a == 0))
    424 			*buf_ptr = 0;
    425 		else if (!(flags & EX86_SSE2_OP1))
    426 			*buf_ptr = reg_map[a] << 3;
    427 		else
    428 			*buf_ptr = a << 3;
    429 	}
    430 	else {
    431 		if (a & SLJIT_IMM) {
    432 			if (imma == 1)
    433 				*inst = GROUP_SHIFT_1;
    434 			else
    435 				*inst = GROUP_SHIFT_N;
    436 		} else
    437 			*inst = GROUP_SHIFT_CL;
    438 		*buf_ptr = 0;
    439 	}
    440 
    441 	if (!(b & SLJIT_MEM))
    442 		*buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b);
    443 	else if ((b & REG_MASK) != SLJIT_UNUSED) {
    444 		if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
    445 			if (immb != 0) {
    446 				if (immb <= 127 && immb >= -128)
    447 					*buf_ptr |= 0x40;
    448 				else
    449 					*buf_ptr |= 0x80;
    450 			}
    451 
    452 			if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
    453 				*buf_ptr++ |= reg_map[b & REG_MASK];
    454 			else {
    455 				*buf_ptr++ |= 0x04;
    456 				*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3);
    457 			}
    458 
    459 			if (immb != 0) {
    460 				if (immb <= 127 && immb >= -128)
    461 					*buf_ptr++ = immb; /* 8 bit displacement. */
    462 				else {
    463 					sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
    464 					buf_ptr += sizeof(sljit_sw);
    465 				}
    466 			}
    467 		}
    468 		else {
    469 			*buf_ptr++ |= 0x04;
    470 			*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6);
    471 		}
    472 	}
    473 	else {
    474 		*buf_ptr++ |= 0x05;
    475 		sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
    476 		buf_ptr += sizeof(sljit_sw);
    477 	}
    478 
    479 	if (a & SLJIT_IMM) {
    480 		if (flags & EX86_BYTE_ARG)
    481 			*buf_ptr = imma;
    482 		else if (flags & EX86_HALF_ARG)
    483 			sljit_unaligned_store_s16(buf_ptr, imma);
    484 		else if (!(flags & EX86_SHIFT_INS))
    485 			sljit_unaligned_store_sw(buf_ptr, imma);
    486 	}
    487 
    488 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
    489 }
    490 
    491 /* --------------------------------------------------------------------- */
    492 /*  Call / return instructions                                           */
    493 /* --------------------------------------------------------------------- */
    494 
    495 static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
    496 {
    497 	sljit_u8 *inst;
    498 
    499 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    500 	inst = (sljit_u8*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2);
    501 	FAIL_IF(!inst);
    502 	INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2);
    503 
    504 	if (type >= SLJIT_CALL3)
    505 		PUSH_REG(reg_map[SLJIT_R2]);
    506 	*inst++ = MOV_r_rm;
    507 	*inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0];
    508 #else
    509 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0));
    510 	FAIL_IF(!inst);
    511 	INC_SIZE(4 * (type - SLJIT_CALL0));
    512 
    513 	*inst++ = MOV_rm_r;
    514 	*inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */;
    515 	*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    516 	*inst++ = 0;
    517 	if (type >= SLJIT_CALL2) {
    518 		*inst++ = MOV_rm_r;
    519 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */;
    520 		*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    521 		*inst++ = sizeof(sljit_sw);
    522 	}
    523 	if (type >= SLJIT_CALL3) {
    524 		*inst++ = MOV_rm_r;
    525 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */;
    526 		*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    527 		*inst++ = 2 * sizeof(sljit_sw);
    528 	}
    529 #endif
    530 	return SLJIT_SUCCESS;
    531 }
    532 
    533 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
    534 {
    535 	sljit_u8 *inst;
    536 
    537 	CHECK_ERROR();
    538 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
    539 	ADJUST_LOCAL_OFFSET(dst, dstw);
    540 
    541 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
    542 
    543 	/* For UNUSED dst. Uncommon, but possible. */
    544 	if (dst == SLJIT_UNUSED)
    545 		dst = TMP_REG1;
    546 
    547 	if (FAST_IS_REG(dst)) {
    548 		/* Unused dest is possible here. */
    549 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    550 		FAIL_IF(!inst);
    551 
    552 		INC_SIZE(1);
    553 		POP_REG(reg_map[dst]);
    554 		return SLJIT_SUCCESS;
    555 	}
    556 
    557 	/* Memory. */
    558 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
    559 	FAIL_IF(!inst);
    560 	*inst++ = POP_rm;
    561 	return SLJIT_SUCCESS;
    562 }
    563 
    564 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
    565 {
    566 	sljit_u8 *inst;
    567 
    568 	CHECK_ERROR();
    569 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
    570 	ADJUST_LOCAL_OFFSET(src, srcw);
    571 
    572 	CHECK_EXTRA_REGS(src, srcw, (void)0);
    573 
    574 	if (FAST_IS_REG(src)) {
    575 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
    576 		FAIL_IF(!inst);
    577 
    578 		INC_SIZE(1 + 1);
    579 		PUSH_REG(reg_map[src]);
    580 	}
    581 	else if (src & SLJIT_MEM) {
    582 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
    583 		FAIL_IF(!inst);
    584 		*inst++ = GROUP_FF;
    585 		*inst |= PUSH_rm;
    586 
    587 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    588 		FAIL_IF(!inst);
    589 		INC_SIZE(1);
    590 	}
    591 	else {
    592 		/* SLJIT_IMM. */
    593 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
    594 		FAIL_IF(!inst);
    595 
    596 		INC_SIZE(5 + 1);
    597 		*inst++ = PUSH_i32;
    598 		sljit_unaligned_store_sw(inst, srcw);
    599 		inst += sizeof(sljit_sw);
    600 	}
    601 
    602 	RET();
    603 	return SLJIT_SUCCESS;
    604 }
    605