Home | History | Annotate | Line # | Download | only in sljit_src
sljitNativeX86_32.c revision 1.1.1.5
      1 /*
      2  *    Stack-less Just-In-Time compiler
      3  *
      4  *    Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification, are
      7  * permitted provided that the following conditions are met:
      8  *
      9  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  *      conditions and the following disclaimer.
     11  *
     12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  *      provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 /* x86 32-bit arch dependent functions. */
     28 
     29 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
     30 {
     31 	sljit_u8 *inst;
     32 
     33 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
     34 	FAIL_IF(!inst);
     35 	INC_SIZE(1 + sizeof(sljit_sw));
     36 	*inst++ = opcode;
     37 	sljit_unaligned_store_sw(inst, imm);
     38 	return SLJIT_SUCCESS;
     39 }
     40 
     41 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset)
     42 {
     43 	if (type == SLJIT_JUMP) {
     44 		*code_ptr++ = JMP_i32;
     45 		jump->addr++;
     46 	}
     47 	else if (type >= SLJIT_FAST_CALL) {
     48 		*code_ptr++ = CALL_i32;
     49 		jump->addr++;
     50 	}
     51 	else {
     52 		*code_ptr++ = GROUP_0F;
     53 		*code_ptr++ = get_jump_code(type);
     54 		jump->addr += 2;
     55 	}
     56 
     57 	if (jump->flags & JUMP_LABEL)
     58 		jump->flags |= PATCH_MW;
     59 	else
     60 		sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset);
     61 	code_ptr += 4;
     62 
     63 	return code_ptr;
     64 }
     65 
     66 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
     67 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
     68 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
     69 {
     70 	sljit_s32 size;
     71 	sljit_u8 *inst;
     72 
     73 	CHECK_ERROR();
     74 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
     75 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
     76 
     77 	compiler->args = args;
     78 
     79 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
     80 	/* [esp+0] for saving temporaries and third argument for calls. */
     81 	compiler->saveds_offset = 1 * sizeof(sljit_sw);
     82 #else
     83 	/* [esp+0] for saving temporaries and space for maximum three arguments. */
     84 	if (scratches <= 1)
     85 		compiler->saveds_offset = 1 * sizeof(sljit_sw);
     86 	else
     87 		compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
     88 #endif
     89 
     90 	if (scratches > 3)
     91 		compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
     92 
     93 	compiler->locals_offset = compiler->saveds_offset;
     94 
     95 	if (saveds > 3)
     96 		compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
     97 
     98 	if (options & SLJIT_F64_ALIGNMENT)
     99 		compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
    100 
    101 	size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3);
    102 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    103 	size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
    104 #else
    105 	size += (args > 0 ? (2 + args * 3) : 0);
    106 #endif
    107 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    108 	FAIL_IF(!inst);
    109 
    110 	INC_SIZE(size);
    111 	PUSH_REG(reg_map[TMP_REG1]);
    112 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    113 	if (args > 0) {
    114 		*inst++ = MOV_r_rm;
    115 		*inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
    116 	}
    117 #endif
    118 	if (saveds > 2 || scratches > 9)
    119 		PUSH_REG(reg_map[SLJIT_S2]);
    120 	if (saveds > 1 || scratches > 10)
    121 		PUSH_REG(reg_map[SLJIT_S1]);
    122 	if (saveds > 0 || scratches > 11)
    123 		PUSH_REG(reg_map[SLJIT_S0]);
    124 
    125 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    126 	if (args > 0) {
    127 		*inst++ = MOV_r_rm;
    128 		*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
    129 	}
    130 	if (args > 1) {
    131 		*inst++ = MOV_r_rm;
    132 		*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
    133 	}
    134 	if (args > 2) {
    135 		*inst++ = MOV_r_rm;
    136 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
    137 		*inst++ = 0x24;
    138 		*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
    139 	}
    140 #else
    141 	if (args > 0) {
    142 		*inst++ = MOV_r_rm;
    143 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
    144 		*inst++ = sizeof(sljit_sw) * 2;
    145 	}
    146 	if (args > 1) {
    147 		*inst++ = MOV_r_rm;
    148 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
    149 		*inst++ = sizeof(sljit_sw) * 3;
    150 	}
    151 	if (args > 2) {
    152 		*inst++ = MOV_r_rm;
    153 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
    154 		*inst++ = sizeof(sljit_sw) * 4;
    155 	}
    156 #endif
    157 
    158 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
    159 
    160 #if defined(__APPLE__)
    161 	/* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
    162 	saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
    163 	local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
    164 #else
    165 	if (options & SLJIT_F64_ALIGNMENT)
    166 		local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
    167 	else
    168 		local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
    169 #endif
    170 
    171 	compiler->local_size = local_size;
    172 
    173 #ifdef _WIN32
    174 	if (local_size > 1024) {
    175 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    176 		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
    177 #else
    178 		/* Space for a single argument. This amount is excluded when the stack is allocated below. */
    179 		local_size -= sizeof(sljit_sw);
    180 		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
    181 		FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
    182 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
    183 #endif
    184 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
    185 	}
    186 #endif
    187 
    188 	SLJIT_ASSERT(local_size > 0);
    189 
    190 #if !defined(__APPLE__)
    191 	if (options & SLJIT_F64_ALIGNMENT) {
    192 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0);
    193 
    194 		/* Some space might allocated during sljit_grow_stack() above on WIN32. */
    195 		FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
    196 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw)));
    197 
    198 #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    199 		if (compiler->local_size > 1024)
    200 			FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
    201 				TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw)));
    202 #endif
    203 
    204 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
    205 		FAIL_IF(!inst);
    206 
    207 		INC_SIZE(6);
    208 		inst[0] = GROUP_BINARY_81;
    209 		inst[1] = MOD_REG | AND | reg_map[SLJIT_SP];
    210 		sljit_unaligned_store_sw(inst + 2, ~(sizeof(sljit_f64) - 1));
    211 
    212 		/* The real local size must be used. */
    213 		return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0);
    214 	}
    215 #endif
    216 	return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
    217 		SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size);
    218 }
    219 
    220 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
    221 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    222 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    223 {
    224 	CHECK_ERROR();
    225 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    226 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    227 
    228 	compiler->args = args;
    229 
    230 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    231 	/* [esp+0] for saving temporaries and third argument for calls. */
    232 	compiler->saveds_offset = 1 * sizeof(sljit_sw);
    233 #else
    234 	/* [esp+0] for saving temporaries and space for maximum three arguments. */
    235 	if (scratches <= 1)
    236 		compiler->saveds_offset = 1 * sizeof(sljit_sw);
    237 	else
    238 		compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
    239 #endif
    240 
    241 	if (scratches > 3)
    242 		compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
    243 
    244 	compiler->locals_offset = compiler->saveds_offset;
    245 
    246 	if (saveds > 3)
    247 		compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
    248 
    249 	if (options & SLJIT_F64_ALIGNMENT)
    250 		compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
    251 
    252 #if defined(__APPLE__)
    253 	saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
    254 	compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
    255 #else
    256 	if (options & SLJIT_F64_ALIGNMENT)
    257 		compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
    258 	else
    259 		compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
    260 #endif
    261 	return SLJIT_SUCCESS;
    262 }
    263 
    264 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
    265 {
    266 	sljit_s32 size;
    267 	sljit_u8 *inst;
    268 
    269 	CHECK_ERROR();
    270 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    271 	SLJIT_ASSERT(compiler->args >= 0);
    272 
    273 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    274 
    275 	SLJIT_ASSERT(compiler->local_size > 0);
    276 
    277 #if !defined(__APPLE__)
    278 	if (compiler->options & SLJIT_F64_ALIGNMENT)
    279 		EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size)
    280 	else
    281 		FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
    282 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
    283 #else
    284 	FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
    285 		SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
    286 #endif
    287 
    288 	size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) +
    289 		(compiler->saveds <= 3 ? compiler->saveds : 3);
    290 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    291 	if (compiler->args > 2)
    292 		size += 2;
    293 #else
    294 	if (compiler->args > 0)
    295 		size += 2;
    296 #endif
    297 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    298 	FAIL_IF(!inst);
    299 
    300 	INC_SIZE(size);
    301 
    302 	if (compiler->saveds > 0 || compiler->scratches > 11)
    303 		POP_REG(reg_map[SLJIT_S0]);
    304 	if (compiler->saveds > 1 || compiler->scratches > 10)
    305 		POP_REG(reg_map[SLJIT_S1]);
    306 	if (compiler->saveds > 2 || compiler->scratches > 9)
    307 		POP_REG(reg_map[SLJIT_S2]);
    308 	POP_REG(reg_map[TMP_REG1]);
    309 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    310 	if (compiler->args > 2)
    311 		RET_I16(sizeof(sljit_sw));
    312 	else
    313 		RET();
    314 #else
    315 	RET();
    316 #endif
    317 
    318 	return SLJIT_SUCCESS;
    319 }
    320 
    321 /* --------------------------------------------------------------------- */
    322 /*  Operators                                                            */
    323 /* --------------------------------------------------------------------- */
    324 
    325 /* Size contains the flags as well. */
    326 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
    327 	/* The register or immediate operand. */
    328 	sljit_s32 a, sljit_sw imma,
    329 	/* The general operand (not immediate). */
    330 	sljit_s32 b, sljit_sw immb)
    331 {
    332 	sljit_u8 *inst;
    333 	sljit_u8 *buf_ptr;
    334 	sljit_s32 flags = size & ~0xf;
    335 	sljit_s32 inst_size;
    336 
    337 	/* Both cannot be switched on. */
    338 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
    339 	/* Size flags not allowed for typed instructions. */
    340 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
    341 	/* Both size flags cannot be switched on. */
    342 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
    343 	/* SSE2 and immediate is not possible. */
    344 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
    345 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
    346 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
    347 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
    348 
    349 	size &= 0xf;
    350 	inst_size = size;
    351 
    352 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
    353 		inst_size++;
    354 	if (flags & EX86_PREF_66)
    355 		inst_size++;
    356 
    357 	/* Calculate size of b. */
    358 	inst_size += 1; /* mod r/m byte. */
    359 	if (b & SLJIT_MEM) {
    360 		if ((b & REG_MASK) == SLJIT_UNUSED)
    361 			inst_size += sizeof(sljit_sw);
    362 		else if (immb != 0 && !(b & OFFS_REG_MASK)) {
    363 			/* Immediate operand. */
    364 			if (immb <= 127 && immb >= -128)
    365 				inst_size += sizeof(sljit_s8);
    366 			else
    367 				inst_size += sizeof(sljit_sw);
    368 		}
    369 
    370 		if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
    371 			b |= TO_OFFS_REG(SLJIT_SP);
    372 
    373 		if ((b & OFFS_REG_MASK) != SLJIT_UNUSED)
    374 			inst_size += 1; /* SIB byte. */
    375 	}
    376 
    377 	/* Calculate size of a. */
    378 	if (a & SLJIT_IMM) {
    379 		if (flags & EX86_BIN_INS) {
    380 			if (imma <= 127 && imma >= -128) {
    381 				inst_size += 1;
    382 				flags |= EX86_BYTE_ARG;
    383 			} else
    384 				inst_size += 4;
    385 		}
    386 		else if (flags & EX86_SHIFT_INS) {
    387 			imma &= 0x1f;
    388 			if (imma != 1) {
    389 				inst_size ++;
    390 				flags |= EX86_BYTE_ARG;
    391 			}
    392 		} else if (flags & EX86_BYTE_ARG)
    393 			inst_size++;
    394 		else if (flags & EX86_HALF_ARG)
    395 			inst_size += sizeof(short);
    396 		else
    397 			inst_size += sizeof(sljit_sw);
    398 	}
    399 	else
    400 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
    401 
    402 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
    403 	PTR_FAIL_IF(!inst);
    404 
    405 	/* Encoding the byte. */
    406 	INC_SIZE(inst_size);
    407 	if (flags & EX86_PREF_F2)
    408 		*inst++ = 0xf2;
    409 	if (flags & EX86_PREF_F3)
    410 		*inst++ = 0xf3;
    411 	if (flags & EX86_PREF_66)
    412 		*inst++ = 0x66;
    413 
    414 	buf_ptr = inst + size;
    415 
    416 	/* Encode mod/rm byte. */
    417 	if (!(flags & EX86_SHIFT_INS)) {
    418 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
    419 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
    420 
    421 		if ((a & SLJIT_IMM) || (a == 0))
    422 			*buf_ptr = 0;
    423 		else if (!(flags & EX86_SSE2_OP1))
    424 			*buf_ptr = reg_map[a] << 3;
    425 		else
    426 			*buf_ptr = a << 3;
    427 	}
    428 	else {
    429 		if (a & SLJIT_IMM) {
    430 			if (imma == 1)
    431 				*inst = GROUP_SHIFT_1;
    432 			else
    433 				*inst = GROUP_SHIFT_N;
    434 		} else
    435 			*inst = GROUP_SHIFT_CL;
    436 		*buf_ptr = 0;
    437 	}
    438 
    439 	if (!(b & SLJIT_MEM))
    440 		*buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b);
    441 	else if ((b & REG_MASK) != SLJIT_UNUSED) {
    442 		if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
    443 			if (immb != 0) {
    444 				if (immb <= 127 && immb >= -128)
    445 					*buf_ptr |= 0x40;
    446 				else
    447 					*buf_ptr |= 0x80;
    448 			}
    449 
    450 			if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
    451 				*buf_ptr++ |= reg_map[b & REG_MASK];
    452 			else {
    453 				*buf_ptr++ |= 0x04;
    454 				*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3);
    455 			}
    456 
    457 			if (immb != 0) {
    458 				if (immb <= 127 && immb >= -128)
    459 					*buf_ptr++ = immb; /* 8 bit displacement. */
    460 				else {
    461 					sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
    462 					buf_ptr += sizeof(sljit_sw);
    463 				}
    464 			}
    465 		}
    466 		else {
    467 			*buf_ptr++ |= 0x04;
    468 			*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6);
    469 		}
    470 	}
    471 	else {
    472 		*buf_ptr++ |= 0x05;
    473 		sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
    474 		buf_ptr += sizeof(sljit_sw);
    475 	}
    476 
    477 	if (a & SLJIT_IMM) {
    478 		if (flags & EX86_BYTE_ARG)
    479 			*buf_ptr = imma;
    480 		else if (flags & EX86_HALF_ARG)
    481 			sljit_unaligned_store_s16(buf_ptr, imma);
    482 		else if (!(flags & EX86_SHIFT_INS))
    483 			sljit_unaligned_store_sw(buf_ptr, imma);
    484 	}
    485 
    486 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
    487 }
    488 
    489 /* --------------------------------------------------------------------- */
    490 /*  Call / return instructions                                           */
    491 /* --------------------------------------------------------------------- */
    492 
    493 static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
    494 {
    495 	sljit_u8 *inst;
    496 
    497 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    498 	inst = (sljit_u8*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2);
    499 	FAIL_IF(!inst);
    500 	INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2);
    501 
    502 	if (type >= SLJIT_CALL3)
    503 		PUSH_REG(reg_map[SLJIT_R2]);
    504 	*inst++ = MOV_r_rm;
    505 	*inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0];
    506 #else
    507 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0));
    508 	FAIL_IF(!inst);
    509 	INC_SIZE(4 * (type - SLJIT_CALL0));
    510 
    511 	*inst++ = MOV_rm_r;
    512 	*inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */;
    513 	*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    514 	*inst++ = 0;
    515 	if (type >= SLJIT_CALL2) {
    516 		*inst++ = MOV_rm_r;
    517 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */;
    518 		*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    519 		*inst++ = sizeof(sljit_sw);
    520 	}
    521 	if (type >= SLJIT_CALL3) {
    522 		*inst++ = MOV_rm_r;
    523 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */;
    524 		*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    525 		*inst++ = 2 * sizeof(sljit_sw);
    526 	}
    527 #endif
    528 	return SLJIT_SUCCESS;
    529 }
    530 
    531 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
    532 {
    533 	sljit_u8 *inst;
    534 
    535 	CHECK_ERROR();
    536 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
    537 	ADJUST_LOCAL_OFFSET(dst, dstw);
    538 
    539 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
    540 
    541 	/* For UNUSED dst. Uncommon, but possible. */
    542 	if (dst == SLJIT_UNUSED)
    543 		dst = TMP_REG1;
    544 
    545 	if (FAST_IS_REG(dst)) {
    546 		/* Unused dest is possible here. */
    547 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    548 		FAIL_IF(!inst);
    549 
    550 		INC_SIZE(1);
    551 		POP_REG(reg_map[dst]);
    552 		return SLJIT_SUCCESS;
    553 	}
    554 
    555 	/* Memory. */
    556 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
    557 	FAIL_IF(!inst);
    558 	*inst++ = POP_rm;
    559 	return SLJIT_SUCCESS;
    560 }
    561 
    562 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
    563 {
    564 	sljit_u8 *inst;
    565 
    566 	CHECK_ERROR();
    567 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
    568 	ADJUST_LOCAL_OFFSET(src, srcw);
    569 
    570 	CHECK_EXTRA_REGS(src, srcw, (void)0);
    571 
    572 	if (FAST_IS_REG(src)) {
    573 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
    574 		FAIL_IF(!inst);
    575 
    576 		INC_SIZE(1 + 1);
    577 		PUSH_REG(reg_map[src]);
    578 	}
    579 	else if (src & SLJIT_MEM) {
    580 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
    581 		FAIL_IF(!inst);
    582 		*inst++ = GROUP_FF;
    583 		*inst |= PUSH_rm;
    584 
    585 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    586 		FAIL_IF(!inst);
    587 		INC_SIZE(1);
    588 	}
    589 	else {
    590 		/* SLJIT_IMM. */
    591 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
    592 		FAIL_IF(!inst);
    593 
    594 		INC_SIZE(5 + 1);
    595 		*inst++ = PUSH_i32;
    596 		sljit_unaligned_store_sw(inst, srcw);
    597 		inst += sizeof(sljit_sw);
    598 	}
    599 
    600 	RET();
    601 	return SLJIT_SUCCESS;
    602 }
    603