Home | History | Annotate | Line # | Download | only in sljit_src
      1  1.6  alnsn /*	$NetBSD: sljitNativeX86_32.c,v 1.6 2019/01/20 23:14:16 alnsn Exp $	*/
      2  1.4  alnsn 
      3  1.1  alnsn /*
      4  1.1  alnsn  *    Stack-less Just-In-Time compiler
      5  1.1  alnsn  *
      6  1.6  alnsn  *    Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      7  1.1  alnsn  *
      8  1.1  alnsn  * Redistribution and use in source and binary forms, with or without modification, are
      9  1.1  alnsn  * permitted provided that the following conditions are met:
     10  1.1  alnsn  *
     11  1.1  alnsn  *   1. Redistributions of source code must retain the above copyright notice, this list of
     12  1.1  alnsn  *      conditions and the following disclaimer.
     13  1.1  alnsn  *
     14  1.1  alnsn  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     15  1.1  alnsn  *      of conditions and the following disclaimer in the documentation and/or other materials
     16  1.1  alnsn  *      provided with the distribution.
     17  1.1  alnsn  *
     18  1.1  alnsn  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     19  1.1  alnsn  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  1.1  alnsn  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     21  1.1  alnsn  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  1.1  alnsn  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     23  1.1  alnsn  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     24  1.1  alnsn  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  1.1  alnsn  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     26  1.1  alnsn  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  1.1  alnsn  */
     28  1.1  alnsn 
     29  1.1  alnsn /* x86 32-bit arch dependent functions. */
     30  1.1  alnsn 
     31  1.5  alnsn static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
     32  1.1  alnsn {
     33  1.5  alnsn 	sljit_u8 *inst;
     34  1.1  alnsn 
     35  1.5  alnsn 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
     36  1.3  alnsn 	FAIL_IF(!inst);
     37  1.3  alnsn 	INC_SIZE(1 + sizeof(sljit_sw));
     38  1.3  alnsn 	*inst++ = opcode;
     39  1.6  alnsn 	sljit_unaligned_store_sw(inst, imm);
     40  1.1  alnsn 	return SLJIT_SUCCESS;
     41  1.1  alnsn }
     42  1.1  alnsn 
     43  1.6  alnsn static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset)
     44  1.1  alnsn {
     45  1.1  alnsn 	if (type == SLJIT_JUMP) {
     46  1.3  alnsn 		*code_ptr++ = JMP_i32;
     47  1.1  alnsn 		jump->addr++;
     48  1.1  alnsn 	}
     49  1.1  alnsn 	else if (type >= SLJIT_FAST_CALL) {
     50  1.3  alnsn 		*code_ptr++ = CALL_i32;
     51  1.1  alnsn 		jump->addr++;
     52  1.1  alnsn 	}
     53  1.1  alnsn 	else {
     54  1.3  alnsn 		*code_ptr++ = GROUP_0F;
     55  1.1  alnsn 		*code_ptr++ = get_jump_code(type);
     56  1.1  alnsn 		jump->addr += 2;
     57  1.1  alnsn 	}
     58  1.1  alnsn 
     59  1.1  alnsn 	if (jump->flags & JUMP_LABEL)
     60  1.1  alnsn 		jump->flags |= PATCH_MW;
     61  1.1  alnsn 	else
     62  1.6  alnsn 		sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset);
     63  1.1  alnsn 	code_ptr += 4;
     64  1.1  alnsn 
     65  1.1  alnsn 	return code_ptr;
     66  1.1  alnsn }
     67  1.1  alnsn 
     68  1.5  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
     69  1.5  alnsn 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
     70  1.5  alnsn 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
     71  1.1  alnsn {
     72  1.5  alnsn 	sljit_s32 size;
     73  1.5  alnsn 	sljit_u8 *inst;
     74  1.1  alnsn 
     75  1.1  alnsn 	CHECK_ERROR();
     76  1.5  alnsn 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
     77  1.5  alnsn 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
     78  1.1  alnsn 
     79  1.1  alnsn 	compiler->args = args;
     80  1.1  alnsn 
     81  1.6  alnsn #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
     82  1.6  alnsn 	/* [esp+0] for saving temporaries and third argument for calls. */
     83  1.6  alnsn 	compiler->saveds_offset = 1 * sizeof(sljit_sw);
     84  1.6  alnsn #else
     85  1.6  alnsn 	/* [esp+0] for saving temporaries and space for maximum three arguments. */
     86  1.6  alnsn 	if (scratches <= 1)
     87  1.6  alnsn 		compiler->saveds_offset = 1 * sizeof(sljit_sw);
     88  1.6  alnsn 	else
     89  1.6  alnsn 		compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
     90  1.6  alnsn #endif
     91  1.6  alnsn 
     92  1.6  alnsn 	if (scratches > 3)
     93  1.6  alnsn 		compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
     94  1.6  alnsn 
     95  1.6  alnsn 	compiler->locals_offset = compiler->saveds_offset;
     96  1.6  alnsn 
     97  1.6  alnsn 	if (saveds > 3)
     98  1.6  alnsn 		compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
     99  1.6  alnsn 
    100  1.6  alnsn 	if (options & SLJIT_F64_ALIGNMENT)
    101  1.6  alnsn 		compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
    102  1.6  alnsn 
    103  1.6  alnsn 	size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3);
    104  1.1  alnsn #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    105  1.5  alnsn 	size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0);
    106  1.1  alnsn #else
    107  1.5  alnsn 	size += (args > 0 ? (2 + args * 3) : 0);
    108  1.1  alnsn #endif
    109  1.5  alnsn 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    110  1.3  alnsn 	FAIL_IF(!inst);
    111  1.1  alnsn 
    112  1.1  alnsn 	INC_SIZE(size);
    113  1.3  alnsn 	PUSH_REG(reg_map[TMP_REG1]);
    114  1.1  alnsn #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    115  1.1  alnsn 	if (args > 0) {
    116  1.3  alnsn 		*inst++ = MOV_r_rm;
    117  1.3  alnsn 		*inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */;
    118  1.1  alnsn 	}
    119  1.1  alnsn #endif
    120  1.6  alnsn 	if (saveds > 2 || scratches > 9)
    121  1.5  alnsn 		PUSH_REG(reg_map[SLJIT_S2]);
    122  1.6  alnsn 	if (saveds > 1 || scratches > 10)
    123  1.5  alnsn 		PUSH_REG(reg_map[SLJIT_S1]);
    124  1.6  alnsn 	if (saveds > 0 || scratches > 11)
    125  1.5  alnsn 		PUSH_REG(reg_map[SLJIT_S0]);
    126  1.1  alnsn 
    127  1.1  alnsn #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    128  1.1  alnsn 	if (args > 0) {
    129  1.3  alnsn 		*inst++ = MOV_r_rm;
    130  1.5  alnsn 		*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
    131  1.1  alnsn 	}
    132  1.1  alnsn 	if (args > 1) {
    133  1.3  alnsn 		*inst++ = MOV_r_rm;
    134  1.5  alnsn 		*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
    135  1.1  alnsn 	}
    136  1.1  alnsn 	if (args > 2) {
    137  1.3  alnsn 		*inst++ = MOV_r_rm;
    138  1.5  alnsn 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
    139  1.3  alnsn 		*inst++ = 0x24;
    140  1.3  alnsn 		*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
    141  1.1  alnsn 	}
    142  1.1  alnsn #else
    143  1.1  alnsn 	if (args > 0) {
    144  1.3  alnsn 		*inst++ = MOV_r_rm;
    145  1.5  alnsn 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
    146  1.3  alnsn 		*inst++ = sizeof(sljit_sw) * 2;
    147  1.1  alnsn 	}
    148  1.1  alnsn 	if (args > 1) {
    149  1.3  alnsn 		*inst++ = MOV_r_rm;
    150  1.5  alnsn 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
    151  1.3  alnsn 		*inst++ = sizeof(sljit_sw) * 3;
    152  1.1  alnsn 	}
    153  1.1  alnsn 	if (args > 2) {
    154  1.3  alnsn 		*inst++ = MOV_r_rm;
    155  1.5  alnsn 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
    156  1.3  alnsn 		*inst++ = sizeof(sljit_sw) * 4;
    157  1.1  alnsn 	}
    158  1.1  alnsn #endif
    159  1.1  alnsn 
    160  1.6  alnsn 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
    161  1.6  alnsn 
    162  1.3  alnsn #if defined(__APPLE__)
    163  1.5  alnsn 	/* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
    164  1.6  alnsn 	saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
    165  1.5  alnsn 	local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
    166  1.3  alnsn #else
    167  1.6  alnsn 	if (options & SLJIT_F64_ALIGNMENT)
    168  1.6  alnsn 		local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
    169  1.5  alnsn 	else
    170  1.6  alnsn 		local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
    171  1.3  alnsn #endif
    172  1.1  alnsn 
    173  1.2  alnsn 	compiler->local_size = local_size;
    174  1.6  alnsn 
    175  1.1  alnsn #ifdef _WIN32
    176  1.1  alnsn 	if (local_size > 1024) {
    177  1.2  alnsn #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    178  1.5  alnsn 		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
    179  1.2  alnsn #else
    180  1.6  alnsn 		/* Space for a single argument. This amount is excluded when the stack is allocated below. */
    181  1.6  alnsn 		local_size -= sizeof(sljit_sw);
    182  1.5  alnsn 		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
    183  1.2  alnsn 		FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
    184  1.6  alnsn 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
    185  1.2  alnsn #endif
    186  1.1  alnsn 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
    187  1.1  alnsn 	}
    188  1.1  alnsn #endif
    189  1.1  alnsn 
    190  1.1  alnsn 	SLJIT_ASSERT(local_size > 0);
    191  1.6  alnsn 
    192  1.6  alnsn #if !defined(__APPLE__)
    193  1.6  alnsn 	if (options & SLJIT_F64_ALIGNMENT) {
    194  1.6  alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0);
    195  1.6  alnsn 
    196  1.6  alnsn 		/* Some space might allocated during sljit_grow_stack() above on WIN32. */
    197  1.6  alnsn 		FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
    198  1.6  alnsn 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw)));
    199  1.6  alnsn 
    200  1.6  alnsn #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    201  1.6  alnsn 		if (compiler->local_size > 1024)
    202  1.6  alnsn 			FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
    203  1.6  alnsn 				TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw)));
    204  1.6  alnsn #endif
    205  1.6  alnsn 
    206  1.6  alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
    207  1.6  alnsn 		FAIL_IF(!inst);
    208  1.6  alnsn 
    209  1.6  alnsn 		INC_SIZE(6);
    210  1.6  alnsn 		inst[0] = GROUP_BINARY_81;
    211  1.6  alnsn 		inst[1] = MOD_REG | AND | reg_map[SLJIT_SP];
    212  1.6  alnsn 		sljit_unaligned_store_sw(inst + 2, ~(sizeof(sljit_f64) - 1));
    213  1.6  alnsn 
    214  1.6  alnsn 		/* The real local size must be used. */
    215  1.6  alnsn 		return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0);
    216  1.6  alnsn 	}
    217  1.6  alnsn #endif
    218  1.3  alnsn 	return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
    219  1.5  alnsn 		SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size);
    220  1.1  alnsn }
    221  1.1  alnsn 
    222  1.5  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
    223  1.5  alnsn 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    224  1.5  alnsn 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    225  1.1  alnsn {
    226  1.5  alnsn 	CHECK_ERROR();
    227  1.5  alnsn 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    228  1.5  alnsn 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    229  1.1  alnsn 
    230  1.1  alnsn 	compiler->args = args;
    231  1.1  alnsn 
    232  1.6  alnsn #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    233  1.6  alnsn 	/* [esp+0] for saving temporaries and third argument for calls. */
    234  1.6  alnsn 	compiler->saveds_offset = 1 * sizeof(sljit_sw);
    235  1.6  alnsn #else
    236  1.6  alnsn 	/* [esp+0] for saving temporaries and space for maximum three arguments. */
    237  1.6  alnsn 	if (scratches <= 1)
    238  1.6  alnsn 		compiler->saveds_offset = 1 * sizeof(sljit_sw);
    239  1.6  alnsn 	else
    240  1.6  alnsn 		compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw);
    241  1.6  alnsn #endif
    242  1.6  alnsn 
    243  1.6  alnsn 	if (scratches > 3)
    244  1.6  alnsn 		compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
    245  1.6  alnsn 
    246  1.6  alnsn 	compiler->locals_offset = compiler->saveds_offset;
    247  1.6  alnsn 
    248  1.6  alnsn 	if (saveds > 3)
    249  1.6  alnsn 		compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
    250  1.6  alnsn 
    251  1.6  alnsn 	if (options & SLJIT_F64_ALIGNMENT)
    252  1.6  alnsn 		compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
    253  1.6  alnsn 
    254  1.3  alnsn #if defined(__APPLE__)
    255  1.6  alnsn 	saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
    256  1.5  alnsn 	compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
    257  1.3  alnsn #else
    258  1.6  alnsn 	if (options & SLJIT_F64_ALIGNMENT)
    259  1.6  alnsn 		compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
    260  1.5  alnsn 	else
    261  1.6  alnsn 		compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
    262  1.3  alnsn #endif
    263  1.5  alnsn 	return SLJIT_SUCCESS;
    264  1.1  alnsn }
    265  1.1  alnsn 
    266  1.5  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
    267  1.1  alnsn {
    268  1.5  alnsn 	sljit_s32 size;
    269  1.5  alnsn 	sljit_u8 *inst;
    270  1.1  alnsn 
    271  1.1  alnsn 	CHECK_ERROR();
    272  1.5  alnsn 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    273  1.1  alnsn 	SLJIT_ASSERT(compiler->args >= 0);
    274  1.1  alnsn 
    275  1.1  alnsn 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    276  1.1  alnsn 
    277  1.1  alnsn 	SLJIT_ASSERT(compiler->local_size > 0);
    278  1.6  alnsn 
    279  1.6  alnsn #if !defined(__APPLE__)
    280  1.6  alnsn 	if (compiler->options & SLJIT_F64_ALIGNMENT)
    281  1.6  alnsn 		EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size)
    282  1.6  alnsn 	else
    283  1.6  alnsn 		FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
    284  1.6  alnsn 			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
    285  1.6  alnsn #else
    286  1.3  alnsn 	FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
    287  1.5  alnsn 		SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
    288  1.5  alnsn #endif
    289  1.1  alnsn 
    290  1.5  alnsn 	size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) +
    291  1.5  alnsn 		(compiler->saveds <= 3 ? compiler->saveds : 3);
    292  1.1  alnsn #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    293  1.1  alnsn 	if (compiler->args > 2)
    294  1.1  alnsn 		size += 2;
    295  1.1  alnsn #else
    296  1.1  alnsn 	if (compiler->args > 0)
    297  1.1  alnsn 		size += 2;
    298  1.1  alnsn #endif
    299  1.5  alnsn 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    300  1.3  alnsn 	FAIL_IF(!inst);
    301  1.1  alnsn 
    302  1.1  alnsn 	INC_SIZE(size);
    303  1.1  alnsn 
    304  1.6  alnsn 	if (compiler->saveds > 0 || compiler->scratches > 11)
    305  1.5  alnsn 		POP_REG(reg_map[SLJIT_S0]);
    306  1.6  alnsn 	if (compiler->saveds > 1 || compiler->scratches > 10)
    307  1.5  alnsn 		POP_REG(reg_map[SLJIT_S1]);
    308  1.6  alnsn 	if (compiler->saveds > 2 || compiler->scratches > 9)
    309  1.5  alnsn 		POP_REG(reg_map[SLJIT_S2]);
    310  1.3  alnsn 	POP_REG(reg_map[TMP_REG1]);
    311  1.1  alnsn #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    312  1.1  alnsn 	if (compiler->args > 2)
    313  1.3  alnsn 		RET_I16(sizeof(sljit_sw));
    314  1.1  alnsn 	else
    315  1.1  alnsn 		RET();
    316  1.1  alnsn #else
    317  1.2  alnsn 	RET();
    318  1.1  alnsn #endif
    319  1.1  alnsn 
    320  1.1  alnsn 	return SLJIT_SUCCESS;
    321  1.1  alnsn }
    322  1.1  alnsn 
    323  1.1  alnsn /* --------------------------------------------------------------------- */
    324  1.1  alnsn /*  Operators                                                            */
    325  1.1  alnsn /* --------------------------------------------------------------------- */
    326  1.1  alnsn 
    327  1.1  alnsn /* Size contains the flags as well. */
    328  1.5  alnsn static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
    329  1.1  alnsn 	/* The register or immediate operand. */
    330  1.5  alnsn 	sljit_s32 a, sljit_sw imma,
    331  1.1  alnsn 	/* The general operand (not immediate). */
    332  1.5  alnsn 	sljit_s32 b, sljit_sw immb)
    333  1.1  alnsn {
    334  1.5  alnsn 	sljit_u8 *inst;
    335  1.5  alnsn 	sljit_u8 *buf_ptr;
    336  1.5  alnsn 	sljit_s32 flags = size & ~0xf;
    337  1.5  alnsn 	sljit_s32 inst_size;
    338  1.1  alnsn 
    339  1.1  alnsn 	/* Both cannot be switched on. */
    340  1.1  alnsn 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
    341  1.1  alnsn 	/* Size flags not allowed for typed instructions. */
    342  1.1  alnsn 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
    343  1.1  alnsn 	/* Both size flags cannot be switched on. */
    344  1.1  alnsn 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
    345  1.1  alnsn 	/* SSE2 and immediate is not possible. */
    346  1.1  alnsn 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
    347  1.3  alnsn 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
    348  1.3  alnsn 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
    349  1.3  alnsn 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
    350  1.1  alnsn 
    351  1.1  alnsn 	size &= 0xf;
    352  1.1  alnsn 	inst_size = size;
    353  1.1  alnsn 
    354  1.3  alnsn 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
    355  1.1  alnsn 		inst_size++;
    356  1.1  alnsn 	if (flags & EX86_PREF_66)
    357  1.1  alnsn 		inst_size++;
    358  1.1  alnsn 
    359  1.1  alnsn 	/* Calculate size of b. */
    360  1.1  alnsn 	inst_size += 1; /* mod r/m byte. */
    361  1.1  alnsn 	if (b & SLJIT_MEM) {
    362  1.3  alnsn 		if ((b & REG_MASK) == SLJIT_UNUSED)
    363  1.3  alnsn 			inst_size += sizeof(sljit_sw);
    364  1.3  alnsn 		else if (immb != 0 && !(b & OFFS_REG_MASK)) {
    365  1.1  alnsn 			/* Immediate operand. */
    366  1.1  alnsn 			if (immb <= 127 && immb >= -128)
    367  1.5  alnsn 				inst_size += sizeof(sljit_s8);
    368  1.1  alnsn 			else
    369  1.3  alnsn 				inst_size += sizeof(sljit_sw);
    370  1.1  alnsn 		}
    371  1.1  alnsn 
    372  1.5  alnsn 		if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
    373  1.5  alnsn 			b |= TO_OFFS_REG(SLJIT_SP);
    374  1.1  alnsn 
    375  1.3  alnsn 		if ((b & OFFS_REG_MASK) != SLJIT_UNUSED)
    376  1.1  alnsn 			inst_size += 1; /* SIB byte. */
    377  1.1  alnsn 	}
    378  1.1  alnsn 
    379  1.1  alnsn 	/* Calculate size of a. */
    380  1.1  alnsn 	if (a & SLJIT_IMM) {
    381  1.1  alnsn 		if (flags & EX86_BIN_INS) {
    382  1.1  alnsn 			if (imma <= 127 && imma >= -128) {
    383  1.1  alnsn 				inst_size += 1;
    384  1.1  alnsn 				flags |= EX86_BYTE_ARG;
    385  1.1  alnsn 			} else
    386  1.1  alnsn 				inst_size += 4;
    387  1.1  alnsn 		}
    388  1.1  alnsn 		else if (flags & EX86_SHIFT_INS) {
    389  1.1  alnsn 			imma &= 0x1f;
    390  1.1  alnsn 			if (imma != 1) {
    391  1.1  alnsn 				inst_size ++;
    392  1.1  alnsn 				flags |= EX86_BYTE_ARG;
    393  1.1  alnsn 			}
    394  1.1  alnsn 		} else if (flags & EX86_BYTE_ARG)
    395  1.1  alnsn 			inst_size++;
    396  1.1  alnsn 		else if (flags & EX86_HALF_ARG)
    397  1.1  alnsn 			inst_size += sizeof(short);
    398  1.1  alnsn 		else
    399  1.3  alnsn 			inst_size += sizeof(sljit_sw);
    400  1.1  alnsn 	}
    401  1.1  alnsn 	else
    402  1.1  alnsn 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
    403  1.1  alnsn 
    404  1.5  alnsn 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
    405  1.3  alnsn 	PTR_FAIL_IF(!inst);
    406  1.1  alnsn 
    407  1.1  alnsn 	/* Encoding the byte. */
    408  1.1  alnsn 	INC_SIZE(inst_size);
    409  1.1  alnsn 	if (flags & EX86_PREF_F2)
    410  1.3  alnsn 		*inst++ = 0xf2;
    411  1.3  alnsn 	if (flags & EX86_PREF_F3)
    412  1.3  alnsn 		*inst++ = 0xf3;
    413  1.1  alnsn 	if (flags & EX86_PREF_66)
    414  1.3  alnsn 		*inst++ = 0x66;
    415  1.1  alnsn 
    416  1.3  alnsn 	buf_ptr = inst + size;
    417  1.1  alnsn 
    418  1.1  alnsn 	/* Encode mod/rm byte. */
    419  1.1  alnsn 	if (!(flags & EX86_SHIFT_INS)) {
    420  1.1  alnsn 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
    421  1.3  alnsn 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
    422  1.1  alnsn 
    423  1.1  alnsn 		if ((a & SLJIT_IMM) || (a == 0))
    424  1.1  alnsn 			*buf_ptr = 0;
    425  1.5  alnsn 		else if (!(flags & EX86_SSE2_OP1))
    426  1.1  alnsn 			*buf_ptr = reg_map[a] << 3;
    427  1.1  alnsn 		else
    428  1.1  alnsn 			*buf_ptr = a << 3;
    429  1.1  alnsn 	}
    430  1.1  alnsn 	else {
    431  1.1  alnsn 		if (a & SLJIT_IMM) {
    432  1.1  alnsn 			if (imma == 1)
    433  1.3  alnsn 				*inst = GROUP_SHIFT_1;
    434  1.1  alnsn 			else
    435  1.3  alnsn 				*inst = GROUP_SHIFT_N;
    436  1.1  alnsn 		} else
    437  1.3  alnsn 			*inst = GROUP_SHIFT_CL;
    438  1.1  alnsn 		*buf_ptr = 0;
    439  1.1  alnsn 	}
    440  1.1  alnsn 
    441  1.1  alnsn 	if (!(b & SLJIT_MEM))
    442  1.5  alnsn 		*buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b);
    443  1.3  alnsn 	else if ((b & REG_MASK) != SLJIT_UNUSED) {
    444  1.5  alnsn 		if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
    445  1.1  alnsn 			if (immb != 0) {
    446  1.1  alnsn 				if (immb <= 127 && immb >= -128)
    447  1.1  alnsn 					*buf_ptr |= 0x40;
    448  1.1  alnsn 				else
    449  1.1  alnsn 					*buf_ptr |= 0x80;
    450  1.1  alnsn 			}
    451  1.1  alnsn 
    452  1.3  alnsn 			if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
    453  1.3  alnsn 				*buf_ptr++ |= reg_map[b & REG_MASK];
    454  1.1  alnsn 			else {
    455  1.1  alnsn 				*buf_ptr++ |= 0x04;
    456  1.3  alnsn 				*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3);
    457  1.1  alnsn 			}
    458  1.1  alnsn 
    459  1.1  alnsn 			if (immb != 0) {
    460  1.1  alnsn 				if (immb <= 127 && immb >= -128)
    461  1.1  alnsn 					*buf_ptr++ = immb; /* 8 bit displacement. */
    462  1.1  alnsn 				else {
    463  1.6  alnsn 					sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
    464  1.3  alnsn 					buf_ptr += sizeof(sljit_sw);
    465  1.1  alnsn 				}
    466  1.1  alnsn 			}
    467  1.1  alnsn 		}
    468  1.1  alnsn 		else {
    469  1.1  alnsn 			*buf_ptr++ |= 0x04;
    470  1.3  alnsn 			*buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6);
    471  1.1  alnsn 		}
    472  1.1  alnsn 	}
    473  1.1  alnsn 	else {
    474  1.1  alnsn 		*buf_ptr++ |= 0x05;
    475  1.6  alnsn 		sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
    476  1.3  alnsn 		buf_ptr += sizeof(sljit_sw);
    477  1.1  alnsn 	}
    478  1.1  alnsn 
    479  1.1  alnsn 	if (a & SLJIT_IMM) {
    480  1.1  alnsn 		if (flags & EX86_BYTE_ARG)
    481  1.1  alnsn 			*buf_ptr = imma;
    482  1.1  alnsn 		else if (flags & EX86_HALF_ARG)
    483  1.6  alnsn 			sljit_unaligned_store_s16(buf_ptr, imma);
    484  1.1  alnsn 		else if (!(flags & EX86_SHIFT_INS))
    485  1.6  alnsn 			sljit_unaligned_store_sw(buf_ptr, imma);
    486  1.1  alnsn 	}
    487  1.1  alnsn 
    488  1.3  alnsn 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
    489  1.1  alnsn }
    490  1.1  alnsn 
    491  1.1  alnsn /* --------------------------------------------------------------------- */
    492  1.1  alnsn /*  Call / return instructions                                           */
    493  1.1  alnsn /* --------------------------------------------------------------------- */
    494  1.1  alnsn 
    495  1.5  alnsn static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
    496  1.1  alnsn {
    497  1.5  alnsn 	sljit_u8 *inst;
    498  1.1  alnsn 
    499  1.1  alnsn #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
    500  1.5  alnsn 	inst = (sljit_u8*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2);
    501  1.3  alnsn 	FAIL_IF(!inst);
    502  1.1  alnsn 	INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2);
    503  1.1  alnsn 
    504  1.1  alnsn 	if (type >= SLJIT_CALL3)
    505  1.5  alnsn 		PUSH_REG(reg_map[SLJIT_R2]);
    506  1.3  alnsn 	*inst++ = MOV_r_rm;
    507  1.5  alnsn 	*inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0];
    508  1.1  alnsn #else
    509  1.5  alnsn 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0));
    510  1.3  alnsn 	FAIL_IF(!inst);
    511  1.2  alnsn 	INC_SIZE(4 * (type - SLJIT_CALL0));
    512  1.2  alnsn 
    513  1.3  alnsn 	*inst++ = MOV_rm_r;
    514  1.5  alnsn 	*inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */;
    515  1.5  alnsn 	*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    516  1.3  alnsn 	*inst++ = 0;
    517  1.2  alnsn 	if (type >= SLJIT_CALL2) {
    518  1.3  alnsn 		*inst++ = MOV_rm_r;
    519  1.5  alnsn 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */;
    520  1.5  alnsn 		*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    521  1.3  alnsn 		*inst++ = sizeof(sljit_sw);
    522  1.2  alnsn 	}
    523  1.2  alnsn 	if (type >= SLJIT_CALL3) {
    524  1.3  alnsn 		*inst++ = MOV_rm_r;
    525  1.5  alnsn 		*inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */;
    526  1.5  alnsn 		*inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP];
    527  1.3  alnsn 		*inst++ = 2 * sizeof(sljit_sw);
    528  1.2  alnsn 	}
    529  1.1  alnsn #endif
    530  1.1  alnsn 	return SLJIT_SUCCESS;
    531  1.1  alnsn }
    532  1.1  alnsn 
    533  1.5  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
    534  1.1  alnsn {
    535  1.5  alnsn 	sljit_u8 *inst;
    536  1.1  alnsn 
    537  1.1  alnsn 	CHECK_ERROR();
    538  1.5  alnsn 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
    539  1.1  alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
    540  1.1  alnsn 
    541  1.1  alnsn 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
    542  1.1  alnsn 
    543  1.3  alnsn 	/* For UNUSED dst. Uncommon, but possible. */
    544  1.3  alnsn 	if (dst == SLJIT_UNUSED)
    545  1.3  alnsn 		dst = TMP_REG1;
    546  1.3  alnsn 
    547  1.3  alnsn 	if (FAST_IS_REG(dst)) {
    548  1.3  alnsn 		/* Unused dest is possible here. */
    549  1.5  alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    550  1.3  alnsn 		FAIL_IF(!inst);
    551  1.1  alnsn 
    552  1.1  alnsn 		INC_SIZE(1);
    553  1.1  alnsn 		POP_REG(reg_map[dst]);
    554  1.1  alnsn 		return SLJIT_SUCCESS;
    555  1.1  alnsn 	}
    556  1.1  alnsn 
    557  1.3  alnsn 	/* Memory. */
    558  1.3  alnsn 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
    559  1.3  alnsn 	FAIL_IF(!inst);
    560  1.3  alnsn 	*inst++ = POP_rm;
    561  1.1  alnsn 	return SLJIT_SUCCESS;
    562  1.1  alnsn }
    563  1.1  alnsn 
    564  1.5  alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
    565  1.1  alnsn {
    566  1.5  alnsn 	sljit_u8 *inst;
    567  1.1  alnsn 
    568  1.1  alnsn 	CHECK_ERROR();
    569  1.5  alnsn 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
    570  1.1  alnsn 	ADJUST_LOCAL_OFFSET(src, srcw);
    571  1.1  alnsn 
    572  1.1  alnsn 	CHECK_EXTRA_REGS(src, srcw, (void)0);
    573  1.1  alnsn 
    574  1.3  alnsn 	if (FAST_IS_REG(src)) {
    575  1.5  alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
    576  1.3  alnsn 		FAIL_IF(!inst);
    577  1.1  alnsn 
    578  1.1  alnsn 		INC_SIZE(1 + 1);
    579  1.1  alnsn 		PUSH_REG(reg_map[src]);
    580  1.1  alnsn 	}
    581  1.1  alnsn 	else if (src & SLJIT_MEM) {
    582  1.3  alnsn 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
    583  1.3  alnsn 		FAIL_IF(!inst);
    584  1.3  alnsn 		*inst++ = GROUP_FF;
    585  1.3  alnsn 		*inst |= PUSH_rm;
    586  1.1  alnsn 
    587  1.5  alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    588  1.3  alnsn 		FAIL_IF(!inst);
    589  1.1  alnsn 		INC_SIZE(1);
    590  1.1  alnsn 	}
    591  1.1  alnsn 	else {
    592  1.1  alnsn 		/* SLJIT_IMM. */
    593  1.5  alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
    594  1.3  alnsn 		FAIL_IF(!inst);
    595  1.1  alnsn 
    596  1.1  alnsn 		INC_SIZE(5 + 1);
    597  1.3  alnsn 		*inst++ = PUSH_i32;
    598  1.6  alnsn 		sljit_unaligned_store_sw(inst, srcw);
    599  1.3  alnsn 		inst += sizeof(sljit_sw);
    600  1.1  alnsn 	}
    601  1.1  alnsn 
    602  1.1  alnsn 	RET();
    603  1.1  alnsn 	return SLJIT_SUCCESS;
    604  1.1  alnsn }
    605