Home | History | Annotate | Line # | Download | only in sljit_src
      1  1.10  christos /*	$NetBSD: sljitNativeX86_common.c,v 1.10 2021/11/30 12:32:09 christos Exp $	*/
      2   1.6     alnsn 
      3   1.1     alnsn /*
      4   1.1     alnsn  *    Stack-less Just-In-Time compiler
      5   1.1     alnsn  *
      6   1.9     alnsn  *    Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      7   1.1     alnsn  *
      8   1.1     alnsn  * Redistribution and use in source and binary forms, with or without modification, are
      9   1.1     alnsn  * permitted provided that the following conditions are met:
     10   1.1     alnsn  *
     11   1.1     alnsn  *   1. Redistributions of source code must retain the above copyright notice, this list of
     12   1.1     alnsn  *      conditions and the following disclaimer.
     13   1.1     alnsn  *
     14   1.1     alnsn  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     15   1.1     alnsn  *      of conditions and the following disclaimer in the documentation and/or other materials
     16   1.1     alnsn  *      provided with the distribution.
     17   1.1     alnsn  *
     18   1.1     alnsn  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     19   1.1     alnsn  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20   1.1     alnsn  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     21   1.1     alnsn  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     22   1.1     alnsn  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     23   1.1     alnsn  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     24   1.1     alnsn  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25   1.1     alnsn  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     26   1.1     alnsn  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27   1.1     alnsn  */
     28   1.1     alnsn 
     29   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
     30   1.1     alnsn {
     31   1.1     alnsn 	return "x86" SLJIT_CPUINFO;
     32   1.1     alnsn }
     33   1.1     alnsn 
     34   1.1     alnsn /*
     35   1.1     alnsn    32b register indexes:
     36   1.1     alnsn      0 - EAX
     37   1.1     alnsn      1 - ECX
     38   1.1     alnsn      2 - EDX
     39   1.1     alnsn      3 - EBX
     40   1.1     alnsn      4 - none
     41   1.1     alnsn      5 - EBP
     42   1.1     alnsn      6 - ESI
     43   1.1     alnsn      7 - EDI
     44   1.1     alnsn */
     45   1.1     alnsn 
     46   1.1     alnsn /*
     47   1.1     alnsn    64b register indexes:
     48   1.1     alnsn      0 - RAX
     49   1.1     alnsn      1 - RCX
     50   1.1     alnsn      2 - RDX
     51   1.1     alnsn      3 - RBX
     52   1.1     alnsn      4 - none
     53   1.1     alnsn      5 - RBP
     54   1.1     alnsn      6 - RSI
     55   1.1     alnsn      7 - RDI
     56   1.1     alnsn      8 - R8   - From now on REX prefix is required
     57   1.1     alnsn      9 - R9
     58   1.1     alnsn     10 - R10
     59   1.1     alnsn     11 - R11
     60   1.1     alnsn     12 - R12
     61   1.1     alnsn     13 - R13
     62   1.1     alnsn     14 - R14
     63   1.1     alnsn     15 - R15
     64   1.1     alnsn */
     65   1.1     alnsn 
     66   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     67   1.1     alnsn 
     68   1.1     alnsn /* Last register + 1. */
     69   1.8     alnsn #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
     70   1.1     alnsn 
     71   1.8     alnsn static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
     72   1.9     alnsn 	0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
     73   1.1     alnsn };
     74   1.1     alnsn 
     75   1.1     alnsn #define CHECK_EXTRA_REGS(p, w, do) \
     76   1.9     alnsn 	if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
     77   1.9     alnsn 		if (p <= compiler->scratches) \
     78   1.9     alnsn 			w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
     79   1.9     alnsn 		else \
     80   1.9     alnsn 			w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
     81   1.8     alnsn 		p = SLJIT_MEM1(SLJIT_SP); \
     82   1.1     alnsn 		do; \
     83   1.1     alnsn 	}
     84   1.1     alnsn 
     85   1.1     alnsn #else /* SLJIT_CONFIG_X86_32 */
     86   1.1     alnsn 
     87   1.1     alnsn /* Last register + 1. */
     88   1.8     alnsn #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
     89   1.8     alnsn #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
     90   1.8     alnsn #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
     91   1.1     alnsn 
     92   1.1     alnsn /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
     93   1.1     alnsn    Note: avoid to use r12 and r13 for memory addessing
     94   1.1     alnsn    therefore r12 is better for SAVED_EREG than SAVED_REG. */
     95   1.1     alnsn #ifndef _WIN64
     96   1.1     alnsn /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
     97   1.8     alnsn static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
     98   1.8     alnsn 	0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
     99   1.1     alnsn };
    100   1.1     alnsn /* low-map. reg_map & 0x7. */
    101   1.8     alnsn static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
    102   1.8     alnsn 	0, 0, 6, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 7, 1
    103   1.1     alnsn };
    104   1.1     alnsn #else
    105   1.1     alnsn /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
    106   1.8     alnsn static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
    107   1.8     alnsn 	0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
    108   1.1     alnsn };
    109   1.1     alnsn /* low-map. reg_map & 0x7. */
    110   1.8     alnsn static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
    111   1.8     alnsn 	0, 0, 2, 1, 3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 2,  0, 1
    112   1.1     alnsn };
    113   1.1     alnsn #endif
    114   1.1     alnsn 
    115   1.1     alnsn #define REX_W		0x48
    116   1.1     alnsn #define REX_R		0x44
    117   1.1     alnsn #define REX_X		0x42
    118   1.1     alnsn #define REX_B		0x41
    119   1.1     alnsn #define REX		0x40
    120   1.1     alnsn 
    121   1.5     alnsn #ifndef _WIN64
    122   1.5     alnsn #define HALFWORD_MAX 0x7fffffffl
    123   1.5     alnsn #define HALFWORD_MIN -0x80000000l
    124   1.5     alnsn #else
    125   1.5     alnsn #define HALFWORD_MAX 0x7fffffffll
    126   1.5     alnsn #define HALFWORD_MIN -0x80000000ll
    127   1.5     alnsn #endif
    128   1.1     alnsn 
    129   1.5     alnsn #define IS_HALFWORD(x)		((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
    130   1.5     alnsn #define NOT_HALFWORD(x)		((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
    131   1.1     alnsn 
    132   1.1     alnsn #define CHECK_EXTRA_REGS(p, w, do)
    133   1.1     alnsn 
    134   1.1     alnsn #endif /* SLJIT_CONFIG_X86_32 */
    135   1.1     alnsn 
    136   1.5     alnsn #define TMP_FREG	(0)
    137   1.1     alnsn 
    138   1.1     alnsn /* Size flags for emit_x86_instruction: */
    139   1.1     alnsn #define EX86_BIN_INS		0x0010
    140   1.1     alnsn #define EX86_SHIFT_INS		0x0020
    141   1.1     alnsn #define EX86_REX		0x0040
    142   1.1     alnsn #define EX86_NO_REXW		0x0080
    143   1.1     alnsn #define EX86_BYTE_ARG		0x0100
    144   1.1     alnsn #define EX86_HALF_ARG		0x0200
    145   1.1     alnsn #define EX86_PREF_66		0x0400
    146   1.8     alnsn #define EX86_PREF_F2		0x0800
    147   1.8     alnsn #define EX86_PREF_F3		0x1000
    148   1.8     alnsn #define EX86_SSE2_OP1		0x2000
    149   1.8     alnsn #define EX86_SSE2_OP2		0x4000
    150   1.8     alnsn #define EX86_SSE2		(EX86_SSE2_OP1 | EX86_SSE2_OP2)
    151   1.1     alnsn 
    152   1.5     alnsn /* --------------------------------------------------------------------- */
    153   1.5     alnsn /*  Instrucion forms                                                     */
    154   1.5     alnsn /* --------------------------------------------------------------------- */
    155   1.1     alnsn 
    156   1.5     alnsn #define ADD		(/* BINARY */ 0 << 3)
    157   1.5     alnsn #define ADD_EAX_i32	0x05
    158   1.5     alnsn #define ADD_r_rm	0x03
    159   1.5     alnsn #define ADD_rm_r	0x01
    160   1.5     alnsn #define ADDSD_x_xm	0x58
    161   1.5     alnsn #define ADC		(/* BINARY */ 2 << 3)
    162   1.5     alnsn #define ADC_EAX_i32	0x15
    163   1.5     alnsn #define ADC_r_rm	0x13
    164   1.5     alnsn #define ADC_rm_r	0x11
    165   1.5     alnsn #define AND		(/* BINARY */ 4 << 3)
    166   1.5     alnsn #define AND_EAX_i32	0x25
    167   1.5     alnsn #define AND_r_rm	0x23
    168   1.5     alnsn #define AND_rm_r	0x21
    169   1.5     alnsn #define ANDPD_x_xm	0x54
    170   1.5     alnsn #define BSR_r_rm	(/* GROUP_0F */ 0xbd)
    171   1.5     alnsn #define CALL_i32	0xe8
    172   1.5     alnsn #define CALL_rm		(/* GROUP_FF */ 2 << 3)
    173   1.5     alnsn #define CDQ		0x99
    174   1.5     alnsn #define CMOVNE_r_rm	(/* GROUP_0F */ 0x45)
    175   1.5     alnsn #define CMP		(/* BINARY */ 7 << 3)
    176   1.5     alnsn #define CMP_EAX_i32	0x3d
    177   1.5     alnsn #define CMP_r_rm	0x3b
    178   1.5     alnsn #define CMP_rm_r	0x39
    179   1.8     alnsn #define CVTPD2PS_x_xm	0x5a
    180   1.8     alnsn #define CVTSI2SD_x_rm	0x2a
    181   1.8     alnsn #define CVTTSD2SI_r_xm	0x2c
    182   1.5     alnsn #define DIV		(/* GROUP_F7 */ 6 << 3)
    183   1.5     alnsn #define DIVSD_x_xm	0x5e
    184   1.5     alnsn #define INT3		0xcc
    185   1.5     alnsn #define IDIV		(/* GROUP_F7 */ 7 << 3)
    186   1.5     alnsn #define IMUL		(/* GROUP_F7 */ 5 << 3)
    187   1.5     alnsn #define IMUL_r_rm	(/* GROUP_0F */ 0xaf)
    188   1.5     alnsn #define IMUL_r_rm_i8	0x6b
    189   1.5     alnsn #define IMUL_r_rm_i32	0x69
    190   1.5     alnsn #define JE_i8		0x74
    191   1.8     alnsn #define JNE_i8		0x75
    192   1.5     alnsn #define JMP_i8		0xeb
    193   1.5     alnsn #define JMP_i32		0xe9
    194   1.5     alnsn #define JMP_rm		(/* GROUP_FF */ 4 << 3)
    195   1.5     alnsn #define LEA_r_m		0x8d
    196   1.5     alnsn #define MOV_r_rm	0x8b
    197   1.5     alnsn #define MOV_r_i32	0xb8
    198   1.5     alnsn #define MOV_rm_r	0x89
    199   1.5     alnsn #define MOV_rm_i32	0xc7
    200   1.5     alnsn #define MOV_rm8_i8	0xc6
    201   1.5     alnsn #define MOV_rm8_r8	0x88
    202   1.5     alnsn #define MOVSD_x_xm	0x10
    203   1.5     alnsn #define MOVSD_xm_x	0x11
    204   1.5     alnsn #define MOVSXD_r_rm	0x63
    205   1.5     alnsn #define MOVSX_r_rm8	(/* GROUP_0F */ 0xbe)
    206   1.5     alnsn #define MOVSX_r_rm16	(/* GROUP_0F */ 0xbf)
    207   1.5     alnsn #define MOVZX_r_rm8	(/* GROUP_0F */ 0xb6)
    208   1.5     alnsn #define MOVZX_r_rm16	(/* GROUP_0F */ 0xb7)
    209   1.5     alnsn #define MUL		(/* GROUP_F7 */ 4 << 3)
    210   1.5     alnsn #define MULSD_x_xm	0x59
    211   1.5     alnsn #define NEG_rm		(/* GROUP_F7 */ 3 << 3)
    212   1.5     alnsn #define NOP		0x90
    213   1.5     alnsn #define NOT_rm		(/* GROUP_F7 */ 2 << 3)
    214   1.5     alnsn #define OR		(/* BINARY */ 1 << 3)
    215   1.5     alnsn #define OR_r_rm		0x0b
    216   1.5     alnsn #define OR_EAX_i32	0x0d
    217   1.5     alnsn #define OR_rm_r		0x09
    218   1.5     alnsn #define OR_rm8_r8	0x08
    219   1.5     alnsn #define POP_r		0x58
    220   1.5     alnsn #define POP_rm		0x8f
    221   1.5     alnsn #define POPF		0x9d
    222   1.5     alnsn #define PUSH_i32	0x68
    223   1.5     alnsn #define PUSH_r		0x50
    224   1.5     alnsn #define PUSH_rm		(/* GROUP_FF */ 6 << 3)
    225   1.5     alnsn #define PUSHF		0x9c
    226   1.5     alnsn #define RET_near	0xc3
    227   1.5     alnsn #define RET_i16		0xc2
    228   1.5     alnsn #define SBB		(/* BINARY */ 3 << 3)
    229   1.5     alnsn #define SBB_EAX_i32	0x1d
    230   1.5     alnsn #define SBB_r_rm	0x1b
    231   1.5     alnsn #define SBB_rm_r	0x19
    232   1.5     alnsn #define SAR		(/* SHIFT */ 7 << 3)
    233   1.5     alnsn #define SHL		(/* SHIFT */ 4 << 3)
    234   1.5     alnsn #define SHR		(/* SHIFT */ 5 << 3)
    235   1.5     alnsn #define SUB		(/* BINARY */ 5 << 3)
    236   1.5     alnsn #define SUB_EAX_i32	0x2d
    237   1.5     alnsn #define SUB_r_rm	0x2b
    238   1.5     alnsn #define SUB_rm_r	0x29
    239   1.5     alnsn #define SUBSD_x_xm	0x5c
    240   1.5     alnsn #define TEST_EAX_i32	0xa9
    241   1.5     alnsn #define TEST_rm_r	0x85
    242   1.5     alnsn #define UCOMISD_x_xm	0x2e
    243   1.8     alnsn #define UNPCKLPD_x_xm	0x14
    244   1.5     alnsn #define XCHG_EAX_r	0x90
    245   1.5     alnsn #define XCHG_r_rm	0x87
    246   1.5     alnsn #define XOR		(/* BINARY */ 6 << 3)
    247   1.5     alnsn #define XOR_EAX_i32	0x35
    248   1.5     alnsn #define XOR_r_rm	0x33
    249   1.5     alnsn #define XOR_rm_r	0x31
    250   1.5     alnsn #define XORPD_x_xm	0x57
    251   1.5     alnsn 
    252   1.5     alnsn #define GROUP_0F	0x0f
    253   1.5     alnsn #define GROUP_F7	0xf7
    254   1.5     alnsn #define GROUP_FF	0xff
    255   1.5     alnsn #define GROUP_BINARY_81	0x81
    256   1.5     alnsn #define GROUP_BINARY_83	0x83
    257   1.5     alnsn #define GROUP_SHIFT_1	0xd1
    258   1.5     alnsn #define GROUP_SHIFT_N	0xc1
    259   1.5     alnsn #define GROUP_SHIFT_CL	0xd3
    260   1.5     alnsn 
    261   1.5     alnsn #define MOD_REG		0xc0
    262   1.5     alnsn #define MOD_DISP8	0x40
    263   1.5     alnsn 
    264   1.5     alnsn #define INC_SIZE(s)			(*inst++ = (s), compiler->size += (s))
    265   1.5     alnsn 
    266   1.5     alnsn #define PUSH_REG(r)			(*inst++ = (PUSH_r + (r)))
    267   1.5     alnsn #define POP_REG(r)			(*inst++ = (POP_r + (r)))
    268   1.5     alnsn #define RET()				(*inst++ = (RET_near))
    269   1.5     alnsn #define RET_I16(n)			(*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
    270   1.1     alnsn /* r32, r/m32 */
    271   1.5     alnsn #define MOV_RM(mod, reg, rm)		(*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
    272   1.5     alnsn 
    273   1.5     alnsn /* Multithreading does not affect these static variables, since they store
    274   1.5     alnsn    built-in CPU features. Therefore they can be overwritten by different threads
    275   1.5     alnsn    if they detect the CPU features in the same time. */
    276   1.8     alnsn #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
    277   1.8     alnsn static sljit_s32 cpu_has_sse2 = -1;
    278   1.5     alnsn #endif
    279   1.8     alnsn static sljit_s32 cpu_has_cmov = -1;
    280   1.5     alnsn 
    281   1.8     alnsn #ifdef _WIN32_WCE
    282   1.8     alnsn #include <cmnintrin.h>
    283   1.8     alnsn #elif defined(_MSC_VER) && _MSC_VER >= 1400
    284   1.5     alnsn #include <intrin.h>
    285   1.5     alnsn #endif
    286   1.1     alnsn 
    287   1.9     alnsn /******************************************************/
    288   1.9     alnsn /*    Unaligned-store functions                       */
    289   1.9     alnsn /******************************************************/
    290   1.9     alnsn 
    291   1.9     alnsn static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
    292   1.9     alnsn {
    293   1.9     alnsn 	SLJIT_MEMCPY(addr, &value, sizeof(value));
    294   1.9     alnsn }
    295   1.9     alnsn 
    296   1.9     alnsn static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
    297   1.9     alnsn {
    298   1.9     alnsn 	SLJIT_MEMCPY(addr, &value, sizeof(value));
    299   1.9     alnsn }
    300   1.9     alnsn 
    301   1.9     alnsn static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
    302   1.9     alnsn {
    303   1.9     alnsn 	SLJIT_MEMCPY(addr, &value, sizeof(value));
    304   1.9     alnsn }
    305   1.9     alnsn 
    306   1.9     alnsn /******************************************************/
    307   1.9     alnsn /*    Utility functions                               */
    308   1.9     alnsn /******************************************************/
    309   1.9     alnsn 
    310   1.5     alnsn static void get_cpu_features(void)
    311   1.5     alnsn {
    312   1.8     alnsn 	sljit_u32 features;
    313   1.5     alnsn 
    314   1.5     alnsn #if defined(_MSC_VER) && _MSC_VER >= 1400
    315   1.5     alnsn 
    316   1.5     alnsn 	int CPUInfo[4];
    317   1.5     alnsn 	__cpuid(CPUInfo, 1);
    318   1.8     alnsn 	features = (sljit_u32)CPUInfo[3];
    319   1.5     alnsn 
    320  1.10  christos #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__lint__)
    321   1.5     alnsn 
    322   1.5     alnsn 	/* AT&T syntax. */
    323   1.5     alnsn 	__asm__ (
    324   1.5     alnsn 		"movl $0x1, %%eax\n"
    325   1.5     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    326   1.5     alnsn 		/* On x86-32, there is no red zone, so this
    327   1.5     alnsn 		   should work (no need for a local variable). */
    328   1.5     alnsn 		"push %%ebx\n"
    329   1.5     alnsn #endif
    330   1.5     alnsn 		"cpuid\n"
    331   1.5     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    332   1.5     alnsn 		"pop %%ebx\n"
    333   1.5     alnsn #endif
    334   1.5     alnsn 		"movl %%edx, %0\n"
    335   1.5     alnsn 		: "=g" (features)
    336   1.5     alnsn 		:
    337   1.5     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    338   1.5     alnsn 		: "%eax", "%ecx", "%edx"
    339   1.5     alnsn #else
    340   1.5     alnsn 		: "%rax", "%rbx", "%rcx", "%rdx"
    341   1.5     alnsn #endif
    342   1.5     alnsn 	);
    343   1.5     alnsn 
    344   1.5     alnsn #else /* _MSC_VER && _MSC_VER >= 1400 */
    345   1.5     alnsn 
    346   1.5     alnsn 	/* Intel syntax. */
    347   1.5     alnsn 	__asm {
    348   1.5     alnsn 		mov eax, 1
    349   1.5     alnsn 		cpuid
    350   1.5     alnsn 		mov features, edx
    351   1.5     alnsn 	}
    352   1.5     alnsn 
    353   1.5     alnsn #endif /* _MSC_VER && _MSC_VER >= 1400 */
    354   1.5     alnsn 
    355   1.8     alnsn #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
    356   1.5     alnsn 	cpu_has_sse2 = (features >> 26) & 0x1;
    357   1.5     alnsn #endif
    358   1.5     alnsn 	cpu_has_cmov = (features >> 15) & 0x1;
    359   1.5     alnsn }
    360   1.5     alnsn 
    361   1.8     alnsn static sljit_u8 get_jump_code(sljit_s32 type)
    362   1.1     alnsn {
    363   1.1     alnsn 	switch (type) {
    364   1.8     alnsn 	case SLJIT_EQUAL:
    365   1.8     alnsn 	case SLJIT_EQUAL_F64:
    366   1.5     alnsn 		return 0x84 /* je */;
    367   1.1     alnsn 
    368   1.8     alnsn 	case SLJIT_NOT_EQUAL:
    369   1.8     alnsn 	case SLJIT_NOT_EQUAL_F64:
    370   1.5     alnsn 		return 0x85 /* jne */;
    371   1.1     alnsn 
    372   1.8     alnsn 	case SLJIT_LESS:
    373   1.8     alnsn 	case SLJIT_LESS_F64:
    374   1.5     alnsn 		return 0x82 /* jc */;
    375   1.1     alnsn 
    376   1.8     alnsn 	case SLJIT_GREATER_EQUAL:
    377   1.8     alnsn 	case SLJIT_GREATER_EQUAL_F64:
    378   1.5     alnsn 		return 0x83 /* jae */;
    379   1.1     alnsn 
    380   1.8     alnsn 	case SLJIT_GREATER:
    381   1.8     alnsn 	case SLJIT_GREATER_F64:
    382   1.5     alnsn 		return 0x87 /* jnbe */;
    383   1.1     alnsn 
    384   1.8     alnsn 	case SLJIT_LESS_EQUAL:
    385   1.8     alnsn 	case SLJIT_LESS_EQUAL_F64:
    386   1.5     alnsn 		return 0x86 /* jbe */;
    387   1.1     alnsn 
    388   1.8     alnsn 	case SLJIT_SIG_LESS:
    389   1.5     alnsn 		return 0x8c /* jl */;
    390   1.1     alnsn 
    391   1.8     alnsn 	case SLJIT_SIG_GREATER_EQUAL:
    392   1.5     alnsn 		return 0x8d /* jnl */;
    393   1.1     alnsn 
    394   1.8     alnsn 	case SLJIT_SIG_GREATER:
    395   1.5     alnsn 		return 0x8f /* jnle */;
    396   1.1     alnsn 
    397   1.8     alnsn 	case SLJIT_SIG_LESS_EQUAL:
    398   1.5     alnsn 		return 0x8e /* jle */;
    399   1.1     alnsn 
    400   1.8     alnsn 	case SLJIT_OVERFLOW:
    401   1.8     alnsn 	case SLJIT_MUL_OVERFLOW:
    402   1.5     alnsn 		return 0x80 /* jo */;
    403   1.1     alnsn 
    404   1.8     alnsn 	case SLJIT_NOT_OVERFLOW:
    405   1.8     alnsn 	case SLJIT_MUL_NOT_OVERFLOW:
    406   1.5     alnsn 		return 0x81 /* jno */;
    407   1.1     alnsn 
    408   1.8     alnsn 	case SLJIT_UNORDERED_F64:
    409   1.5     alnsn 		return 0x8a /* jp */;
    410   1.1     alnsn 
    411   1.8     alnsn 	case SLJIT_ORDERED_F64:
    412   1.5     alnsn 		return 0x8b /* jpo */;
    413   1.1     alnsn 	}
    414   1.1     alnsn 	return 0;
    415   1.1     alnsn }
    416   1.1     alnsn 
    417   1.9     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    418   1.9     alnsn static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
    419   1.9     alnsn #else
    420   1.8     alnsn static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
    421   1.1     alnsn #endif
    422   1.1     alnsn 
    423   1.9     alnsn static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
    424   1.1     alnsn {
    425   1.8     alnsn 	sljit_s32 short_jump;
    426   1.1     alnsn 	sljit_uw label_addr;
    427   1.1     alnsn 
    428   1.1     alnsn 	if (jump->flags & JUMP_LABEL)
    429   1.1     alnsn 		label_addr = (sljit_uw)(code + jump->u.label->size);
    430   1.1     alnsn 	else
    431   1.9     alnsn 		label_addr = jump->u.target - executable_offset;
    432   1.9     alnsn 
    433   1.5     alnsn 	short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
    434   1.1     alnsn 
    435   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    436   1.5     alnsn 	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
    437   1.1     alnsn 		return generate_far_jump_code(jump, code_ptr, type);
    438   1.1     alnsn #endif
    439   1.1     alnsn 
    440   1.1     alnsn 	if (type == SLJIT_JUMP) {
    441   1.1     alnsn 		if (short_jump)
    442   1.5     alnsn 			*code_ptr++ = JMP_i8;
    443   1.1     alnsn 		else
    444   1.5     alnsn 			*code_ptr++ = JMP_i32;
    445   1.1     alnsn 		jump->addr++;
    446   1.1     alnsn 	}
    447   1.1     alnsn 	else if (type >= SLJIT_FAST_CALL) {
    448   1.1     alnsn 		short_jump = 0;
    449   1.5     alnsn 		*code_ptr++ = CALL_i32;
    450   1.1     alnsn 		jump->addr++;
    451   1.1     alnsn 	}
    452   1.1     alnsn 	else if (short_jump) {
    453   1.1     alnsn 		*code_ptr++ = get_jump_code(type) - 0x10;
    454   1.1     alnsn 		jump->addr++;
    455   1.1     alnsn 	}
    456   1.1     alnsn 	else {
    457   1.5     alnsn 		*code_ptr++ = GROUP_0F;
    458   1.1     alnsn 		*code_ptr++ = get_jump_code(type);
    459   1.1     alnsn 		jump->addr += 2;
    460   1.1     alnsn 	}
    461   1.1     alnsn 
    462   1.1     alnsn 	if (short_jump) {
    463   1.1     alnsn 		jump->flags |= PATCH_MB;
    464   1.8     alnsn 		code_ptr += sizeof(sljit_s8);
    465   1.1     alnsn 	} else {
    466   1.1     alnsn 		jump->flags |= PATCH_MW;
    467   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    468   1.5     alnsn 		code_ptr += sizeof(sljit_sw);
    469   1.1     alnsn #else
    470   1.8     alnsn 		code_ptr += sizeof(sljit_s32);
    471   1.1     alnsn #endif
    472   1.1     alnsn 	}
    473   1.1     alnsn 
    474   1.1     alnsn 	return code_ptr;
    475   1.1     alnsn }
    476   1.1     alnsn 
    477   1.1     alnsn SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
    478   1.1     alnsn {
    479   1.1     alnsn 	struct sljit_memory_fragment *buf;
    480   1.8     alnsn 	sljit_u8 *code;
    481   1.8     alnsn 	sljit_u8 *code_ptr;
    482   1.8     alnsn 	sljit_u8 *buf_ptr;
    483   1.8     alnsn 	sljit_u8 *buf_end;
    484   1.8     alnsn 	sljit_u8 len;
    485   1.9     alnsn 	sljit_sw executable_offset;
    486   1.9     alnsn 	sljit_sw jump_addr;
    487   1.1     alnsn 
    488   1.1     alnsn 	struct sljit_label *label;
    489   1.1     alnsn 	struct sljit_jump *jump;
    490   1.1     alnsn 	struct sljit_const *const_;
    491   1.1     alnsn 
    492   1.1     alnsn 	CHECK_ERROR_PTR();
    493   1.8     alnsn 	CHECK_PTR(check_sljit_generate_code(compiler));
    494   1.1     alnsn 	reverse_buf(compiler);
    495   1.1     alnsn 
    496   1.1     alnsn 	/* Second code generation pass. */
    497   1.8     alnsn 	code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
    498   1.1     alnsn 	PTR_FAIL_WITH_EXEC_IF(code);
    499   1.1     alnsn 	buf = compiler->buf;
    500   1.1     alnsn 
    501   1.1     alnsn 	code_ptr = code;
    502   1.1     alnsn 	label = compiler->labels;
    503   1.1     alnsn 	jump = compiler->jumps;
    504   1.1     alnsn 	const_ = compiler->consts;
    505   1.9     alnsn 	executable_offset = SLJIT_EXEC_OFFSET(code);
    506   1.9     alnsn 
    507   1.1     alnsn 	do {
    508   1.1     alnsn 		buf_ptr = buf->memory;
    509   1.1     alnsn 		buf_end = buf_ptr + buf->used_size;
    510   1.1     alnsn 		do {
    511   1.1     alnsn 			len = *buf_ptr++;
    512   1.1     alnsn 			if (len > 0) {
    513   1.1     alnsn 				/* The code is already generated. */
    514   1.9     alnsn 				SLJIT_MEMCPY(code_ptr, buf_ptr, len);
    515   1.1     alnsn 				code_ptr += len;
    516   1.1     alnsn 				buf_ptr += len;
    517   1.1     alnsn 			}
    518   1.1     alnsn 			else {
    519   1.9     alnsn 				if (*buf_ptr >= 2) {
    520   1.1     alnsn 					jump->addr = (sljit_uw)code_ptr;
    521   1.1     alnsn 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
    522   1.9     alnsn 						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
    523   1.9     alnsn 					else {
    524   1.9     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    525   1.9     alnsn 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
    526   1.9     alnsn #else
    527   1.9     alnsn 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
    528   1.9     alnsn #endif
    529   1.9     alnsn 					}
    530   1.1     alnsn 					jump = jump->next;
    531   1.1     alnsn 				}
    532   1.1     alnsn 				else if (*buf_ptr == 0) {
    533   1.9     alnsn 					label->addr = ((sljit_uw)code_ptr) + executable_offset;
    534   1.1     alnsn 					label->size = code_ptr - code;
    535   1.1     alnsn 					label = label->next;
    536   1.1     alnsn 				}
    537   1.9     alnsn 				else { /* *buf_ptr is 1 */
    538   1.5     alnsn 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
    539   1.1     alnsn 					const_ = const_->next;
    540   1.1     alnsn 				}
    541   1.1     alnsn 				buf_ptr++;
    542   1.1     alnsn 			}
    543   1.1     alnsn 		} while (buf_ptr < buf_end);
    544   1.1     alnsn 		SLJIT_ASSERT(buf_ptr == buf_end);
    545   1.1     alnsn 		buf = buf->next;
    546   1.1     alnsn 	} while (buf);
    547   1.1     alnsn 
    548   1.1     alnsn 	SLJIT_ASSERT(!label);
    549   1.1     alnsn 	SLJIT_ASSERT(!jump);
    550   1.1     alnsn 	SLJIT_ASSERT(!const_);
    551   1.1     alnsn 
    552   1.1     alnsn 	jump = compiler->jumps;
    553   1.1     alnsn 	while (jump) {
    554   1.9     alnsn 		jump_addr = jump->addr + executable_offset;
    555   1.9     alnsn 
    556   1.1     alnsn 		if (jump->flags & PATCH_MB) {
    557   1.9     alnsn 			SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
    558   1.9     alnsn 			*(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
    559   1.1     alnsn 		} else if (jump->flags & PATCH_MW) {
    560   1.1     alnsn 			if (jump->flags & JUMP_LABEL) {
    561   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    562   1.9     alnsn 				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
    563   1.1     alnsn #else
    564   1.9     alnsn 				SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
    565   1.9     alnsn 				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
    566   1.1     alnsn #endif
    567   1.1     alnsn 			}
    568   1.1     alnsn 			else {
    569   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    570   1.9     alnsn 				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
    571   1.1     alnsn #else
    572   1.9     alnsn 				SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
    573   1.9     alnsn 				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
    574   1.1     alnsn #endif
    575   1.1     alnsn 			}
    576   1.1     alnsn 		}
    577   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    578   1.1     alnsn 		else if (jump->flags & PATCH_MD)
    579   1.9     alnsn 			sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
    580   1.1     alnsn #endif
    581   1.1     alnsn 
    582   1.1     alnsn 		jump = jump->next;
    583   1.1     alnsn 	}
    584   1.1     alnsn 
    585   1.9     alnsn 	/* Some space may be wasted because of short jumps. */
    586   1.1     alnsn 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
    587   1.1     alnsn 	compiler->error = SLJIT_ERR_COMPILED;
    588   1.9     alnsn 	compiler->executable_offset = executable_offset;
    589   1.5     alnsn 	compiler->executable_size = code_ptr - code;
    590   1.9     alnsn 	return (void*)(code + executable_offset);
    591   1.1     alnsn }
    592   1.1     alnsn 
    593   1.1     alnsn /* --------------------------------------------------------------------- */
    594   1.1     alnsn /*  Operators                                                            */
    595   1.1     alnsn /* --------------------------------------------------------------------- */
    596   1.1     alnsn 
    597   1.8     alnsn static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
    598   1.8     alnsn 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
    599   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
    600   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
    601   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w);
    602   1.1     alnsn 
    603   1.8     alnsn static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
    604   1.8     alnsn 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
    605   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
    606   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
    607   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w);
    608   1.1     alnsn 
    609   1.8     alnsn static sljit_s32 emit_mov(struct sljit_compiler *compiler,
    610   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
    611   1.8     alnsn 	sljit_s32 src, sljit_sw srcw);
    612   1.1     alnsn 
    613   1.9     alnsn #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
    614   1.9     alnsn 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
    615   1.1     alnsn 
    616   1.1     alnsn #ifdef _WIN32
    617   1.1     alnsn #include <malloc.h>
    618   1.1     alnsn 
    619   1.5     alnsn static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
    620   1.1     alnsn {
    621   1.1     alnsn 	/* Workaround for calling the internal _chkstk() function on Windows.
    622   1.1     alnsn 	This function touches all 4k pages belongs to the requested stack space,
    623   1.1     alnsn 	which size is passed in local_size. This is necessary on Windows where
    624   1.1     alnsn 	the stack can only grow in 4k steps. However, this function just burn
    625   1.5     alnsn 	CPU cycles if the stack is large enough. However, you don't know it in
    626   1.5     alnsn 	advance, so it must always be called. I think this is a bad design in
    627   1.5     alnsn 	general even if it has some reasons. */
    628   1.8     alnsn 	*(volatile sljit_s32*)alloca(local_size) = 0;
    629   1.1     alnsn }
    630   1.1     alnsn 
    631   1.1     alnsn #endif
    632   1.1     alnsn 
    633   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    634   1.1     alnsn #include "sljitNativeX86_32.c"
    635   1.1     alnsn #else
    636   1.1     alnsn #include "sljitNativeX86_64.c"
    637   1.1     alnsn #endif
    638   1.1     alnsn 
    639   1.8     alnsn static sljit_s32 emit_mov(struct sljit_compiler *compiler,
    640   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
    641   1.8     alnsn 	sljit_s32 src, sljit_sw srcw)
    642   1.1     alnsn {
    643   1.8     alnsn 	sljit_u8* inst;
    644   1.1     alnsn 
    645   1.1     alnsn 	if (dst == SLJIT_UNUSED) {
    646   1.1     alnsn 		/* No destination, doesn't need to setup flags. */
    647   1.1     alnsn 		if (src & SLJIT_MEM) {
    648   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
    649   1.5     alnsn 			FAIL_IF(!inst);
    650   1.5     alnsn 			*inst = MOV_r_rm;
    651   1.1     alnsn 		}
    652   1.1     alnsn 		return SLJIT_SUCCESS;
    653   1.1     alnsn 	}
    654   1.5     alnsn 	if (FAST_IS_REG(src)) {
    655   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
    656   1.5     alnsn 		FAIL_IF(!inst);
    657   1.5     alnsn 		*inst = MOV_rm_r;
    658   1.1     alnsn 		return SLJIT_SUCCESS;
    659   1.1     alnsn 	}
    660   1.1     alnsn 	if (src & SLJIT_IMM) {
    661   1.5     alnsn 		if (FAST_IS_REG(dst)) {
    662   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    663   1.5     alnsn 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
    664   1.1     alnsn #else
    665   1.1     alnsn 			if (!compiler->mode32) {
    666   1.1     alnsn 				if (NOT_HALFWORD(srcw))
    667   1.1     alnsn 					return emit_load_imm64(compiler, dst, srcw);
    668   1.1     alnsn 			}
    669   1.1     alnsn 			else
    670   1.5     alnsn 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
    671   1.1     alnsn #endif
    672   1.1     alnsn 		}
    673   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    674   1.1     alnsn 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
    675   1.1     alnsn 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
    676   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
    677   1.5     alnsn 			FAIL_IF(!inst);
    678   1.5     alnsn 			*inst = MOV_rm_r;
    679   1.1     alnsn 			return SLJIT_SUCCESS;
    680   1.1     alnsn 		}
    681   1.1     alnsn #endif
    682   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
    683   1.5     alnsn 		FAIL_IF(!inst);
    684   1.5     alnsn 		*inst = MOV_rm_i32;
    685   1.1     alnsn 		return SLJIT_SUCCESS;
    686   1.1     alnsn 	}
    687   1.5     alnsn 	if (FAST_IS_REG(dst)) {
    688   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
    689   1.5     alnsn 		FAIL_IF(!inst);
    690   1.5     alnsn 		*inst = MOV_r_rm;
    691   1.1     alnsn 		return SLJIT_SUCCESS;
    692   1.1     alnsn 	}
    693   1.1     alnsn 
    694   1.1     alnsn 	/* Memory to memory move. Requires two instruction. */
    695   1.5     alnsn 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
    696   1.5     alnsn 	FAIL_IF(!inst);
    697   1.5     alnsn 	*inst = MOV_r_rm;
    698   1.5     alnsn 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
    699   1.5     alnsn 	FAIL_IF(!inst);
    700   1.5     alnsn 	*inst = MOV_rm_r;
    701   1.1     alnsn 	return SLJIT_SUCCESS;
    702   1.1     alnsn }
    703   1.1     alnsn 
    704   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
    705   1.1     alnsn {
    706   1.8     alnsn 	sljit_u8 *inst;
    707   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    708   1.8     alnsn 	sljit_s32 size;
    709   1.1     alnsn #endif
    710   1.1     alnsn 
    711   1.1     alnsn 	CHECK_ERROR();
    712   1.8     alnsn 	CHECK(check_sljit_emit_op0(compiler, op));
    713   1.1     alnsn 
    714   1.1     alnsn 	switch (GET_OPCODE(op)) {
    715   1.1     alnsn 	case SLJIT_BREAKPOINT:
    716   1.8     alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    717   1.5     alnsn 		FAIL_IF(!inst);
    718   1.1     alnsn 		INC_SIZE(1);
    719   1.5     alnsn 		*inst = INT3;
    720   1.1     alnsn 		break;
    721   1.1     alnsn 	case SLJIT_NOP:
    722   1.8     alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    723   1.5     alnsn 		FAIL_IF(!inst);
    724   1.1     alnsn 		INC_SIZE(1);
    725   1.5     alnsn 		*inst = NOP;
    726   1.1     alnsn 		break;
    727   1.8     alnsn 	case SLJIT_LMUL_UW:
    728   1.8     alnsn 	case SLJIT_LMUL_SW:
    729   1.8     alnsn 	case SLJIT_DIVMOD_UW:
    730   1.8     alnsn 	case SLJIT_DIVMOD_SW:
    731   1.8     alnsn 	case SLJIT_DIV_UW:
    732   1.8     alnsn 	case SLJIT_DIV_SW:
    733   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    734   1.1     alnsn #ifdef _WIN64
    735   1.9     alnsn 		SLJIT_ASSERT(
    736   1.8     alnsn 			reg_map[SLJIT_R0] == 0
    737   1.8     alnsn 			&& reg_map[SLJIT_R1] == 2
    738   1.9     alnsn 			&& reg_map[TMP_REG1] > 7);
    739   1.1     alnsn #else
    740   1.9     alnsn 		SLJIT_ASSERT(
    741   1.8     alnsn 			reg_map[SLJIT_R0] == 0
    742   1.8     alnsn 			&& reg_map[SLJIT_R1] < 7
    743   1.9     alnsn 			&& reg_map[TMP_REG1] == 2);
    744   1.1     alnsn #endif
    745   1.8     alnsn 		compiler->mode32 = op & SLJIT_I32_OP;
    746   1.1     alnsn #endif
    747   1.8     alnsn 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
    748   1.1     alnsn 
    749   1.1     alnsn 		op = GET_OPCODE(op);
    750   1.8     alnsn 		if ((op | 0x2) == SLJIT_DIV_UW) {
    751   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
    752   1.8     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
    753   1.8     alnsn 			inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
    754   1.1     alnsn #else
    755   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
    756   1.1     alnsn #endif
    757   1.5     alnsn 			FAIL_IF(!inst);
    758   1.5     alnsn 			*inst = XOR_r_rm;
    759   1.1     alnsn 		}
    760   1.1     alnsn 
    761   1.8     alnsn 		if ((op | 0x2) == SLJIT_DIV_SW) {
    762   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
    763   1.8     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
    764   1.1     alnsn #endif
    765   1.1     alnsn 
    766   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    767   1.8     alnsn 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    768   1.5     alnsn 			FAIL_IF(!inst);
    769   1.1     alnsn 			INC_SIZE(1);
    770   1.5     alnsn 			*inst = CDQ;
    771   1.1     alnsn #else
    772   1.1     alnsn 			if (compiler->mode32) {
    773   1.8     alnsn 				inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
    774   1.5     alnsn 				FAIL_IF(!inst);
    775   1.1     alnsn 				INC_SIZE(1);
    776   1.5     alnsn 				*inst = CDQ;
    777   1.1     alnsn 			} else {
    778   1.8     alnsn 				inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
    779   1.5     alnsn 				FAIL_IF(!inst);
    780   1.1     alnsn 				INC_SIZE(2);
    781   1.5     alnsn 				*inst++ = REX_W;
    782   1.5     alnsn 				*inst = CDQ;
    783   1.1     alnsn 			}
    784   1.1     alnsn #endif
    785   1.1     alnsn 		}
    786   1.1     alnsn 
    787   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    788   1.8     alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
    789   1.5     alnsn 		FAIL_IF(!inst);
    790   1.1     alnsn 		INC_SIZE(2);
    791   1.5     alnsn 		*inst++ = GROUP_F7;
    792   1.8     alnsn 		*inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
    793   1.1     alnsn #else
    794   1.1     alnsn #ifdef _WIN64
    795   1.8     alnsn 		size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
    796   1.1     alnsn #else
    797   1.1     alnsn 		size = (!compiler->mode32) ? 3 : 2;
    798   1.1     alnsn #endif
    799   1.8     alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
    800   1.5     alnsn 		FAIL_IF(!inst);
    801   1.1     alnsn 		INC_SIZE(size);
    802   1.1     alnsn #ifdef _WIN64
    803   1.1     alnsn 		if (!compiler->mode32)
    804   1.8     alnsn 			*inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
    805   1.8     alnsn 		else if (op >= SLJIT_DIVMOD_UW)
    806   1.5     alnsn 			*inst++ = REX_B;
    807   1.5     alnsn 		*inst++ = GROUP_F7;
    808   1.8     alnsn 		*inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
    809   1.1     alnsn #else
    810   1.1     alnsn 		if (!compiler->mode32)
    811   1.5     alnsn 			*inst++ = REX_W;
    812   1.5     alnsn 		*inst++ = GROUP_F7;
    813   1.8     alnsn 		*inst = MOD_REG | reg_map[SLJIT_R1];
    814   1.1     alnsn #endif
    815   1.1     alnsn #endif
    816   1.1     alnsn 		switch (op) {
    817   1.8     alnsn 		case SLJIT_LMUL_UW:
    818   1.5     alnsn 			*inst |= MUL;
    819   1.1     alnsn 			break;
    820   1.8     alnsn 		case SLJIT_LMUL_SW:
    821   1.5     alnsn 			*inst |= IMUL;
    822   1.1     alnsn 			break;
    823   1.8     alnsn 		case SLJIT_DIVMOD_UW:
    824   1.8     alnsn 		case SLJIT_DIV_UW:
    825   1.5     alnsn 			*inst |= DIV;
    826   1.1     alnsn 			break;
    827   1.8     alnsn 		case SLJIT_DIVMOD_SW:
    828   1.8     alnsn 		case SLJIT_DIV_SW:
    829   1.5     alnsn 			*inst |= IDIV;
    830   1.1     alnsn 			break;
    831   1.1     alnsn 		}
    832   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
    833   1.8     alnsn 		if (op <= SLJIT_DIVMOD_SW)
    834   1.8     alnsn 			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
    835   1.8     alnsn #else
    836   1.8     alnsn 		if (op >= SLJIT_DIV_UW)
    837   1.8     alnsn 			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
    838   1.1     alnsn #endif
    839   1.1     alnsn 		break;
    840   1.1     alnsn 	}
    841   1.1     alnsn 
    842   1.1     alnsn 	return SLJIT_SUCCESS;
    843   1.1     alnsn }
    844   1.1     alnsn 
    845   1.1     alnsn #define ENCODE_PREFIX(prefix) \
    846   1.1     alnsn 	do { \
    847   1.8     alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
    848   1.5     alnsn 		FAIL_IF(!inst); \
    849   1.5     alnsn 		INC_SIZE(1); \
    850   1.5     alnsn 		*inst = (prefix); \
    851   1.1     alnsn 	} while (0)
    852   1.1     alnsn 
    853   1.8     alnsn static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
    854   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
    855   1.8     alnsn 	sljit_s32 src, sljit_sw srcw)
    856   1.1     alnsn {
    857   1.8     alnsn 	sljit_u8* inst;
    858   1.8     alnsn 	sljit_s32 dst_r;
    859   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    860   1.8     alnsn 	sljit_s32 work_r;
    861   1.1     alnsn #endif
    862   1.1     alnsn 
    863   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    864   1.1     alnsn 	compiler->mode32 = 0;
    865   1.1     alnsn #endif
    866   1.1     alnsn 
    867   1.1     alnsn 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
    868   1.1     alnsn 		return SLJIT_SUCCESS; /* Empty instruction. */
    869   1.1     alnsn 
    870   1.1     alnsn 	if (src & SLJIT_IMM) {
    871   1.5     alnsn 		if (FAST_IS_REG(dst)) {
    872   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    873   1.5     alnsn 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
    874   1.1     alnsn #else
    875   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
    876   1.5     alnsn 			FAIL_IF(!inst);
    877   1.5     alnsn 			*inst = MOV_rm_i32;
    878   1.5     alnsn 			return SLJIT_SUCCESS;
    879   1.1     alnsn #endif
    880   1.1     alnsn 		}
    881   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
    882   1.5     alnsn 		FAIL_IF(!inst);
    883   1.5     alnsn 		*inst = MOV_rm8_i8;
    884   1.1     alnsn 		return SLJIT_SUCCESS;
    885   1.1     alnsn 	}
    886   1.1     alnsn 
    887   1.5     alnsn 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
    888   1.1     alnsn 
    889   1.5     alnsn 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
    890   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    891   1.1     alnsn 		if (reg_map[src] >= 4) {
    892   1.5     alnsn 			SLJIT_ASSERT(dst_r == TMP_REG1);
    893   1.5     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
    894   1.1     alnsn 		} else
    895   1.1     alnsn 			dst_r = src;
    896   1.1     alnsn #else
    897   1.1     alnsn 		dst_r = src;
    898   1.1     alnsn #endif
    899   1.1     alnsn 	}
    900   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    901   1.5     alnsn 	else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
    902   1.1     alnsn 		/* src, dst are registers. */
    903   1.5     alnsn 		SLJIT_ASSERT(SLOW_IS_REG(dst));
    904   1.1     alnsn 		if (reg_map[dst] < 4) {
    905   1.1     alnsn 			if (dst != src)
    906   1.1     alnsn 				EMIT_MOV(compiler, dst, 0, src, 0);
    907   1.5     alnsn 			inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
    908   1.5     alnsn 			FAIL_IF(!inst);
    909   1.5     alnsn 			*inst++ = GROUP_0F;
    910   1.5     alnsn 			*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
    911   1.1     alnsn 		}
    912   1.1     alnsn 		else {
    913   1.1     alnsn 			if (dst != src)
    914   1.1     alnsn 				EMIT_MOV(compiler, dst, 0, src, 0);
    915   1.1     alnsn 			if (sign) {
    916   1.1     alnsn 				/* shl reg, 24 */
    917   1.5     alnsn 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
    918   1.5     alnsn 				FAIL_IF(!inst);
    919   1.5     alnsn 				*inst |= SHL;
    920   1.5     alnsn 				/* sar reg, 24 */
    921   1.5     alnsn 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
    922   1.5     alnsn 				FAIL_IF(!inst);
    923   1.5     alnsn 				*inst |= SAR;
    924   1.1     alnsn 			}
    925   1.1     alnsn 			else {
    926   1.5     alnsn 				inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
    927   1.5     alnsn 				FAIL_IF(!inst);
    928   1.5     alnsn 				*(inst + 1) |= AND;
    929   1.1     alnsn 			}
    930   1.1     alnsn 		}
    931   1.1     alnsn 		return SLJIT_SUCCESS;
    932   1.1     alnsn 	}
    933   1.1     alnsn #endif
    934   1.1     alnsn 	else {
    935   1.1     alnsn 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
    936   1.5     alnsn 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
    937   1.5     alnsn 		FAIL_IF(!inst);
    938   1.5     alnsn 		*inst++ = GROUP_0F;
    939   1.5     alnsn 		*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
    940   1.1     alnsn 	}
    941   1.1     alnsn 
    942   1.1     alnsn 	if (dst & SLJIT_MEM) {
    943   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    944   1.5     alnsn 		if (dst_r == TMP_REG1) {
    945   1.1     alnsn 			/* Find a non-used register, whose reg_map[src] < 4. */
    946   1.8     alnsn 			if ((dst & REG_MASK) == SLJIT_R0) {
    947   1.8     alnsn 				if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
    948   1.8     alnsn 					work_r = SLJIT_R2;
    949   1.1     alnsn 				else
    950   1.8     alnsn 					work_r = SLJIT_R1;
    951   1.1     alnsn 			}
    952   1.1     alnsn 			else {
    953   1.8     alnsn 				if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
    954   1.8     alnsn 					work_r = SLJIT_R0;
    955   1.8     alnsn 				else if ((dst & REG_MASK) == SLJIT_R1)
    956   1.8     alnsn 					work_r = SLJIT_R2;
    957   1.1     alnsn 				else
    958   1.8     alnsn 					work_r = SLJIT_R1;
    959   1.1     alnsn 			}
    960   1.1     alnsn 
    961   1.8     alnsn 			if (work_r == SLJIT_R0) {
    962   1.5     alnsn 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
    963   1.1     alnsn 			}
    964   1.1     alnsn 			else {
    965   1.5     alnsn 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
    966   1.5     alnsn 				FAIL_IF(!inst);
    967   1.5     alnsn 				*inst = XCHG_r_rm;
    968   1.1     alnsn 			}
    969   1.1     alnsn 
    970   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
    971   1.5     alnsn 			FAIL_IF(!inst);
    972   1.5     alnsn 			*inst = MOV_rm8_r8;
    973   1.1     alnsn 
    974   1.8     alnsn 			if (work_r == SLJIT_R0) {
    975   1.5     alnsn 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
    976   1.1     alnsn 			}
    977   1.1     alnsn 			else {
    978   1.5     alnsn 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
    979   1.5     alnsn 				FAIL_IF(!inst);
    980   1.5     alnsn 				*inst = XCHG_r_rm;
    981   1.1     alnsn 			}
    982   1.1     alnsn 		}
    983   1.1     alnsn 		else {
    984   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
    985   1.5     alnsn 			FAIL_IF(!inst);
    986   1.5     alnsn 			*inst = MOV_rm8_r8;
    987   1.1     alnsn 		}
    988   1.1     alnsn #else
    989   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
    990   1.5     alnsn 		FAIL_IF(!inst);
    991   1.5     alnsn 		*inst = MOV_rm8_r8;
    992   1.1     alnsn #endif
    993   1.1     alnsn 	}
    994   1.1     alnsn 
    995   1.1     alnsn 	return SLJIT_SUCCESS;
    996   1.1     alnsn }
    997   1.1     alnsn 
    998   1.8     alnsn static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
    999   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   1000   1.8     alnsn 	sljit_s32 src, sljit_sw srcw)
   1001   1.1     alnsn {
   1002   1.8     alnsn 	sljit_u8* inst;
   1003   1.8     alnsn 	sljit_s32 dst_r;
   1004   1.1     alnsn 
   1005   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1006   1.1     alnsn 	compiler->mode32 = 0;
   1007   1.1     alnsn #endif
   1008   1.1     alnsn 
   1009   1.1     alnsn 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
   1010   1.1     alnsn 		return SLJIT_SUCCESS; /* Empty instruction. */
   1011   1.1     alnsn 
   1012   1.1     alnsn 	if (src & SLJIT_IMM) {
   1013   1.5     alnsn 		if (FAST_IS_REG(dst)) {
   1014   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1015   1.5     alnsn 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
   1016   1.1     alnsn #else
   1017   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
   1018   1.5     alnsn 			FAIL_IF(!inst);
   1019   1.5     alnsn 			*inst = MOV_rm_i32;
   1020   1.5     alnsn 			return SLJIT_SUCCESS;
   1021   1.1     alnsn #endif
   1022   1.1     alnsn 		}
   1023   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
   1024   1.5     alnsn 		FAIL_IF(!inst);
   1025   1.5     alnsn 		*inst = MOV_rm_i32;
   1026   1.1     alnsn 		return SLJIT_SUCCESS;
   1027   1.1     alnsn 	}
   1028   1.1     alnsn 
   1029   1.5     alnsn 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
   1030   1.1     alnsn 
   1031   1.5     alnsn 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
   1032   1.1     alnsn 		dst_r = src;
   1033   1.1     alnsn 	else {
   1034   1.5     alnsn 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
   1035   1.5     alnsn 		FAIL_IF(!inst);
   1036   1.5     alnsn 		*inst++ = GROUP_0F;
   1037   1.5     alnsn 		*inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
   1038   1.1     alnsn 	}
   1039   1.1     alnsn 
   1040   1.1     alnsn 	if (dst & SLJIT_MEM) {
   1041   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
   1042   1.5     alnsn 		FAIL_IF(!inst);
   1043   1.5     alnsn 		*inst = MOV_rm_r;
   1044   1.1     alnsn 	}
   1045   1.1     alnsn 
   1046   1.1     alnsn 	return SLJIT_SUCCESS;
   1047   1.1     alnsn }
   1048   1.1     alnsn 
   1049   1.8     alnsn static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
   1050   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   1051   1.8     alnsn 	sljit_s32 src, sljit_sw srcw)
   1052   1.1     alnsn {
   1053   1.8     alnsn 	sljit_u8* inst;
   1054   1.1     alnsn 
   1055   1.1     alnsn 	if (dst == SLJIT_UNUSED) {
   1056   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1057   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1058   1.5     alnsn 		FAIL_IF(!inst);
   1059   1.5     alnsn 		*inst++ = GROUP_F7;
   1060   1.5     alnsn 		*inst |= opcode;
   1061   1.1     alnsn 		return SLJIT_SUCCESS;
   1062   1.1     alnsn 	}
   1063   1.1     alnsn 	if (dst == src && dstw == srcw) {
   1064   1.1     alnsn 		/* Same input and output */
   1065   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
   1066   1.5     alnsn 		FAIL_IF(!inst);
   1067   1.5     alnsn 		*inst++ = GROUP_F7;
   1068   1.5     alnsn 		*inst |= opcode;
   1069   1.1     alnsn 		return SLJIT_SUCCESS;
   1070   1.1     alnsn 	}
   1071   1.5     alnsn 	if (FAST_IS_REG(dst)) {
   1072   1.1     alnsn 		EMIT_MOV(compiler, dst, 0, src, srcw);
   1073   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
   1074   1.5     alnsn 		FAIL_IF(!inst);
   1075   1.5     alnsn 		*inst++ = GROUP_F7;
   1076   1.5     alnsn 		*inst |= opcode;
   1077   1.1     alnsn 		return SLJIT_SUCCESS;
   1078   1.1     alnsn 	}
   1079   1.5     alnsn 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1080   1.5     alnsn 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1081   1.5     alnsn 	FAIL_IF(!inst);
   1082   1.5     alnsn 	*inst++ = GROUP_F7;
   1083   1.5     alnsn 	*inst |= opcode;
   1084   1.5     alnsn 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1085   1.1     alnsn 	return SLJIT_SUCCESS;
   1086   1.1     alnsn }
   1087   1.1     alnsn 
   1088   1.8     alnsn static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
   1089   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   1090   1.8     alnsn 	sljit_s32 src, sljit_sw srcw)
   1091   1.1     alnsn {
   1092   1.8     alnsn 	sljit_u8* inst;
   1093   1.1     alnsn 
   1094   1.1     alnsn 	if (dst == SLJIT_UNUSED) {
   1095   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1096   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1097   1.5     alnsn 		FAIL_IF(!inst);
   1098   1.5     alnsn 		*inst++ = GROUP_F7;
   1099   1.5     alnsn 		*inst |= NOT_rm;
   1100   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
   1101   1.5     alnsn 		FAIL_IF(!inst);
   1102   1.5     alnsn 		*inst = OR_r_rm;
   1103   1.1     alnsn 		return SLJIT_SUCCESS;
   1104   1.1     alnsn 	}
   1105   1.5     alnsn 	if (FAST_IS_REG(dst)) {
   1106   1.1     alnsn 		EMIT_MOV(compiler, dst, 0, src, srcw);
   1107   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
   1108   1.5     alnsn 		FAIL_IF(!inst);
   1109   1.5     alnsn 		*inst++ = GROUP_F7;
   1110   1.5     alnsn 		*inst |= NOT_rm;
   1111   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
   1112   1.5     alnsn 		FAIL_IF(!inst);
   1113   1.5     alnsn 		*inst = OR_r_rm;
   1114   1.5     alnsn 		return SLJIT_SUCCESS;
   1115   1.5     alnsn 	}
   1116   1.5     alnsn 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1117   1.5     alnsn 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1118   1.5     alnsn 	FAIL_IF(!inst);
   1119   1.5     alnsn 	*inst++ = GROUP_F7;
   1120   1.5     alnsn 	*inst |= NOT_rm;
   1121   1.5     alnsn 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
   1122   1.5     alnsn 	FAIL_IF(!inst);
   1123   1.5     alnsn 	*inst = OR_r_rm;
   1124   1.5     alnsn 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1125   1.1     alnsn 	return SLJIT_SUCCESS;
   1126   1.1     alnsn }
   1127   1.1     alnsn 
   1128   1.8     alnsn static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
   1129   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   1130   1.8     alnsn 	sljit_s32 src, sljit_sw srcw)
   1131   1.1     alnsn {
   1132   1.8     alnsn 	sljit_u8* inst;
   1133   1.8     alnsn 	sljit_s32 dst_r;
   1134   1.1     alnsn 
   1135   1.5     alnsn 	SLJIT_UNUSED_ARG(op_flags);
   1136   1.1     alnsn 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
   1137   1.1     alnsn 		/* Just set the zero flag. */
   1138   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1139   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1140   1.5     alnsn 		FAIL_IF(!inst);
   1141   1.5     alnsn 		*inst++ = GROUP_F7;
   1142   1.5     alnsn 		*inst |= NOT_rm;
   1143   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1144   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
   1145   1.1     alnsn #else
   1146   1.8     alnsn 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0);
   1147   1.1     alnsn #endif
   1148   1.5     alnsn 		FAIL_IF(!inst);
   1149   1.5     alnsn 		*inst |= SHR;
   1150   1.1     alnsn 		return SLJIT_SUCCESS;
   1151   1.1     alnsn 	}
   1152   1.1     alnsn 
   1153   1.1     alnsn 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
   1154   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
   1155   1.5     alnsn 		src = TMP_REG1;
   1156   1.1     alnsn 		srcw = 0;
   1157   1.1     alnsn 	}
   1158   1.1     alnsn 
   1159   1.5     alnsn 	inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
   1160   1.5     alnsn 	FAIL_IF(!inst);
   1161   1.5     alnsn 	*inst++ = GROUP_0F;
   1162   1.5     alnsn 	*inst = BSR_r_rm;
   1163   1.1     alnsn 
   1164   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1165   1.5     alnsn 	if (FAST_IS_REG(dst))
   1166   1.1     alnsn 		dst_r = dst;
   1167   1.1     alnsn 	else {
   1168   1.1     alnsn 		/* Find an unused temporary register. */
   1169   1.8     alnsn 		if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
   1170   1.8     alnsn 			dst_r = SLJIT_R0;
   1171   1.8     alnsn 		else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
   1172   1.8     alnsn 			dst_r = SLJIT_R1;
   1173   1.1     alnsn 		else
   1174   1.8     alnsn 			dst_r = SLJIT_R2;
   1175   1.1     alnsn 		EMIT_MOV(compiler, dst, dstw, dst_r, 0);
   1176   1.1     alnsn 	}
   1177   1.1     alnsn 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
   1178   1.1     alnsn #else
   1179   1.5     alnsn 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
   1180   1.1     alnsn 	compiler->mode32 = 0;
   1181   1.8     alnsn 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31);
   1182   1.8     alnsn 	compiler->mode32 = op_flags & SLJIT_I32_OP;
   1183   1.1     alnsn #endif
   1184   1.1     alnsn 
   1185   1.5     alnsn 	if (cpu_has_cmov == -1)
   1186   1.5     alnsn 		get_cpu_features();
   1187   1.5     alnsn 
   1188   1.5     alnsn 	if (cpu_has_cmov) {
   1189   1.5     alnsn 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
   1190   1.5     alnsn 		FAIL_IF(!inst);
   1191   1.5     alnsn 		*inst++ = GROUP_0F;
   1192   1.5     alnsn 		*inst = CMOVNE_r_rm;
   1193   1.5     alnsn 	} else {
   1194   1.5     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1195   1.8     alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
   1196   1.5     alnsn 		FAIL_IF(!inst);
   1197   1.5     alnsn 		INC_SIZE(4);
   1198   1.5     alnsn 
   1199   1.5     alnsn 		*inst++ = JE_i8;
   1200   1.5     alnsn 		*inst++ = 2;
   1201   1.5     alnsn 		*inst++ = MOV_r_rm;
   1202   1.5     alnsn 		*inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
   1203   1.5     alnsn #else
   1204   1.8     alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
   1205   1.5     alnsn 		FAIL_IF(!inst);
   1206   1.5     alnsn 		INC_SIZE(5);
   1207   1.5     alnsn 
   1208   1.5     alnsn 		*inst++ = JE_i8;
   1209   1.5     alnsn 		*inst++ = 3;
   1210   1.5     alnsn 		*inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
   1211   1.5     alnsn 		*inst++ = MOV_r_rm;
   1212   1.5     alnsn 		*inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
   1213   1.5     alnsn #endif
   1214   1.5     alnsn 	}
   1215   1.1     alnsn 
   1216   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1217   1.5     alnsn 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
   1218   1.1     alnsn #else
   1219   1.8     alnsn 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
   1220   1.1     alnsn #endif
   1221   1.5     alnsn 	FAIL_IF(!inst);
   1222   1.5     alnsn 	*(inst + 1) |= XOR;
   1223   1.1     alnsn 
   1224   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1225   1.1     alnsn 	if (dst & SLJIT_MEM) {
   1226   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
   1227   1.5     alnsn 		FAIL_IF(!inst);
   1228   1.5     alnsn 		*inst = XCHG_r_rm;
   1229   1.1     alnsn 	}
   1230   1.1     alnsn #else
   1231   1.1     alnsn 	if (dst & SLJIT_MEM)
   1232   1.1     alnsn 		EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
   1233   1.1     alnsn #endif
   1234   1.1     alnsn 	return SLJIT_SUCCESS;
   1235   1.1     alnsn }
   1236   1.1     alnsn 
   1237   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
   1238   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   1239   1.8     alnsn 	sljit_s32 src, sljit_sw srcw)
   1240   1.8     alnsn {
   1241   1.8     alnsn 	sljit_s32 update = 0;
   1242   1.8     alnsn 	sljit_s32 op_flags = GET_ALL_FLAGS(op);
   1243   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1244   1.8     alnsn 	sljit_s32 dst_is_ereg = 0;
   1245   1.8     alnsn 	sljit_s32 src_is_ereg = 0;
   1246   1.1     alnsn #else
   1247   1.5     alnsn #	define src_is_ereg 0
   1248   1.1     alnsn #endif
   1249   1.1     alnsn 
   1250   1.1     alnsn 	CHECK_ERROR();
   1251   1.8     alnsn 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
   1252   1.1     alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1253   1.1     alnsn 	ADJUST_LOCAL_OFFSET(src, srcw);
   1254   1.1     alnsn 
   1255   1.1     alnsn 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
   1256   1.1     alnsn 	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
   1257   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1258   1.8     alnsn 	compiler->mode32 = op_flags & SLJIT_I32_OP;
   1259   1.1     alnsn #endif
   1260   1.1     alnsn 
   1261   1.5     alnsn 	op = GET_OPCODE(op);
   1262   1.5     alnsn 	if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
   1263   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1264   1.1     alnsn 		compiler->mode32 = 0;
   1265   1.1     alnsn #endif
   1266   1.1     alnsn 
   1267   1.8     alnsn 		if (op_flags & SLJIT_I32_OP) {
   1268   1.5     alnsn 			if (FAST_IS_REG(src) && src == dst) {
   1269   1.5     alnsn 				if (!TYPE_CAST_NEEDED(op))
   1270   1.5     alnsn 					return SLJIT_SUCCESS;
   1271   1.5     alnsn 			}
   1272   1.5     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1273   1.8     alnsn 			if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
   1274   1.8     alnsn 				op = SLJIT_MOV_U32;
   1275   1.8     alnsn 			if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
   1276   1.8     alnsn 				op = SLJIT_MOVU_U32;
   1277   1.8     alnsn 			if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
   1278   1.8     alnsn 				op = SLJIT_MOV_S32;
   1279   1.8     alnsn 			if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
   1280   1.8     alnsn 				op = SLJIT_MOVU_S32;
   1281   1.5     alnsn #endif
   1282   1.5     alnsn 		}
   1283   1.5     alnsn 
   1284   1.5     alnsn 		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
   1285   1.1     alnsn 		if (op >= SLJIT_MOVU) {
   1286   1.1     alnsn 			update = 1;
   1287   1.5     alnsn 			op -= 8;
   1288   1.1     alnsn 		}
   1289   1.1     alnsn 
   1290   1.1     alnsn 		if (src & SLJIT_IMM) {
   1291   1.1     alnsn 			switch (op) {
   1292   1.8     alnsn 			case SLJIT_MOV_U8:
   1293   1.8     alnsn 				srcw = (sljit_u8)srcw;
   1294   1.1     alnsn 				break;
   1295   1.8     alnsn 			case SLJIT_MOV_S8:
   1296   1.8     alnsn 				srcw = (sljit_s8)srcw;
   1297   1.1     alnsn 				break;
   1298   1.8     alnsn 			case SLJIT_MOV_U16:
   1299   1.8     alnsn 				srcw = (sljit_u16)srcw;
   1300   1.1     alnsn 				break;
   1301   1.8     alnsn 			case SLJIT_MOV_S16:
   1302   1.8     alnsn 				srcw = (sljit_s16)srcw;
   1303   1.1     alnsn 				break;
   1304   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1305   1.8     alnsn 			case SLJIT_MOV_U32:
   1306   1.8     alnsn 				srcw = (sljit_u32)srcw;
   1307   1.1     alnsn 				break;
   1308   1.8     alnsn 			case SLJIT_MOV_S32:
   1309   1.8     alnsn 				srcw = (sljit_s32)srcw;
   1310   1.1     alnsn 				break;
   1311   1.1     alnsn #endif
   1312   1.1     alnsn 			}
   1313   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1314   1.1     alnsn 			if (SLJIT_UNLIKELY(dst_is_ereg))
   1315   1.1     alnsn 				return emit_mov(compiler, dst, dstw, src, srcw);
   1316   1.1     alnsn #endif
   1317   1.1     alnsn 		}
   1318   1.1     alnsn 
   1319   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1320   1.8     alnsn 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
   1321   1.8     alnsn 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
   1322   1.5     alnsn 			dst = TMP_REG1;
   1323   1.1     alnsn 		}
   1324   1.1     alnsn #endif
   1325   1.1     alnsn 
   1326   1.1     alnsn 		switch (op) {
   1327   1.1     alnsn 		case SLJIT_MOV:
   1328   1.5     alnsn 		case SLJIT_MOV_P:
   1329   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1330   1.8     alnsn 		case SLJIT_MOV_U32:
   1331   1.8     alnsn 		case SLJIT_MOV_S32:
   1332   1.1     alnsn #endif
   1333   1.1     alnsn 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
   1334   1.1     alnsn 			break;
   1335   1.8     alnsn 		case SLJIT_MOV_U8:
   1336   1.5     alnsn 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
   1337   1.1     alnsn 			break;
   1338   1.8     alnsn 		case SLJIT_MOV_S8:
   1339   1.5     alnsn 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
   1340   1.1     alnsn 			break;
   1341   1.8     alnsn 		case SLJIT_MOV_U16:
   1342   1.5     alnsn 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
   1343   1.1     alnsn 			break;
   1344   1.8     alnsn 		case SLJIT_MOV_S16:
   1345   1.5     alnsn 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
   1346   1.1     alnsn 			break;
   1347   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1348   1.8     alnsn 		case SLJIT_MOV_U32:
   1349   1.5     alnsn 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
   1350   1.1     alnsn 			break;
   1351   1.8     alnsn 		case SLJIT_MOV_S32:
   1352   1.5     alnsn 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
   1353   1.1     alnsn 			break;
   1354   1.1     alnsn #endif
   1355   1.1     alnsn 		}
   1356   1.1     alnsn 
   1357   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1358   1.5     alnsn 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
   1359   1.8     alnsn 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
   1360   1.1     alnsn #endif
   1361   1.1     alnsn 
   1362   1.9     alnsn 		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) {
   1363   1.9     alnsn 			if ((src & OFFS_REG_MASK) != 0) {
   1364   1.9     alnsn 				FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
   1365   1.9     alnsn 						(src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0));
   1366   1.9     alnsn 			}
   1367   1.9     alnsn 			else if (srcw != 0) {
   1368   1.9     alnsn 				FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
   1369   1.9     alnsn 						(src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw));
   1370   1.9     alnsn 			}
   1371   1.9     alnsn 		}
   1372   1.9     alnsn 
   1373   1.9     alnsn 		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) {
   1374   1.9     alnsn 			if ((dst & OFFS_REG_MASK) != 0) {
   1375   1.9     alnsn 				FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
   1376   1.9     alnsn 						(dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0));
   1377   1.9     alnsn 			}
   1378   1.9     alnsn 			else if (dstw != 0) {
   1379   1.9     alnsn 				FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
   1380   1.9     alnsn 						(dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw));
   1381   1.9     alnsn 			}
   1382   1.1     alnsn 		}
   1383   1.1     alnsn 		return SLJIT_SUCCESS;
   1384   1.1     alnsn 	}
   1385   1.1     alnsn 
   1386   1.5     alnsn 	switch (op) {
   1387   1.1     alnsn 	case SLJIT_NOT:
   1388   1.9     alnsn 		if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
   1389   1.1     alnsn 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
   1390   1.5     alnsn 		return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
   1391   1.1     alnsn 
   1392   1.1     alnsn 	case SLJIT_NEG:
   1393   1.5     alnsn 		return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
   1394   1.1     alnsn 
   1395   1.1     alnsn 	case SLJIT_CLZ:
   1396   1.5     alnsn 		return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
   1397   1.1     alnsn 	}
   1398   1.1     alnsn 
   1399   1.1     alnsn 	return SLJIT_SUCCESS;
   1400   1.1     alnsn 
   1401   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1402   1.5     alnsn #	undef src_is_ereg
   1403   1.1     alnsn #endif
   1404   1.1     alnsn }
   1405   1.1     alnsn 
   1406   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1407   1.1     alnsn 
   1408   1.5     alnsn #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
   1409   1.1     alnsn 	if (IS_HALFWORD(immw) || compiler->mode32) { \
   1410   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
   1411   1.5     alnsn 		FAIL_IF(!inst); \
   1412   1.5     alnsn 		*(inst + 1) |= (op_imm); \
   1413   1.1     alnsn 	} \
   1414   1.1     alnsn 	else { \
   1415   1.1     alnsn 		FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
   1416   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
   1417   1.5     alnsn 		FAIL_IF(!inst); \
   1418   1.5     alnsn 		*inst = (op_mr); \
   1419   1.1     alnsn 	}
   1420   1.1     alnsn 
   1421   1.5     alnsn #define BINARY_EAX_IMM(op_eax_imm, immw) \
   1422   1.5     alnsn 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
   1423   1.1     alnsn 
   1424   1.1     alnsn #else
   1425   1.1     alnsn 
   1426   1.5     alnsn #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
   1427   1.5     alnsn 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
   1428   1.5     alnsn 	FAIL_IF(!inst); \
   1429   1.5     alnsn 	*(inst + 1) |= (op_imm);
   1430   1.1     alnsn 
   1431   1.5     alnsn #define BINARY_EAX_IMM(op_eax_imm, immw) \
   1432   1.5     alnsn 	FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
   1433   1.1     alnsn 
   1434   1.1     alnsn #endif
   1435   1.1     alnsn 
   1436   1.8     alnsn static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
   1437   1.8     alnsn 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
   1438   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   1439   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
   1440   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w)
   1441   1.1     alnsn {
   1442   1.8     alnsn 	sljit_u8* inst;
   1443   1.1     alnsn 
   1444   1.1     alnsn 	if (dst == SLJIT_UNUSED) {
   1445   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1446   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1447   1.5     alnsn 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1448   1.1     alnsn 		}
   1449   1.1     alnsn 		else {
   1450   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1451   1.5     alnsn 			FAIL_IF(!inst);
   1452   1.5     alnsn 			*inst = op_rm;
   1453   1.1     alnsn 		}
   1454   1.1     alnsn 		return SLJIT_SUCCESS;
   1455   1.1     alnsn 	}
   1456   1.1     alnsn 
   1457   1.1     alnsn 	if (dst == src1 && dstw == src1w) {
   1458   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1459   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1460   1.8     alnsn 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1461   1.1     alnsn #else
   1462   1.8     alnsn 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
   1463   1.1     alnsn #endif
   1464   1.1     alnsn 				BINARY_EAX_IMM(op_eax_imm, src2w);
   1465   1.1     alnsn 			}
   1466   1.1     alnsn 			else {
   1467   1.1     alnsn 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
   1468   1.1     alnsn 			}
   1469   1.1     alnsn 		}
   1470   1.5     alnsn 		else if (FAST_IS_REG(dst)) {
   1471   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
   1472   1.5     alnsn 			FAIL_IF(!inst);
   1473   1.5     alnsn 			*inst = op_rm;
   1474   1.5     alnsn 		}
   1475   1.5     alnsn 		else if (FAST_IS_REG(src2)) {
   1476   1.5     alnsn 			/* Special exception for sljit_emit_op_flags. */
   1477   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
   1478   1.5     alnsn 			FAIL_IF(!inst);
   1479   1.5     alnsn 			*inst = op_mr;
   1480   1.5     alnsn 		}
   1481   1.5     alnsn 		else {
   1482   1.5     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
   1483   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
   1484   1.5     alnsn 			FAIL_IF(!inst);
   1485   1.5     alnsn 			*inst = op_mr;
   1486   1.1     alnsn 		}
   1487   1.1     alnsn 		return SLJIT_SUCCESS;
   1488   1.1     alnsn 	}
   1489   1.1     alnsn 
   1490   1.1     alnsn 	/* Only for cumulative operations. */
   1491   1.1     alnsn 	if (dst == src2 && dstw == src2w) {
   1492   1.1     alnsn 		if (src1 & SLJIT_IMM) {
   1493   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1494   1.8     alnsn 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1495   1.1     alnsn #else
   1496   1.8     alnsn 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
   1497   1.1     alnsn #endif
   1498   1.1     alnsn 				BINARY_EAX_IMM(op_eax_imm, src1w);
   1499   1.1     alnsn 			}
   1500   1.1     alnsn 			else {
   1501   1.1     alnsn 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
   1502   1.1     alnsn 			}
   1503   1.1     alnsn 		}
   1504   1.5     alnsn 		else if (FAST_IS_REG(dst)) {
   1505   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
   1506   1.5     alnsn 			FAIL_IF(!inst);
   1507   1.5     alnsn 			*inst = op_rm;
   1508   1.5     alnsn 		}
   1509   1.5     alnsn 		else if (FAST_IS_REG(src1)) {
   1510   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
   1511   1.5     alnsn 			FAIL_IF(!inst);
   1512   1.5     alnsn 			*inst = op_mr;
   1513   1.1     alnsn 		}
   1514   1.1     alnsn 		else {
   1515   1.5     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1516   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
   1517   1.5     alnsn 			FAIL_IF(!inst);
   1518   1.5     alnsn 			*inst = op_mr;
   1519   1.1     alnsn 		}
   1520   1.1     alnsn 		return SLJIT_SUCCESS;
   1521   1.1     alnsn 	}
   1522   1.1     alnsn 
   1523   1.1     alnsn 	/* General version. */
   1524   1.5     alnsn 	if (FAST_IS_REG(dst)) {
   1525   1.1     alnsn 		EMIT_MOV(compiler, dst, 0, src1, src1w);
   1526   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1527   1.1     alnsn 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
   1528   1.1     alnsn 		}
   1529   1.1     alnsn 		else {
   1530   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
   1531   1.5     alnsn 			FAIL_IF(!inst);
   1532   1.5     alnsn 			*inst = op_rm;
   1533   1.1     alnsn 		}
   1534   1.1     alnsn 	}
   1535   1.1     alnsn 	else {
   1536   1.1     alnsn 		/* This version requires less memory writing. */
   1537   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1538   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1539   1.5     alnsn 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1540   1.1     alnsn 		}
   1541   1.1     alnsn 		else {
   1542   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1543   1.5     alnsn 			FAIL_IF(!inst);
   1544   1.5     alnsn 			*inst = op_rm;
   1545   1.1     alnsn 		}
   1546   1.5     alnsn 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1547   1.1     alnsn 	}
   1548   1.1     alnsn 
   1549   1.1     alnsn 	return SLJIT_SUCCESS;
   1550   1.1     alnsn }
   1551   1.1     alnsn 
   1552   1.8     alnsn static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
   1553   1.8     alnsn 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
   1554   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   1555   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
   1556   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w)
   1557   1.1     alnsn {
   1558   1.8     alnsn 	sljit_u8* inst;
   1559   1.1     alnsn 
   1560   1.1     alnsn 	if (dst == SLJIT_UNUSED) {
   1561   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1562   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1563   1.5     alnsn 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1564   1.1     alnsn 		}
   1565   1.1     alnsn 		else {
   1566   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1567   1.5     alnsn 			FAIL_IF(!inst);
   1568   1.5     alnsn 			*inst = op_rm;
   1569   1.1     alnsn 		}
   1570   1.1     alnsn 		return SLJIT_SUCCESS;
   1571   1.1     alnsn 	}
   1572   1.1     alnsn 
   1573   1.1     alnsn 	if (dst == src1 && dstw == src1w) {
   1574   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1575   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1576   1.8     alnsn 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1577   1.1     alnsn #else
   1578   1.8     alnsn 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
   1579   1.1     alnsn #endif
   1580   1.1     alnsn 				BINARY_EAX_IMM(op_eax_imm, src2w);
   1581   1.1     alnsn 			}
   1582   1.1     alnsn 			else {
   1583   1.1     alnsn 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
   1584   1.1     alnsn 			}
   1585   1.1     alnsn 		}
   1586   1.5     alnsn 		else if (FAST_IS_REG(dst)) {
   1587   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
   1588   1.5     alnsn 			FAIL_IF(!inst);
   1589   1.5     alnsn 			*inst = op_rm;
   1590   1.5     alnsn 		}
   1591   1.5     alnsn 		else if (FAST_IS_REG(src2)) {
   1592   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
   1593   1.5     alnsn 			FAIL_IF(!inst);
   1594   1.5     alnsn 			*inst = op_mr;
   1595   1.1     alnsn 		}
   1596   1.1     alnsn 		else {
   1597   1.5     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
   1598   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
   1599   1.5     alnsn 			FAIL_IF(!inst);
   1600   1.5     alnsn 			*inst = op_mr;
   1601   1.1     alnsn 		}
   1602   1.1     alnsn 		return SLJIT_SUCCESS;
   1603   1.1     alnsn 	}
   1604   1.1     alnsn 
   1605   1.1     alnsn 	/* General version. */
   1606   1.5     alnsn 	if (FAST_IS_REG(dst) && dst != src2) {
   1607   1.1     alnsn 		EMIT_MOV(compiler, dst, 0, src1, src1w);
   1608   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1609   1.1     alnsn 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
   1610   1.1     alnsn 		}
   1611   1.1     alnsn 		else {
   1612   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
   1613   1.5     alnsn 			FAIL_IF(!inst);
   1614   1.5     alnsn 			*inst = op_rm;
   1615   1.1     alnsn 		}
   1616   1.1     alnsn 	}
   1617   1.1     alnsn 	else {
   1618   1.1     alnsn 		/* This version requires less memory writing. */
   1619   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1620   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1621   1.5     alnsn 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1622   1.1     alnsn 		}
   1623   1.1     alnsn 		else {
   1624   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1625   1.5     alnsn 			FAIL_IF(!inst);
   1626   1.5     alnsn 			*inst = op_rm;
   1627   1.1     alnsn 		}
   1628   1.5     alnsn 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1629   1.1     alnsn 	}
   1630   1.1     alnsn 
   1631   1.1     alnsn 	return SLJIT_SUCCESS;
   1632   1.1     alnsn }
   1633   1.1     alnsn 
   1634   1.8     alnsn static sljit_s32 emit_mul(struct sljit_compiler *compiler,
   1635   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   1636   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
   1637   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w)
   1638   1.1     alnsn {
   1639   1.8     alnsn 	sljit_u8* inst;
   1640   1.8     alnsn 	sljit_s32 dst_r;
   1641   1.1     alnsn 
   1642   1.5     alnsn 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
   1643   1.1     alnsn 
   1644   1.1     alnsn 	/* Register destination. */
   1645   1.1     alnsn 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
   1646   1.5     alnsn 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
   1647   1.5     alnsn 		FAIL_IF(!inst);
   1648   1.5     alnsn 		*inst++ = GROUP_0F;
   1649   1.5     alnsn 		*inst = IMUL_r_rm;
   1650   1.1     alnsn 	}
   1651   1.1     alnsn 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
   1652   1.5     alnsn 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
   1653   1.5     alnsn 		FAIL_IF(!inst);
   1654   1.5     alnsn 		*inst++ = GROUP_0F;
   1655   1.5     alnsn 		*inst = IMUL_r_rm;
   1656   1.1     alnsn 	}
   1657   1.1     alnsn 	else if (src1 & SLJIT_IMM) {
   1658   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1659   1.1     alnsn 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
   1660   1.1     alnsn 			src2 = dst_r;
   1661   1.1     alnsn 			src2w = 0;
   1662   1.1     alnsn 		}
   1663   1.1     alnsn 
   1664   1.1     alnsn 		if (src1w <= 127 && src1w >= -128) {
   1665   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1666   1.5     alnsn 			FAIL_IF(!inst);
   1667   1.5     alnsn 			*inst = IMUL_r_rm_i8;
   1668   1.8     alnsn 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
   1669   1.5     alnsn 			FAIL_IF(!inst);
   1670   1.5     alnsn 			INC_SIZE(1);
   1671   1.8     alnsn 			*inst = (sljit_s8)src1w;
   1672   1.1     alnsn 		}
   1673   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1674   1.1     alnsn 		else {
   1675   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1676   1.5     alnsn 			FAIL_IF(!inst);
   1677   1.5     alnsn 			*inst = IMUL_r_rm_i32;
   1678   1.8     alnsn 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
   1679   1.5     alnsn 			FAIL_IF(!inst);
   1680   1.5     alnsn 			INC_SIZE(4);
   1681   1.9     alnsn 			sljit_unaligned_store_sw(inst, src1w);
   1682   1.1     alnsn 		}
   1683   1.1     alnsn #else
   1684   1.1     alnsn 		else if (IS_HALFWORD(src1w)) {
   1685   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1686   1.5     alnsn 			FAIL_IF(!inst);
   1687   1.5     alnsn 			*inst = IMUL_r_rm_i32;
   1688   1.8     alnsn 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
   1689   1.5     alnsn 			FAIL_IF(!inst);
   1690   1.5     alnsn 			INC_SIZE(4);
   1691   1.9     alnsn 			sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
   1692   1.1     alnsn 		}
   1693   1.1     alnsn 		else {
   1694   1.1     alnsn 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
   1695   1.1     alnsn 			if (dst_r != src2)
   1696   1.1     alnsn 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
   1697   1.5     alnsn 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
   1698   1.5     alnsn 			FAIL_IF(!inst);
   1699   1.5     alnsn 			*inst++ = GROUP_0F;
   1700   1.5     alnsn 			*inst = IMUL_r_rm;
   1701   1.1     alnsn 		}
   1702   1.1     alnsn #endif
   1703   1.1     alnsn 	}
   1704   1.1     alnsn 	else if (src2 & SLJIT_IMM) {
   1705   1.1     alnsn 		/* Note: src1 is NOT immediate. */
   1706   1.1     alnsn 
   1707   1.1     alnsn 		if (src2w <= 127 && src2w >= -128) {
   1708   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1709   1.5     alnsn 			FAIL_IF(!inst);
   1710   1.5     alnsn 			*inst = IMUL_r_rm_i8;
   1711   1.8     alnsn 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
   1712   1.5     alnsn 			FAIL_IF(!inst);
   1713   1.5     alnsn 			INC_SIZE(1);
   1714   1.8     alnsn 			*inst = (sljit_s8)src2w;
   1715   1.1     alnsn 		}
   1716   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1717   1.1     alnsn 		else {
   1718   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1719   1.5     alnsn 			FAIL_IF(!inst);
   1720   1.5     alnsn 			*inst = IMUL_r_rm_i32;
   1721   1.8     alnsn 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
   1722   1.5     alnsn 			FAIL_IF(!inst);
   1723   1.5     alnsn 			INC_SIZE(4);
   1724   1.9     alnsn 			sljit_unaligned_store_sw(inst, src2w);
   1725   1.1     alnsn 		}
   1726   1.1     alnsn #else
   1727   1.1     alnsn 		else if (IS_HALFWORD(src2w)) {
   1728   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1729   1.5     alnsn 			FAIL_IF(!inst);
   1730   1.5     alnsn 			*inst = IMUL_r_rm_i32;
   1731   1.8     alnsn 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
   1732   1.5     alnsn 			FAIL_IF(!inst);
   1733   1.5     alnsn 			INC_SIZE(4);
   1734   1.9     alnsn 			sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
   1735   1.1     alnsn 		}
   1736   1.1     alnsn 		else {
   1737   1.7     alnsn 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
   1738   1.1     alnsn 			if (dst_r != src1)
   1739   1.1     alnsn 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
   1740   1.5     alnsn 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
   1741   1.5     alnsn 			FAIL_IF(!inst);
   1742   1.5     alnsn 			*inst++ = GROUP_0F;
   1743   1.5     alnsn 			*inst = IMUL_r_rm;
   1744   1.1     alnsn 		}
   1745   1.1     alnsn #endif
   1746   1.1     alnsn 	}
   1747   1.1     alnsn 	else {
   1748   1.1     alnsn 		/* Neither argument is immediate. */
   1749   1.1     alnsn 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
   1750   1.5     alnsn 			dst_r = TMP_REG1;
   1751   1.1     alnsn 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
   1752   1.5     alnsn 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
   1753   1.5     alnsn 		FAIL_IF(!inst);
   1754   1.5     alnsn 		*inst++ = GROUP_0F;
   1755   1.5     alnsn 		*inst = IMUL_r_rm;
   1756   1.1     alnsn 	}
   1757   1.1     alnsn 
   1758   1.5     alnsn 	if (dst_r == TMP_REG1)
   1759   1.5     alnsn 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1760   1.1     alnsn 
   1761   1.1     alnsn 	return SLJIT_SUCCESS;
   1762   1.1     alnsn }
   1763   1.1     alnsn 
   1764   1.9     alnsn static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
   1765   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   1766   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
   1767   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w)
   1768   1.1     alnsn {
   1769   1.8     alnsn 	sljit_u8* inst;
   1770   1.8     alnsn 	sljit_s32 dst_r, done = 0;
   1771   1.1     alnsn 
   1772   1.1     alnsn 	/* These cases better be left to handled by normal way. */
   1773   1.9     alnsn 	if (dst == src1 && dstw == src1w)
   1774   1.9     alnsn 		return SLJIT_ERR_UNSUPPORTED;
   1775   1.9     alnsn 	if (dst == src2 && dstw == src2w)
   1776   1.9     alnsn 		return SLJIT_ERR_UNSUPPORTED;
   1777   1.5     alnsn 
   1778   1.5     alnsn 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
   1779   1.5     alnsn 
   1780   1.5     alnsn 	if (FAST_IS_REG(src1)) {
   1781   1.5     alnsn 		if (FAST_IS_REG(src2)) {
   1782   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
   1783   1.5     alnsn 			FAIL_IF(!inst);
   1784   1.5     alnsn 			*inst = LEA_r_m;
   1785   1.1     alnsn 			done = 1;
   1786   1.1     alnsn 		}
   1787   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1788   1.1     alnsn 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1789   1.8     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
   1790   1.1     alnsn #else
   1791   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1792   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
   1793   1.1     alnsn #endif
   1794   1.5     alnsn 			FAIL_IF(!inst);
   1795   1.5     alnsn 			*inst = LEA_r_m;
   1796   1.1     alnsn 			done = 1;
   1797   1.1     alnsn 		}
   1798   1.1     alnsn 	}
   1799   1.5     alnsn 	else if (FAST_IS_REG(src2)) {
   1800   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1801   1.1     alnsn 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1802   1.8     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
   1803   1.1     alnsn #else
   1804   1.1     alnsn 		if (src1 & SLJIT_IMM) {
   1805   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
   1806   1.1     alnsn #endif
   1807   1.5     alnsn 			FAIL_IF(!inst);
   1808   1.5     alnsn 			*inst = LEA_r_m;
   1809   1.1     alnsn 			done = 1;
   1810   1.1     alnsn 		}
   1811   1.1     alnsn 	}
   1812   1.1     alnsn 
   1813   1.1     alnsn 	if (done) {
   1814   1.5     alnsn 		if (dst_r == TMP_REG1)
   1815   1.5     alnsn 			return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
   1816   1.1     alnsn 		return SLJIT_SUCCESS;
   1817   1.1     alnsn 	}
   1818   1.1     alnsn 	return SLJIT_ERR_UNSUPPORTED;
   1819   1.1     alnsn }
   1820   1.1     alnsn 
   1821   1.8     alnsn static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
   1822   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
   1823   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w)
   1824   1.1     alnsn {
   1825   1.8     alnsn 	sljit_u8* inst;
   1826   1.1     alnsn 
   1827   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1828   1.8     alnsn 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1829   1.1     alnsn #else
   1830   1.8     alnsn 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
   1831   1.1     alnsn #endif
   1832   1.5     alnsn 		BINARY_EAX_IMM(CMP_EAX_i32, src2w);
   1833   1.1     alnsn 		return SLJIT_SUCCESS;
   1834   1.1     alnsn 	}
   1835   1.1     alnsn 
   1836   1.5     alnsn 	if (FAST_IS_REG(src1)) {
   1837   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1838   1.5     alnsn 			BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
   1839   1.1     alnsn 		}
   1840   1.1     alnsn 		else {
   1841   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
   1842   1.5     alnsn 			FAIL_IF(!inst);
   1843   1.5     alnsn 			*inst = CMP_r_rm;
   1844   1.1     alnsn 		}
   1845   1.1     alnsn 		return SLJIT_SUCCESS;
   1846   1.1     alnsn 	}
   1847   1.1     alnsn 
   1848   1.5     alnsn 	if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
   1849   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
   1850   1.5     alnsn 		FAIL_IF(!inst);
   1851   1.5     alnsn 		*inst = CMP_rm_r;
   1852   1.1     alnsn 		return SLJIT_SUCCESS;
   1853   1.1     alnsn 	}
   1854   1.1     alnsn 
   1855   1.1     alnsn 	if (src2 & SLJIT_IMM) {
   1856   1.1     alnsn 		if (src1 & SLJIT_IMM) {
   1857   1.5     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1858   1.5     alnsn 			src1 = TMP_REG1;
   1859   1.1     alnsn 			src1w = 0;
   1860   1.1     alnsn 		}
   1861   1.5     alnsn 		BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
   1862   1.1     alnsn 	}
   1863   1.1     alnsn 	else {
   1864   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1865   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1866   1.5     alnsn 		FAIL_IF(!inst);
   1867   1.5     alnsn 		*inst = CMP_r_rm;
   1868   1.1     alnsn 	}
   1869   1.1     alnsn 	return SLJIT_SUCCESS;
   1870   1.1     alnsn }
   1871   1.1     alnsn 
   1872   1.8     alnsn static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
   1873   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
   1874   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w)
   1875   1.1     alnsn {
   1876   1.8     alnsn 	sljit_u8* inst;
   1877   1.1     alnsn 
   1878   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1879   1.8     alnsn 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1880   1.1     alnsn #else
   1881   1.8     alnsn 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
   1882   1.1     alnsn #endif
   1883   1.5     alnsn 		BINARY_EAX_IMM(TEST_EAX_i32, src2w);
   1884   1.1     alnsn 		return SLJIT_SUCCESS;
   1885   1.1     alnsn 	}
   1886   1.1     alnsn 
   1887   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1888   1.9     alnsn 	if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1889   1.1     alnsn #else
   1890   1.8     alnsn 	if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
   1891   1.1     alnsn #endif
   1892   1.5     alnsn 		BINARY_EAX_IMM(TEST_EAX_i32, src1w);
   1893   1.1     alnsn 		return SLJIT_SUCCESS;
   1894   1.1     alnsn 	}
   1895   1.1     alnsn 
   1896   1.8     alnsn 	if (!(src1 & SLJIT_IMM)) {
   1897   1.1     alnsn 		if (src2 & SLJIT_IMM) {
   1898   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1899   1.1     alnsn 			if (IS_HALFWORD(src2w) || compiler->mode32) {
   1900   1.8     alnsn 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
   1901   1.5     alnsn 				FAIL_IF(!inst);
   1902   1.5     alnsn 				*inst = GROUP_F7;
   1903   1.1     alnsn 			}
   1904   1.1     alnsn 			else {
   1905   1.1     alnsn 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
   1906   1.8     alnsn 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
   1907   1.5     alnsn 				FAIL_IF(!inst);
   1908   1.5     alnsn 				*inst = TEST_rm_r;
   1909   1.1     alnsn 			}
   1910   1.1     alnsn #else
   1911   1.8     alnsn 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
   1912   1.5     alnsn 			FAIL_IF(!inst);
   1913   1.5     alnsn 			*inst = GROUP_F7;
   1914   1.1     alnsn #endif
   1915   1.8     alnsn 			return SLJIT_SUCCESS;
   1916   1.1     alnsn 		}
   1917   1.8     alnsn 		else if (FAST_IS_REG(src1)) {
   1918   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
   1919   1.5     alnsn 			FAIL_IF(!inst);
   1920   1.5     alnsn 			*inst = TEST_rm_r;
   1921   1.8     alnsn 			return SLJIT_SUCCESS;
   1922   1.1     alnsn 		}
   1923   1.1     alnsn 	}
   1924   1.1     alnsn 
   1925   1.8     alnsn 	if (!(src2 & SLJIT_IMM)) {
   1926   1.1     alnsn 		if (src1 & SLJIT_IMM) {
   1927   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1928   1.1     alnsn 			if (IS_HALFWORD(src1w) || compiler->mode32) {
   1929   1.8     alnsn 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
   1930   1.5     alnsn 				FAIL_IF(!inst);
   1931   1.5     alnsn 				*inst = GROUP_F7;
   1932   1.1     alnsn 			}
   1933   1.1     alnsn 			else {
   1934   1.1     alnsn 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
   1935   1.8     alnsn 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
   1936   1.5     alnsn 				FAIL_IF(!inst);
   1937   1.5     alnsn 				*inst = TEST_rm_r;
   1938   1.1     alnsn 			}
   1939   1.1     alnsn #else
   1940   1.8     alnsn 			inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
   1941   1.5     alnsn 			FAIL_IF(!inst);
   1942   1.5     alnsn 			*inst = GROUP_F7;
   1943   1.1     alnsn #endif
   1944   1.8     alnsn 			return SLJIT_SUCCESS;
   1945   1.1     alnsn 		}
   1946   1.8     alnsn 		else if (FAST_IS_REG(src2)) {
   1947   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
   1948   1.5     alnsn 			FAIL_IF(!inst);
   1949   1.5     alnsn 			*inst = TEST_rm_r;
   1950   1.8     alnsn 			return SLJIT_SUCCESS;
   1951   1.1     alnsn 		}
   1952   1.1     alnsn 	}
   1953   1.1     alnsn 
   1954   1.5     alnsn 	EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1955   1.1     alnsn 	if (src2 & SLJIT_IMM) {
   1956   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1957   1.1     alnsn 		if (IS_HALFWORD(src2w) || compiler->mode32) {
   1958   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
   1959   1.5     alnsn 			FAIL_IF(!inst);
   1960   1.5     alnsn 			*inst = GROUP_F7;
   1961   1.1     alnsn 		}
   1962   1.1     alnsn 		else {
   1963   1.1     alnsn 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
   1964   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
   1965   1.5     alnsn 			FAIL_IF(!inst);
   1966   1.5     alnsn 			*inst = TEST_rm_r;
   1967   1.1     alnsn 		}
   1968   1.1     alnsn #else
   1969   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
   1970   1.5     alnsn 		FAIL_IF(!inst);
   1971   1.5     alnsn 		*inst = GROUP_F7;
   1972   1.1     alnsn #endif
   1973   1.1     alnsn 	}
   1974   1.1     alnsn 	else {
   1975   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1976   1.5     alnsn 		FAIL_IF(!inst);
   1977   1.5     alnsn 		*inst = TEST_rm_r;
   1978   1.1     alnsn 	}
   1979   1.1     alnsn 	return SLJIT_SUCCESS;
   1980   1.1     alnsn }
   1981   1.1     alnsn 
   1982   1.8     alnsn static sljit_s32 emit_shift(struct sljit_compiler *compiler,
   1983   1.8     alnsn 	sljit_u8 mode,
   1984   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   1985   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
   1986   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w)
   1987   1.1     alnsn {
   1988   1.8     alnsn 	sljit_u8* inst;
   1989   1.1     alnsn 
   1990   1.1     alnsn 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
   1991   1.1     alnsn 		if (dst == src1 && dstw == src1w) {
   1992   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
   1993   1.5     alnsn 			FAIL_IF(!inst);
   1994   1.5     alnsn 			*inst |= mode;
   1995   1.1     alnsn 			return SLJIT_SUCCESS;
   1996   1.1     alnsn 		}
   1997   1.1     alnsn 		if (dst == SLJIT_UNUSED) {
   1998   1.5     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1999   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
   2000   1.5     alnsn 			FAIL_IF(!inst);
   2001   1.5     alnsn 			*inst |= mode;
   2002   1.1     alnsn 			return SLJIT_SUCCESS;
   2003   1.1     alnsn 		}
   2004   1.1     alnsn 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
   2005   1.5     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2006   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2007   1.5     alnsn 			FAIL_IF(!inst);
   2008   1.5     alnsn 			*inst |= mode;
   2009   1.5     alnsn 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2010   1.1     alnsn 			return SLJIT_SUCCESS;
   2011   1.1     alnsn 		}
   2012   1.5     alnsn 		if (FAST_IS_REG(dst)) {
   2013   1.1     alnsn 			EMIT_MOV(compiler, dst, 0, src1, src1w);
   2014   1.5     alnsn 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
   2015   1.5     alnsn 			FAIL_IF(!inst);
   2016   1.5     alnsn 			*inst |= mode;
   2017   1.1     alnsn 			return SLJIT_SUCCESS;
   2018   1.1     alnsn 		}
   2019   1.1     alnsn 
   2020   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2021   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
   2022   1.5     alnsn 		FAIL_IF(!inst);
   2023   1.5     alnsn 		*inst |= mode;
   2024   1.5     alnsn 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   2025   1.1     alnsn 		return SLJIT_SUCCESS;
   2026   1.1     alnsn 	}
   2027   1.1     alnsn 
   2028   1.1     alnsn 	if (dst == SLJIT_PREF_SHIFT_REG) {
   2029   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2030   1.1     alnsn 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   2031   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2032   1.5     alnsn 		FAIL_IF(!inst);
   2033   1.5     alnsn 		*inst |= mode;
   2034   1.5     alnsn 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2035   1.1     alnsn 	}
   2036   1.5     alnsn 	else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
   2037   1.1     alnsn 		if (src1 != dst)
   2038   1.1     alnsn 			EMIT_MOV(compiler, dst, 0, src1, src1w);
   2039   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
   2040   1.1     alnsn 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   2041   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
   2042   1.5     alnsn 		FAIL_IF(!inst);
   2043   1.5     alnsn 		*inst |= mode;
   2044   1.5     alnsn 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2045   1.1     alnsn 	}
   2046   1.1     alnsn 	else {
   2047   1.9     alnsn 		/* This case is complex since ecx itself may be used for
   2048   1.9     alnsn 		   addressing, and this case must be supported as well. */
   2049   1.5     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2050   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2051   1.1     alnsn 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
   2052   1.1     alnsn #else
   2053   1.9     alnsn 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
   2054   1.1     alnsn #endif
   2055   1.1     alnsn 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   2056   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2057   1.5     alnsn 		FAIL_IF(!inst);
   2058   1.5     alnsn 		*inst |= mode;
   2059   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2060   1.1     alnsn 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
   2061   1.1     alnsn #else
   2062   1.9     alnsn 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
   2063   1.1     alnsn #endif
   2064   1.5     alnsn 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   2065   1.1     alnsn 	}
   2066   1.1     alnsn 
   2067   1.1     alnsn 	return SLJIT_SUCCESS;
   2068   1.1     alnsn }
   2069   1.1     alnsn 
   2070   1.8     alnsn static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
   2071   1.8     alnsn 	sljit_u8 mode, sljit_s32 set_flags,
   2072   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   2073   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
   2074   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w)
   2075   1.1     alnsn {
   2076   1.1     alnsn 	/* The CPU does not set flags if the shift count is 0. */
   2077   1.1     alnsn 	if (src2 & SLJIT_IMM) {
   2078   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2079   1.1     alnsn 		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
   2080   1.1     alnsn 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   2081   1.1     alnsn #else
   2082   1.1     alnsn 		if ((src2w & 0x1f) != 0)
   2083   1.1     alnsn 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   2084   1.1     alnsn #endif
   2085   1.1     alnsn 		if (!set_flags)
   2086   1.1     alnsn 			return emit_mov(compiler, dst, dstw, src1, src1w);
   2087   1.1     alnsn 		/* OR dst, src, 0 */
   2088   1.5     alnsn 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
   2089   1.1     alnsn 			dst, dstw, src1, src1w, SLJIT_IMM, 0);
   2090   1.1     alnsn 	}
   2091   1.1     alnsn 
   2092   1.1     alnsn 	if (!set_flags)
   2093   1.1     alnsn 		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   2094   1.1     alnsn 
   2095   1.5     alnsn 	if (!FAST_IS_REG(dst))
   2096   1.1     alnsn 		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
   2097   1.1     alnsn 
   2098   1.1     alnsn 	FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
   2099   1.1     alnsn 
   2100   1.5     alnsn 	if (FAST_IS_REG(dst))
   2101   1.1     alnsn 		return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
   2102   1.1     alnsn 	return SLJIT_SUCCESS;
   2103   1.1     alnsn }
   2104   1.1     alnsn 
   2105   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
   2106   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   2107   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
   2108   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w)
   2109   1.1     alnsn {
   2110   1.1     alnsn 	CHECK_ERROR();
   2111   1.8     alnsn 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   2112   1.1     alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2113   1.1     alnsn 	ADJUST_LOCAL_OFFSET(src1, src1w);
   2114   1.1     alnsn 	ADJUST_LOCAL_OFFSET(src2, src2w);
   2115   1.1     alnsn 
   2116   1.1     alnsn 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2117   1.1     alnsn 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
   2118   1.1     alnsn 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
   2119   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2120   1.8     alnsn 	compiler->mode32 = op & SLJIT_I32_OP;
   2121   1.1     alnsn #endif
   2122   1.1     alnsn 
   2123   1.1     alnsn 	switch (GET_OPCODE(op)) {
   2124   1.1     alnsn 	case SLJIT_ADD:
   2125   1.9     alnsn 		if (!HAS_FLAGS(op)) {
   2126   1.9     alnsn 			if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
   2127   1.1     alnsn 				return compiler->error;
   2128   1.1     alnsn 		}
   2129   1.5     alnsn 		return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
   2130   1.1     alnsn 			dst, dstw, src1, src1w, src2, src2w);
   2131   1.1     alnsn 	case SLJIT_ADDC:
   2132   1.5     alnsn 		return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
   2133   1.1     alnsn 			dst, dstw, src1, src1w, src2, src2w);
   2134   1.1     alnsn 	case SLJIT_SUB:
   2135   1.9     alnsn 		if (!HAS_FLAGS(op)) {
   2136   1.9     alnsn 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
   2137   1.1     alnsn 				return compiler->error;
   2138   1.1     alnsn 		}
   2139   1.9     alnsn 
   2140   1.1     alnsn 		if (dst == SLJIT_UNUSED)
   2141   1.1     alnsn 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
   2142   1.5     alnsn 		return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
   2143   1.1     alnsn 			dst, dstw, src1, src1w, src2, src2w);
   2144   1.1     alnsn 	case SLJIT_SUBC:
   2145   1.5     alnsn 		return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
   2146   1.1     alnsn 			dst, dstw, src1, src1w, src2, src2w);
   2147   1.1     alnsn 	case SLJIT_MUL:
   2148   1.1     alnsn 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
   2149   1.1     alnsn 	case SLJIT_AND:
   2150   1.1     alnsn 		if (dst == SLJIT_UNUSED)
   2151   1.1     alnsn 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
   2152   1.5     alnsn 		return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
   2153   1.1     alnsn 			dst, dstw, src1, src1w, src2, src2w);
   2154   1.1     alnsn 	case SLJIT_OR:
   2155   1.5     alnsn 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
   2156   1.1     alnsn 			dst, dstw, src1, src1w, src2, src2w);
   2157   1.1     alnsn 	case SLJIT_XOR:
   2158   1.5     alnsn 		return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
   2159   1.1     alnsn 			dst, dstw, src1, src1w, src2, src2w);
   2160   1.1     alnsn 	case SLJIT_SHL:
   2161   1.9     alnsn 		return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
   2162   1.1     alnsn 			dst, dstw, src1, src1w, src2, src2w);
   2163   1.1     alnsn 	case SLJIT_LSHR:
   2164   1.9     alnsn 		return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
   2165   1.1     alnsn 			dst, dstw, src1, src1w, src2, src2w);
   2166   1.1     alnsn 	case SLJIT_ASHR:
   2167   1.9     alnsn 		return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
   2168   1.1     alnsn 			dst, dstw, src1, src1w, src2, src2w);
   2169   1.1     alnsn 	}
   2170   1.1     alnsn 
   2171   1.1     alnsn 	return SLJIT_SUCCESS;
   2172   1.1     alnsn }
   2173   1.1     alnsn 
   2174   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
   2175   1.1     alnsn {
   2176   1.8     alnsn 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
   2177   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2178   1.9     alnsn 	if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
   2179   1.1     alnsn 		return -1;
   2180   1.1     alnsn #endif
   2181   1.1     alnsn 	return reg_map[reg];
   2182   1.1     alnsn }
   2183   1.1     alnsn 
   2184   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
   2185   1.5     alnsn {
   2186   1.8     alnsn 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
   2187   1.5     alnsn 	return reg;
   2188   1.5     alnsn }
   2189   1.5     alnsn 
   2190   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
   2191   1.8     alnsn 	void *instruction, sljit_s32 size)
   2192   1.1     alnsn {
   2193   1.8     alnsn 	sljit_u8 *inst;
   2194   1.1     alnsn 
   2195   1.1     alnsn 	CHECK_ERROR();
   2196   1.8     alnsn 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
   2197   1.1     alnsn 
   2198   1.8     alnsn 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
   2199   1.5     alnsn 	FAIL_IF(!inst);
   2200   1.1     alnsn 	INC_SIZE(size);
   2201   1.9     alnsn 	SLJIT_MEMCPY(inst, instruction, size);
   2202   1.1     alnsn 	return SLJIT_SUCCESS;
   2203   1.1     alnsn }
   2204   1.1     alnsn 
   2205   1.1     alnsn /* --------------------------------------------------------------------- */
   2206   1.1     alnsn /*  Floating point operators                                             */
   2207   1.1     alnsn /* --------------------------------------------------------------------- */
   2208   1.1     alnsn 
   2209   1.1     alnsn /* Alignment + 2 * 16 bytes. */
   2210   1.8     alnsn static sljit_s32 sse2_data[3 + (4 + 4) * 2];
   2211   1.8     alnsn static sljit_s32 *sse2_buffer;
   2212   1.1     alnsn 
   2213   1.2     alnsn static void init_compiler(void)
   2214   1.1     alnsn {
   2215   1.8     alnsn 	sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
   2216   1.5     alnsn 	/* Single precision constants. */
   2217   1.5     alnsn 	sse2_buffer[0] = 0x80000000;
   2218   1.5     alnsn 	sse2_buffer[4] = 0x7fffffff;
   2219   1.5     alnsn 	/* Double precision constants. */
   2220   1.5     alnsn 	sse2_buffer[8] = 0;
   2221   1.5     alnsn 	sse2_buffer[9] = 0x80000000;
   2222   1.5     alnsn 	sse2_buffer[12] = 0xffffffff;
   2223   1.5     alnsn 	sse2_buffer[13] = 0x7fffffff;
   2224   1.1     alnsn }
   2225   1.1     alnsn 
   2226   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
   2227   1.1     alnsn {
   2228   1.5     alnsn #ifdef SLJIT_IS_FPU_AVAILABLE
   2229   1.5     alnsn 	return SLJIT_IS_FPU_AVAILABLE;
   2230   1.8     alnsn #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
   2231   1.5     alnsn 	if (cpu_has_sse2 == -1)
   2232   1.5     alnsn 		get_cpu_features();
   2233   1.5     alnsn 	return cpu_has_sse2;
   2234   1.5     alnsn #else /* SLJIT_DETECT_SSE2 */
   2235   1.1     alnsn 	return 1;
   2236   1.5     alnsn #endif /* SLJIT_DETECT_SSE2 */
   2237   1.1     alnsn }
   2238   1.1     alnsn 
   2239   1.8     alnsn static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
   2240   1.8     alnsn 	sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
   2241   1.1     alnsn {
   2242   1.8     alnsn 	sljit_u8 *inst;
   2243   1.1     alnsn 
   2244   1.5     alnsn 	inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
   2245   1.5     alnsn 	FAIL_IF(!inst);
   2246   1.5     alnsn 	*inst++ = GROUP_0F;
   2247   1.5     alnsn 	*inst = opcode;
   2248   1.1     alnsn 	return SLJIT_SUCCESS;
   2249   1.1     alnsn }
   2250   1.1     alnsn 
   2251   1.8     alnsn static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
   2252   1.8     alnsn 	sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
   2253   1.1     alnsn {
   2254   1.8     alnsn 	sljit_u8 *inst;
   2255   1.1     alnsn 
   2256   1.5     alnsn 	inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
   2257   1.5     alnsn 	FAIL_IF(!inst);
   2258   1.5     alnsn 	*inst++ = GROUP_0F;
   2259   1.5     alnsn 	*inst = opcode;
   2260   1.1     alnsn 	return SLJIT_SUCCESS;
   2261   1.1     alnsn }
   2262   1.1     alnsn 
   2263   1.8     alnsn static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
   2264   1.8     alnsn 	sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
   2265   1.1     alnsn {
   2266   1.5     alnsn 	return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
   2267   1.1     alnsn }
   2268   1.1     alnsn 
   2269   1.8     alnsn static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
   2270   1.8     alnsn 	sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
   2271   1.1     alnsn {
   2272   1.5     alnsn 	return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
   2273   1.1     alnsn }
   2274   1.1     alnsn 
   2275   1.8     alnsn static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
   2276   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   2277   1.8     alnsn 	sljit_s32 src, sljit_sw srcw)
   2278   1.8     alnsn {
   2279   1.8     alnsn 	sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
   2280   1.8     alnsn 	sljit_u8 *inst;
   2281   1.8     alnsn 
   2282   1.8     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2283   1.8     alnsn 	if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
   2284   1.8     alnsn 		compiler->mode32 = 0;
   2285   1.8     alnsn #endif
   2286   1.8     alnsn 
   2287   1.8     alnsn 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
   2288   1.8     alnsn 	FAIL_IF(!inst);
   2289   1.8     alnsn 	*inst++ = GROUP_0F;
   2290   1.8     alnsn 	*inst = CVTTSD2SI_r_xm;
   2291   1.8     alnsn 
   2292   1.8     alnsn 	if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
   2293   1.8     alnsn 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
   2294   1.8     alnsn 	return SLJIT_SUCCESS;
   2295   1.8     alnsn }
   2296   1.8     alnsn 
   2297   1.8     alnsn static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
   2298   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   2299   1.8     alnsn 	sljit_s32 src, sljit_sw srcw)
   2300   1.8     alnsn {
   2301   1.8     alnsn 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
   2302   1.8     alnsn 	sljit_u8 *inst;
   2303   1.8     alnsn 
   2304   1.8     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2305   1.8     alnsn 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
   2306   1.8     alnsn 		compiler->mode32 = 0;
   2307   1.8     alnsn #endif
   2308   1.8     alnsn 
   2309   1.8     alnsn 	if (src & SLJIT_IMM) {
   2310   1.8     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2311   1.8     alnsn 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
   2312   1.8     alnsn 			srcw = (sljit_s32)srcw;
   2313   1.8     alnsn #endif
   2314   1.8     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   2315   1.8     alnsn 		src = TMP_REG1;
   2316   1.8     alnsn 		srcw = 0;
   2317   1.8     alnsn 	}
   2318   1.8     alnsn 
   2319   1.8     alnsn 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
   2320   1.8     alnsn 	FAIL_IF(!inst);
   2321   1.8     alnsn 	*inst++ = GROUP_0F;
   2322   1.8     alnsn 	*inst = CVTSI2SD_x_rm;
   2323   1.8     alnsn 
   2324   1.8     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2325   1.8     alnsn 	compiler->mode32 = 1;
   2326   1.8     alnsn #endif
   2327   1.8     alnsn 	if (dst_r == TMP_FREG)
   2328   1.8     alnsn 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
   2329   1.8     alnsn 	return SLJIT_SUCCESS;
   2330   1.8     alnsn }
   2331   1.8     alnsn 
   2332   1.8     alnsn static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
   2333   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
   2334   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w)
   2335   1.1     alnsn {
   2336   1.8     alnsn 	if (!FAST_IS_REG(src1)) {
   2337   1.8     alnsn 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
   2338   1.8     alnsn 		src1 = TMP_FREG;
   2339   1.8     alnsn 	}
   2340   1.8     alnsn 	return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
   2341   1.8     alnsn }
   2342   1.1     alnsn 
   2343   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
   2344   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   2345   1.8     alnsn 	sljit_s32 src, sljit_sw srcw)
   2346   1.8     alnsn {
   2347   1.8     alnsn 	sljit_s32 dst_r;
   2348   1.1     alnsn 
   2349   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2350   1.1     alnsn 	compiler->mode32 = 1;
   2351   1.1     alnsn #endif
   2352   1.1     alnsn 
   2353   1.8     alnsn 	CHECK_ERROR();
   2354   1.8     alnsn 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
   2355   1.8     alnsn 
   2356   1.8     alnsn 	if (GET_OPCODE(op) == SLJIT_MOV_F64) {
   2357   1.5     alnsn 		if (FAST_IS_REG(dst))
   2358   1.8     alnsn 			return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
   2359   1.8     alnsn 		if (FAST_IS_REG(src))
   2360   1.8     alnsn 			return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
   2361   1.8     alnsn 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
   2362   1.8     alnsn 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
   2363   1.8     alnsn 	}
   2364   1.8     alnsn 
   2365   1.8     alnsn 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
   2366   1.8     alnsn 		dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
   2367   1.8     alnsn 		if (FAST_IS_REG(src)) {
   2368   1.8     alnsn 			/* We overwrite the high bits of source. From SLJIT point of view,
   2369   1.8     alnsn 			   this is not an issue.
   2370   1.8     alnsn 			   Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
   2371   1.8     alnsn 			FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
   2372   1.8     alnsn 		}
   2373   1.1     alnsn 		else {
   2374   1.8     alnsn 			FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
   2375   1.8     alnsn 			src = TMP_FREG;
   2376   1.1     alnsn 		}
   2377   1.1     alnsn 
   2378   1.8     alnsn 		FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
   2379   1.8     alnsn 		if (dst_r == TMP_FREG)
   2380   1.8     alnsn 			return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
   2381   1.8     alnsn 		return SLJIT_SUCCESS;
   2382   1.1     alnsn 	}
   2383   1.1     alnsn 
   2384   1.5     alnsn 	if (SLOW_IS_REG(dst)) {
   2385   1.1     alnsn 		dst_r = dst;
   2386   1.1     alnsn 		if (dst != src)
   2387   1.8     alnsn 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
   2388   1.1     alnsn 	}
   2389   1.1     alnsn 	else {
   2390   1.1     alnsn 		dst_r = TMP_FREG;
   2391   1.8     alnsn 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
   2392   1.1     alnsn 	}
   2393   1.1     alnsn 
   2394   1.5     alnsn 	switch (GET_OPCODE(op)) {
   2395   1.8     alnsn 	case SLJIT_NEG_F64:
   2396   1.8     alnsn 		FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
   2397   1.1     alnsn 		break;
   2398   1.1     alnsn 
   2399   1.8     alnsn 	case SLJIT_ABS_F64:
   2400   1.8     alnsn 		FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
   2401   1.1     alnsn 		break;
   2402   1.1     alnsn 	}
   2403   1.1     alnsn 
   2404   1.1     alnsn 	if (dst_r == TMP_FREG)
   2405   1.8     alnsn 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
   2406   1.1     alnsn 	return SLJIT_SUCCESS;
   2407   1.1     alnsn }
   2408   1.1     alnsn 
   2409   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
   2410   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   2411   1.8     alnsn 	sljit_s32 src1, sljit_sw src1w,
   2412   1.8     alnsn 	sljit_s32 src2, sljit_sw src2w)
   2413   1.1     alnsn {
   2414   1.8     alnsn 	sljit_s32 dst_r;
   2415   1.1     alnsn 
   2416   1.1     alnsn 	CHECK_ERROR();
   2417   1.8     alnsn 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   2418   1.8     alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2419   1.8     alnsn 	ADJUST_LOCAL_OFFSET(src1, src1w);
   2420   1.8     alnsn 	ADJUST_LOCAL_OFFSET(src2, src2w);
   2421   1.1     alnsn 
   2422   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2423   1.1     alnsn 	compiler->mode32 = 1;
   2424   1.1     alnsn #endif
   2425   1.1     alnsn 
   2426   1.5     alnsn 	if (FAST_IS_REG(dst)) {
   2427   1.1     alnsn 		dst_r = dst;
   2428   1.1     alnsn 		if (dst == src1)
   2429   1.1     alnsn 			; /* Do nothing here. */
   2430   1.8     alnsn 		else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
   2431   1.1     alnsn 			/* Swap arguments. */
   2432   1.1     alnsn 			src2 = src1;
   2433   1.1     alnsn 			src2w = src1w;
   2434   1.1     alnsn 		}
   2435   1.1     alnsn 		else if (dst != src2)
   2436   1.8     alnsn 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
   2437   1.1     alnsn 		else {
   2438   1.1     alnsn 			dst_r = TMP_FREG;
   2439   1.8     alnsn 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
   2440   1.1     alnsn 		}
   2441   1.1     alnsn 	}
   2442   1.1     alnsn 	else {
   2443   1.1     alnsn 		dst_r = TMP_FREG;
   2444   1.8     alnsn 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
   2445   1.1     alnsn 	}
   2446   1.1     alnsn 
   2447   1.5     alnsn 	switch (GET_OPCODE(op)) {
   2448   1.8     alnsn 	case SLJIT_ADD_F64:
   2449   1.8     alnsn 		FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
   2450   1.1     alnsn 		break;
   2451   1.1     alnsn 
   2452   1.8     alnsn 	case SLJIT_SUB_F64:
   2453   1.8     alnsn 		FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
   2454   1.1     alnsn 		break;
   2455   1.1     alnsn 
   2456   1.8     alnsn 	case SLJIT_MUL_F64:
   2457   1.8     alnsn 		FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
   2458   1.1     alnsn 		break;
   2459   1.1     alnsn 
   2460   1.8     alnsn 	case SLJIT_DIV_F64:
   2461   1.8     alnsn 		FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
   2462   1.1     alnsn 		break;
   2463   1.1     alnsn 	}
   2464   1.1     alnsn 
   2465   1.1     alnsn 	if (dst_r == TMP_FREG)
   2466   1.8     alnsn 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
   2467   1.1     alnsn 	return SLJIT_SUCCESS;
   2468   1.1     alnsn }
   2469   1.1     alnsn 
   2470   1.1     alnsn /* --------------------------------------------------------------------- */
   2471   1.1     alnsn /*  Conditional instructions                                             */
   2472   1.1     alnsn /* --------------------------------------------------------------------- */
   2473   1.1     alnsn 
   2474   1.1     alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
   2475   1.1     alnsn {
   2476   1.8     alnsn 	sljit_u8 *inst;
   2477   1.1     alnsn 	struct sljit_label *label;
   2478   1.1     alnsn 
   2479   1.1     alnsn 	CHECK_ERROR_PTR();
   2480   1.8     alnsn 	CHECK_PTR(check_sljit_emit_label(compiler));
   2481   1.1     alnsn 
   2482   1.1     alnsn 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   2483   1.1     alnsn 		return compiler->last_label;
   2484   1.1     alnsn 
   2485   1.1     alnsn 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
   2486   1.1     alnsn 	PTR_FAIL_IF(!label);
   2487   1.1     alnsn 	set_label(label, compiler);
   2488   1.1     alnsn 
   2489   1.8     alnsn 	inst = (sljit_u8*)ensure_buf(compiler, 2);
   2490   1.5     alnsn 	PTR_FAIL_IF(!inst);
   2491   1.1     alnsn 
   2492   1.5     alnsn 	*inst++ = 0;
   2493   1.5     alnsn 	*inst++ = 0;
   2494   1.1     alnsn 
   2495   1.1     alnsn 	return label;
   2496   1.1     alnsn }
   2497   1.1     alnsn 
   2498   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
   2499   1.1     alnsn {
   2500   1.8     alnsn 	sljit_u8 *inst;
   2501   1.1     alnsn 	struct sljit_jump *jump;
   2502   1.1     alnsn 
   2503   1.1     alnsn 	CHECK_ERROR_PTR();
   2504   1.8     alnsn 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
   2505   1.1     alnsn 
   2506   1.1     alnsn 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2507   1.1     alnsn 	PTR_FAIL_IF_NULL(jump);
   2508   1.1     alnsn 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   2509   1.1     alnsn 	type &= 0xff;
   2510   1.1     alnsn 
   2511   1.1     alnsn 	if (type >= SLJIT_CALL1)
   2512   1.1     alnsn 		PTR_FAIL_IF(call_with_args(compiler, type));
   2513   1.1     alnsn 
   2514   1.1     alnsn 	/* Worst case size. */
   2515   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2516   1.1     alnsn 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
   2517   1.1     alnsn #else
   2518   1.1     alnsn 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
   2519   1.1     alnsn #endif
   2520   1.1     alnsn 
   2521   1.8     alnsn 	inst = (sljit_u8*)ensure_buf(compiler, 2);
   2522   1.5     alnsn 	PTR_FAIL_IF_NULL(inst);
   2523   1.1     alnsn 
   2524   1.5     alnsn 	*inst++ = 0;
   2525   1.9     alnsn 	*inst++ = type + 2;
   2526   1.1     alnsn 	return jump;
   2527   1.1     alnsn }
   2528   1.1     alnsn 
   2529   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
   2530   1.1     alnsn {
   2531   1.8     alnsn 	sljit_u8 *inst;
   2532   1.1     alnsn 	struct sljit_jump *jump;
   2533   1.1     alnsn 
   2534   1.1     alnsn 	CHECK_ERROR();
   2535   1.8     alnsn 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
   2536   1.1     alnsn 	ADJUST_LOCAL_OFFSET(src, srcw);
   2537   1.1     alnsn 
   2538   1.1     alnsn 	CHECK_EXTRA_REGS(src, srcw, (void)0);
   2539   1.1     alnsn 
   2540   1.1     alnsn 	if (type >= SLJIT_CALL1) {
   2541   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2542   1.1     alnsn #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
   2543   1.8     alnsn 		if (src == SLJIT_R2) {
   2544   1.5     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
   2545   1.5     alnsn 			src = TMP_REG1;
   2546   1.1     alnsn 		}
   2547   1.8     alnsn 		if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
   2548   1.5     alnsn 			srcw += sizeof(sljit_sw);
   2549   1.1     alnsn #endif
   2550   1.1     alnsn #endif
   2551   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
   2552   1.8     alnsn 		if (src == SLJIT_R2) {
   2553   1.5     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
   2554   1.5     alnsn 			src = TMP_REG1;
   2555   1.1     alnsn 		}
   2556   1.1     alnsn #endif
   2557   1.1     alnsn 		FAIL_IF(call_with_args(compiler, type));
   2558   1.1     alnsn 	}
   2559   1.1     alnsn 
   2560   1.1     alnsn 	if (src == SLJIT_IMM) {
   2561   1.1     alnsn 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2562   1.1     alnsn 		FAIL_IF_NULL(jump);
   2563   1.1     alnsn 		set_jump(jump, compiler, JUMP_ADDR);
   2564   1.1     alnsn 		jump->u.target = srcw;
   2565   1.1     alnsn 
   2566   1.1     alnsn 		/* Worst case size. */
   2567   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2568   1.1     alnsn 		compiler->size += 5;
   2569   1.1     alnsn #else
   2570   1.1     alnsn 		compiler->size += 10 + 3;
   2571   1.1     alnsn #endif
   2572   1.1     alnsn 
   2573   1.8     alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 2);
   2574   1.5     alnsn 		FAIL_IF_NULL(inst);
   2575   1.1     alnsn 
   2576   1.5     alnsn 		*inst++ = 0;
   2577   1.9     alnsn 		*inst++ = type + 2;
   2578   1.1     alnsn 	}
   2579   1.1     alnsn 	else {
   2580   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2581   1.1     alnsn 		/* REX_W is not necessary (src is not immediate). */
   2582   1.1     alnsn 		compiler->mode32 = 1;
   2583   1.1     alnsn #endif
   2584   1.5     alnsn 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
   2585   1.5     alnsn 		FAIL_IF(!inst);
   2586   1.5     alnsn 		*inst++ = GROUP_FF;
   2587   1.5     alnsn 		*inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
   2588   1.1     alnsn 	}
   2589   1.1     alnsn 	return SLJIT_SUCCESS;
   2590   1.1     alnsn }
   2591   1.1     alnsn 
   2592   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
   2593   1.8     alnsn 	sljit_s32 dst, sljit_sw dstw,
   2594   1.8     alnsn 	sljit_s32 src, sljit_sw srcw,
   2595   1.8     alnsn 	sljit_s32 type)
   2596   1.1     alnsn {
   2597   1.8     alnsn 	sljit_u8 *inst;
   2598   1.8     alnsn 	sljit_u8 cond_set = 0;
   2599   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2600   1.8     alnsn 	sljit_s32 reg;
   2601   1.9     alnsn #endif
   2602   1.9     alnsn 	/* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
   2603   1.8     alnsn 	sljit_s32 dst_save = dst;
   2604   1.5     alnsn 	sljit_sw dstw_save = dstw;
   2605   1.1     alnsn 
   2606   1.1     alnsn 	CHECK_ERROR();
   2607   1.8     alnsn 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
   2608   1.8     alnsn 	SLJIT_UNUSED_ARG(srcw);
   2609   1.1     alnsn 
   2610   1.1     alnsn 	if (dst == SLJIT_UNUSED)
   2611   1.1     alnsn 		return SLJIT_SUCCESS;
   2612   1.1     alnsn 
   2613   1.1     alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2614   1.1     alnsn 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2615   1.1     alnsn 
   2616   1.8     alnsn 	type &= 0xff;
   2617   1.5     alnsn 	/* setcc = jcc + 0x10. */
   2618   1.5     alnsn 	cond_set = get_jump_code(type) + 0x10;
   2619   1.1     alnsn 
   2620   1.5     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2621   1.5     alnsn 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
   2622   1.8     alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
   2623   1.5     alnsn 		FAIL_IF(!inst);
   2624   1.5     alnsn 		INC_SIZE(4 + 3);
   2625   1.5     alnsn 		/* Set low register to conditional flag. */
   2626   1.5     alnsn 		*inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
   2627   1.5     alnsn 		*inst++ = GROUP_0F;
   2628   1.5     alnsn 		*inst++ = cond_set;
   2629   1.5     alnsn 		*inst++ = MOD_REG | reg_lmap[TMP_REG1];
   2630   1.5     alnsn 		*inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
   2631   1.5     alnsn 		*inst++ = OR_rm8_r8;
   2632   1.5     alnsn 		*inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
   2633   1.5     alnsn 		return SLJIT_SUCCESS;
   2634   1.1     alnsn 	}
   2635   1.1     alnsn 
   2636   1.5     alnsn 	reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
   2637   1.1     alnsn 
   2638   1.8     alnsn 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
   2639   1.5     alnsn 	FAIL_IF(!inst);
   2640   1.1     alnsn 	INC_SIZE(4 + 4);
   2641   1.1     alnsn 	/* Set low register to conditional flag. */
   2642   1.5     alnsn 	*inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
   2643   1.5     alnsn 	*inst++ = GROUP_0F;
   2644   1.5     alnsn 	*inst++ = cond_set;
   2645   1.5     alnsn 	*inst++ = MOD_REG | reg_lmap[reg];
   2646   1.5     alnsn 	*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
   2647   1.9     alnsn 	/* The movzx instruction does not affect flags. */
   2648   1.5     alnsn 	*inst++ = GROUP_0F;
   2649   1.5     alnsn 	*inst++ = MOVZX_r_rm8;
   2650   1.5     alnsn 	*inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
   2651   1.5     alnsn 
   2652   1.5     alnsn 	if (reg != TMP_REG1)
   2653   1.5     alnsn 		return SLJIT_SUCCESS;
   2654   1.5     alnsn 
   2655   1.5     alnsn 	if (GET_OPCODE(op) < SLJIT_ADD) {
   2656   1.5     alnsn 		compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
   2657   1.5     alnsn 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
   2658   1.5     alnsn 	}
   2659   1.8     alnsn #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
   2660   1.8     alnsn 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
   2661   1.5     alnsn 	compiler->skip_checks = 1;
   2662   1.1     alnsn #endif
   2663   1.9     alnsn 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
   2664   1.9     alnsn 
   2665   1.9     alnsn #else
   2666   1.9     alnsn 	/* The SLJIT_CONFIG_X86_32 code path starts here. */
   2667   1.5     alnsn 	if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
   2668   1.5     alnsn 		if (reg_map[dst] <= 4) {
   2669   1.5     alnsn 			/* Low byte is accessible. */
   2670   1.8     alnsn 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
   2671   1.5     alnsn 			FAIL_IF(!inst);
   2672   1.1     alnsn 			INC_SIZE(3 + 3);
   2673   1.1     alnsn 			/* Set low byte to conditional flag. */
   2674   1.5     alnsn 			*inst++ = GROUP_0F;
   2675   1.5     alnsn 			*inst++ = cond_set;
   2676   1.5     alnsn 			*inst++ = MOD_REG | reg_map[dst];
   2677   1.5     alnsn 
   2678   1.5     alnsn 			*inst++ = GROUP_0F;
   2679   1.5     alnsn 			*inst++ = MOVZX_r_rm8;
   2680   1.5     alnsn 			*inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
   2681   1.5     alnsn 			return SLJIT_SUCCESS;
   2682   1.5     alnsn 		}
   2683   1.5     alnsn 
   2684   1.5     alnsn 		/* Low byte is not accessible. */
   2685   1.5     alnsn 		if (cpu_has_cmov == -1)
   2686   1.5     alnsn 			get_cpu_features();
   2687   1.5     alnsn 
   2688   1.5     alnsn 		if (cpu_has_cmov) {
   2689   1.5     alnsn 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
   2690   1.5     alnsn 			/* a xor reg, reg operation would overwrite the flags. */
   2691   1.5     alnsn 			EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
   2692   1.1     alnsn 
   2693   1.8     alnsn 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
   2694   1.5     alnsn 			FAIL_IF(!inst);
   2695   1.5     alnsn 			INC_SIZE(3);
   2696   1.5     alnsn 
   2697   1.5     alnsn 			*inst++ = GROUP_0F;
   2698   1.5     alnsn 			/* cmovcc = setcc - 0x50. */
   2699   1.5     alnsn 			*inst++ = cond_set - 0x50;
   2700   1.5     alnsn 			*inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
   2701   1.5     alnsn 			return SLJIT_SUCCESS;
   2702   1.1     alnsn 		}
   2703   1.1     alnsn 
   2704   1.8     alnsn 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
   2705   1.5     alnsn 		FAIL_IF(!inst);
   2706   1.5     alnsn 		INC_SIZE(1 + 3 + 3 + 1);
   2707   1.5     alnsn 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2708   1.5     alnsn 		/* Set al to conditional flag. */
   2709   1.5     alnsn 		*inst++ = GROUP_0F;
   2710   1.5     alnsn 		*inst++ = cond_set;
   2711   1.5     alnsn 		*inst++ = MOD_REG | 0 /* eax */;
   2712   1.5     alnsn 
   2713   1.5     alnsn 		*inst++ = GROUP_0F;
   2714   1.5     alnsn 		*inst++ = MOVZX_r_rm8;
   2715   1.5     alnsn 		*inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
   2716   1.5     alnsn 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2717   1.5     alnsn 		return SLJIT_SUCCESS;
   2718   1.5     alnsn 	}
   2719   1.5     alnsn 
   2720   1.5     alnsn 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
   2721   1.9     alnsn 		SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
   2722   1.9     alnsn 
   2723   1.8     alnsn 		if (dst != SLJIT_R0) {
   2724   1.8     alnsn 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
   2725   1.5     alnsn 			FAIL_IF(!inst);
   2726   1.5     alnsn 			INC_SIZE(1 + 3 + 2 + 1);
   2727   1.5     alnsn 			/* Set low register to conditional flag. */
   2728   1.5     alnsn 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2729   1.5     alnsn 			*inst++ = GROUP_0F;
   2730   1.5     alnsn 			*inst++ = cond_set;
   2731   1.5     alnsn 			*inst++ = MOD_REG | 0 /* eax */;
   2732   1.5     alnsn 			*inst++ = OR_rm8_r8;
   2733   1.5     alnsn 			*inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
   2734   1.5     alnsn 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2735   1.5     alnsn 		}
   2736   1.5     alnsn 		else {
   2737   1.8     alnsn 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
   2738   1.5     alnsn 			FAIL_IF(!inst);
   2739   1.5     alnsn 			INC_SIZE(2 + 3 + 2 + 2);
   2740   1.5     alnsn 			/* Set low register to conditional flag. */
   2741   1.5     alnsn 			*inst++ = XCHG_r_rm;
   2742   1.5     alnsn 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
   2743   1.5     alnsn 			*inst++ = GROUP_0F;
   2744   1.5     alnsn 			*inst++ = cond_set;
   2745   1.5     alnsn 			*inst++ = MOD_REG | 1 /* ecx */;
   2746   1.5     alnsn 			*inst++ = OR_rm8_r8;
   2747   1.5     alnsn 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
   2748   1.5     alnsn 			*inst++ = XCHG_r_rm;
   2749   1.5     alnsn 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
   2750   1.5     alnsn 		}
   2751   1.5     alnsn 		return SLJIT_SUCCESS;
   2752   1.5     alnsn 	}
   2753   1.5     alnsn 
   2754   1.5     alnsn 	/* Set TMP_REG1 to the bit. */
   2755   1.8     alnsn 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
   2756   1.5     alnsn 	FAIL_IF(!inst);
   2757   1.5     alnsn 	INC_SIZE(1 + 3 + 3 + 1);
   2758   1.5     alnsn 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2759   1.5     alnsn 	/* Set al to conditional flag. */
   2760   1.5     alnsn 	*inst++ = GROUP_0F;
   2761   1.5     alnsn 	*inst++ = cond_set;
   2762   1.5     alnsn 	*inst++ = MOD_REG | 0 /* eax */;
   2763   1.5     alnsn 
   2764   1.5     alnsn 	*inst++ = GROUP_0F;
   2765   1.5     alnsn 	*inst++ = MOVZX_r_rm8;
   2766   1.5     alnsn 	*inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
   2767   1.1     alnsn 
   2768   1.5     alnsn 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2769   1.1     alnsn 
   2770   1.5     alnsn 	if (GET_OPCODE(op) < SLJIT_ADD)
   2771   1.5     alnsn 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
   2772   1.1     alnsn 
   2773   1.8     alnsn #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
   2774   1.8     alnsn 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
   2775   1.5     alnsn 	compiler->skip_checks = 1;
   2776   1.1     alnsn #endif
   2777   1.5     alnsn 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
   2778   1.5     alnsn #endif /* SLJIT_CONFIG_X86_64 */
   2779   1.1     alnsn }
   2780   1.1     alnsn 
   2781   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
   2782   1.1     alnsn {
   2783   1.1     alnsn 	CHECK_ERROR();
   2784   1.8     alnsn 	CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
   2785   1.1     alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2786   1.1     alnsn 
   2787   1.1     alnsn 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2788   1.1     alnsn 
   2789   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2790   1.1     alnsn 	compiler->mode32 = 0;
   2791   1.1     alnsn #endif
   2792   1.1     alnsn 
   2793   1.8     alnsn 	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
   2794   1.1     alnsn 
   2795   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2796   1.1     alnsn 	if (NOT_HALFWORD(offset)) {
   2797   1.5     alnsn 		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
   2798   1.1     alnsn #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
   2799   1.9     alnsn 		SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
   2800   1.1     alnsn 		return compiler->error;
   2801   1.1     alnsn #else
   2802   1.9     alnsn 		return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
   2803   1.1     alnsn #endif
   2804   1.1     alnsn 	}
   2805   1.1     alnsn #endif
   2806   1.1     alnsn 
   2807   1.1     alnsn 	if (offset != 0)
   2808   1.9     alnsn 		return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
   2809   1.8     alnsn 	return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
   2810   1.1     alnsn }
   2811   1.1     alnsn 
   2812   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
   2813   1.1     alnsn {
   2814   1.8     alnsn 	sljit_u8 *inst;
   2815   1.1     alnsn 	struct sljit_const *const_;
   2816   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2817   1.8     alnsn 	sljit_s32 reg;
   2818   1.1     alnsn #endif
   2819   1.1     alnsn 
   2820   1.1     alnsn 	CHECK_ERROR_PTR();
   2821   1.8     alnsn 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
   2822   1.1     alnsn 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2823   1.1     alnsn 
   2824   1.1     alnsn 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2825   1.1     alnsn 
   2826   1.1     alnsn 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
   2827   1.1     alnsn 	PTR_FAIL_IF(!const_);
   2828   1.1     alnsn 	set_const(const_, compiler);
   2829   1.1     alnsn 
   2830   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2831   1.1     alnsn 	compiler->mode32 = 0;
   2832   1.5     alnsn 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
   2833   1.1     alnsn 
   2834   1.1     alnsn 	if (emit_load_imm64(compiler, reg, init_value))
   2835   1.1     alnsn 		return NULL;
   2836   1.1     alnsn #else
   2837   1.1     alnsn 	if (dst == SLJIT_UNUSED)
   2838   1.5     alnsn 		dst = TMP_REG1;
   2839   1.1     alnsn 
   2840   1.1     alnsn 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
   2841   1.1     alnsn 		return NULL;
   2842   1.1     alnsn #endif
   2843   1.1     alnsn 
   2844   1.8     alnsn 	inst = (sljit_u8*)ensure_buf(compiler, 2);
   2845   1.5     alnsn 	PTR_FAIL_IF(!inst);
   2846   1.1     alnsn 
   2847   1.5     alnsn 	*inst++ = 0;
   2848   1.5     alnsn 	*inst++ = 1;
   2849   1.1     alnsn 
   2850   1.1     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2851   1.5     alnsn 	if (dst & SLJIT_MEM)
   2852   1.5     alnsn 		if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
   2853   1.1     alnsn 			return NULL;
   2854   1.1     alnsn #endif
   2855   1.1     alnsn 
   2856   1.1     alnsn 	return const_;
   2857   1.1     alnsn }
   2858   1.1     alnsn 
   2859   1.9     alnsn SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
   2860   1.1     alnsn {
   2861   1.9     alnsn 	SLJIT_UNUSED_ARG(executable_offset);
   2862   1.1     alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2863   1.9     alnsn 	sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
   2864   1.1     alnsn #else
   2865   1.9     alnsn 	sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
   2866   1.1     alnsn #endif
   2867   1.1     alnsn }
   2868   1.1     alnsn 
   2869   1.9     alnsn SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
   2870   1.1     alnsn {
   2871   1.9     alnsn 	SLJIT_UNUSED_ARG(executable_offset);
   2872   1.9     alnsn 	sljit_unaligned_store_sw((void*)addr, new_constant);
   2873   1.1     alnsn }
   2874   1.8     alnsn 
   2875   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)
   2876   1.8     alnsn {
   2877   1.8     alnsn #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
   2878   1.8     alnsn 	if (cpu_has_sse2 == -1)
   2879   1.8     alnsn 		get_cpu_features();
   2880   1.8     alnsn 	return cpu_has_sse2;
   2881   1.8     alnsn #else
   2882   1.8     alnsn 	return 1;
   2883   1.8     alnsn #endif
   2884   1.8     alnsn }
   2885   1.8     alnsn 
   2886   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void)
   2887   1.8     alnsn {
   2888   1.8     alnsn 	if (cpu_has_cmov == -1)
   2889   1.8     alnsn 		get_cpu_features();
   2890   1.8     alnsn 	return cpu_has_cmov;
   2891   1.8     alnsn }
   2892   1.8     alnsn 
   2893   1.8     alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,
   2894   1.8     alnsn 	sljit_s32 type,
   2895   1.8     alnsn 	sljit_s32 dst_reg,
   2896   1.8     alnsn 	sljit_s32 src, sljit_sw srcw)
   2897   1.8     alnsn {
   2898   1.8     alnsn 	sljit_u8* inst;
   2899   1.8     alnsn 
   2900   1.8     alnsn 	CHECK_ERROR();
   2901   1.8     alnsn #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
   2902   1.8     alnsn 	CHECK_ARGUMENT(sljit_x86_is_cmov_available());
   2903   1.8     alnsn 	CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
   2904   1.8     alnsn 	CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
   2905   1.8     alnsn 	CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
   2906   1.8     alnsn 	FUNCTION_CHECK_SRC(src, srcw);
   2907   1.9     alnsn 
   2908   1.9     alnsn 	if ((type & 0xff) <= SLJIT_NOT_ZERO)
   2909   1.9     alnsn 		CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
   2910   1.9     alnsn 	else
   2911   1.9     alnsn 		CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff));
   2912   1.8     alnsn #endif
   2913   1.8     alnsn #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
   2914   1.8     alnsn 	if (SLJIT_UNLIKELY(!!compiler->verbose)) {
   2915   1.8     alnsn 		fprintf(compiler->verbose, "  x86_cmov%s %s%s, ",
   2916   1.8     alnsn 			!(dst_reg & SLJIT_I32_OP) ? "" : ".i",
   2917   1.8     alnsn 			jump_names[type & 0xff], JUMP_POSTFIX(type));
   2918   1.8     alnsn 		sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
   2919   1.8     alnsn 		fprintf(compiler->verbose, ", ");
   2920   1.8     alnsn 		sljit_verbose_param(compiler, src, srcw);
   2921   1.8     alnsn 		fprintf(compiler->verbose, "\n");
   2922   1.8     alnsn 	}
   2923   1.8     alnsn #endif
   2924   1.8     alnsn 
   2925   1.8     alnsn 	ADJUST_LOCAL_OFFSET(src, srcw);
   2926   1.8     alnsn 	CHECK_EXTRA_REGS(src, srcw, (void)0);
   2927   1.8     alnsn 
   2928   1.8     alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2929   1.8     alnsn 	compiler->mode32 = dst_reg & SLJIT_I32_OP;
   2930   1.8     alnsn #endif
   2931   1.8     alnsn 	dst_reg &= ~SLJIT_I32_OP;
   2932   1.8     alnsn 
   2933   1.8     alnsn 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
   2934   1.8     alnsn 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
   2935   1.8     alnsn 		src = TMP_REG1;
   2936   1.8     alnsn 		srcw = 0;
   2937   1.8     alnsn 	}
   2938   1.8     alnsn 
   2939   1.8     alnsn 	inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
   2940   1.8     alnsn 	FAIL_IF(!inst);
   2941   1.8     alnsn 	*inst++ = GROUP_0F;
   2942   1.8     alnsn 	*inst = get_jump_code(type & 0xff) - 0x40;
   2943   1.8     alnsn 	return SLJIT_SUCCESS;
   2944   1.8     alnsn }
   2945