Home | History | Annotate | Line # | Download | only in sljit_src
sljitNativeX86_common.c revision 1.1.1.4
      1 /*
      2  *    Stack-less Just-In-Time compiler
      3  *
      4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification, are
      7  * permitted provided that the following conditions are met:
      8  *
      9  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  *      conditions and the following disclaimer.
     11  *
     12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  *      provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
     28 {
     29 	return "x86" SLJIT_CPUINFO;
     30 }
     31 
     32 /*
     33    32b register indexes:
     34      0 - EAX
     35      1 - ECX
     36      2 - EDX
     37      3 - EBX
     38      4 - none
     39      5 - EBP
     40      6 - ESI
     41      7 - EDI
     42 */
     43 
     44 /*
     45    64b register indexes:
     46      0 - RAX
     47      1 - RCX
     48      2 - RDX
     49      3 - RBX
     50      4 - none
     51      5 - RBP
     52      6 - RSI
     53      7 - RDI
     54      8 - R8   - From now on REX prefix is required
     55      9 - R9
     56     10 - R10
     57     11 - R11
     58     12 - R12
     59     13 - R13
     60     14 - R14
     61     15 - R15
     62 */
     63 
     64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     65 
     66 /* Last register + 1. */
     67 #define TMP_REG1	(SLJIT_NO_REGISTERS + 1)
     68 
     69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
     70 	0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
     71 };
     72 
     73 #define CHECK_EXTRA_REGS(p, w, do) \
     74 	if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
     75 		w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \
     76 		p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
     77 		do; \
     78 	} \
     79 	else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
     80 		w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \
     81 		p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
     82 		do; \
     83 	}
     84 
     85 #else /* SLJIT_CONFIG_X86_32 */
     86 
     87 /* Last register + 1. */
     88 #define TMP_REG1	(SLJIT_NO_REGISTERS + 1)
     89 #define TMP_REG2	(SLJIT_NO_REGISTERS + 2)
     90 #define TMP_REG3	(SLJIT_NO_REGISTERS + 3)
     91 
     92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
     93    Note: avoid to use r12 and r13 for memory addessing
     94    therefore r12 is better for SAVED_EREG than SAVED_REG. */
     95 #ifndef _WIN64
     96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
     97 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
     98 	0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
     99 };
    100 /* low-map. reg_map & 0x7. */
    101 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
    102 	0, 0, 6, 1, 0, 3,  3, 7,  6,  5,  4,  4, 2, 7, 1
    103 };
    104 #else
    105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
    106 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
    107 	0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
    108 };
    109 /* low-map. reg_map & 0x7. */
    110 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
    111 	0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  7, 4, 2,  0, 1
    112 };
    113 #endif
    114 
    115 #define REX_W		0x48
    116 #define REX_R		0x44
    117 #define REX_X		0x42
    118 #define REX_B		0x41
    119 #define REX		0x40
    120 
    121 #ifndef _WIN64
    122 #define HALFWORD_MAX 0x7fffffffl
    123 #define HALFWORD_MIN -0x80000000l
    124 #else
    125 #define HALFWORD_MAX 0x7fffffffll
    126 #define HALFWORD_MIN -0x80000000ll
    127 #endif
    128 
    129 #define IS_HALFWORD(x)		((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
    130 #define NOT_HALFWORD(x)		((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
    131 
    132 #define CHECK_EXTRA_REGS(p, w, do)
    133 
    134 #endif /* SLJIT_CONFIG_X86_32 */
    135 
    136 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
    137 #define TMP_FREG	(0)
    138 #endif
    139 
    140 /* Size flags for emit_x86_instruction: */
    141 #define EX86_BIN_INS		0x0010
    142 #define EX86_SHIFT_INS		0x0020
    143 #define EX86_REX		0x0040
    144 #define EX86_NO_REXW		0x0080
    145 #define EX86_BYTE_ARG		0x0100
    146 #define EX86_HALF_ARG		0x0200
    147 #define EX86_PREF_66		0x0400
    148 
    149 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
    150 #define EX86_SSE2		0x0800
    151 #define EX86_PREF_F2		0x1000
    152 #define EX86_PREF_F3		0x2000
    153 #endif
    154 
    155 /* --------------------------------------------------------------------- */
    156 /*  Instrucion forms                                                     */
    157 /* --------------------------------------------------------------------- */
    158 
    159 #define ADD		(/* BINARY */ 0 << 3)
    160 #define ADD_EAX_i32	0x05
    161 #define ADD_r_rm	0x03
    162 #define ADD_rm_r	0x01
    163 #define ADDSD_x_xm	0x58
    164 #define ADC		(/* BINARY */ 2 << 3)
    165 #define ADC_EAX_i32	0x15
    166 #define ADC_r_rm	0x13
    167 #define ADC_rm_r	0x11
    168 #define AND		(/* BINARY */ 4 << 3)
    169 #define AND_EAX_i32	0x25
    170 #define AND_r_rm	0x23
    171 #define AND_rm_r	0x21
    172 #define ANDPD_x_xm	0x54
    173 #define BSR_r_rm	(/* GROUP_0F */ 0xbd)
    174 #define CALL_i32	0xe8
    175 #define CALL_rm		(/* GROUP_FF */ 2 << 3)
    176 #define CDQ		0x99
    177 #define CMOVNE_r_rm	(/* GROUP_0F */ 0x45)
    178 #define CMP		(/* BINARY */ 7 << 3)
    179 #define CMP_EAX_i32	0x3d
    180 #define CMP_r_rm	0x3b
    181 #define CMP_rm_r	0x39
    182 #define DIV		(/* GROUP_F7 */ 6 << 3)
    183 #define DIVSD_x_xm	0x5e
    184 #define INT3		0xcc
    185 #define IDIV		(/* GROUP_F7 */ 7 << 3)
    186 #define IMUL		(/* GROUP_F7 */ 5 << 3)
    187 #define IMUL_r_rm	(/* GROUP_0F */ 0xaf)
    188 #define IMUL_r_rm_i8	0x6b
    189 #define IMUL_r_rm_i32	0x69
    190 #define JE_i8		0x74
    191 #define JMP_i8		0xeb
    192 #define JMP_i32		0xe9
    193 #define JMP_rm		(/* GROUP_FF */ 4 << 3)
    194 #define LEA_r_m		0x8d
    195 #define MOV_r_rm	0x8b
    196 #define MOV_r_i32	0xb8
    197 #define MOV_rm_r	0x89
    198 #define MOV_rm_i32	0xc7
    199 #define MOV_rm8_i8	0xc6
    200 #define MOV_rm8_r8	0x88
    201 #define MOVSD_x_xm	0x10
    202 #define MOVSD_xm_x	0x11
    203 #define MOVSXD_r_rm	0x63
    204 #define MOVSX_r_rm8	(/* GROUP_0F */ 0xbe)
    205 #define MOVSX_r_rm16	(/* GROUP_0F */ 0xbf)
    206 #define MOVZX_r_rm8	(/* GROUP_0F */ 0xb6)
    207 #define MOVZX_r_rm16	(/* GROUP_0F */ 0xb7)
    208 #define MUL		(/* GROUP_F7 */ 4 << 3)
    209 #define MULSD_x_xm	0x59
    210 #define NEG_rm		(/* GROUP_F7 */ 3 << 3)
    211 #define NOP		0x90
    212 #define NOT_rm		(/* GROUP_F7 */ 2 << 3)
    213 #define OR		(/* BINARY */ 1 << 3)
    214 #define OR_r_rm		0x0b
    215 #define OR_EAX_i32	0x0d
    216 #define OR_rm_r		0x09
    217 #define OR_rm8_r8	0x08
    218 #define POP_r		0x58
    219 #define POP_rm		0x8f
    220 #define POPF		0x9d
    221 #define PUSH_i32	0x68
    222 #define PUSH_r		0x50
    223 #define PUSH_rm		(/* GROUP_FF */ 6 << 3)
    224 #define PUSHF		0x9c
    225 #define RET_near	0xc3
    226 #define RET_i16		0xc2
    227 #define SBB		(/* BINARY */ 3 << 3)
    228 #define SBB_EAX_i32	0x1d
    229 #define SBB_r_rm	0x1b
    230 #define SBB_rm_r	0x19
    231 #define SAR		(/* SHIFT */ 7 << 3)
    232 #define SHL		(/* SHIFT */ 4 << 3)
    233 #define SHR		(/* SHIFT */ 5 << 3)
    234 #define SUB		(/* BINARY */ 5 << 3)
    235 #define SUB_EAX_i32	0x2d
    236 #define SUB_r_rm	0x2b
    237 #define SUB_rm_r	0x29
    238 #define SUBSD_x_xm	0x5c
    239 #define TEST_EAX_i32	0xa9
    240 #define TEST_rm_r	0x85
    241 #define UCOMISD_x_xm	0x2e
    242 #define XCHG_EAX_r	0x90
    243 #define XCHG_r_rm	0x87
    244 #define XOR		(/* BINARY */ 6 << 3)
    245 #define XOR_EAX_i32	0x35
    246 #define XOR_r_rm	0x33
    247 #define XOR_rm_r	0x31
    248 #define XORPD_x_xm	0x57
    249 
    250 #define GROUP_0F	0x0f
    251 #define GROUP_F7	0xf7
    252 #define GROUP_FF	0xff
    253 #define GROUP_BINARY_81	0x81
    254 #define GROUP_BINARY_83	0x83
    255 #define GROUP_SHIFT_1	0xd1
    256 #define GROUP_SHIFT_N	0xc1
    257 #define GROUP_SHIFT_CL	0xd3
    258 
    259 #define MOD_REG		0xc0
    260 #define MOD_DISP8	0x40
    261 
    262 #define INC_SIZE(s)			(*inst++ = (s), compiler->size += (s))
    263 
    264 #define PUSH_REG(r)			(*inst++ = (PUSH_r + (r)))
    265 #define POP_REG(r)			(*inst++ = (POP_r + (r)))
    266 #define RET()				(*inst++ = (RET_near))
    267 #define RET_I16(n)			(*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
    268 /* r32, r/m32 */
    269 #define MOV_RM(mod, reg, rm)		(*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
    270 
    271 /* Multithreading does not affect these static variables, since they store
    272    built-in CPU features. Therefore they can be overwritten by different threads
    273    if they detect the CPU features in the same time. */
    274 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
    275 static sljit_si cpu_has_sse2 = -1;
    276 #endif
    277 static sljit_si cpu_has_cmov = -1;
    278 
    279 #if defined(_MSC_VER) && _MSC_VER >= 1400
    280 #include <intrin.h>
    281 #endif
    282 
    283 static void get_cpu_features(void)
    284 {
    285 	sljit_ui features;
    286 
    287 #if defined(_MSC_VER) && _MSC_VER >= 1400
    288 
    289 	int CPUInfo[4];
    290 	__cpuid(CPUInfo, 1);
    291 	features = (sljit_ui)CPUInfo[3];
    292 
    293 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
    294 
    295 	/* AT&T syntax. */
    296 	__asm__ (
    297 		"movl $0x1, %%eax\n"
    298 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    299 		/* On x86-32, there is no red zone, so this
    300 		   should work (no need for a local variable). */
    301 		"push %%ebx\n"
    302 #endif
    303 		"cpuid\n"
    304 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    305 		"pop %%ebx\n"
    306 #endif
    307 		"movl %%edx, %0\n"
    308 		: "=g" (features)
    309 		:
    310 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    311 		: "%eax", "%ecx", "%edx"
    312 #else
    313 		: "%rax", "%rbx", "%rcx", "%rdx"
    314 #endif
    315 	);
    316 
    317 #else /* _MSC_VER && _MSC_VER >= 1400 */
    318 
    319 	/* Intel syntax. */
    320 	__asm {
    321 		mov eax, 1
    322 		cpuid
    323 		mov features, edx
    324 	}
    325 
    326 #endif /* _MSC_VER && _MSC_VER >= 1400 */
    327 
    328 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
    329 	cpu_has_sse2 = (features >> 26) & 0x1;
    330 #endif
    331 	cpu_has_cmov = (features >> 15) & 0x1;
    332 }
    333 
    334 static sljit_ub get_jump_code(sljit_si type)
    335 {
    336 	switch (type) {
    337 	case SLJIT_C_EQUAL:
    338 	case SLJIT_C_FLOAT_EQUAL:
    339 		return 0x84 /* je */;
    340 
    341 	case SLJIT_C_NOT_EQUAL:
    342 	case SLJIT_C_FLOAT_NOT_EQUAL:
    343 		return 0x85 /* jne */;
    344 
    345 	case SLJIT_C_LESS:
    346 	case SLJIT_C_FLOAT_LESS:
    347 		return 0x82 /* jc */;
    348 
    349 	case SLJIT_C_GREATER_EQUAL:
    350 	case SLJIT_C_FLOAT_GREATER_EQUAL:
    351 		return 0x83 /* jae */;
    352 
    353 	case SLJIT_C_GREATER:
    354 	case SLJIT_C_FLOAT_GREATER:
    355 		return 0x87 /* jnbe */;
    356 
    357 	case SLJIT_C_LESS_EQUAL:
    358 	case SLJIT_C_FLOAT_LESS_EQUAL:
    359 		return 0x86 /* jbe */;
    360 
    361 	case SLJIT_C_SIG_LESS:
    362 		return 0x8c /* jl */;
    363 
    364 	case SLJIT_C_SIG_GREATER_EQUAL:
    365 		return 0x8d /* jnl */;
    366 
    367 	case SLJIT_C_SIG_GREATER:
    368 		return 0x8f /* jnle */;
    369 
    370 	case SLJIT_C_SIG_LESS_EQUAL:
    371 		return 0x8e /* jle */;
    372 
    373 	case SLJIT_C_OVERFLOW:
    374 	case SLJIT_C_MUL_OVERFLOW:
    375 		return 0x80 /* jo */;
    376 
    377 	case SLJIT_C_NOT_OVERFLOW:
    378 	case SLJIT_C_MUL_NOT_OVERFLOW:
    379 		return 0x81 /* jno */;
    380 
    381 	case SLJIT_C_FLOAT_UNORDERED:
    382 		return 0x8a /* jp */;
    383 
    384 	case SLJIT_C_FLOAT_ORDERED:
    385 		return 0x8b /* jpo */;
    386 	}
    387 	return 0;
    388 }
    389 
    390 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
    391 
    392 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    393 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
    394 #endif
    395 
    396 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
    397 {
    398 	sljit_si short_jump;
    399 	sljit_uw label_addr;
    400 
    401 	if (jump->flags & JUMP_LABEL)
    402 		label_addr = (sljit_uw)(code + jump->u.label->size);
    403 	else
    404 		label_addr = jump->u.target;
    405 	short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
    406 
    407 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    408 	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
    409 		return generate_far_jump_code(jump, code_ptr, type);
    410 #endif
    411 
    412 	if (type == SLJIT_JUMP) {
    413 		if (short_jump)
    414 			*code_ptr++ = JMP_i8;
    415 		else
    416 			*code_ptr++ = JMP_i32;
    417 		jump->addr++;
    418 	}
    419 	else if (type >= SLJIT_FAST_CALL) {
    420 		short_jump = 0;
    421 		*code_ptr++ = CALL_i32;
    422 		jump->addr++;
    423 	}
    424 	else if (short_jump) {
    425 		*code_ptr++ = get_jump_code(type) - 0x10;
    426 		jump->addr++;
    427 	}
    428 	else {
    429 		*code_ptr++ = GROUP_0F;
    430 		*code_ptr++ = get_jump_code(type);
    431 		jump->addr += 2;
    432 	}
    433 
    434 	if (short_jump) {
    435 		jump->flags |= PATCH_MB;
    436 		code_ptr += sizeof(sljit_sb);
    437 	} else {
    438 		jump->flags |= PATCH_MW;
    439 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    440 		code_ptr += sizeof(sljit_sw);
    441 #else
    442 		code_ptr += sizeof(sljit_si);
    443 #endif
    444 	}
    445 
    446 	return code_ptr;
    447 }
    448 
    449 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
    450 {
    451 	struct sljit_memory_fragment *buf;
    452 	sljit_ub *code;
    453 	sljit_ub *code_ptr;
    454 	sljit_ub *buf_ptr;
    455 	sljit_ub *buf_end;
    456 	sljit_ub len;
    457 
    458 	struct sljit_label *label;
    459 	struct sljit_jump *jump;
    460 	struct sljit_const *const_;
    461 
    462 	CHECK_ERROR_PTR();
    463 	check_sljit_generate_code(compiler);
    464 	reverse_buf(compiler);
    465 
    466 	/* Second code generation pass. */
    467 	code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
    468 	PTR_FAIL_WITH_EXEC_IF(code);
    469 	buf = compiler->buf;
    470 
    471 	code_ptr = code;
    472 	label = compiler->labels;
    473 	jump = compiler->jumps;
    474 	const_ = compiler->consts;
    475 	do {
    476 		buf_ptr = buf->memory;
    477 		buf_end = buf_ptr + buf->used_size;
    478 		do {
    479 			len = *buf_ptr++;
    480 			if (len > 0) {
    481 				/* The code is already generated. */
    482 				SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
    483 				code_ptr += len;
    484 				buf_ptr += len;
    485 			}
    486 			else {
    487 				if (*buf_ptr >= 4) {
    488 					jump->addr = (sljit_uw)code_ptr;
    489 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
    490 						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
    491 					else
    492 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
    493 					jump = jump->next;
    494 				}
    495 				else if (*buf_ptr == 0) {
    496 					label->addr = (sljit_uw)code_ptr;
    497 					label->size = code_ptr - code;
    498 					label = label->next;
    499 				}
    500 				else if (*buf_ptr == 1) {
    501 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
    502 					const_ = const_->next;
    503 				}
    504 				else {
    505 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    506 					*code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
    507 					buf_ptr++;
    508 					*(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
    509 					code_ptr += sizeof(sljit_sw);
    510 					buf_ptr += sizeof(sljit_sw) - 1;
    511 #else
    512 					code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
    513 					buf_ptr += sizeof(sljit_sw);
    514 #endif
    515 				}
    516 				buf_ptr++;
    517 			}
    518 		} while (buf_ptr < buf_end);
    519 		SLJIT_ASSERT(buf_ptr == buf_end);
    520 		buf = buf->next;
    521 	} while (buf);
    522 
    523 	SLJIT_ASSERT(!label);
    524 	SLJIT_ASSERT(!jump);
    525 	SLJIT_ASSERT(!const_);
    526 
    527 	jump = compiler->jumps;
    528 	while (jump) {
    529 		if (jump->flags & PATCH_MB) {
    530 			SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
    531 			*(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
    532 		} else if (jump->flags & PATCH_MW) {
    533 			if (jump->flags & JUMP_LABEL) {
    534 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    535 				*(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
    536 #else
    537 				SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
    538 				*(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
    539 #endif
    540 			}
    541 			else {
    542 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    543 				*(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
    544 #else
    545 				SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
    546 				*(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
    547 #endif
    548 			}
    549 		}
    550 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    551 		else if (jump->flags & PATCH_MD)
    552 			*(sljit_sw*)jump->addr = jump->u.label->addr;
    553 #endif
    554 
    555 		jump = jump->next;
    556 	}
    557 
    558 	/* Maybe we waste some space because of short jumps. */
    559 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
    560 	compiler->error = SLJIT_ERR_COMPILED;
    561 	compiler->executable_size = code_ptr - code;
    562 	return (void*)code;
    563 }
    564 
    565 /* --------------------------------------------------------------------- */
    566 /*  Operators                                                            */
    567 /* --------------------------------------------------------------------- */
    568 
    569 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
    570 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
    571 	sljit_si dst, sljit_sw dstw,
    572 	sljit_si src1, sljit_sw src1w,
    573 	sljit_si src2, sljit_sw src2w);
    574 
    575 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
    576 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
    577 	sljit_si dst, sljit_sw dstw,
    578 	sljit_si src1, sljit_sw src1w,
    579 	sljit_si src2, sljit_sw src2w);
    580 
    581 static sljit_si emit_mov(struct sljit_compiler *compiler,
    582 	sljit_si dst, sljit_sw dstw,
    583 	sljit_si src, sljit_sw srcw);
    584 
    585 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
    586 {
    587 	sljit_ub *inst;
    588 
    589 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    590 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
    591 	FAIL_IF(!inst);
    592 	INC_SIZE(5);
    593 #else
    594 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
    595 	FAIL_IF(!inst);
    596 	INC_SIZE(6);
    597 	*inst++ = REX_W;
    598 #endif
    599 	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
    600 	*inst++ = 0x64;
    601 	*inst++ = 0x24;
    602 	*inst++ = (sljit_ub)sizeof(sljit_sw);
    603 	*inst++ = PUSHF;
    604 	compiler->flags_saved = 1;
    605 	return SLJIT_SUCCESS;
    606 }
    607 
    608 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
    609 {
    610 	sljit_ub *inst;
    611 
    612 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    613 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
    614 	FAIL_IF(!inst);
    615 	INC_SIZE(5);
    616 	*inst++ = POPF;
    617 #else
    618 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
    619 	FAIL_IF(!inst);
    620 	INC_SIZE(6);
    621 	*inst++ = POPF;
    622 	*inst++ = REX_W;
    623 #endif
    624 	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
    625 	*inst++ = 0x64;
    626 	*inst++ = 0x24;
    627 	*inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
    628 	compiler->flags_saved = keep_flags;
    629 	return SLJIT_SUCCESS;
    630 }
    631 
    632 #ifdef _WIN32
    633 #include <malloc.h>
    634 
    635 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
    636 {
    637 	/* Workaround for calling the internal _chkstk() function on Windows.
    638 	This function touches all 4k pages belongs to the requested stack space,
    639 	which size is passed in local_size. This is necessary on Windows where
    640 	the stack can only grow in 4k steps. However, this function just burn
    641 	CPU cycles if the stack is large enough. However, you don't know it in
    642 	advance, so it must always be called. I think this is a bad design in
    643 	general even if it has some reasons. */
    644 	*(volatile sljit_si*)alloca(local_size) = 0;
    645 }
    646 
    647 #endif
    648 
    649 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    650 #include "sljitNativeX86_32.c"
    651 #else
    652 #include "sljitNativeX86_64.c"
    653 #endif
    654 
    655 static sljit_si emit_mov(struct sljit_compiler *compiler,
    656 	sljit_si dst, sljit_sw dstw,
    657 	sljit_si src, sljit_sw srcw)
    658 {
    659 	sljit_ub* inst;
    660 
    661 	if (dst == SLJIT_UNUSED) {
    662 		/* No destination, doesn't need to setup flags. */
    663 		if (src & SLJIT_MEM) {
    664 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
    665 			FAIL_IF(!inst);
    666 			*inst = MOV_r_rm;
    667 		}
    668 		return SLJIT_SUCCESS;
    669 	}
    670 	if (FAST_IS_REG(src)) {
    671 		inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
    672 		FAIL_IF(!inst);
    673 		*inst = MOV_rm_r;
    674 		return SLJIT_SUCCESS;
    675 	}
    676 	if (src & SLJIT_IMM) {
    677 		if (FAST_IS_REG(dst)) {
    678 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    679 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
    680 #else
    681 			if (!compiler->mode32) {
    682 				if (NOT_HALFWORD(srcw))
    683 					return emit_load_imm64(compiler, dst, srcw);
    684 			}
    685 			else
    686 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
    687 #endif
    688 		}
    689 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    690 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
    691 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
    692 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
    693 			FAIL_IF(!inst);
    694 			*inst = MOV_rm_r;
    695 			return SLJIT_SUCCESS;
    696 		}
    697 #endif
    698 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
    699 		FAIL_IF(!inst);
    700 		*inst = MOV_rm_i32;
    701 		return SLJIT_SUCCESS;
    702 	}
    703 	if (FAST_IS_REG(dst)) {
    704 		inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
    705 		FAIL_IF(!inst);
    706 		*inst = MOV_r_rm;
    707 		return SLJIT_SUCCESS;
    708 	}
    709 
    710 	/* Memory to memory move. Requires two instruction. */
    711 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
    712 	FAIL_IF(!inst);
    713 	*inst = MOV_r_rm;
    714 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
    715 	FAIL_IF(!inst);
    716 	*inst = MOV_rm_r;
    717 	return SLJIT_SUCCESS;
    718 }
    719 
    720 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
    721 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
    722 
    723 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
    724 {
    725 	sljit_ub *inst;
    726 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    727 	sljit_si size;
    728 #endif
    729 
    730 	CHECK_ERROR();
    731 	check_sljit_emit_op0(compiler, op);
    732 
    733 	switch (GET_OPCODE(op)) {
    734 	case SLJIT_BREAKPOINT:
    735 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    736 		FAIL_IF(!inst);
    737 		INC_SIZE(1);
    738 		*inst = INT3;
    739 		break;
    740 	case SLJIT_NOP:
    741 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    742 		FAIL_IF(!inst);
    743 		INC_SIZE(1);
    744 		*inst = NOP;
    745 		break;
    746 	case SLJIT_UMUL:
    747 	case SLJIT_SMUL:
    748 	case SLJIT_UDIV:
    749 	case SLJIT_SDIV:
    750 		compiler->flags_saved = 0;
    751 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    752 #ifdef _WIN64
    753 		SLJIT_COMPILE_ASSERT(
    754 			reg_map[SLJIT_SCRATCH_REG1] == 0
    755 			&& reg_map[SLJIT_SCRATCH_REG2] == 2
    756 			&& reg_map[TMP_REG1] > 7,
    757 			invalid_register_assignment_for_div_mul);
    758 #else
    759 		SLJIT_COMPILE_ASSERT(
    760 			reg_map[SLJIT_SCRATCH_REG1] == 0
    761 			&& reg_map[SLJIT_SCRATCH_REG2] < 7
    762 			&& reg_map[TMP_REG1] == 2,
    763 			invalid_register_assignment_for_div_mul);
    764 #endif
    765 		compiler->mode32 = op & SLJIT_INT_OP;
    766 #endif
    767 
    768 		op = GET_OPCODE(op);
    769 		if (op == SLJIT_UDIV) {
    770 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
    771 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
    772 			inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0);
    773 #else
    774 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
    775 #endif
    776 			FAIL_IF(!inst);
    777 			*inst = XOR_r_rm;
    778 		}
    779 
    780 		if (op == SLJIT_SDIV) {
    781 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
    782 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
    783 #endif
    784 
    785 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    786 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    787 			FAIL_IF(!inst);
    788 			INC_SIZE(1);
    789 			*inst = CDQ;
    790 #else
    791 			if (compiler->mode32) {
    792 				inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    793 				FAIL_IF(!inst);
    794 				INC_SIZE(1);
    795 				*inst = CDQ;
    796 			} else {
    797 				inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
    798 				FAIL_IF(!inst);
    799 				INC_SIZE(2);
    800 				*inst++ = REX_W;
    801 				*inst = CDQ;
    802 			}
    803 #endif
    804 		}
    805 
    806 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    807 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
    808 		FAIL_IF(!inst);
    809 		INC_SIZE(2);
    810 		*inst++ = GROUP_F7;
    811 		*inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_SCRATCH_REG2]);
    812 #else
    813 #ifdef _WIN64
    814 		size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
    815 #else
    816 		size = (!compiler->mode32) ? 3 : 2;
    817 #endif
    818 		inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
    819 		FAIL_IF(!inst);
    820 		INC_SIZE(size);
    821 #ifdef _WIN64
    822 		if (!compiler->mode32)
    823 			*inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
    824 		else if (op >= SLJIT_UDIV)
    825 			*inst++ = REX_B;
    826 		*inst++ = GROUP_F7;
    827 		*inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_SCRATCH_REG2]);
    828 #else
    829 		if (!compiler->mode32)
    830 			*inst++ = REX_W;
    831 		*inst++ = GROUP_F7;
    832 		*inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2];
    833 #endif
    834 #endif
    835 		switch (op) {
    836 		case SLJIT_UMUL:
    837 			*inst |= MUL;
    838 			break;
    839 		case SLJIT_SMUL:
    840 			*inst |= IMUL;
    841 			break;
    842 		case SLJIT_UDIV:
    843 			*inst |= DIV;
    844 			break;
    845 		case SLJIT_SDIV:
    846 			*inst |= IDIV;
    847 			break;
    848 		}
    849 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
    850 		EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REG1, 0);
    851 #endif
    852 		break;
    853 	}
    854 
    855 	return SLJIT_SUCCESS;
    856 }
    857 
    858 #define ENCODE_PREFIX(prefix) \
    859 	do { \
    860 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
    861 		FAIL_IF(!inst); \
    862 		INC_SIZE(1); \
    863 		*inst = (prefix); \
    864 	} while (0)
    865 
    866 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
    867 	sljit_si dst, sljit_sw dstw,
    868 	sljit_si src, sljit_sw srcw)
    869 {
    870 	sljit_ub* inst;
    871 	sljit_si dst_r;
    872 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    873 	sljit_si work_r;
    874 #endif
    875 
    876 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    877 	compiler->mode32 = 0;
    878 #endif
    879 
    880 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
    881 		return SLJIT_SUCCESS; /* Empty instruction. */
    882 
    883 	if (src & SLJIT_IMM) {
    884 		if (FAST_IS_REG(dst)) {
    885 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    886 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
    887 #else
    888 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
    889 			FAIL_IF(!inst);
    890 			*inst = MOV_rm_i32;
    891 			return SLJIT_SUCCESS;
    892 #endif
    893 		}
    894 		inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
    895 		FAIL_IF(!inst);
    896 		*inst = MOV_rm8_i8;
    897 		return SLJIT_SUCCESS;
    898 	}
    899 
    900 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
    901 
    902 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
    903 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    904 		if (reg_map[src] >= 4) {
    905 			SLJIT_ASSERT(dst_r == TMP_REG1);
    906 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
    907 		} else
    908 			dst_r = src;
    909 #else
    910 		dst_r = src;
    911 #endif
    912 	}
    913 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    914 	else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
    915 		/* src, dst are registers. */
    916 		SLJIT_ASSERT(SLOW_IS_REG(dst));
    917 		if (reg_map[dst] < 4) {
    918 			if (dst != src)
    919 				EMIT_MOV(compiler, dst, 0, src, 0);
    920 			inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
    921 			FAIL_IF(!inst);
    922 			*inst++ = GROUP_0F;
    923 			*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
    924 		}
    925 		else {
    926 			if (dst != src)
    927 				EMIT_MOV(compiler, dst, 0, src, 0);
    928 			if (sign) {
    929 				/* shl reg, 24 */
    930 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
    931 				FAIL_IF(!inst);
    932 				*inst |= SHL;
    933 				/* sar reg, 24 */
    934 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
    935 				FAIL_IF(!inst);
    936 				*inst |= SAR;
    937 			}
    938 			else {
    939 				inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
    940 				FAIL_IF(!inst);
    941 				*(inst + 1) |= AND;
    942 			}
    943 		}
    944 		return SLJIT_SUCCESS;
    945 	}
    946 #endif
    947 	else {
    948 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
    949 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
    950 		FAIL_IF(!inst);
    951 		*inst++ = GROUP_0F;
    952 		*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
    953 	}
    954 
    955 	if (dst & SLJIT_MEM) {
    956 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    957 		if (dst_r == TMP_REG1) {
    958 			/* Find a non-used register, whose reg_map[src] < 4. */
    959 			if ((dst & REG_MASK) == SLJIT_SCRATCH_REG1) {
    960 				if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SCRATCH_REG2))
    961 					work_r = SLJIT_SCRATCH_REG3;
    962 				else
    963 					work_r = SLJIT_SCRATCH_REG2;
    964 			}
    965 			else {
    966 				if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
    967 					work_r = SLJIT_SCRATCH_REG1;
    968 				else if ((dst & REG_MASK) == SLJIT_SCRATCH_REG2)
    969 					work_r = SLJIT_SCRATCH_REG3;
    970 				else
    971 					work_r = SLJIT_SCRATCH_REG2;
    972 			}
    973 
    974 			if (work_r == SLJIT_SCRATCH_REG1) {
    975 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
    976 			}
    977 			else {
    978 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
    979 				FAIL_IF(!inst);
    980 				*inst = XCHG_r_rm;
    981 			}
    982 
    983 			inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
    984 			FAIL_IF(!inst);
    985 			*inst = MOV_rm8_r8;
    986 
    987 			if (work_r == SLJIT_SCRATCH_REG1) {
    988 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
    989 			}
    990 			else {
    991 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
    992 				FAIL_IF(!inst);
    993 				*inst = XCHG_r_rm;
    994 			}
    995 		}
    996 		else {
    997 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
    998 			FAIL_IF(!inst);
    999 			*inst = MOV_rm8_r8;
   1000 		}
   1001 #else
   1002 		inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
   1003 		FAIL_IF(!inst);
   1004 		*inst = MOV_rm8_r8;
   1005 #endif
   1006 	}
   1007 
   1008 	return SLJIT_SUCCESS;
   1009 }
   1010 
   1011 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
   1012 	sljit_si dst, sljit_sw dstw,
   1013 	sljit_si src, sljit_sw srcw)
   1014 {
   1015 	sljit_ub* inst;
   1016 	sljit_si dst_r;
   1017 
   1018 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1019 	compiler->mode32 = 0;
   1020 #endif
   1021 
   1022 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
   1023 		return SLJIT_SUCCESS; /* Empty instruction. */
   1024 
   1025 	if (src & SLJIT_IMM) {
   1026 		if (FAST_IS_REG(dst)) {
   1027 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1028 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
   1029 #else
   1030 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
   1031 			FAIL_IF(!inst);
   1032 			*inst = MOV_rm_i32;
   1033 			return SLJIT_SUCCESS;
   1034 #endif
   1035 		}
   1036 		inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
   1037 		FAIL_IF(!inst);
   1038 		*inst = MOV_rm_i32;
   1039 		return SLJIT_SUCCESS;
   1040 	}
   1041 
   1042 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
   1043 
   1044 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
   1045 		dst_r = src;
   1046 	else {
   1047 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
   1048 		FAIL_IF(!inst);
   1049 		*inst++ = GROUP_0F;
   1050 		*inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
   1051 	}
   1052 
   1053 	if (dst & SLJIT_MEM) {
   1054 		inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
   1055 		FAIL_IF(!inst);
   1056 		*inst = MOV_rm_r;
   1057 	}
   1058 
   1059 	return SLJIT_SUCCESS;
   1060 }
   1061 
   1062 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
   1063 	sljit_si dst, sljit_sw dstw,
   1064 	sljit_si src, sljit_sw srcw)
   1065 {
   1066 	sljit_ub* inst;
   1067 
   1068 	if (dst == SLJIT_UNUSED) {
   1069 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1070 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1071 		FAIL_IF(!inst);
   1072 		*inst++ = GROUP_F7;
   1073 		*inst |= opcode;
   1074 		return SLJIT_SUCCESS;
   1075 	}
   1076 	if (dst == src && dstw == srcw) {
   1077 		/* Same input and output */
   1078 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
   1079 		FAIL_IF(!inst);
   1080 		*inst++ = GROUP_F7;
   1081 		*inst |= opcode;
   1082 		return SLJIT_SUCCESS;
   1083 	}
   1084 	if (FAST_IS_REG(dst)) {
   1085 		EMIT_MOV(compiler, dst, 0, src, srcw);
   1086 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
   1087 		FAIL_IF(!inst);
   1088 		*inst++ = GROUP_F7;
   1089 		*inst |= opcode;
   1090 		return SLJIT_SUCCESS;
   1091 	}
   1092 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1093 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1094 	FAIL_IF(!inst);
   1095 	*inst++ = GROUP_F7;
   1096 	*inst |= opcode;
   1097 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1098 	return SLJIT_SUCCESS;
   1099 }
   1100 
   1101 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
   1102 	sljit_si dst, sljit_sw dstw,
   1103 	sljit_si src, sljit_sw srcw)
   1104 {
   1105 	sljit_ub* inst;
   1106 
   1107 	if (dst == SLJIT_UNUSED) {
   1108 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1109 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1110 		FAIL_IF(!inst);
   1111 		*inst++ = GROUP_F7;
   1112 		*inst |= NOT_rm;
   1113 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
   1114 		FAIL_IF(!inst);
   1115 		*inst = OR_r_rm;
   1116 		return SLJIT_SUCCESS;
   1117 	}
   1118 	if (FAST_IS_REG(dst)) {
   1119 		EMIT_MOV(compiler, dst, 0, src, srcw);
   1120 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
   1121 		FAIL_IF(!inst);
   1122 		*inst++ = GROUP_F7;
   1123 		*inst |= NOT_rm;
   1124 		inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
   1125 		FAIL_IF(!inst);
   1126 		*inst = OR_r_rm;
   1127 		return SLJIT_SUCCESS;
   1128 	}
   1129 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1130 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1131 	FAIL_IF(!inst);
   1132 	*inst++ = GROUP_F7;
   1133 	*inst |= NOT_rm;
   1134 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
   1135 	FAIL_IF(!inst);
   1136 	*inst = OR_r_rm;
   1137 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1138 	return SLJIT_SUCCESS;
   1139 }
   1140 
   1141 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
   1142 	sljit_si dst, sljit_sw dstw,
   1143 	sljit_si src, sljit_sw srcw)
   1144 {
   1145 	sljit_ub* inst;
   1146 	sljit_si dst_r;
   1147 
   1148 	SLJIT_UNUSED_ARG(op_flags);
   1149 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
   1150 		/* Just set the zero flag. */
   1151 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
   1152 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
   1153 		FAIL_IF(!inst);
   1154 		*inst++ = GROUP_F7;
   1155 		*inst |= NOT_rm;
   1156 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1157 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
   1158 #else
   1159 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
   1160 #endif
   1161 		FAIL_IF(!inst);
   1162 		*inst |= SHR;
   1163 		return SLJIT_SUCCESS;
   1164 	}
   1165 
   1166 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
   1167 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
   1168 		src = TMP_REG1;
   1169 		srcw = 0;
   1170 	}
   1171 
   1172 	inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
   1173 	FAIL_IF(!inst);
   1174 	*inst++ = GROUP_0F;
   1175 	*inst = BSR_r_rm;
   1176 
   1177 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1178 	if (FAST_IS_REG(dst))
   1179 		dst_r = dst;
   1180 	else {
   1181 		/* Find an unused temporary register. */
   1182 		if ((dst & REG_MASK) != SLJIT_SCRATCH_REG1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
   1183 			dst_r = SLJIT_SCRATCH_REG1;
   1184 		else if ((dst & REG_MASK) != SLJIT_SCRATCH_REG2 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG2))
   1185 			dst_r = SLJIT_SCRATCH_REG2;
   1186 		else
   1187 			dst_r = SLJIT_SCRATCH_REG3;
   1188 		EMIT_MOV(compiler, dst, dstw, dst_r, 0);
   1189 	}
   1190 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
   1191 #else
   1192 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
   1193 	compiler->mode32 = 0;
   1194 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
   1195 	compiler->mode32 = op_flags & SLJIT_INT_OP;
   1196 #endif
   1197 
   1198 	if (cpu_has_cmov == -1)
   1199 		get_cpu_features();
   1200 
   1201 	if (cpu_has_cmov) {
   1202 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
   1203 		FAIL_IF(!inst);
   1204 		*inst++ = GROUP_0F;
   1205 		*inst = CMOVNE_r_rm;
   1206 	} else {
   1207 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1208 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1209 		FAIL_IF(!inst);
   1210 		INC_SIZE(4);
   1211 
   1212 		*inst++ = JE_i8;
   1213 		*inst++ = 2;
   1214 		*inst++ = MOV_r_rm;
   1215 		*inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
   1216 #else
   1217 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
   1218 		FAIL_IF(!inst);
   1219 		INC_SIZE(5);
   1220 
   1221 		*inst++ = JE_i8;
   1222 		*inst++ = 3;
   1223 		*inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
   1224 		*inst++ = MOV_r_rm;
   1225 		*inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
   1226 #endif
   1227 	}
   1228 
   1229 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1230 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
   1231 #else
   1232 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
   1233 #endif
   1234 	FAIL_IF(!inst);
   1235 	*(inst + 1) |= XOR;
   1236 
   1237 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1238 	if (dst & SLJIT_MEM) {
   1239 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
   1240 		FAIL_IF(!inst);
   1241 		*inst = XCHG_r_rm;
   1242 	}
   1243 #else
   1244 	if (dst & SLJIT_MEM)
   1245 		EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
   1246 #endif
   1247 	return SLJIT_SUCCESS;
   1248 }
   1249 
   1250 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
   1251 	sljit_si dst, sljit_sw dstw,
   1252 	sljit_si src, sljit_sw srcw)
   1253 {
   1254 	sljit_ub* inst;
   1255 	sljit_si update = 0;
   1256 	sljit_si op_flags = GET_ALL_FLAGS(op);
   1257 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1258 	sljit_si dst_is_ereg = 0;
   1259 	sljit_si src_is_ereg = 0;
   1260 #else
   1261 #	define src_is_ereg 0
   1262 #endif
   1263 
   1264 	CHECK_ERROR();
   1265 	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
   1266 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1267 	ADJUST_LOCAL_OFFSET(src, srcw);
   1268 
   1269 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
   1270 	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
   1271 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1272 	compiler->mode32 = op_flags & SLJIT_INT_OP;
   1273 #endif
   1274 
   1275 	op = GET_OPCODE(op);
   1276 	if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
   1277 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1278 		compiler->mode32 = 0;
   1279 #endif
   1280 
   1281 		if (op_flags & SLJIT_INT_OP) {
   1282 			if (FAST_IS_REG(src) && src == dst) {
   1283 				if (!TYPE_CAST_NEEDED(op))
   1284 					return SLJIT_SUCCESS;
   1285 			}
   1286 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1287 			if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
   1288 				op = SLJIT_MOV_UI;
   1289 			if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
   1290 				op = SLJIT_MOVU_UI;
   1291 			if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
   1292 				op = SLJIT_MOV_SI;
   1293 			if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
   1294 				op = SLJIT_MOVU_SI;
   1295 #endif
   1296 		}
   1297 
   1298 		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
   1299 		if (op >= SLJIT_MOVU) {
   1300 			update = 1;
   1301 			op -= 8;
   1302 		}
   1303 
   1304 		if (src & SLJIT_IMM) {
   1305 			switch (op) {
   1306 			case SLJIT_MOV_UB:
   1307 				srcw = (sljit_ub)srcw;
   1308 				break;
   1309 			case SLJIT_MOV_SB:
   1310 				srcw = (sljit_sb)srcw;
   1311 				break;
   1312 			case SLJIT_MOV_UH:
   1313 				srcw = (sljit_uh)srcw;
   1314 				break;
   1315 			case SLJIT_MOV_SH:
   1316 				srcw = (sljit_sh)srcw;
   1317 				break;
   1318 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1319 			case SLJIT_MOV_UI:
   1320 				srcw = (sljit_ui)srcw;
   1321 				break;
   1322 			case SLJIT_MOV_SI:
   1323 				srcw = (sljit_si)srcw;
   1324 				break;
   1325 #endif
   1326 			}
   1327 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1328 			if (SLJIT_UNLIKELY(dst_is_ereg))
   1329 				return emit_mov(compiler, dst, dstw, src, srcw);
   1330 #endif
   1331 		}
   1332 
   1333 		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
   1334 			inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
   1335 			FAIL_IF(!inst);
   1336 			*inst = LEA_r_m;
   1337 			src &= SLJIT_MEM | 0xf;
   1338 			srcw = 0;
   1339 		}
   1340 
   1341 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1342 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
   1343 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
   1344 			dst = TMP_REG1;
   1345 		}
   1346 #endif
   1347 
   1348 		switch (op) {
   1349 		case SLJIT_MOV:
   1350 		case SLJIT_MOV_P:
   1351 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1352 		case SLJIT_MOV_UI:
   1353 		case SLJIT_MOV_SI:
   1354 #endif
   1355 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
   1356 			break;
   1357 		case SLJIT_MOV_UB:
   1358 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
   1359 			break;
   1360 		case SLJIT_MOV_SB:
   1361 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
   1362 			break;
   1363 		case SLJIT_MOV_UH:
   1364 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
   1365 			break;
   1366 		case SLJIT_MOV_SH:
   1367 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
   1368 			break;
   1369 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1370 		case SLJIT_MOV_UI:
   1371 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
   1372 			break;
   1373 		case SLJIT_MOV_SI:
   1374 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
   1375 			break;
   1376 #endif
   1377 		}
   1378 
   1379 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1380 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
   1381 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REG1, 0);
   1382 #endif
   1383 
   1384 		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
   1385 			inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
   1386 			FAIL_IF(!inst);
   1387 			*inst = LEA_r_m;
   1388 		}
   1389 		return SLJIT_SUCCESS;
   1390 	}
   1391 
   1392 	if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
   1393 		compiler->flags_saved = 0;
   1394 
   1395 	switch (op) {
   1396 	case SLJIT_NOT:
   1397 		if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
   1398 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
   1399 		return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
   1400 
   1401 	case SLJIT_NEG:
   1402 		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   1403 			FAIL_IF(emit_save_flags(compiler));
   1404 		return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
   1405 
   1406 	case SLJIT_CLZ:
   1407 		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   1408 			FAIL_IF(emit_save_flags(compiler));
   1409 		return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
   1410 	}
   1411 
   1412 	return SLJIT_SUCCESS;
   1413 
   1414 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1415 #	undef src_is_ereg
   1416 #endif
   1417 }
   1418 
   1419 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1420 
   1421 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
   1422 	if (IS_HALFWORD(immw) || compiler->mode32) { \
   1423 		inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
   1424 		FAIL_IF(!inst); \
   1425 		*(inst + 1) |= (op_imm); \
   1426 	} \
   1427 	else { \
   1428 		FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
   1429 		inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
   1430 		FAIL_IF(!inst); \
   1431 		*inst = (op_mr); \
   1432 	}
   1433 
   1434 #define BINARY_EAX_IMM(op_eax_imm, immw) \
   1435 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
   1436 
   1437 #else
   1438 
   1439 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
   1440 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
   1441 	FAIL_IF(!inst); \
   1442 	*(inst + 1) |= (op_imm);
   1443 
   1444 #define BINARY_EAX_IMM(op_eax_imm, immw) \
   1445 	FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
   1446 
   1447 #endif
   1448 
   1449 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
   1450 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
   1451 	sljit_si dst, sljit_sw dstw,
   1452 	sljit_si src1, sljit_sw src1w,
   1453 	sljit_si src2, sljit_sw src2w)
   1454 {
   1455 	sljit_ub* inst;
   1456 
   1457 	if (dst == SLJIT_UNUSED) {
   1458 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1459 		if (src2 & SLJIT_IMM) {
   1460 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1461 		}
   1462 		else {
   1463 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1464 			FAIL_IF(!inst);
   1465 			*inst = op_rm;
   1466 		}
   1467 		return SLJIT_SUCCESS;
   1468 	}
   1469 
   1470 	if (dst == src1 && dstw == src1w) {
   1471 		if (src2 & SLJIT_IMM) {
   1472 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1473 			if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1474 #else
   1475 			if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
   1476 #endif
   1477 				BINARY_EAX_IMM(op_eax_imm, src2w);
   1478 			}
   1479 			else {
   1480 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
   1481 			}
   1482 		}
   1483 		else if (FAST_IS_REG(dst)) {
   1484 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
   1485 			FAIL_IF(!inst);
   1486 			*inst = op_rm;
   1487 		}
   1488 		else if (FAST_IS_REG(src2)) {
   1489 			/* Special exception for sljit_emit_op_flags. */
   1490 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
   1491 			FAIL_IF(!inst);
   1492 			*inst = op_mr;
   1493 		}
   1494 		else {
   1495 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
   1496 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
   1497 			FAIL_IF(!inst);
   1498 			*inst = op_mr;
   1499 		}
   1500 		return SLJIT_SUCCESS;
   1501 	}
   1502 
   1503 	/* Only for cumulative operations. */
   1504 	if (dst == src2 && dstw == src2w) {
   1505 		if (src1 & SLJIT_IMM) {
   1506 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1507 			if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1508 #else
   1509 			if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) {
   1510 #endif
   1511 				BINARY_EAX_IMM(op_eax_imm, src1w);
   1512 			}
   1513 			else {
   1514 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
   1515 			}
   1516 		}
   1517 		else if (FAST_IS_REG(dst)) {
   1518 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
   1519 			FAIL_IF(!inst);
   1520 			*inst = op_rm;
   1521 		}
   1522 		else if (FAST_IS_REG(src1)) {
   1523 			inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
   1524 			FAIL_IF(!inst);
   1525 			*inst = op_mr;
   1526 		}
   1527 		else {
   1528 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1529 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
   1530 			FAIL_IF(!inst);
   1531 			*inst = op_mr;
   1532 		}
   1533 		return SLJIT_SUCCESS;
   1534 	}
   1535 
   1536 	/* General version. */
   1537 	if (FAST_IS_REG(dst)) {
   1538 		EMIT_MOV(compiler, dst, 0, src1, src1w);
   1539 		if (src2 & SLJIT_IMM) {
   1540 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
   1541 		}
   1542 		else {
   1543 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
   1544 			FAIL_IF(!inst);
   1545 			*inst = op_rm;
   1546 		}
   1547 	}
   1548 	else {
   1549 		/* This version requires less memory writing. */
   1550 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1551 		if (src2 & SLJIT_IMM) {
   1552 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1553 		}
   1554 		else {
   1555 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1556 			FAIL_IF(!inst);
   1557 			*inst = op_rm;
   1558 		}
   1559 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1560 	}
   1561 
   1562 	return SLJIT_SUCCESS;
   1563 }
   1564 
   1565 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
   1566 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
   1567 	sljit_si dst, sljit_sw dstw,
   1568 	sljit_si src1, sljit_sw src1w,
   1569 	sljit_si src2, sljit_sw src2w)
   1570 {
   1571 	sljit_ub* inst;
   1572 
   1573 	if (dst == SLJIT_UNUSED) {
   1574 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1575 		if (src2 & SLJIT_IMM) {
   1576 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1577 		}
   1578 		else {
   1579 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1580 			FAIL_IF(!inst);
   1581 			*inst = op_rm;
   1582 		}
   1583 		return SLJIT_SUCCESS;
   1584 	}
   1585 
   1586 	if (dst == src1 && dstw == src1w) {
   1587 		if (src2 & SLJIT_IMM) {
   1588 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1589 			if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1590 #else
   1591 			if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
   1592 #endif
   1593 				BINARY_EAX_IMM(op_eax_imm, src2w);
   1594 			}
   1595 			else {
   1596 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
   1597 			}
   1598 		}
   1599 		else if (FAST_IS_REG(dst)) {
   1600 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
   1601 			FAIL_IF(!inst);
   1602 			*inst = op_rm;
   1603 		}
   1604 		else if (FAST_IS_REG(src2)) {
   1605 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
   1606 			FAIL_IF(!inst);
   1607 			*inst = op_mr;
   1608 		}
   1609 		else {
   1610 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
   1611 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
   1612 			FAIL_IF(!inst);
   1613 			*inst = op_mr;
   1614 		}
   1615 		return SLJIT_SUCCESS;
   1616 	}
   1617 
   1618 	/* General version. */
   1619 	if (FAST_IS_REG(dst) && dst != src2) {
   1620 		EMIT_MOV(compiler, dst, 0, src1, src1w);
   1621 		if (src2 & SLJIT_IMM) {
   1622 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
   1623 		}
   1624 		else {
   1625 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
   1626 			FAIL_IF(!inst);
   1627 			*inst = op_rm;
   1628 		}
   1629 	}
   1630 	else {
   1631 		/* This version requires less memory writing. */
   1632 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1633 		if (src2 & SLJIT_IMM) {
   1634 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
   1635 		}
   1636 		else {
   1637 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1638 			FAIL_IF(!inst);
   1639 			*inst = op_rm;
   1640 		}
   1641 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1642 	}
   1643 
   1644 	return SLJIT_SUCCESS;
   1645 }
   1646 
   1647 static sljit_si emit_mul(struct sljit_compiler *compiler,
   1648 	sljit_si dst, sljit_sw dstw,
   1649 	sljit_si src1, sljit_sw src1w,
   1650 	sljit_si src2, sljit_sw src2w)
   1651 {
   1652 	sljit_ub* inst;
   1653 	sljit_si dst_r;
   1654 
   1655 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
   1656 
   1657 	/* Register destination. */
   1658 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
   1659 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
   1660 		FAIL_IF(!inst);
   1661 		*inst++ = GROUP_0F;
   1662 		*inst = IMUL_r_rm;
   1663 	}
   1664 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
   1665 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
   1666 		FAIL_IF(!inst);
   1667 		*inst++ = GROUP_0F;
   1668 		*inst = IMUL_r_rm;
   1669 	}
   1670 	else if (src1 & SLJIT_IMM) {
   1671 		if (src2 & SLJIT_IMM) {
   1672 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
   1673 			src2 = dst_r;
   1674 			src2w = 0;
   1675 		}
   1676 
   1677 		if (src1w <= 127 && src1w >= -128) {
   1678 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1679 			FAIL_IF(!inst);
   1680 			*inst = IMUL_r_rm_i8;
   1681 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
   1682 			FAIL_IF(!inst);
   1683 			INC_SIZE(1);
   1684 			*inst = (sljit_sb)src1w;
   1685 		}
   1686 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1687 		else {
   1688 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1689 			FAIL_IF(!inst);
   1690 			*inst = IMUL_r_rm_i32;
   1691 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1692 			FAIL_IF(!inst);
   1693 			INC_SIZE(4);
   1694 			*(sljit_sw*)inst = src1w;
   1695 		}
   1696 #else
   1697 		else if (IS_HALFWORD(src1w)) {
   1698 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1699 			FAIL_IF(!inst);
   1700 			*inst = IMUL_r_rm_i32;
   1701 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1702 			FAIL_IF(!inst);
   1703 			INC_SIZE(4);
   1704 			*(sljit_si*)inst = (sljit_si)src1w;
   1705 		}
   1706 		else {
   1707 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
   1708 			if (dst_r != src2)
   1709 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
   1710 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
   1711 			FAIL_IF(!inst);
   1712 			*inst++ = GROUP_0F;
   1713 			*inst = IMUL_r_rm;
   1714 		}
   1715 #endif
   1716 	}
   1717 	else if (src2 & SLJIT_IMM) {
   1718 		/* Note: src1 is NOT immediate. */
   1719 
   1720 		if (src2w <= 127 && src2w >= -128) {
   1721 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1722 			FAIL_IF(!inst);
   1723 			*inst = IMUL_r_rm_i8;
   1724 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
   1725 			FAIL_IF(!inst);
   1726 			INC_SIZE(1);
   1727 			*inst = (sljit_sb)src2w;
   1728 		}
   1729 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1730 		else {
   1731 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1732 			FAIL_IF(!inst);
   1733 			*inst = IMUL_r_rm_i32;
   1734 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1735 			FAIL_IF(!inst);
   1736 			INC_SIZE(4);
   1737 			*(sljit_sw*)inst = src2w;
   1738 		}
   1739 #else
   1740 		else if (IS_HALFWORD(src2w)) {
   1741 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1742 			FAIL_IF(!inst);
   1743 			*inst = IMUL_r_rm_i32;
   1744 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1745 			FAIL_IF(!inst);
   1746 			INC_SIZE(4);
   1747 			*(sljit_si*)inst = (sljit_si)src2w;
   1748 		}
   1749 		else {
   1750 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
   1751 			if (dst_r != src1)
   1752 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
   1753 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
   1754 			FAIL_IF(!inst);
   1755 			*inst++ = GROUP_0F;
   1756 			*inst = IMUL_r_rm;
   1757 		}
   1758 #endif
   1759 	}
   1760 	else {
   1761 		/* Neither argument is immediate. */
   1762 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
   1763 			dst_r = TMP_REG1;
   1764 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
   1765 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
   1766 		FAIL_IF(!inst);
   1767 		*inst++ = GROUP_0F;
   1768 		*inst = IMUL_r_rm;
   1769 	}
   1770 
   1771 	if (dst_r == TMP_REG1)
   1772 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   1773 
   1774 	return SLJIT_SUCCESS;
   1775 }
   1776 
   1777 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
   1778 	sljit_si dst, sljit_sw dstw,
   1779 	sljit_si src1, sljit_sw src1w,
   1780 	sljit_si src2, sljit_sw src2w)
   1781 {
   1782 	sljit_ub* inst;
   1783 	sljit_si dst_r, done = 0;
   1784 
   1785 	/* These cases better be left to handled by normal way. */
   1786 	if (!keep_flags) {
   1787 		if (dst == src1 && dstw == src1w)
   1788 			return SLJIT_ERR_UNSUPPORTED;
   1789 		if (dst == src2 && dstw == src2w)
   1790 			return SLJIT_ERR_UNSUPPORTED;
   1791 	}
   1792 
   1793 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
   1794 
   1795 	if (FAST_IS_REG(src1)) {
   1796 		if (FAST_IS_REG(src2)) {
   1797 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
   1798 			FAIL_IF(!inst);
   1799 			*inst = LEA_r_m;
   1800 			done = 1;
   1801 		}
   1802 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1803 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1804 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
   1805 #else
   1806 		if (src2 & SLJIT_IMM) {
   1807 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
   1808 #endif
   1809 			FAIL_IF(!inst);
   1810 			*inst = LEA_r_m;
   1811 			done = 1;
   1812 		}
   1813 	}
   1814 	else if (FAST_IS_REG(src2)) {
   1815 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1816 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1817 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
   1818 #else
   1819 		if (src1 & SLJIT_IMM) {
   1820 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
   1821 #endif
   1822 			FAIL_IF(!inst);
   1823 			*inst = LEA_r_m;
   1824 			done = 1;
   1825 		}
   1826 	}
   1827 
   1828 	if (done) {
   1829 		if (dst_r == TMP_REG1)
   1830 			return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
   1831 		return SLJIT_SUCCESS;
   1832 	}
   1833 	return SLJIT_ERR_UNSUPPORTED;
   1834 }
   1835 
   1836 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
   1837 	sljit_si src1, sljit_sw src1w,
   1838 	sljit_si src2, sljit_sw src2w)
   1839 {
   1840 	sljit_ub* inst;
   1841 
   1842 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1843 	if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1844 #else
   1845 	if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
   1846 #endif
   1847 		BINARY_EAX_IMM(CMP_EAX_i32, src2w);
   1848 		return SLJIT_SUCCESS;
   1849 	}
   1850 
   1851 	if (FAST_IS_REG(src1)) {
   1852 		if (src2 & SLJIT_IMM) {
   1853 			BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
   1854 		}
   1855 		else {
   1856 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
   1857 			FAIL_IF(!inst);
   1858 			*inst = CMP_r_rm;
   1859 		}
   1860 		return SLJIT_SUCCESS;
   1861 	}
   1862 
   1863 	if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
   1864 		inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
   1865 		FAIL_IF(!inst);
   1866 		*inst = CMP_rm_r;
   1867 		return SLJIT_SUCCESS;
   1868 	}
   1869 
   1870 	if (src2 & SLJIT_IMM) {
   1871 		if (src1 & SLJIT_IMM) {
   1872 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1873 			src1 = TMP_REG1;
   1874 			src1w = 0;
   1875 		}
   1876 		BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
   1877 	}
   1878 	else {
   1879 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1880 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1881 		FAIL_IF(!inst);
   1882 		*inst = CMP_r_rm;
   1883 	}
   1884 	return SLJIT_SUCCESS;
   1885 }
   1886 
   1887 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
   1888 	sljit_si src1, sljit_sw src1w,
   1889 	sljit_si src2, sljit_sw src2w)
   1890 {
   1891 	sljit_ub* inst;
   1892 
   1893 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1894 	if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1895 #else
   1896 	if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
   1897 #endif
   1898 		BINARY_EAX_IMM(TEST_EAX_i32, src2w);
   1899 		return SLJIT_SUCCESS;
   1900 	}
   1901 
   1902 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1903 	if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1904 #else
   1905 	if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
   1906 #endif
   1907 		BINARY_EAX_IMM(TEST_EAX_i32, src1w);
   1908 		return SLJIT_SUCCESS;
   1909 	}
   1910 
   1911 	if (FAST_IS_REG(src1)) {
   1912 		if (src2 & SLJIT_IMM) {
   1913 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1914 			if (IS_HALFWORD(src2w) || compiler->mode32) {
   1915 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
   1916 				FAIL_IF(!inst);
   1917 				*inst = GROUP_F7;
   1918 			}
   1919 			else {
   1920 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
   1921 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
   1922 				FAIL_IF(!inst);
   1923 				*inst = TEST_rm_r;
   1924 			}
   1925 #else
   1926 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
   1927 			FAIL_IF(!inst);
   1928 			*inst = GROUP_F7;
   1929 #endif
   1930 		}
   1931 		else {
   1932 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
   1933 			FAIL_IF(!inst);
   1934 			*inst = TEST_rm_r;
   1935 		}
   1936 		return SLJIT_SUCCESS;
   1937 	}
   1938 
   1939 	if (FAST_IS_REG(src2)) {
   1940 		if (src1 & SLJIT_IMM) {
   1941 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1942 			if (IS_HALFWORD(src1w) || compiler->mode32) {
   1943 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
   1944 				FAIL_IF(!inst);
   1945 				*inst = GROUP_F7;
   1946 			}
   1947 			else {
   1948 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
   1949 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
   1950 				FAIL_IF(!inst);
   1951 				*inst = TEST_rm_r;
   1952 			}
   1953 #else
   1954 			inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
   1955 			FAIL_IF(!inst);
   1956 			*inst = GROUP_F7;
   1957 #endif
   1958 		}
   1959 		else {
   1960 			inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
   1961 			FAIL_IF(!inst);
   1962 			*inst = TEST_rm_r;
   1963 		}
   1964 		return SLJIT_SUCCESS;
   1965 	}
   1966 
   1967 	EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   1968 	if (src2 & SLJIT_IMM) {
   1969 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1970 		if (IS_HALFWORD(src2w) || compiler->mode32) {
   1971 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
   1972 			FAIL_IF(!inst);
   1973 			*inst = GROUP_F7;
   1974 		}
   1975 		else {
   1976 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
   1977 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
   1978 			FAIL_IF(!inst);
   1979 			*inst = TEST_rm_r;
   1980 		}
   1981 #else
   1982 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
   1983 		FAIL_IF(!inst);
   1984 		*inst = GROUP_F7;
   1985 #endif
   1986 	}
   1987 	else {
   1988 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
   1989 		FAIL_IF(!inst);
   1990 		*inst = TEST_rm_r;
   1991 	}
   1992 	return SLJIT_SUCCESS;
   1993 }
   1994 
   1995 static sljit_si emit_shift(struct sljit_compiler *compiler,
   1996 	sljit_ub mode,
   1997 	sljit_si dst, sljit_sw dstw,
   1998 	sljit_si src1, sljit_sw src1w,
   1999 	sljit_si src2, sljit_sw src2w)
   2000 {
   2001 	sljit_ub* inst;
   2002 
   2003 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
   2004 		if (dst == src1 && dstw == src1w) {
   2005 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
   2006 			FAIL_IF(!inst);
   2007 			*inst |= mode;
   2008 			return SLJIT_SUCCESS;
   2009 		}
   2010 		if (dst == SLJIT_UNUSED) {
   2011 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2012 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
   2013 			FAIL_IF(!inst);
   2014 			*inst |= mode;
   2015 			return SLJIT_SUCCESS;
   2016 		}
   2017 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
   2018 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2019 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2020 			FAIL_IF(!inst);
   2021 			*inst |= mode;
   2022 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2023 			return SLJIT_SUCCESS;
   2024 		}
   2025 		if (FAST_IS_REG(dst)) {
   2026 			EMIT_MOV(compiler, dst, 0, src1, src1w);
   2027 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
   2028 			FAIL_IF(!inst);
   2029 			*inst |= mode;
   2030 			return SLJIT_SUCCESS;
   2031 		}
   2032 
   2033 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2034 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
   2035 		FAIL_IF(!inst);
   2036 		*inst |= mode;
   2037 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   2038 		return SLJIT_SUCCESS;
   2039 	}
   2040 
   2041 	if (dst == SLJIT_PREF_SHIFT_REG) {
   2042 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2043 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   2044 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2045 		FAIL_IF(!inst);
   2046 		*inst |= mode;
   2047 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2048 	}
   2049 	else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
   2050 		if (src1 != dst)
   2051 			EMIT_MOV(compiler, dst, 0, src1, src1w);
   2052 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
   2053 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   2054 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
   2055 		FAIL_IF(!inst);
   2056 		*inst |= mode;
   2057 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2058 	}
   2059 	else {
   2060 		/* This case is really difficult, since ecx itself may used for
   2061 		   addressing, and we must ensure to work even in that case. */
   2062 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
   2063 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2064 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
   2065 #else
   2066 		/* [esp+0] contains the flags. */
   2067 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
   2068 #endif
   2069 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   2070 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
   2071 		FAIL_IF(!inst);
   2072 		*inst |= mode;
   2073 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2074 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
   2075 #else
   2076 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw));
   2077 #endif
   2078 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
   2079 	}
   2080 
   2081 	return SLJIT_SUCCESS;
   2082 }
   2083 
   2084 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
   2085 	sljit_ub mode, sljit_si set_flags,
   2086 	sljit_si dst, sljit_sw dstw,
   2087 	sljit_si src1, sljit_sw src1w,
   2088 	sljit_si src2, sljit_sw src2w)
   2089 {
   2090 	/* The CPU does not set flags if the shift count is 0. */
   2091 	if (src2 & SLJIT_IMM) {
   2092 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2093 		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
   2094 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   2095 #else
   2096 		if ((src2w & 0x1f) != 0)
   2097 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   2098 #endif
   2099 		if (!set_flags)
   2100 			return emit_mov(compiler, dst, dstw, src1, src1w);
   2101 		/* OR dst, src, 0 */
   2102 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
   2103 			dst, dstw, src1, src1w, SLJIT_IMM, 0);
   2104 	}
   2105 
   2106 	if (!set_flags)
   2107 		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   2108 
   2109 	if (!FAST_IS_REG(dst))
   2110 		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
   2111 
   2112 	FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
   2113 
   2114 	if (FAST_IS_REG(dst))
   2115 		return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
   2116 	return SLJIT_SUCCESS;
   2117 }
   2118 
   2119 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
   2120 	sljit_si dst, sljit_sw dstw,
   2121 	sljit_si src1, sljit_sw src1w,
   2122 	sljit_si src2, sljit_sw src2w)
   2123 {
   2124 	CHECK_ERROR();
   2125 	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
   2126 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2127 	ADJUST_LOCAL_OFFSET(src1, src1w);
   2128 	ADJUST_LOCAL_OFFSET(src2, src2w);
   2129 
   2130 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2131 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
   2132 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
   2133 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2134 	compiler->mode32 = op & SLJIT_INT_OP;
   2135 #endif
   2136 
   2137 	if (GET_OPCODE(op) >= SLJIT_MUL) {
   2138 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
   2139 			compiler->flags_saved = 0;
   2140 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   2141 			FAIL_IF(emit_save_flags(compiler));
   2142 	}
   2143 
   2144 	switch (GET_OPCODE(op)) {
   2145 	case SLJIT_ADD:
   2146 		if (!GET_FLAGS(op)) {
   2147 			if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
   2148 				return compiler->error;
   2149 		}
   2150 		else
   2151 			compiler->flags_saved = 0;
   2152 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   2153 			FAIL_IF(emit_save_flags(compiler));
   2154 		return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
   2155 			dst, dstw, src1, src1w, src2, src2w);
   2156 	case SLJIT_ADDC:
   2157 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
   2158 			FAIL_IF(emit_restore_flags(compiler, 1));
   2159 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
   2160 			FAIL_IF(emit_save_flags(compiler));
   2161 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
   2162 			compiler->flags_saved = 0;
   2163 		return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
   2164 			dst, dstw, src1, src1w, src2, src2w);
   2165 	case SLJIT_SUB:
   2166 		if (!GET_FLAGS(op)) {
   2167 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
   2168 				return compiler->error;
   2169 		}
   2170 		else
   2171 			compiler->flags_saved = 0;
   2172 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   2173 			FAIL_IF(emit_save_flags(compiler));
   2174 		if (dst == SLJIT_UNUSED)
   2175 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
   2176 		return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
   2177 			dst, dstw, src1, src1w, src2, src2w);
   2178 	case SLJIT_SUBC:
   2179 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
   2180 			FAIL_IF(emit_restore_flags(compiler, 1));
   2181 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
   2182 			FAIL_IF(emit_save_flags(compiler));
   2183 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
   2184 			compiler->flags_saved = 0;
   2185 		return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
   2186 			dst, dstw, src1, src1w, src2, src2w);
   2187 	case SLJIT_MUL:
   2188 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
   2189 	case SLJIT_AND:
   2190 		if (dst == SLJIT_UNUSED)
   2191 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
   2192 		return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
   2193 			dst, dstw, src1, src1w, src2, src2w);
   2194 	case SLJIT_OR:
   2195 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
   2196 			dst, dstw, src1, src1w, src2, src2w);
   2197 	case SLJIT_XOR:
   2198 		return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
   2199 			dst, dstw, src1, src1w, src2, src2w);
   2200 	case SLJIT_SHL:
   2201 		return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
   2202 			dst, dstw, src1, src1w, src2, src2w);
   2203 	case SLJIT_LSHR:
   2204 		return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
   2205 			dst, dstw, src1, src1w, src2, src2w);
   2206 	case SLJIT_ASHR:
   2207 		return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
   2208 			dst, dstw, src1, src1w, src2, src2w);
   2209 	}
   2210 
   2211 	return SLJIT_SUCCESS;
   2212 }
   2213 
   2214 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
   2215 {
   2216 	check_sljit_get_register_index(reg);
   2217 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2218 	if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
   2219 			|| reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
   2220 		return -1;
   2221 #endif
   2222 	return reg_map[reg];
   2223 }
   2224 
   2225 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
   2226 {
   2227 	check_sljit_get_float_register_index(reg);
   2228 	return reg;
   2229 }
   2230 
   2231 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
   2232 	void *instruction, sljit_si size)
   2233 {
   2234 	sljit_ub *inst;
   2235 
   2236 	CHECK_ERROR();
   2237 	check_sljit_emit_op_custom(compiler, instruction, size);
   2238 	SLJIT_ASSERT(size > 0 && size < 16);
   2239 
   2240 	inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
   2241 	FAIL_IF(!inst);
   2242 	INC_SIZE(size);
   2243 	SLJIT_MEMMOVE(inst, instruction, size);
   2244 	return SLJIT_SUCCESS;
   2245 }
   2246 
   2247 /* --------------------------------------------------------------------- */
   2248 /*  Floating point operators                                             */
   2249 /* --------------------------------------------------------------------- */
   2250 
   2251 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
   2252 
   2253 /* Alignment + 2 * 16 bytes. */
   2254 static sljit_si sse2_data[3 + (4 + 4) * 2];
   2255 static sljit_si *sse2_buffer;
   2256 
   2257 static void init_compiler(void)
   2258 {
   2259 	sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
   2260 	/* Single precision constants. */
   2261 	sse2_buffer[0] = 0x80000000;
   2262 	sse2_buffer[4] = 0x7fffffff;
   2263 	/* Double precision constants. */
   2264 	sse2_buffer[8] = 0;
   2265 	sse2_buffer[9] = 0x80000000;
   2266 	sse2_buffer[12] = 0xffffffff;
   2267 	sse2_buffer[13] = 0x7fffffff;
   2268 }
   2269 
   2270 #endif
   2271 
   2272 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
   2273 {
   2274 #ifdef SLJIT_IS_FPU_AVAILABLE
   2275 	return SLJIT_IS_FPU_AVAILABLE;
   2276 #elif (defined SLJIT_SSE2 && SLJIT_SSE2)
   2277 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
   2278 	if (cpu_has_sse2 == -1)
   2279 		get_cpu_features();
   2280 	return cpu_has_sse2;
   2281 #else /* SLJIT_DETECT_SSE2 */
   2282 	return 1;
   2283 #endif /* SLJIT_DETECT_SSE2 */
   2284 #else /* SLJIT_SSE2 */
   2285 	return 0;
   2286 #endif
   2287 }
   2288 
   2289 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
   2290 
   2291 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
   2292 	sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
   2293 {
   2294 	sljit_ub *inst;
   2295 
   2296 	inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
   2297 	FAIL_IF(!inst);
   2298 	*inst++ = GROUP_0F;
   2299 	*inst = opcode;
   2300 	return SLJIT_SUCCESS;
   2301 }
   2302 
   2303 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
   2304 	sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
   2305 {
   2306 	sljit_ub *inst;
   2307 
   2308 	inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
   2309 	FAIL_IF(!inst);
   2310 	*inst++ = GROUP_0F;
   2311 	*inst = opcode;
   2312 	return SLJIT_SUCCESS;
   2313 }
   2314 
   2315 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
   2316 	sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
   2317 {
   2318 	return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
   2319 }
   2320 
   2321 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
   2322 	sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
   2323 {
   2324 	return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
   2325 }
   2326 
   2327 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
   2328 	sljit_si dst, sljit_sw dstw,
   2329 	sljit_si src, sljit_sw srcw)
   2330 {
   2331 	sljit_si dst_r;
   2332 
   2333 	CHECK_ERROR();
   2334 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
   2335 
   2336 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2337 	compiler->mode32 = 1;
   2338 #endif
   2339 
   2340 	if (GET_OPCODE(op) == SLJIT_CMPD) {
   2341 		compiler->flags_saved = 0;
   2342 		if (FAST_IS_REG(dst))
   2343 			dst_r = dst;
   2344 		else {
   2345 			dst_r = TMP_FREG;
   2346 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw));
   2347 		}
   2348 		return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw);
   2349 	}
   2350 
   2351 	if (op == SLJIT_MOVD) {
   2352 		if (FAST_IS_REG(dst))
   2353 			return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
   2354 		if (FAST_IS_REG(src))
   2355 			return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
   2356 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
   2357 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
   2358 	}
   2359 
   2360 	if (SLOW_IS_REG(dst)) {
   2361 		dst_r = dst;
   2362 		if (dst != src)
   2363 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
   2364 	}
   2365 	else {
   2366 		dst_r = TMP_FREG;
   2367 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
   2368 	}
   2369 
   2370 	switch (GET_OPCODE(op)) {
   2371 	case SLJIT_NEGD:
   2372 		FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
   2373 		break;
   2374 
   2375 	case SLJIT_ABSD:
   2376 		FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
   2377 		break;
   2378 	}
   2379 
   2380 	if (dst_r == TMP_FREG)
   2381 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
   2382 	return SLJIT_SUCCESS;
   2383 }
   2384 
   2385 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
   2386 	sljit_si dst, sljit_sw dstw,
   2387 	sljit_si src1, sljit_sw src1w,
   2388 	sljit_si src2, sljit_sw src2w)
   2389 {
   2390 	sljit_si dst_r;
   2391 
   2392 	CHECK_ERROR();
   2393 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
   2394 
   2395 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2396 	compiler->mode32 = 1;
   2397 #endif
   2398 
   2399 	if (FAST_IS_REG(dst)) {
   2400 		dst_r = dst;
   2401 		if (dst == src1)
   2402 			; /* Do nothing here. */
   2403 		else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) {
   2404 			/* Swap arguments. */
   2405 			src2 = src1;
   2406 			src2w = src1w;
   2407 		}
   2408 		else if (dst != src2)
   2409 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
   2410 		else {
   2411 			dst_r = TMP_FREG;
   2412 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
   2413 		}
   2414 	}
   2415 	else {
   2416 		dst_r = TMP_FREG;
   2417 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
   2418 	}
   2419 
   2420 	switch (GET_OPCODE(op)) {
   2421 	case SLJIT_ADDD:
   2422 		FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
   2423 		break;
   2424 
   2425 	case SLJIT_SUBD:
   2426 		FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
   2427 		break;
   2428 
   2429 	case SLJIT_MULD:
   2430 		FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
   2431 		break;
   2432 
   2433 	case SLJIT_DIVD:
   2434 		FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
   2435 		break;
   2436 	}
   2437 
   2438 	if (dst_r == TMP_FREG)
   2439 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
   2440 	return SLJIT_SUCCESS;
   2441 }
   2442 
   2443 #else
   2444 
   2445 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
   2446 	sljit_si dst, sljit_sw dstw,
   2447 	sljit_si src, sljit_sw srcw)
   2448 {
   2449 	CHECK_ERROR();
   2450 	/* Should cause an assertion fail. */
   2451 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
   2452 	compiler->error = SLJIT_ERR_UNSUPPORTED;
   2453 	return SLJIT_ERR_UNSUPPORTED;
   2454 }
   2455 
   2456 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
   2457 	sljit_si dst, sljit_sw dstw,
   2458 	sljit_si src1, sljit_sw src1w,
   2459 	sljit_si src2, sljit_sw src2w)
   2460 {
   2461 	CHECK_ERROR();
   2462 	/* Should cause an assertion fail. */
   2463 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
   2464 	compiler->error = SLJIT_ERR_UNSUPPORTED;
   2465 	return SLJIT_ERR_UNSUPPORTED;
   2466 }
   2467 
   2468 #endif
   2469 
   2470 /* --------------------------------------------------------------------- */
   2471 /*  Conditional instructions                                             */
   2472 /* --------------------------------------------------------------------- */
   2473 
   2474 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
   2475 {
   2476 	sljit_ub *inst;
   2477 	struct sljit_label *label;
   2478 
   2479 	CHECK_ERROR_PTR();
   2480 	check_sljit_emit_label(compiler);
   2481 
   2482 	/* We should restore the flags before the label,
   2483 	   since other taken jumps has their own flags as well. */
   2484 	if (SLJIT_UNLIKELY(compiler->flags_saved))
   2485 		PTR_FAIL_IF(emit_restore_flags(compiler, 0));
   2486 
   2487 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   2488 		return compiler->last_label;
   2489 
   2490 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
   2491 	PTR_FAIL_IF(!label);
   2492 	set_label(label, compiler);
   2493 
   2494 	inst = (sljit_ub*)ensure_buf(compiler, 2);
   2495 	PTR_FAIL_IF(!inst);
   2496 
   2497 	*inst++ = 0;
   2498 	*inst++ = 0;
   2499 
   2500 	return label;
   2501 }
   2502 
   2503 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
   2504 {
   2505 	sljit_ub *inst;
   2506 	struct sljit_jump *jump;
   2507 
   2508 	CHECK_ERROR_PTR();
   2509 	check_sljit_emit_jump(compiler, type);
   2510 
   2511 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
   2512 		if ((type & 0xff) <= SLJIT_JUMP)
   2513 			PTR_FAIL_IF(emit_restore_flags(compiler, 0));
   2514 		compiler->flags_saved = 0;
   2515 	}
   2516 
   2517 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2518 	PTR_FAIL_IF_NULL(jump);
   2519 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   2520 	type &= 0xff;
   2521 
   2522 	if (type >= SLJIT_CALL1)
   2523 		PTR_FAIL_IF(call_with_args(compiler, type));
   2524 
   2525 	/* Worst case size. */
   2526 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2527 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
   2528 #else
   2529 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
   2530 #endif
   2531 
   2532 	inst = (sljit_ub*)ensure_buf(compiler, 2);
   2533 	PTR_FAIL_IF_NULL(inst);
   2534 
   2535 	*inst++ = 0;
   2536 	*inst++ = type + 4;
   2537 	return jump;
   2538 }
   2539 
   2540 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
   2541 {
   2542 	sljit_ub *inst;
   2543 	struct sljit_jump *jump;
   2544 
   2545 	CHECK_ERROR();
   2546 	check_sljit_emit_ijump(compiler, type, src, srcw);
   2547 	ADJUST_LOCAL_OFFSET(src, srcw);
   2548 
   2549 	CHECK_EXTRA_REGS(src, srcw, (void)0);
   2550 
   2551 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
   2552 		if (type <= SLJIT_JUMP)
   2553 			FAIL_IF(emit_restore_flags(compiler, 0));
   2554 		compiler->flags_saved = 0;
   2555 	}
   2556 
   2557 	if (type >= SLJIT_CALL1) {
   2558 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2559 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
   2560 		if (src == SLJIT_SCRATCH_REG3) {
   2561 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
   2562 			src = TMP_REG1;
   2563 		}
   2564 		if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
   2565 			srcw += sizeof(sljit_sw);
   2566 #endif
   2567 #endif
   2568 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
   2569 		if (src == SLJIT_SCRATCH_REG3) {
   2570 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
   2571 			src = TMP_REG1;
   2572 		}
   2573 #endif
   2574 		FAIL_IF(call_with_args(compiler, type));
   2575 	}
   2576 
   2577 	if (src == SLJIT_IMM) {
   2578 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2579 		FAIL_IF_NULL(jump);
   2580 		set_jump(jump, compiler, JUMP_ADDR);
   2581 		jump->u.target = srcw;
   2582 
   2583 		/* Worst case size. */
   2584 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2585 		compiler->size += 5;
   2586 #else
   2587 		compiler->size += 10 + 3;
   2588 #endif
   2589 
   2590 		inst = (sljit_ub*)ensure_buf(compiler, 2);
   2591 		FAIL_IF_NULL(inst);
   2592 
   2593 		*inst++ = 0;
   2594 		*inst++ = type + 4;
   2595 	}
   2596 	else {
   2597 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2598 		/* REX_W is not necessary (src is not immediate). */
   2599 		compiler->mode32 = 1;
   2600 #endif
   2601 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
   2602 		FAIL_IF(!inst);
   2603 		*inst++ = GROUP_FF;
   2604 		*inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
   2605 	}
   2606 	return SLJIT_SUCCESS;
   2607 }
   2608 
   2609 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
   2610 	sljit_si dst, sljit_sw dstw,
   2611 	sljit_si src, sljit_sw srcw,
   2612 	sljit_si type)
   2613 {
   2614 	sljit_ub *inst;
   2615 	sljit_ub cond_set = 0;
   2616 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2617 	sljit_si reg;
   2618 #else
   2619 	/* CHECK_EXTRA_REGS migh overwrite these values. */
   2620 	sljit_si dst_save = dst;
   2621 	sljit_sw dstw_save = dstw;
   2622 #endif
   2623 
   2624 	CHECK_ERROR();
   2625 	check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
   2626 
   2627 	if (dst == SLJIT_UNUSED)
   2628 		return SLJIT_SUCCESS;
   2629 
   2630 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2631 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2632 	if (SLJIT_UNLIKELY(compiler->flags_saved))
   2633 		FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
   2634 
   2635 	/* setcc = jcc + 0x10. */
   2636 	cond_set = get_jump_code(type) + 0x10;
   2637 
   2638 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2639 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
   2640 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
   2641 		FAIL_IF(!inst);
   2642 		INC_SIZE(4 + 3);
   2643 		/* Set low register to conditional flag. */
   2644 		*inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
   2645 		*inst++ = GROUP_0F;
   2646 		*inst++ = cond_set;
   2647 		*inst++ = MOD_REG | reg_lmap[TMP_REG1];
   2648 		*inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
   2649 		*inst++ = OR_rm8_r8;
   2650 		*inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
   2651 		return SLJIT_SUCCESS;
   2652 	}
   2653 
   2654 	reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
   2655 
   2656 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
   2657 	FAIL_IF(!inst);
   2658 	INC_SIZE(4 + 4);
   2659 	/* Set low register to conditional flag. */
   2660 	*inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
   2661 	*inst++ = GROUP_0F;
   2662 	*inst++ = cond_set;
   2663 	*inst++ = MOD_REG | reg_lmap[reg];
   2664 	*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
   2665 	*inst++ = GROUP_0F;
   2666 	*inst++ = MOVZX_r_rm8;
   2667 	*inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
   2668 
   2669 	if (reg != TMP_REG1)
   2670 		return SLJIT_SUCCESS;
   2671 
   2672 	if (GET_OPCODE(op) < SLJIT_ADD) {
   2673 		compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
   2674 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
   2675 	}
   2676 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
   2677 	compiler->skip_checks = 1;
   2678 #endif
   2679 	return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
   2680 #else /* SLJIT_CONFIG_X86_64 */
   2681 	if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
   2682 		if (reg_map[dst] <= 4) {
   2683 			/* Low byte is accessible. */
   2684 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
   2685 			FAIL_IF(!inst);
   2686 			INC_SIZE(3 + 3);
   2687 			/* Set low byte to conditional flag. */
   2688 			*inst++ = GROUP_0F;
   2689 			*inst++ = cond_set;
   2690 			*inst++ = MOD_REG | reg_map[dst];
   2691 
   2692 			*inst++ = GROUP_0F;
   2693 			*inst++ = MOVZX_r_rm8;
   2694 			*inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
   2695 			return SLJIT_SUCCESS;
   2696 		}
   2697 
   2698 		/* Low byte is not accessible. */
   2699 		if (cpu_has_cmov == -1)
   2700 			get_cpu_features();
   2701 
   2702 		if (cpu_has_cmov) {
   2703 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
   2704 			/* a xor reg, reg operation would overwrite the flags. */
   2705 			EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
   2706 
   2707 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
   2708 			FAIL_IF(!inst);
   2709 			INC_SIZE(3);
   2710 
   2711 			*inst++ = GROUP_0F;
   2712 			/* cmovcc = setcc - 0x50. */
   2713 			*inst++ = cond_set - 0x50;
   2714 			*inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
   2715 			return SLJIT_SUCCESS;
   2716 		}
   2717 
   2718 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
   2719 		FAIL_IF(!inst);
   2720 		INC_SIZE(1 + 3 + 3 + 1);
   2721 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2722 		/* Set al to conditional flag. */
   2723 		*inst++ = GROUP_0F;
   2724 		*inst++ = cond_set;
   2725 		*inst++ = MOD_REG | 0 /* eax */;
   2726 
   2727 		*inst++ = GROUP_0F;
   2728 		*inst++ = MOVZX_r_rm8;
   2729 		*inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
   2730 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2731 		return SLJIT_SUCCESS;
   2732 	}
   2733 
   2734 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
   2735 		SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax);
   2736 		if (dst != SLJIT_SCRATCH_REG1) {
   2737 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
   2738 			FAIL_IF(!inst);
   2739 			INC_SIZE(1 + 3 + 2 + 1);
   2740 			/* Set low register to conditional flag. */
   2741 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2742 			*inst++ = GROUP_0F;
   2743 			*inst++ = cond_set;
   2744 			*inst++ = MOD_REG | 0 /* eax */;
   2745 			*inst++ = OR_rm8_r8;
   2746 			*inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
   2747 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2748 		}
   2749 		else {
   2750 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
   2751 			FAIL_IF(!inst);
   2752 			INC_SIZE(2 + 3 + 2 + 2);
   2753 			/* Set low register to conditional flag. */
   2754 			*inst++ = XCHG_r_rm;
   2755 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
   2756 			*inst++ = GROUP_0F;
   2757 			*inst++ = cond_set;
   2758 			*inst++ = MOD_REG | 1 /* ecx */;
   2759 			*inst++ = OR_rm8_r8;
   2760 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
   2761 			*inst++ = XCHG_r_rm;
   2762 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
   2763 		}
   2764 		return SLJIT_SUCCESS;
   2765 	}
   2766 
   2767 	/* Set TMP_REG1 to the bit. */
   2768 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
   2769 	FAIL_IF(!inst);
   2770 	INC_SIZE(1 + 3 + 3 + 1);
   2771 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2772 	/* Set al to conditional flag. */
   2773 	*inst++ = GROUP_0F;
   2774 	*inst++ = cond_set;
   2775 	*inst++ = MOD_REG | 0 /* eax */;
   2776 
   2777 	*inst++ = GROUP_0F;
   2778 	*inst++ = MOVZX_r_rm8;
   2779 	*inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
   2780 
   2781 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
   2782 
   2783 	if (GET_OPCODE(op) < SLJIT_ADD)
   2784 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
   2785 
   2786 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
   2787 	compiler->skip_checks = 1;
   2788 #endif
   2789 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
   2790 #endif /* SLJIT_CONFIG_X86_64 */
   2791 }
   2792 
   2793 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
   2794 {
   2795 	CHECK_ERROR();
   2796 	check_sljit_get_local_base(compiler, dst, dstw, offset);
   2797 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2798 
   2799 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2800 
   2801 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2802 	compiler->mode32 = 0;
   2803 #endif
   2804 
   2805 	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
   2806 
   2807 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2808 	if (NOT_HALFWORD(offset)) {
   2809 		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
   2810 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
   2811 		SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
   2812 		return compiler->error;
   2813 #else
   2814 		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0);
   2815 #endif
   2816 	}
   2817 #endif
   2818 
   2819 	if (offset != 0)
   2820 		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
   2821 	return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
   2822 }
   2823 
   2824 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
   2825 {
   2826 	sljit_ub *inst;
   2827 	struct sljit_const *const_;
   2828 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2829 	sljit_si reg;
   2830 #endif
   2831 
   2832 	CHECK_ERROR_PTR();
   2833 	check_sljit_emit_const(compiler, dst, dstw, init_value);
   2834 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2835 
   2836 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2837 
   2838 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
   2839 	PTR_FAIL_IF(!const_);
   2840 	set_const(const_, compiler);
   2841 
   2842 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2843 	compiler->mode32 = 0;
   2844 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
   2845 
   2846 	if (emit_load_imm64(compiler, reg, init_value))
   2847 		return NULL;
   2848 #else
   2849 	if (dst == SLJIT_UNUSED)
   2850 		dst = TMP_REG1;
   2851 
   2852 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
   2853 		return NULL;
   2854 #endif
   2855 
   2856 	inst = (sljit_ub*)ensure_buf(compiler, 2);
   2857 	PTR_FAIL_IF(!inst);
   2858 
   2859 	*inst++ = 0;
   2860 	*inst++ = 1;
   2861 
   2862 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2863 	if (dst & SLJIT_MEM)
   2864 		if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
   2865 			return NULL;
   2866 #endif
   2867 
   2868 	return const_;
   2869 }
   2870 
   2871 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
   2872 {
   2873 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2874 	*(sljit_sw*)addr = new_addr - (addr + 4);
   2875 #else
   2876 	*(sljit_uw*)addr = new_addr;
   2877 #endif
   2878 }
   2879 
   2880 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
   2881 {
   2882 	*(sljit_sw*)addr = new_constant;
   2883 }
   2884