sljitNativeX86_common.c revision 1.4       1 /*
      2  *    Stack-less Just-In-Time compiler
      3  *
      4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification, are
      7  * permitted provided that the following conditions are met:
      8  *
      9  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  *      conditions and the following disclaimer.
     11  *
     12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  *      provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
     28 {
     29 	return "x86" SLJIT_CPUINFO;
     30 }
     31 
     32 /*
     33    32b register indexes:
     34      0 - EAX
     35      1 - ECX
     36      2 - EDX
     37      3 - EBX
     38      4 - none
     39      5 - EBP
     40      6 - ESI
     41      7 - EDI
     42 */
     43 
     44 /*
     45    64b register indexes:
     46      0 - RAX
     47      1 - RCX
     48      2 - RDX
     49      3 - RBX
     50      4 - none
     51      5 - RBP
     52      6 - RSI
     53      7 - RDI
     54      8 - R8   - From now on REX prefix is required
     55      9 - R9
     56     10 - R10
     57     11 - R11
     58     12 - R12
     59     13 - R13
     60     14 - R14
     61     15 - R15
     62 */
     63 
     64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
     65 
     66 /* Last register + 1. */
     67 #define TMP_REGISTER	(SLJIT_NO_REGISTERS + 1)
     68 
     69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
     70 	0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
     71 };
     72 
     73 #define CHECK_EXTRA_REGS(p, w, do) \
     74 	if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
     75 		w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
     76 		p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
     77 		do; \
     78 	} \
     79 	else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
     80 		w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_w); \
     81 		p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
     82 		do; \
     83 	}
     84 
     85 #else /* SLJIT_CONFIG_X86_32 */
     86 
     87 /* Last register + 1. */
     88 #define TMP_REGISTER	(SLJIT_NO_REGISTERS + 1)
     89 #define TMP_REG2	(SLJIT_NO_REGISTERS + 2)
     90 #define TMP_REG3	(SLJIT_NO_REGISTERS + 3)
     91 
     92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
     93    Note: avoid to use r12 and r13 for memory addessing
     94    therefore r12 is better for SAVED_EREG than SAVED_REG. */
     95 #ifndef _WIN64
     96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
     97 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
     98 	0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
     99 };
    100 /* low-map. reg_map & 0x7. */
    101 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
    102 	0, 0, 6, 1, 0, 3,  3, 7,  6,  5,  4,  4, 2, 7, 1
    103 };
    104 #else
    105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
    106 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
    107 	0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
    108 };
    109 /* low-map. reg_map & 0x7. */
    110 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
    111 	0, 0, 2, 1, 3,  5,  3, 6, 7,  6,  7, 4, 2,  0, 1
    112 };
    113 #endif
    114 
    115 #define REX_W		0x48
    116 #define REX_R		0x44
    117 #define REX_X		0x42
    118 #define REX_B		0x41
    119 #define REX		0x40
    120 
    121 typedef unsigned int sljit_uhw;
    122 typedef int sljit_hw;
    123 
    124 #define IS_HALFWORD(x)		((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
    125 #define NOT_HALFWORD(x)		((x) > 0x7fffffffll || (x) < -0x80000000ll)
    126 
    127 #define CHECK_EXTRA_REGS(p, w, do)
    128 
    129 #endif /* SLJIT_CONFIG_X86_32 */
    130 
    131 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
    132 #define TMP_FREG	(SLJIT_FLOAT_REG4 + 1)
    133 #endif
    134 
    135 /* Size flags for emit_x86_instruction: */
    136 #define EX86_BIN_INS		0x0010
    137 #define EX86_SHIFT_INS		0x0020
    138 #define EX86_REX		0x0040
    139 #define EX86_NO_REXW		0x0080
    140 #define EX86_BYTE_ARG		0x0100
    141 #define EX86_HALF_ARG		0x0200
    142 #define EX86_PREF_66		0x0400
    143 
    144 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
    145 #define EX86_PREF_F2		0x0800
    146 #define EX86_SSE2		0x1000
    147 #endif
    148 
    149 #define INC_SIZE(s)			(*buf++ = (s), compiler->size += (s))
    150 #define INC_CSIZE(s)			(*code++ = (s), compiler->size += (s))
    151 
    152 #define PUSH_REG(r)			(*buf++ = (0x50 + (r)))
    153 #define POP_REG(r)			(*buf++ = (0x58 + (r)))
    154 #define RET()				(*buf++ = (0xc3))
    155 #define RETN(n)				(*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
    156 /* r32, r/m32 */
    157 #define MOV_RM(mod, reg, rm)		(*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
    158 
    159 static sljit_ub get_jump_code(int type)
    160 {
    161 	switch (type) {
    162 	case SLJIT_C_EQUAL:
    163 	case SLJIT_C_FLOAT_EQUAL:
    164 		return 0x84;
    165 
    166 	case SLJIT_C_NOT_EQUAL:
    167 	case SLJIT_C_FLOAT_NOT_EQUAL:
    168 		return 0x85;
    169 
    170 	case SLJIT_C_LESS:
    171 	case SLJIT_C_FLOAT_LESS:
    172 		return 0x82;
    173 
    174 	case SLJIT_C_GREATER_EQUAL:
    175 	case SLJIT_C_FLOAT_GREATER_EQUAL:
    176 		return 0x83;
    177 
    178 	case SLJIT_C_GREATER:
    179 	case SLJIT_C_FLOAT_GREATER:
    180 		return 0x87;
    181 
    182 	case SLJIT_C_LESS_EQUAL:
    183 	case SLJIT_C_FLOAT_LESS_EQUAL:
    184 		return 0x86;
    185 
    186 	case SLJIT_C_SIG_LESS:
    187 		return 0x8c;
    188 
    189 	case SLJIT_C_SIG_GREATER_EQUAL:
    190 		return 0x8d;
    191 
    192 	case SLJIT_C_SIG_GREATER:
    193 		return 0x8f;
    194 
    195 	case SLJIT_C_SIG_LESS_EQUAL:
    196 		return 0x8e;
    197 
    198 	case SLJIT_C_OVERFLOW:
    199 	case SLJIT_C_MUL_OVERFLOW:
    200 		return 0x80;
    201 
    202 	case SLJIT_C_NOT_OVERFLOW:
    203 	case SLJIT_C_MUL_NOT_OVERFLOW:
    204 		return 0x81;
    205 
    206 	case SLJIT_C_FLOAT_UNORDERED:
    207 		return 0x8a;
    208 
    209 	case SLJIT_C_FLOAT_ORDERED:
    210 		return 0x8b;
    211 	}
    212 	return 0;
    213 }
    214 
    215 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
    216 
    217 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    218 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
    219 #endif
    220 
    221 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
    222 {
    223 	int short_jump;
    224 	sljit_uw label_addr;
    225 
    226 	if (jump->flags & JUMP_LABEL)
    227 		label_addr = (sljit_uw)(code + jump->u.label->size);
    228 	else
    229 		label_addr = jump->u.target;
    230 	short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
    231 
    232 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    233 	if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
    234 		return generate_far_jump_code(jump, code_ptr, type);
    235 #endif
    236 
    237 	if (type == SLJIT_JUMP) {
    238 		if (short_jump)
    239 			*code_ptr++ = 0xeb;
    240 		else
    241 			*code_ptr++ = 0xe9;
    242 		jump->addr++;
    243 	}
    244 	else if (type >= SLJIT_FAST_CALL) {
    245 		short_jump = 0;
    246 		*code_ptr++ = 0xe8;
    247 		jump->addr++;
    248 	}
    249 	else if (short_jump) {
    250 		*code_ptr++ = get_jump_code(type) - 0x10;
    251 		jump->addr++;
    252 	}
    253 	else {
    254 		*code_ptr++ = 0x0f;
    255 		*code_ptr++ = get_jump_code(type);
    256 		jump->addr += 2;
    257 	}
    258 
    259 	if (short_jump) {
    260 		jump->flags |= PATCH_MB;
    261 		code_ptr += sizeof(sljit_b);
    262 	} else {
    263 		jump->flags |= PATCH_MW;
    264 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    265 		code_ptr += sizeof(sljit_w);
    266 #else
    267 		code_ptr += sizeof(sljit_hw);
    268 #endif
    269 	}
    270 
    271 	return code_ptr;
    272 }
    273 
    274 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
    275 {
    276 	struct sljit_memory_fragment *buf;
    277 	sljit_ub *code;
    278 	sljit_ub *code_ptr;
    279 	sljit_ub *buf_ptr;
    280 	sljit_ub *buf_end;
    281 	sljit_ub len;
    282 
    283 	struct sljit_label *label;
    284 	struct sljit_jump *jump;
    285 	struct sljit_const *const_;
    286 
    287 	CHECK_ERROR_PTR();
    288 	check_sljit_generate_code(compiler);
    289 	reverse_buf(compiler);
    290 
    291 	/* Second code generation pass. */
    292 	code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
    293 	PTR_FAIL_WITH_EXEC_IF(code);
    294 	buf = compiler->buf;
    295 
    296 	code_ptr = code;
    297 	label = compiler->labels;
    298 	jump = compiler->jumps;
    299 	const_ = compiler->consts;
    300 	do {
    301 		buf_ptr = buf->memory;
    302 		buf_end = buf_ptr + buf->used_size;
    303 		do {
    304 			len = *buf_ptr++;
    305 			if (len > 0) {
    306 				/* The code is already generated. */
    307 				SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
    308 				code_ptr += len;
    309 				buf_ptr += len;
    310 			}
    311 			else {
    312 				if (*buf_ptr >= 4) {
    313 					jump->addr = (sljit_uw)code_ptr;
    314 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
    315 						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
    316 					else
    317 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
    318 					jump = jump->next;
    319 				}
    320 				else if (*buf_ptr == 0) {
    321 					label->addr = (sljit_uw)code_ptr;
    322 					label->size = code_ptr - code;
    323 					label = label->next;
    324 				}
    325 				else if (*buf_ptr == 1) {
    326 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
    327 					const_ = const_->next;
    328 				}
    329 				else {
    330 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    331 					*code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
    332 					buf_ptr++;
    333 					*(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
    334 					code_ptr += sizeof(sljit_w);
    335 					buf_ptr += sizeof(sljit_w) - 1;
    336 #else
    337 					code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
    338 					buf_ptr += sizeof(sljit_w);
    339 #endif
    340 				}
    341 				buf_ptr++;
    342 			}
    343 		} while (buf_ptr < buf_end);
    344 		SLJIT_ASSERT(buf_ptr == buf_end);
    345 		buf = buf->next;
    346 	} while (buf);
    347 
    348 	SLJIT_ASSERT(!label);
    349 	SLJIT_ASSERT(!jump);
    350 	SLJIT_ASSERT(!const_);
    351 
    352 	jump = compiler->jumps;
    353 	while (jump) {
    354 		if (jump->flags & PATCH_MB) {
    355 			SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
    356 			*(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
    357 		} else if (jump->flags & PATCH_MW) {
    358 			if (jump->flags & JUMP_LABEL) {
    359 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    360 				*(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
    361 #else
    362 				SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
    363 				*(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
    364 #endif
    365 			}
    366 			else {
    367 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    368 				*(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
    369 #else
    370 				SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
    371 				*(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
    372 #endif
    373 			}
    374 		}
    375 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    376 		else if (jump->flags & PATCH_MD)
    377 			*(sljit_w*)jump->addr = jump->u.label->addr;
    378 #endif
    379 
    380 		jump = jump->next;
    381 	}
    382 
    383 	/* Maybe we waste some space because of short jumps. */
    384 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
    385 	compiler->error = SLJIT_ERR_COMPILED;
    386 	compiler->executable_size = compiler->size;
    387 	return (void*)code;
    388 }
    389 
    390 /* --------------------------------------------------------------------- */
    391 /*  Operators                                                            */
    392 /* --------------------------------------------------------------------- */
    393 
    394 static int emit_cum_binary(struct sljit_compiler *compiler,
    395 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
    396 	int dst, sljit_w dstw,
    397 	int src1, sljit_w src1w,
    398 	int src2, sljit_w src2w);
    399 
    400 static int emit_non_cum_binary(struct sljit_compiler *compiler,
    401 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
    402 	int dst, sljit_w dstw,
    403 	int src1, sljit_w src1w,
    404 	int src2, sljit_w src2w);
    405 
    406 static int emit_mov(struct sljit_compiler *compiler,
    407 	int dst, sljit_w dstw,
    408 	int src, sljit_w srcw);
    409 
    410 static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
    411 {
    412 	sljit_ub *buf;
    413 
    414 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    415 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
    416 	FAIL_IF(!buf);
    417 	INC_SIZE(5);
    418 #else
    419 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
    420 	FAIL_IF(!buf);
    421 	INC_SIZE(6);
    422 	*buf++ = REX_W;
    423 #endif
    424 	*buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
    425 	*buf++ = 0x64;
    426 	*buf++ = 0x24;
    427 	*buf++ = (sljit_ub)sizeof(sljit_w);
    428 	*buf++ = 0x9c; /* pushfd / pushfq */
    429 	compiler->flags_saved = 1;
    430 	return SLJIT_SUCCESS;
    431 }
    432 
    433 static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
    434 {
    435 	sljit_ub *buf;
    436 
    437 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    438 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
    439 	FAIL_IF(!buf);
    440 	INC_SIZE(5);
    441 	*buf++ = 0x9d; /* popfd */
    442 #else
    443 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
    444 	FAIL_IF(!buf);
    445 	INC_SIZE(6);
    446 	*buf++ = 0x9d; /* popfq */
    447 	*buf++ = REX_W;
    448 #endif
    449 	*buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
    450 	*buf++ = 0x64;
    451 	*buf++ = 0x24;
    452 	*buf++ = (sljit_ub)-(int)sizeof(sljit_w);
    453 	compiler->flags_saved = keep_flags;
    454 	return SLJIT_SUCCESS;
    455 }
    456 
    457 #ifdef _WIN32
    458 #include <malloc.h>
    459 
    460 static void SLJIT_CALL sljit_grow_stack(sljit_w local_size)
    461 {
    462 	/* Workaround for calling the internal _chkstk() function on Windows.
    463 	This function touches all 4k pages belongs to the requested stack space,
    464 	which size is passed in local_size. This is necessary on Windows where
    465 	the stack can only grow in 4k steps. However, this function just burn
    466 	CPU cycles if the stack is large enough, but you don't know it in advance.
    467 	I think this is a bad design even if it has some reasons. */
    468 	alloca(local_size);
    469 }
    470 
    471 #endif
    472 
    473 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    474 #include "sljitNativeX86_32.c"
    475 #else
    476 #include "sljitNativeX86_64.c"
    477 #endif
    478 
    479 static int emit_mov(struct sljit_compiler *compiler,
    480 	int dst, sljit_w dstw,
    481 	int src, sljit_w srcw)
    482 {
    483 	sljit_ub* code;
    484 
    485 	if (dst == SLJIT_UNUSED) {
    486 		/* No destination, doesn't need to setup flags. */
    487 		if (src & SLJIT_MEM) {
    488 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
    489 			FAIL_IF(!code);
    490 			*code = 0x8b;
    491 		}
    492 		return SLJIT_SUCCESS;
    493 	}
    494 	if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
    495 		code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
    496 		FAIL_IF(!code);
    497 		*code = 0x89;
    498 		return SLJIT_SUCCESS;
    499 	}
    500 	if (src & SLJIT_IMM) {
    501 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
    502 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    503 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
    504 #else
    505 			if (!compiler->mode32) {
    506 				if (NOT_HALFWORD(srcw))
    507 					return emit_load_imm64(compiler, dst, srcw);
    508 			}
    509 			else
    510 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
    511 #endif
    512 		}
    513 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    514 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
    515 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
    516 			code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
    517 			FAIL_IF(!code);
    518 			*code = 0x89;
    519 			return SLJIT_SUCCESS;
    520 		}
    521 #endif
    522 		code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
    523 		FAIL_IF(!code);
    524 		*code = 0xc7;
    525 		return SLJIT_SUCCESS;
    526 	}
    527 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
    528 		code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
    529 		FAIL_IF(!code);
    530 		*code = 0x8b;
    531 		return SLJIT_SUCCESS;
    532 	}
    533 
    534 	/* Memory to memory move. Requires two instruction. */
    535 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
    536 	FAIL_IF(!code);
    537 	*code = 0x8b;
    538 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
    539 	FAIL_IF(!code);
    540 	*code = 0x89;
    541 	return SLJIT_SUCCESS;
    542 }
    543 
    544 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
    545 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
    546 
    547 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
    548 {
    549 	sljit_ub *buf;
    550 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    551 	int size;
    552 #endif
    553 
    554 	CHECK_ERROR();
    555 	check_sljit_emit_op0(compiler, op);
    556 
    557 	switch (GET_OPCODE(op)) {
    558 	case SLJIT_BREAKPOINT:
    559 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    560 		FAIL_IF(!buf);
    561 		INC_SIZE(1);
    562 		*buf = 0xcc;
    563 		break;
    564 	case SLJIT_NOP:
    565 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    566 		FAIL_IF(!buf);
    567 		INC_SIZE(1);
    568 		*buf = 0x90;
    569 		break;
    570 	case SLJIT_UMUL:
    571 	case SLJIT_SMUL:
    572 	case SLJIT_UDIV:
    573 	case SLJIT_SDIV:
    574 		compiler->flags_saved = 0;
    575 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    576 #ifdef _WIN64
    577 		SLJIT_COMPILE_ASSERT(
    578 			reg_map[SLJIT_TEMPORARY_REG1] == 0
    579 			&& reg_map[SLJIT_TEMPORARY_REG2] == 2
    580 			&& reg_map[TMP_REGISTER] > 7,
    581 			invalid_register_assignment_for_div_mul);
    582 #else
    583 		SLJIT_COMPILE_ASSERT(
    584 			reg_map[SLJIT_TEMPORARY_REG1] == 0
    585 			&& reg_map[SLJIT_TEMPORARY_REG2] < 7
    586 			&& reg_map[TMP_REGISTER] == 2,
    587 			invalid_register_assignment_for_div_mul);
    588 #endif
    589 		compiler->mode32 = op & SLJIT_INT_OP;
    590 #endif
    591 
    592 		op = GET_OPCODE(op);
    593 		if (op == SLJIT_UDIV) {
    594 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
    595 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
    596 			buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
    597 #else
    598 			buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
    599 #endif
    600 			FAIL_IF(!buf);
    601 			*buf = 0x33;
    602 		}
    603 
    604 		if (op == SLJIT_SDIV) {
    605 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
    606 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
    607 #endif
    608 
    609 			/* CDQ instruction */
    610 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    611 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    612 			FAIL_IF(!buf);
    613 			INC_SIZE(1);
    614 			*buf = 0x99;
    615 #else
    616 			if (compiler->mode32) {
    617 				buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
    618 				FAIL_IF(!buf);
    619 				INC_SIZE(1);
    620 				*buf = 0x99;
    621 			} else {
    622 				buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
    623 				FAIL_IF(!buf);
    624 				INC_SIZE(2);
    625 				*buf++ = REX_W;
    626 				*buf = 0x99;
    627 			}
    628 #endif
    629 		}
    630 
    631 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    632 		buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
    633 		FAIL_IF(!buf);
    634 		INC_SIZE(2);
    635 		*buf++ = 0xf7;
    636 		*buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
    637 #else
    638 #ifdef _WIN64
    639 		size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
    640 #else
    641 		size = (!compiler->mode32) ? 3 : 2;
    642 #endif
    643 		buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
    644 		FAIL_IF(!buf);
    645 		INC_SIZE(size);
    646 #ifdef _WIN64
    647 		if (!compiler->mode32)
    648 			*buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
    649 		else if (op >= SLJIT_UDIV)
    650 			*buf++ = REX_B;
    651 		*buf++ = 0xf7;
    652 		*buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
    653 #else
    654 		if (!compiler->mode32)
    655 			*buf++ = REX_W;
    656 		*buf++ = 0xf7;
    657 		*buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
    658 #endif
    659 #endif
    660 		switch (op) {
    661 		case SLJIT_UMUL:
    662 			*buf |= 4 << 3;
    663 			break;
    664 		case SLJIT_SMUL:
    665 			*buf |= 5 << 3;
    666 			break;
    667 		case SLJIT_UDIV:
    668 			*buf |= 6 << 3;
    669 			break;
    670 		case SLJIT_SDIV:
    671 			*buf |= 7 << 3;
    672 			break;
    673 		}
    674 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
    675 		EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
    676 #endif
    677 		break;
    678 	}
    679 
    680 	return SLJIT_SUCCESS;
    681 }
    682 
    683 #define ENCODE_PREFIX(prefix) \
    684 	do { \
    685 		code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
    686 		FAIL_IF(!code); \
    687 		INC_CSIZE(1); \
    688 		*code = (prefix); \
    689 	} while (0)
    690 
    691 static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
    692 	int dst, sljit_w dstw,
    693 	int src, sljit_w srcw)
    694 {
    695 	sljit_ub* code;
    696 	int dst_r;
    697 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    698 	int work_r;
    699 #endif
    700 
    701 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    702 	compiler->mode32 = 0;
    703 #endif
    704 
    705 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
    706 		return SLJIT_SUCCESS; /* Empty instruction. */
    707 
    708 	if (src & SLJIT_IMM) {
    709 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
    710 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    711 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
    712 #else
    713 			return emit_load_imm64(compiler, dst, srcw);
    714 #endif
    715 		}
    716 		code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
    717 		FAIL_IF(!code);
    718 		*code = 0xc6;
    719 		return SLJIT_SUCCESS;
    720 	}
    721 
    722 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
    723 
    724 	if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
    725 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    726 		if (reg_map[src] >= 4) {
    727 			SLJIT_ASSERT(dst_r == TMP_REGISTER);
    728 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
    729 		} else
    730 			dst_r = src;
    731 #else
    732 		dst_r = src;
    733 #endif
    734 	}
    735 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    736 	else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
    737 		/* src, dst are registers. */
    738 		SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
    739 		if (reg_map[dst] < 4) {
    740 			if (dst != src)
    741 				EMIT_MOV(compiler, dst, 0, src, 0);
    742 			code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
    743 			FAIL_IF(!code);
    744 			*code++ = 0x0f;
    745 			*code = sign ? 0xbe : 0xb6;
    746 		}
    747 		else {
    748 			if (dst != src)
    749 				EMIT_MOV(compiler, dst, 0, src, 0);
    750 			if (sign) {
    751 				/* shl reg, 24 */
    752 				code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
    753 				FAIL_IF(!code);
    754 				*code |= 0x4 << 3;
    755 				code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
    756 				FAIL_IF(!code);
    757 				/* shr/sar reg, 24 */
    758 				*code |= 0x7 << 3;
    759 			}
    760 			else {
    761 				/* and dst, 0xff */
    762 				code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
    763 				FAIL_IF(!code);
    764 				*(code + 1) |= 0x4 << 3;
    765 			}
    766 		}
    767 		return SLJIT_SUCCESS;
    768 	}
    769 #endif
    770 	else {
    771 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
    772 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
    773 		FAIL_IF(!code);
    774 		*code++ = 0x0f;
    775 		*code = sign ? 0xbe : 0xb6;
    776 	}
    777 
    778 	if (dst & SLJIT_MEM) {
    779 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    780 		if (dst_r == TMP_REGISTER) {
    781 			/* Find a non-used register, whose reg_map[src] < 4. */
    782 			if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
    783 				if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
    784 					work_r = SLJIT_TEMPORARY_REG3;
    785 				else
    786 					work_r = SLJIT_TEMPORARY_REG2;
    787 			}
    788 			else {
    789 				if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
    790 					work_r = SLJIT_TEMPORARY_REG1;
    791 				else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
    792 					work_r = SLJIT_TEMPORARY_REG3;
    793 				else
    794 					work_r = SLJIT_TEMPORARY_REG2;
    795 			}
    796 
    797 			if (work_r == SLJIT_TEMPORARY_REG1) {
    798 				ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
    799 			}
    800 			else {
    801 				code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
    802 				FAIL_IF(!code);
    803 				*code = 0x87;
    804 			}
    805 
    806 			code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
    807 			FAIL_IF(!code);
    808 			*code = 0x88;
    809 
    810 			if (work_r == SLJIT_TEMPORARY_REG1) {
    811 				ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
    812 			}
    813 			else {
    814 				code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
    815 				FAIL_IF(!code);
    816 				*code = 0x87;
    817 			}
    818 		}
    819 		else {
    820 			code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
    821 			FAIL_IF(!code);
    822 			*code = 0x88;
    823 		}
    824 #else
    825 		code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
    826 		FAIL_IF(!code);
    827 		*code = 0x88;
    828 #endif
    829 	}
    830 
    831 	return SLJIT_SUCCESS;
    832 }
    833 
    834 static int emit_mov_half(struct sljit_compiler *compiler, int sign,
    835 	int dst, sljit_w dstw,
    836 	int src, sljit_w srcw)
    837 {
    838 	sljit_ub* code;
    839 	int dst_r;
    840 
    841 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
    842 	compiler->mode32 = 0;
    843 #endif
    844 
    845 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
    846 		return SLJIT_SUCCESS; /* Empty instruction. */
    847 
    848 	if (src & SLJIT_IMM) {
    849 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
    850 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    851 			return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
    852 #else
    853 			return emit_load_imm64(compiler, dst, srcw);
    854 #endif
    855 		}
    856 		code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
    857 		FAIL_IF(!code);
    858 		*code = 0xc7;
    859 		return SLJIT_SUCCESS;
    860 	}
    861 
    862 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
    863 
    864 	if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
    865 		dst_r = src;
    866 	else {
    867 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
    868 		FAIL_IF(!code);
    869 		*code++ = 0x0f;
    870 		*code = sign ? 0xbf : 0xb7;
    871 	}
    872 
    873 	if (dst & SLJIT_MEM) {
    874 		code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
    875 		FAIL_IF(!code);
    876 		*code = 0x89;
    877 	}
    878 
    879 	return SLJIT_SUCCESS;
    880 }
    881 
    882 static int emit_unary(struct sljit_compiler *compiler, int un_index,
    883 	int dst, sljit_w dstw,
    884 	int src, sljit_w srcw)
    885 {
    886 	sljit_ub* code;
    887 
    888 	if (dst == SLJIT_UNUSED) {
    889 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
    890 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
    891 		FAIL_IF(!code);
    892 		*code++ = 0xf7;
    893 		*code |= (un_index) << 3;
    894 		return SLJIT_SUCCESS;
    895 	}
    896 	if (dst == src && dstw == srcw) {
    897 		/* Same input and output */
    898 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
    899 		FAIL_IF(!code);
    900 		*code++ = 0xf7;
    901 		*code |= (un_index) << 3;
    902 		return SLJIT_SUCCESS;
    903 	}
    904 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
    905 		EMIT_MOV(compiler, dst, 0, src, srcw);
    906 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
    907 		FAIL_IF(!code);
    908 		*code++ = 0xf7;
    909 		*code |= (un_index) << 3;
    910 		return SLJIT_SUCCESS;
    911 	}
    912 	EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
    913 	code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
    914 	FAIL_IF(!code);
    915 	*code++ = 0xf7;
    916 	*code |= (un_index) << 3;
    917 	EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
    918 	return SLJIT_SUCCESS;
    919 }
    920 
    921 static int emit_not_with_flags(struct sljit_compiler *compiler,
    922 	int dst, sljit_w dstw,
    923 	int src, sljit_w srcw)
    924 {
    925 	sljit_ub* code;
    926 
    927 	if (dst == SLJIT_UNUSED) {
    928 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
    929 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
    930 		FAIL_IF(!code);
    931 		*code++ = 0xf7;
    932 		*code |= 0x2 << 3;
    933 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
    934 		FAIL_IF(!code);
    935 		*code = 0x0b;
    936 		return SLJIT_SUCCESS;
    937 	}
    938 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
    939 		EMIT_MOV(compiler, dst, 0, src, srcw);
    940 		code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
    941 		FAIL_IF(!code);
    942 		*code++ = 0xf7;
    943 		*code |= 0x2 << 3;
    944 		code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
    945 		FAIL_IF(!code);
    946 		*code = 0x0b;
    947 		return SLJIT_SUCCESS;
    948 	}
    949 	EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
    950 	code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
    951 	FAIL_IF(!code);
    952 	*code++ = 0xf7;
    953 	*code |= 0x2 << 3;
    954 	code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
    955 	FAIL_IF(!code);
    956 	*code = 0x0b;
    957 	EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
    958 	return SLJIT_SUCCESS;
    959 }
    960 
    961 static int emit_clz(struct sljit_compiler *compiler, int op,
    962 	int dst, sljit_w dstw,
    963 	int src, sljit_w srcw)
    964 {
    965 	sljit_ub* code;
    966 	int dst_r;
    967 
    968 	SLJIT_UNUSED_ARG(op);
    969 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
    970 		/* Just set the zero flag. */
    971 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
    972 		code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
    973 		FAIL_IF(!code);
    974 		*code++ = 0xf7;
    975 		*code |= 0x2 << 3;
    976 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    977 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
    978 #else
    979 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
    980 #endif
    981 		FAIL_IF(!code);
    982 		*code |= 0x5 << 3;
    983 		return SLJIT_SUCCESS;
    984 	}
    985 
    986 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
    987 		EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
    988 		src = TMP_REGISTER;
    989 		srcw = 0;
    990 	}
    991 
    992 	code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
    993 	FAIL_IF(!code);
    994 	*code++ = 0x0f;
    995 	*code = 0xbd;
    996 
    997 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
    998 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
    999 		dst_r = dst;
   1000 	else {
   1001 		/* Find an unused temporary register. */
   1002 		if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
   1003 			dst_r = SLJIT_TEMPORARY_REG1;
   1004 		else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
   1005 			dst_r = SLJIT_TEMPORARY_REG2;
   1006 		else
   1007 			dst_r = SLJIT_TEMPORARY_REG3;
   1008 		EMIT_MOV(compiler, dst, dstw, dst_r, 0);
   1009 	}
   1010 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
   1011 #else
   1012 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
   1013 	compiler->mode32 = 0;
   1014 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
   1015 	compiler->mode32 = op & SLJIT_INT_OP;
   1016 #endif
   1017 
   1018 	code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
   1019 	FAIL_IF(!code);
   1020 	*code++ = 0x0f;
   1021 	*code = 0x45;
   1022 
   1023 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1024 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
   1025 #else
   1026 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
   1027 #endif
   1028 	FAIL_IF(!code);
   1029 	*(code + 1) |= 0x6 << 3;
   1030 
   1031 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1032 	if (dst & SLJIT_MEM) {
   1033 		code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
   1034 		FAIL_IF(!code);
   1035 		*code = 0x87;
   1036 	}
   1037 #else
   1038 	if (dst & SLJIT_MEM)
   1039 		EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
   1040 #endif
   1041 	return SLJIT_SUCCESS;
   1042 }
   1043 
   1044 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
   1045 	int dst, sljit_w dstw,
   1046 	int src, sljit_w srcw)
   1047 {
   1048 	sljit_ub* code;
   1049 	int update = 0;
   1050 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1051 	int dst_is_ereg = 0;
   1052 	int src_is_ereg = 0;
   1053 #else
   1054 	#define src_is_ereg 0
   1055 #endif
   1056 
   1057 	CHECK_ERROR();
   1058 	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
   1059 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1060 	ADJUST_LOCAL_OFFSET(src, srcw);
   1061 
   1062 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
   1063 	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
   1064 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1065 	compiler->mode32 = op & SLJIT_INT_OP;
   1066 #endif
   1067 
   1068 	if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
   1069 		op = GET_OPCODE(op);
   1070 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1071 		compiler->mode32 = 0;
   1072 #endif
   1073 
   1074 		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
   1075 		if (op >= SLJIT_MOVU) {
   1076 			update = 1;
   1077 			op -= 7;
   1078 		}
   1079 
   1080 		if (src & SLJIT_IMM) {
   1081 			switch (op) {
   1082 			case SLJIT_MOV_UB:
   1083 				srcw = (unsigned char)srcw;
   1084 				break;
   1085 			case SLJIT_MOV_SB:
   1086 				srcw = (signed char)srcw;
   1087 				break;
   1088 			case SLJIT_MOV_UH:
   1089 				srcw = (unsigned short)srcw;
   1090 				break;
   1091 			case SLJIT_MOV_SH:
   1092 				srcw = (signed short)srcw;
   1093 				break;
   1094 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1095 			case SLJIT_MOV_UI:
   1096 				srcw = (unsigned int)srcw;
   1097 				break;
   1098 			case SLJIT_MOV_SI:
   1099 				srcw = (signed int)srcw;
   1100 				break;
   1101 #endif
   1102 			}
   1103 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1104 			if (SLJIT_UNLIKELY(dst_is_ereg))
   1105 				return emit_mov(compiler, dst, dstw, src, srcw);
   1106 #endif
   1107 		}
   1108 
   1109 		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
   1110 			code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
   1111 			FAIL_IF(!code);
   1112 			*code = 0x8d;
   1113 			src &= SLJIT_MEM | 0xf;
   1114 			srcw = 0;
   1115 		}
   1116 
   1117 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1118 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
   1119 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
   1120 			dst = TMP_REGISTER;
   1121 		}
   1122 #endif
   1123 
   1124 		switch (op) {
   1125 		case SLJIT_MOV:
   1126 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1127 		case SLJIT_MOV_UI:
   1128 		case SLJIT_MOV_SI:
   1129 #endif
   1130 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
   1131 			break;
   1132 		case SLJIT_MOV_UB:
   1133 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
   1134 			break;
   1135 		case SLJIT_MOV_SB:
   1136 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
   1137 			break;
   1138 		case SLJIT_MOV_UH:
   1139 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
   1140 			break;
   1141 		case SLJIT_MOV_SH:
   1142 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
   1143 			break;
   1144 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1145 		case SLJIT_MOV_UI:
   1146 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
   1147 			break;
   1148 		case SLJIT_MOV_SI:
   1149 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
   1150 			break;
   1151 #endif
   1152 		}
   1153 
   1154 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1155 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
   1156 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
   1157 #endif
   1158 
   1159 		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
   1160 			code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
   1161 			FAIL_IF(!code);
   1162 			*code = 0x8d;
   1163 		}
   1164 		return SLJIT_SUCCESS;
   1165 	}
   1166 
   1167 	if (SLJIT_UNLIKELY(GET_FLAGS(op)))
   1168 		compiler->flags_saved = 0;
   1169 
   1170 	switch (GET_OPCODE(op)) {
   1171 	case SLJIT_NOT:
   1172 		if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
   1173 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
   1174 		return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
   1175 
   1176 	case SLJIT_NEG:
   1177 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   1178 			FAIL_IF(emit_save_flags(compiler));
   1179 		return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
   1180 
   1181 	case SLJIT_CLZ:
   1182 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   1183 			FAIL_IF(emit_save_flags(compiler));
   1184 		return emit_clz(compiler, op, dst, dstw, src, srcw);
   1185 	}
   1186 
   1187 	return SLJIT_SUCCESS;
   1188 
   1189 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1190 	#undef src_is_ereg
   1191 #endif
   1192 }
   1193 
   1194 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1195 
   1196 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
   1197 	if (IS_HALFWORD(immw) || compiler->mode32) { \
   1198 		code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
   1199 		FAIL_IF(!code); \
   1200 		*(code + 1) |= (_op_imm_); \
   1201 	} \
   1202 	else { \
   1203 		FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
   1204 		code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
   1205 		FAIL_IF(!code); \
   1206 		*code = (_op_mr_); \
   1207 	}
   1208 
   1209 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
   1210 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
   1211 
   1212 #else
   1213 
   1214 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
   1215 	code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
   1216 	FAIL_IF(!code); \
   1217 	*(code + 1) |= (_op_imm_);
   1218 
   1219 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
   1220 	FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
   1221 
   1222 #endif
   1223 
   1224 static int emit_cum_binary(struct sljit_compiler *compiler,
   1225 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
   1226 	int dst, sljit_w dstw,
   1227 	int src1, sljit_w src1w,
   1228 	int src2, sljit_w src2w)
   1229 {
   1230 	sljit_ub* code;
   1231 
   1232 	if (dst == SLJIT_UNUSED) {
   1233 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1234 		if (src2 & SLJIT_IMM) {
   1235 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
   1236 		}
   1237 		else {
   1238 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
   1239 			FAIL_IF(!code);
   1240 			*code = op_rm;
   1241 		}
   1242 		return SLJIT_SUCCESS;
   1243 	}
   1244 
   1245 	if (dst == src1 && dstw == src1w) {
   1246 		if (src2 & SLJIT_IMM) {
   1247 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1248 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1249 #else
   1250 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
   1251 #endif
   1252 				BINARY_EAX_IMM(op_eax_imm, src2w);
   1253 			}
   1254 			else {
   1255 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
   1256 			}
   1257 		}
   1258 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
   1259 			code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
   1260 			FAIL_IF(!code);
   1261 			*code = op_rm;
   1262 		}
   1263 		else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
   1264 			/* Special exception for sljit_emit_cond_value. */
   1265 			code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
   1266 			FAIL_IF(!code);
   1267 			*code = op_mr;
   1268 		}
   1269 		else {
   1270 			EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
   1271 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
   1272 			FAIL_IF(!code);
   1273 			*code = op_mr;
   1274 		}
   1275 		return SLJIT_SUCCESS;
   1276 	}
   1277 
   1278 	/* Only for cumulative operations. */
   1279 	if (dst == src2 && dstw == src2w) {
   1280 		if (src1 & SLJIT_IMM) {
   1281 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1282 			if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1283 #else
   1284 			if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
   1285 #endif
   1286 				BINARY_EAX_IMM(op_eax_imm, src1w);
   1287 			}
   1288 			else {
   1289 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
   1290 			}
   1291 		}
   1292 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
   1293 			code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
   1294 			FAIL_IF(!code);
   1295 			*code = op_rm;
   1296 		}
   1297 		else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
   1298 			code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
   1299 			FAIL_IF(!code);
   1300 			*code = op_mr;
   1301 		}
   1302 		else {
   1303 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1304 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
   1305 			FAIL_IF(!code);
   1306 			*code = op_mr;
   1307 		}
   1308 		return SLJIT_SUCCESS;
   1309 	}
   1310 
   1311 	/* General version. */
   1312 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
   1313 		EMIT_MOV(compiler, dst, 0, src1, src1w);
   1314 		if (src2 & SLJIT_IMM) {
   1315 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
   1316 		}
   1317 		else {
   1318 			code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
   1319 			FAIL_IF(!code);
   1320 			*code = op_rm;
   1321 		}
   1322 	}
   1323 	else {
   1324 		/* This version requires less memory writing. */
   1325 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1326 		if (src2 & SLJIT_IMM) {
   1327 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
   1328 		}
   1329 		else {
   1330 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
   1331 			FAIL_IF(!code);
   1332 			*code = op_rm;
   1333 		}
   1334 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
   1335 	}
   1336 
   1337 	return SLJIT_SUCCESS;
   1338 }
   1339 
   1340 static int emit_non_cum_binary(struct sljit_compiler *compiler,
   1341 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
   1342 	int dst, sljit_w dstw,
   1343 	int src1, sljit_w src1w,
   1344 	int src2, sljit_w src2w)
   1345 {
   1346 	sljit_ub* code;
   1347 
   1348 	if (dst == SLJIT_UNUSED) {
   1349 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1350 		if (src2 & SLJIT_IMM) {
   1351 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
   1352 		}
   1353 		else {
   1354 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
   1355 			FAIL_IF(!code);
   1356 			*code = op_rm;
   1357 		}
   1358 		return SLJIT_SUCCESS;
   1359 	}
   1360 
   1361 	if (dst == src1 && dstw == src1w) {
   1362 		if (src2 & SLJIT_IMM) {
   1363 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1364 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1365 #else
   1366 			if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
   1367 #endif
   1368 				BINARY_EAX_IMM(op_eax_imm, src2w);
   1369 			}
   1370 			else {
   1371 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
   1372 			}
   1373 		}
   1374 		else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
   1375 			code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
   1376 			FAIL_IF(!code);
   1377 			*code = op_rm;
   1378 		}
   1379 		else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
   1380 			code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
   1381 			FAIL_IF(!code);
   1382 			*code = op_mr;
   1383 		}
   1384 		else {
   1385 			EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
   1386 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
   1387 			FAIL_IF(!code);
   1388 			*code = op_mr;
   1389 		}
   1390 		return SLJIT_SUCCESS;
   1391 	}
   1392 
   1393 	/* General version. */
   1394 	if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
   1395 		EMIT_MOV(compiler, dst, 0, src1, src1w);
   1396 		if (src2 & SLJIT_IMM) {
   1397 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
   1398 		}
   1399 		else {
   1400 			code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
   1401 			FAIL_IF(!code);
   1402 			*code = op_rm;
   1403 		}
   1404 	}
   1405 	else {
   1406 		/* This version requires less memory writing. */
   1407 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1408 		if (src2 & SLJIT_IMM) {
   1409 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
   1410 		}
   1411 		else {
   1412 			code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
   1413 			FAIL_IF(!code);
   1414 			*code = op_rm;
   1415 		}
   1416 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
   1417 	}
   1418 
   1419 	return SLJIT_SUCCESS;
   1420 }
   1421 
   1422 static int emit_mul(struct sljit_compiler *compiler,
   1423 	int dst, sljit_w dstw,
   1424 	int src1, sljit_w src1w,
   1425 	int src2, sljit_w src2w)
   1426 {
   1427 	sljit_ub* code;
   1428 	int dst_r;
   1429 
   1430 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
   1431 
   1432 	/* Register destination. */
   1433 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
   1434 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
   1435 		FAIL_IF(!code);
   1436 		*code++ = 0x0f;
   1437 		*code = 0xaf;
   1438 	}
   1439 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
   1440 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
   1441 		FAIL_IF(!code);
   1442 		*code++ = 0x0f;
   1443 		*code = 0xaf;
   1444 	}
   1445 	else if (src1 & SLJIT_IMM) {
   1446 		if (src2 & SLJIT_IMM) {
   1447 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
   1448 			src2 = dst_r;
   1449 			src2w = 0;
   1450 		}
   1451 
   1452 		if (src1w <= 127 && src1w >= -128) {
   1453 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1454 			FAIL_IF(!code);
   1455 			*code = 0x6b;
   1456 			code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
   1457 			FAIL_IF(!code);
   1458 			INC_CSIZE(1);
   1459 			*code = (sljit_b)src1w;
   1460 		}
   1461 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1462 		else {
   1463 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1464 			FAIL_IF(!code);
   1465 			*code = 0x69;
   1466 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1467 			FAIL_IF(!code);
   1468 			INC_CSIZE(4);
   1469 			*(sljit_w*)code = src1w;
   1470 		}
   1471 #else
   1472 		else if (IS_HALFWORD(src1w)) {
   1473 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
   1474 			FAIL_IF(!code);
   1475 			*code = 0x69;
   1476 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1477 			FAIL_IF(!code);
   1478 			INC_CSIZE(4);
   1479 			*(sljit_hw*)code = (sljit_hw)src1w;
   1480 		}
   1481 		else {
   1482 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
   1483 			if (dst_r != src2)
   1484 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
   1485 			code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
   1486 			FAIL_IF(!code);
   1487 			*code++ = 0x0f;
   1488 			*code = 0xaf;
   1489 		}
   1490 #endif
   1491 	}
   1492 	else if (src2 & SLJIT_IMM) {
   1493 		/* Note: src1 is NOT immediate. */
   1494 
   1495 		if (src2w <= 127 && src2w >= -128) {
   1496 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1497 			FAIL_IF(!code);
   1498 			*code = 0x6b;
   1499 			code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
   1500 			FAIL_IF(!code);
   1501 			INC_CSIZE(1);
   1502 			*code = (sljit_b)src2w;
   1503 		}
   1504 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1505 		else {
   1506 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1507 			FAIL_IF(!code);
   1508 			*code = 0x69;
   1509 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1510 			FAIL_IF(!code);
   1511 			INC_CSIZE(4);
   1512 			*(sljit_w*)code = src2w;
   1513 		}
   1514 #else
   1515 		else if (IS_HALFWORD(src2w)) {
   1516 			code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
   1517 			FAIL_IF(!code);
   1518 			*code = 0x69;
   1519 			code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
   1520 			FAIL_IF(!code);
   1521 			INC_CSIZE(4);
   1522 			*(sljit_hw*)code = (sljit_hw)src2w;
   1523 		}
   1524 		else {
   1525 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
   1526 			if (dst_r != src1)
   1527 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
   1528 			code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
   1529 			FAIL_IF(!code);
   1530 			*code++ = 0x0f;
   1531 			*code = 0xaf;
   1532 		}
   1533 #endif
   1534 	}
   1535 	else {
   1536 		/* Neither argument is immediate. */
   1537 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
   1538 			dst_r = TMP_REGISTER;
   1539 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
   1540 		code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
   1541 		FAIL_IF(!code);
   1542 		*code++ = 0x0f;
   1543 		*code = 0xaf;
   1544 	}
   1545 
   1546 	if (dst_r == TMP_REGISTER)
   1547 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
   1548 
   1549 	return SLJIT_SUCCESS;
   1550 }
   1551 
   1552 static int emit_lea_binary(struct sljit_compiler *compiler,
   1553 	int dst, sljit_w dstw,
   1554 	int src1, sljit_w src1w,
   1555 	int src2, sljit_w src2w)
   1556 {
   1557 	sljit_ub* code;
   1558 	int dst_r, done = 0;
   1559 
   1560 	/* These cases better be left to handled by normal way. */
   1561 	if (dst == src1 && dstw == src1w)
   1562 		return SLJIT_ERR_UNSUPPORTED;
   1563 	if (dst == src2 && dstw == src2w)
   1564 		return SLJIT_ERR_UNSUPPORTED;
   1565 
   1566 	dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
   1567 
   1568 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
   1569 		if ((src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) || src2 == TMP_REGISTER) {
   1570 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
   1571 			FAIL_IF(!code);
   1572 			*code = 0x8d;
   1573 			done = 1;
   1574 		}
   1575 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1576 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1577 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
   1578 #else
   1579 		if (src2 & SLJIT_IMM) {
   1580 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
   1581 #endif
   1582 			FAIL_IF(!code);
   1583 			*code = 0x8d;
   1584 			done = 1;
   1585 		}
   1586 	}
   1587 	else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
   1588 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1589 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1590 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
   1591 #else
   1592 		if (src1 & SLJIT_IMM) {
   1593 			code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
   1594 #endif
   1595 			FAIL_IF(!code);
   1596 			*code = 0x8d;
   1597 			done = 1;
   1598 		}
   1599 	}
   1600 
   1601 	if (done) {
   1602 		if (dst_r == TMP_REGISTER)
   1603 			return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
   1604 		return SLJIT_SUCCESS;
   1605 	}
   1606 	return SLJIT_ERR_UNSUPPORTED;
   1607 }
   1608 
   1609 static int emit_cmp_binary(struct sljit_compiler *compiler,
   1610 	int src1, sljit_w src1w,
   1611 	int src2, sljit_w src2w)
   1612 {
   1613 	sljit_ub* code;
   1614 
   1615 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1616 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1617 #else
   1618 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
   1619 #endif
   1620 		BINARY_EAX_IMM(0x3d, src2w);
   1621 		return SLJIT_SUCCESS;
   1622 	}
   1623 
   1624 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
   1625 		if (src2 & SLJIT_IMM) {
   1626 			BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
   1627 		}
   1628 		else {
   1629 			code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
   1630 			FAIL_IF(!code);
   1631 			*code = 0x3b;
   1632 		}
   1633 		return SLJIT_SUCCESS;
   1634 	}
   1635 
   1636 	if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
   1637 		code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
   1638 		FAIL_IF(!code);
   1639 		*code = 0x39;
   1640 		return SLJIT_SUCCESS;
   1641 	}
   1642 
   1643 	if (src2 & SLJIT_IMM) {
   1644 		if (src1 & SLJIT_IMM) {
   1645 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1646 			src1 = TMP_REGISTER;
   1647 			src1w = 0;
   1648 		}
   1649 		BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
   1650 	}
   1651 	else {
   1652 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1653 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
   1654 		FAIL_IF(!code);
   1655 		*code = 0x3b;
   1656 	}
   1657 	return SLJIT_SUCCESS;
   1658 }
   1659 
   1660 static int emit_test_binary(struct sljit_compiler *compiler,
   1661 	int src1, sljit_w src1w,
   1662 	int src2, sljit_w src2w)
   1663 {
   1664 	sljit_ub* code;
   1665 
   1666 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1667 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
   1668 #else
   1669 	if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
   1670 #endif
   1671 		BINARY_EAX_IMM(0xa9, src2w);
   1672 		return SLJIT_SUCCESS;
   1673 	}
   1674 
   1675 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1676 	if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
   1677 #else
   1678 	if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
   1679 #endif
   1680 		BINARY_EAX_IMM(0xa9, src1w);
   1681 		return SLJIT_SUCCESS;
   1682 	}
   1683 
   1684 	if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
   1685 		if (src2 & SLJIT_IMM) {
   1686 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1687 			if (IS_HALFWORD(src2w) || compiler->mode32) {
   1688 				code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
   1689 				FAIL_IF(!code);
   1690 				*code = 0xf7;
   1691 			}
   1692 			else {
   1693 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
   1694 				code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
   1695 				FAIL_IF(!code);
   1696 				*code = 0x85;
   1697 			}
   1698 #else
   1699 			code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
   1700 			FAIL_IF(!code);
   1701 			*code = 0xf7;
   1702 #endif
   1703 		}
   1704 		else {
   1705 			code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
   1706 			FAIL_IF(!code);
   1707 			*code = 0x85;
   1708 		}
   1709 		return SLJIT_SUCCESS;
   1710 	}
   1711 
   1712 	if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
   1713 		if (src1 & SLJIT_IMM) {
   1714 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1715 			if (IS_HALFWORD(src1w) || compiler->mode32) {
   1716 				code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
   1717 				FAIL_IF(!code);
   1718 				*code = 0xf7;
   1719 			}
   1720 			else {
   1721 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
   1722 				code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
   1723 				FAIL_IF(!code);
   1724 				*code = 0x85;
   1725 			}
   1726 #else
   1727 			code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
   1728 			FAIL_IF(!code);
   1729 			*code = 0xf7;
   1730 #endif
   1731 		}
   1732 		else {
   1733 			code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
   1734 			FAIL_IF(!code);
   1735 			*code = 0x85;
   1736 		}
   1737 		return SLJIT_SUCCESS;
   1738 	}
   1739 
   1740 	EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1741 	if (src2 & SLJIT_IMM) {
   1742 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1743 		if (IS_HALFWORD(src2w) || compiler->mode32) {
   1744 			code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
   1745 			FAIL_IF(!code);
   1746 			*code = 0xf7;
   1747 		}
   1748 		else {
   1749 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
   1750 			code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
   1751 			FAIL_IF(!code);
   1752 			*code = 0x85;
   1753 		}
   1754 #else
   1755 		code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
   1756 		FAIL_IF(!code);
   1757 		*code = 0xf7;
   1758 #endif
   1759 	}
   1760 	else {
   1761 		code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
   1762 		FAIL_IF(!code);
   1763 		*code = 0x85;
   1764 	}
   1765 	return SLJIT_SUCCESS;
   1766 }
   1767 
   1768 static int emit_shift(struct sljit_compiler *compiler,
   1769 	sljit_ub mode,
   1770 	int dst, sljit_w dstw,
   1771 	int src1, sljit_w src1w,
   1772 	int src2, sljit_w src2w)
   1773 {
   1774 	sljit_ub* code;
   1775 
   1776 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
   1777 		if (dst == src1 && dstw == src1w) {
   1778 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
   1779 			FAIL_IF(!code);
   1780 			*code |= mode;
   1781 			return SLJIT_SUCCESS;
   1782 		}
   1783 		if (dst == SLJIT_UNUSED) {
   1784 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1785 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
   1786 			FAIL_IF(!code);
   1787 			*code |= mode;
   1788 			return SLJIT_SUCCESS;
   1789 		}
   1790 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
   1791 			EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1792 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
   1793 			FAIL_IF(!code);
   1794 			*code |= mode;
   1795 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
   1796 			return SLJIT_SUCCESS;
   1797 		}
   1798 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
   1799 			EMIT_MOV(compiler, dst, 0, src1, src1w);
   1800 			code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
   1801 			FAIL_IF(!code);
   1802 			*code |= mode;
   1803 			return SLJIT_SUCCESS;
   1804 		}
   1805 
   1806 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1807 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
   1808 		FAIL_IF(!code);
   1809 		*code |= mode;
   1810 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
   1811 		return SLJIT_SUCCESS;
   1812 	}
   1813 
   1814 	if (dst == SLJIT_PREF_SHIFT_REG) {
   1815 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1816 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   1817 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
   1818 		FAIL_IF(!code);
   1819 		*code |= mode;
   1820 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
   1821 	}
   1822 	else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
   1823 		if (src1 != dst)
   1824 			EMIT_MOV(compiler, dst, 0, src1, src1w);
   1825 		EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
   1826 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   1827 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
   1828 		FAIL_IF(!code);
   1829 		*code |= mode;
   1830 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
   1831 	}
   1832 	else {
   1833 		/* This case is really difficult, since ecx itself may used for
   1834 		   addressing, and we must ensure to work even in that case. */
   1835 		EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
   1836 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1837 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
   1838 #else
   1839 		/* [esp+0] contains the flags. */
   1840 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_w), SLJIT_PREF_SHIFT_REG, 0);
   1841 #endif
   1842 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
   1843 		code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
   1844 		FAIL_IF(!code);
   1845 		*code |= mode;
   1846 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1847 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
   1848 #else
   1849 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_w));
   1850 #endif
   1851 		EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
   1852 	}
   1853 
   1854 	return SLJIT_SUCCESS;
   1855 }
   1856 
   1857 static int emit_shift_with_flags(struct sljit_compiler *compiler,
   1858 	sljit_ub mode, int set_flags,
   1859 	int dst, sljit_w dstw,
   1860 	int src1, sljit_w src1w,
   1861 	int src2, sljit_w src2w)
   1862 {
   1863 	/* The CPU does not set flags if the shift count is 0. */
   1864 	if (src2 & SLJIT_IMM) {
   1865 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1866 		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
   1867 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   1868 #else
   1869 		if ((src2w & 0x1f) != 0)
   1870 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   1871 #endif
   1872 		if (!set_flags)
   1873 			return emit_mov(compiler, dst, dstw, src1, src1w);
   1874 		/* OR dst, src, 0 */
   1875 		return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
   1876 			dst, dstw, src1, src1w, SLJIT_IMM, 0);
   1877 	}
   1878 
   1879 	if (!set_flags)
   1880 		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
   1881 
   1882 	if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS))
   1883 		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
   1884 
   1885 	FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
   1886 
   1887 	if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
   1888 		return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
   1889 	return SLJIT_SUCCESS;
   1890 }
   1891 
   1892 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
   1893 	int dst, sljit_w dstw,
   1894 	int src1, sljit_w src1w,
   1895 	int src2, sljit_w src2w)
   1896 {
   1897 	CHECK_ERROR();
   1898 	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
   1899 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1900 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1901 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1902 
   1903 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   1904 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
   1905 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
   1906 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   1907 	compiler->mode32 = op & SLJIT_INT_OP;
   1908 #endif
   1909 
   1910 	if (GET_OPCODE(op) >= SLJIT_MUL) {
   1911 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
   1912 			compiler->flags_saved = 0;
   1913 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   1914 			FAIL_IF(emit_save_flags(compiler));
   1915 	}
   1916 
   1917 	switch (GET_OPCODE(op)) {
   1918 	case SLJIT_ADD:
   1919 		if (!GET_FLAGS(op)) {
   1920 			if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
   1921 				return compiler->error;
   1922 		}
   1923 		else
   1924 			compiler->flags_saved = 0;
   1925 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   1926 			FAIL_IF(emit_save_flags(compiler));
   1927 		return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
   1928 			dst, dstw, src1, src1w, src2, src2w);
   1929 	case SLJIT_ADDC:
   1930 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
   1931 			FAIL_IF(emit_restore_flags(compiler, 1));
   1932 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
   1933 			FAIL_IF(emit_save_flags(compiler));
   1934 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
   1935 			compiler->flags_saved = 0;
   1936 		return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
   1937 			dst, dstw, src1, src1w, src2, src2w);
   1938 	case SLJIT_SUB:
   1939 		if (!GET_FLAGS(op)) {
   1940 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
   1941 				return compiler->error;
   1942 		}
   1943 		else
   1944 			compiler->flags_saved = 0;
   1945 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
   1946 			FAIL_IF(emit_save_flags(compiler));
   1947 		if (dst == SLJIT_UNUSED)
   1948 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
   1949 		return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
   1950 			dst, dstw, src1, src1w, src2, src2w);
   1951 	case SLJIT_SUBC:
   1952 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
   1953 			FAIL_IF(emit_restore_flags(compiler, 1));
   1954 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
   1955 			FAIL_IF(emit_save_flags(compiler));
   1956 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
   1957 			compiler->flags_saved = 0;
   1958 		return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
   1959 			dst, dstw, src1, src1w, src2, src2w);
   1960 	case SLJIT_MUL:
   1961 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
   1962 	case SLJIT_AND:
   1963 		if (dst == SLJIT_UNUSED)
   1964 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
   1965 		return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
   1966 			dst, dstw, src1, src1w, src2, src2w);
   1967 	case SLJIT_OR:
   1968 		return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
   1969 			dst, dstw, src1, src1w, src2, src2w);
   1970 	case SLJIT_XOR:
   1971 		return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
   1972 			dst, dstw, src1, src1w, src2, src2w);
   1973 	case SLJIT_SHL:
   1974 		return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op),
   1975 			dst, dstw, src1, src1w, src2, src2w);
   1976 	case SLJIT_LSHR:
   1977 		return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op),
   1978 			dst, dstw, src1, src1w, src2, src2w);
   1979 	case SLJIT_ASHR:
   1980 		return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op),
   1981 			dst, dstw, src1, src1w, src2, src2w);
   1982 	}
   1983 
   1984 	return SLJIT_SUCCESS;
   1985 }
   1986 
   1987 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
   1988 {
   1989 	check_sljit_get_register_index(reg);
   1990 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   1991 	if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
   1992 			|| reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
   1993 		return -1;
   1994 #endif
   1995 	return reg_map[reg];
   1996 }
   1997 
   1998 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
   1999 	void *instruction, int size)
   2000 {
   2001 	sljit_ub *buf;
   2002 
   2003 	CHECK_ERROR();
   2004 	check_sljit_emit_op_custom(compiler, instruction, size);
   2005 	SLJIT_ASSERT(size > 0 && size < 16);
   2006 
   2007 	buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
   2008 	FAIL_IF(!buf);
   2009 	INC_SIZE(size);
   2010 	SLJIT_MEMMOVE(buf, instruction, size);
   2011 	return SLJIT_SUCCESS;
   2012 }
   2013 
   2014 /* --------------------------------------------------------------------- */
   2015 /*  Floating point operators                                             */
   2016 /* --------------------------------------------------------------------- */
   2017 
   2018 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
   2019 
   2020 /* Alignment + 2 * 16 bytes. */
   2021 static sljit_i sse2_data[3 + 4 + 4];
   2022 static sljit_i *sse2_buffer;
   2023 
   2024 static void init_compiler(void)
   2025 {
   2026 	sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
   2027 	sse2_buffer[0] = 0;
   2028 	sse2_buffer[1] = 0x80000000;
   2029 	sse2_buffer[4] = 0xffffffff;
   2030 	sse2_buffer[5] = 0x7fffffff;
   2031 }
   2032 
   2033 #endif
   2034 
   2035 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
   2036 {
   2037 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
   2038 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
   2039 	static int sse2_available = -1;
   2040 	int features;
   2041 
   2042 	if (sse2_available != -1)
   2043 		return sse2_available;
   2044 
   2045 #ifdef __GNUC__
   2046 	/* AT&T syntax. */
   2047 	asm (
   2048 		"pushl %%ebx\n"
   2049 		"movl $0x1, %%eax\n"
   2050 		"cpuid\n"
   2051 		"popl %%ebx\n"
   2052 		"movl %%edx, %0\n"
   2053 		: "=g" (features)
   2054 		:
   2055 		: "%eax", "%ecx", "%edx"
   2056 	);
   2057 #elif defined(_MSC_VER) || defined(__BORLANDC__)
   2058 	/* Intel syntax. */
   2059 	__asm {
   2060 		mov eax, 1
   2061 		push ebx
   2062 		cpuid
   2063 		pop ebx
   2064 		mov features, edx
   2065 	}
   2066 #else
   2067 	#error "SLJIT_DETECT_SSE2 is not implemented for this C compiler"
   2068 #endif
   2069 	sse2_available = (features >> 26) & 0x1;
   2070 	return sse2_available;
   2071 #else
   2072 	return 1;
   2073 #endif
   2074 #else
   2075 	return 0;
   2076 #endif
   2077 }
   2078 
   2079 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
   2080 
   2081 static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
   2082 	int xmm1, int xmm2, sljit_w xmm2w)
   2083 {
   2084 	sljit_ub *buf;
   2085 
   2086 	buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
   2087 	FAIL_IF(!buf);
   2088 	*buf++ = 0x0f;
   2089 	*buf = opcode;
   2090 	return SLJIT_SUCCESS;
   2091 }
   2092 
   2093 static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
   2094 	int xmm1, int xmm2, sljit_w xmm2w)
   2095 {
   2096 	sljit_ub *buf;
   2097 
   2098 	buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
   2099 	FAIL_IF(!buf);
   2100 	*buf++ = 0x0f;
   2101 	*buf = opcode;
   2102 	return SLJIT_SUCCESS;
   2103 }
   2104 
   2105 static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
   2106 	int dst, int src, sljit_w srcw)
   2107 {
   2108 	return emit_sse2(compiler, 0x10, dst, src, srcw);
   2109 }
   2110 
   2111 static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
   2112 	int dst, sljit_w dstw, int src)
   2113 {
   2114 	return emit_sse2(compiler, 0x11, src, dst, dstw);
   2115 }
   2116 
   2117 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
   2118 	int dst, sljit_w dstw,
   2119 	int src, sljit_w srcw)
   2120 {
   2121 	int dst_r;
   2122 
   2123 	CHECK_ERROR();
   2124 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
   2125 
   2126 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2127 	compiler->mode32 = 1;
   2128 #endif
   2129 
   2130 	if (GET_OPCODE(op) == SLJIT_FCMP) {
   2131 		compiler->flags_saved = 0;
   2132 		if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
   2133 			dst_r = dst;
   2134 		else {
   2135 			dst_r = TMP_FREG;
   2136 			FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
   2137 		}
   2138 		return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
   2139 	}
   2140 
   2141 	if (op == SLJIT_FMOV) {
   2142 		if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
   2143 			return emit_sse2_load(compiler, dst, src, srcw);
   2144 		if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
   2145 			return emit_sse2_store(compiler, dst, dstw, src);
   2146 		FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
   2147 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
   2148 	}
   2149 
   2150 	if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
   2151 		dst_r = dst;
   2152 		if (dst != src)
   2153 			FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
   2154 	}
   2155 	else {
   2156 		dst_r = TMP_FREG;
   2157 		FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
   2158 	}
   2159 
   2160 	switch (op) {
   2161 	case SLJIT_FNEG:
   2162 		FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
   2163 		break;
   2164 
   2165 	case SLJIT_FABS:
   2166 		FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
   2167 		break;
   2168 	}
   2169 
   2170 	if (dst_r == TMP_FREG)
   2171 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
   2172 	return SLJIT_SUCCESS;
   2173 }
   2174 
   2175 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
   2176 	int dst, sljit_w dstw,
   2177 	int src1, sljit_w src1w,
   2178 	int src2, sljit_w src2w)
   2179 {
   2180 	int dst_r;
   2181 
   2182 	CHECK_ERROR();
   2183 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
   2184 
   2185 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2186 	compiler->mode32 = 1;
   2187 #endif
   2188 
   2189 	if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
   2190 		dst_r = dst;
   2191 		if (dst == src1)
   2192 			; /* Do nothing here. */
   2193 		else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
   2194 			/* Swap arguments. */
   2195 			src2 = src1;
   2196 			src2w = src1w;
   2197 		}
   2198 		else if (dst != src2)
   2199 			FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
   2200 		else {
   2201 			dst_r = TMP_FREG;
   2202 			FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
   2203 		}
   2204 	}
   2205 	else {
   2206 		dst_r = TMP_FREG;
   2207 		FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
   2208 	}
   2209 
   2210 	switch (op) {
   2211 	case SLJIT_FADD:
   2212 		FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
   2213 		break;
   2214 
   2215 	case SLJIT_FSUB:
   2216 		FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
   2217 		break;
   2218 
   2219 	case SLJIT_FMUL:
   2220 		FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
   2221 		break;
   2222 
   2223 	case SLJIT_FDIV:
   2224 		FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
   2225 		break;
   2226 	}
   2227 
   2228 	if (dst_r == TMP_FREG)
   2229 		return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
   2230 	return SLJIT_SUCCESS;
   2231 }
   2232 
   2233 #else
   2234 
   2235 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
   2236 	int dst, sljit_w dstw,
   2237 	int src, sljit_w srcw)
   2238 {
   2239 	CHECK_ERROR();
   2240 	/* Should cause an assertion fail. */
   2241 	check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
   2242 	compiler->error = SLJIT_ERR_UNSUPPORTED;
   2243 	return SLJIT_ERR_UNSUPPORTED;
   2244 }
   2245 
   2246 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
   2247 	int dst, sljit_w dstw,
   2248 	int src1, sljit_w src1w,
   2249 	int src2, sljit_w src2w)
   2250 {
   2251 	CHECK_ERROR();
   2252 	/* Should cause an assertion fail. */
   2253 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
   2254 	compiler->error = SLJIT_ERR_UNSUPPORTED;
   2255 	return SLJIT_ERR_UNSUPPORTED;
   2256 }
   2257 
   2258 #endif
   2259 
   2260 /* --------------------------------------------------------------------- */
   2261 /*  Conditional instructions                                             */
   2262 /* --------------------------------------------------------------------- */
   2263 
   2264 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
   2265 {
   2266 	sljit_ub *buf;
   2267 	struct sljit_label *label;
   2268 
   2269 	CHECK_ERROR_PTR();
   2270 	check_sljit_emit_label(compiler);
   2271 
   2272 	/* We should restore the flags before the label,
   2273 	   since other taken jumps has their own flags as well. */
   2274 	if (SLJIT_UNLIKELY(compiler->flags_saved))
   2275 		PTR_FAIL_IF(emit_restore_flags(compiler, 0));
   2276 
   2277 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   2278 		return compiler->last_label;
   2279 
   2280 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
   2281 	PTR_FAIL_IF(!label);
   2282 	set_label(label, compiler);
   2283 
   2284 	buf = (sljit_ub*)ensure_buf(compiler, 2);
   2285 	PTR_FAIL_IF(!buf);
   2286 
   2287 	*buf++ = 0;
   2288 	*buf++ = 0;
   2289 
   2290 	return label;
   2291 }
   2292 
   2293 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
   2294 {
   2295 	sljit_ub *buf;
   2296 	struct sljit_jump *jump;
   2297 
   2298 	CHECK_ERROR_PTR();
   2299 	check_sljit_emit_jump(compiler, type);
   2300 
   2301 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
   2302 		if ((type & 0xff) <= SLJIT_JUMP)
   2303 			PTR_FAIL_IF(emit_restore_flags(compiler, 0));
   2304 		compiler->flags_saved = 0;
   2305 	}
   2306 
   2307 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2308 	PTR_FAIL_IF_NULL(jump);
   2309 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   2310 	type &= 0xff;
   2311 
   2312 	if (type >= SLJIT_CALL1)
   2313 		PTR_FAIL_IF(call_with_args(compiler, type));
   2314 
   2315 	/* Worst case size. */
   2316 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2317 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
   2318 #else
   2319 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
   2320 #endif
   2321 
   2322 	buf = (sljit_ub*)ensure_buf(compiler, 2);
   2323 	PTR_FAIL_IF_NULL(buf);
   2324 
   2325 	*buf++ = 0;
   2326 	*buf++ = type + 4;
   2327 	return jump;
   2328 }
   2329 
   2330 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
   2331 {
   2332 	sljit_ub *code;
   2333 	struct sljit_jump *jump;
   2334 
   2335 	CHECK_ERROR();
   2336 	check_sljit_emit_ijump(compiler, type, src, srcw);
   2337 	ADJUST_LOCAL_OFFSET(src, srcw);
   2338 
   2339 	CHECK_EXTRA_REGS(src, srcw, (void)0);
   2340 
   2341 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
   2342 		if (type <= SLJIT_JUMP)
   2343 			FAIL_IF(emit_restore_flags(compiler, 0));
   2344 		compiler->flags_saved = 0;
   2345 	}
   2346 
   2347 	if (type >= SLJIT_CALL1) {
   2348 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2349 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
   2350 		if (src == SLJIT_TEMPORARY_REG3) {
   2351 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
   2352 			src = TMP_REGISTER;
   2353 		}
   2354 		if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
   2355 			srcw += sizeof(sljit_w);
   2356 #endif
   2357 #endif
   2358 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
   2359 		if (src == SLJIT_TEMPORARY_REG3) {
   2360 			EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
   2361 			src = TMP_REGISTER;
   2362 		}
   2363 #endif
   2364 		FAIL_IF(call_with_args(compiler, type));
   2365 	}
   2366 
   2367 	if (src == SLJIT_IMM) {
   2368 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2369 		FAIL_IF_NULL(jump);
   2370 		set_jump(jump, compiler, JUMP_ADDR);
   2371 		jump->u.target = srcw;
   2372 
   2373 		/* Worst case size. */
   2374 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2375 		compiler->size += 5;
   2376 #else
   2377 		compiler->size += 10 + 3;
   2378 #endif
   2379 
   2380 		code = (sljit_ub*)ensure_buf(compiler, 2);
   2381 		FAIL_IF_NULL(code);
   2382 
   2383 		*code++ = 0;
   2384 		*code++ = type + 4;
   2385 	}
   2386 	else {
   2387 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2388 		/* REX_W is not necessary (src is not immediate). */
   2389 		compiler->mode32 = 1;
   2390 #endif
   2391 		code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
   2392 		FAIL_IF(!code);
   2393 		*code++ = 0xff;
   2394 		*code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
   2395 	}
   2396 	return SLJIT_SUCCESS;
   2397 }
   2398 
   2399 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
   2400 {
   2401 	sljit_ub *buf;
   2402 	sljit_ub cond_set = 0;
   2403 	int dst_save = dst;
   2404 	sljit_w dstw_save = dstw;
   2405 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2406 	int reg;
   2407 #endif
   2408 
   2409 	CHECK_ERROR();
   2410 	check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
   2411 
   2412 	if (dst == SLJIT_UNUSED)
   2413 		return SLJIT_SUCCESS;
   2414 
   2415 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2416 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2417 	if (SLJIT_UNLIKELY(compiler->flags_saved))
   2418 		FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
   2419 
   2420 	switch (type) {
   2421 	case SLJIT_C_EQUAL:
   2422 	case SLJIT_C_FLOAT_EQUAL:
   2423 		cond_set = 0x94;
   2424 		break;
   2425 
   2426 	case SLJIT_C_NOT_EQUAL:
   2427 	case SLJIT_C_FLOAT_NOT_EQUAL:
   2428 		cond_set = 0x95;
   2429 		break;
   2430 
   2431 	case SLJIT_C_LESS:
   2432 	case SLJIT_C_FLOAT_LESS:
   2433 		cond_set = 0x92;
   2434 		break;
   2435 
   2436 	case SLJIT_C_GREATER_EQUAL:
   2437 	case SLJIT_C_FLOAT_GREATER_EQUAL:
   2438 		cond_set = 0x93;
   2439 		break;
   2440 
   2441 	case SLJIT_C_GREATER:
   2442 	case SLJIT_C_FLOAT_GREATER:
   2443 		cond_set = 0x97;
   2444 		break;
   2445 
   2446 	case SLJIT_C_LESS_EQUAL:
   2447 	case SLJIT_C_FLOAT_LESS_EQUAL:
   2448 		cond_set = 0x96;
   2449 		break;
   2450 
   2451 	case SLJIT_C_SIG_LESS:
   2452 		cond_set = 0x9c;
   2453 		break;
   2454 
   2455 	case SLJIT_C_SIG_GREATER_EQUAL:
   2456 		cond_set = 0x9d;
   2457 		break;
   2458 
   2459 	case SLJIT_C_SIG_GREATER:
   2460 		cond_set = 0x9f;
   2461 		break;
   2462 
   2463 	case SLJIT_C_SIG_LESS_EQUAL:
   2464 		cond_set = 0x9e;
   2465 		break;
   2466 
   2467 	case SLJIT_C_OVERFLOW:
   2468 	case SLJIT_C_MUL_OVERFLOW:
   2469 		cond_set = 0x90;
   2470 		break;
   2471 
   2472 	case SLJIT_C_NOT_OVERFLOW:
   2473 	case SLJIT_C_MUL_NOT_OVERFLOW:
   2474 		cond_set = 0x91;
   2475 		break;
   2476 
   2477 	case SLJIT_C_FLOAT_UNORDERED:
   2478 		cond_set = 0x9a;
   2479 		break;
   2480 
   2481 	case SLJIT_C_FLOAT_ORDERED:
   2482 		cond_set = 0x9b;
   2483 		break;
   2484 	}
   2485 
   2486 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2487 	reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
   2488 
   2489 	buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
   2490 	FAIL_IF(!buf);
   2491 	INC_SIZE(4 + 4);
   2492 	/* Set low register to conditional flag. */
   2493 	*buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
   2494 	*buf++ = 0x0f;
   2495 	*buf++ = cond_set;
   2496 	*buf++ = 0xC0 | reg_lmap[reg];
   2497 	*buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
   2498 	*buf++ = 0x0f;
   2499 	*buf++ = 0xb6;
   2500 	*buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
   2501 
   2502 	if (reg == TMP_REGISTER) {
   2503 		if (op == SLJIT_MOV) {
   2504 			compiler->mode32 = 0;
   2505 			EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
   2506 		}
   2507 		else {
   2508 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
   2509 			compiler->skip_checks = 1;
   2510 #endif
   2511 			return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
   2512 		}
   2513 	}
   2514 #else
   2515 	if (op == SLJIT_MOV) {
   2516 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
   2517 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
   2518 			FAIL_IF(!buf);
   2519 			INC_SIZE(3 + 3);
   2520 			/* Set low byte to conditional flag. */
   2521 			*buf++ = 0x0f;
   2522 			*buf++ = cond_set;
   2523 			*buf++ = 0xC0 | reg_map[dst];
   2524 
   2525 			*buf++ = 0x0f;
   2526 			*buf++ = 0xb6;
   2527 			*buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
   2528 		}
   2529 		else {
   2530 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
   2531 
   2532 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
   2533 			FAIL_IF(!buf);
   2534 			INC_SIZE(3 + 3);
   2535 			/* Set al to conditional flag. */
   2536 			*buf++ = 0x0f;
   2537 			*buf++ = cond_set;
   2538 			*buf++ = 0xC0;
   2539 
   2540 			*buf++ = 0x0f;
   2541 			*buf++ = 0xb6;
   2542 			if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS)
   2543 				*buf = 0xC0 | (reg_map[dst] << 3);
   2544 			else {
   2545 				*buf = 0xC0;
   2546 				EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
   2547 			}
   2548 
   2549 			EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
   2550 		}
   2551 	}
   2552 	else {
   2553 		if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
   2554 			EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
   2555 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
   2556 			FAIL_IF(!buf);
   2557 			INC_SIZE(3);
   2558 
   2559 			*buf++ = 0x0f;
   2560 			*buf++ = cond_set;
   2561 			*buf++ = 0xC0 | reg_map[dst];
   2562 		}
   2563 		else {
   2564 			EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
   2565 
   2566 			buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
   2567 			FAIL_IF(!buf);
   2568 			INC_SIZE(3 + 3 + 1);
   2569 			/* Set al to conditional flag. */
   2570 			*buf++ = 0x0f;
   2571 			*buf++ = cond_set;
   2572 			*buf++ = 0xC0;
   2573 
   2574 			*buf++ = 0x0f;
   2575 			*buf++ = 0xb6;
   2576 			*buf++ = 0xC0;
   2577 
   2578 			*buf++ = 0x90 + reg_map[TMP_REGISTER];
   2579 		}
   2580 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
   2581 		compiler->skip_checks = 1;
   2582 #endif
   2583 		return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
   2584 	}
   2585 #endif
   2586 
   2587 	return SLJIT_SUCCESS;
   2588 }
   2589 
   2590 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_local_base(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w offset)
   2591 {
   2592 	CHECK_ERROR();
   2593 	check_sljit_get_local_base(compiler, dst, dstw, offset);
   2594 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2595 
   2596 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2597 
   2598 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2599 	compiler->mode32 = 0;
   2600 #endif
   2601 
   2602 	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
   2603 
   2604 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2605 	if (NOT_HALFWORD(offset)) {
   2606 		FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, offset));
   2607 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
   2608 		SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED);
   2609 		return compiler->error;
   2610 #else
   2611 		return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0);
   2612 #endif
   2613 	}
   2614 #endif
   2615 
   2616 	if (offset != 0)
   2617 		return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
   2618 	return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
   2619 }
   2620 
   2621 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
   2622 {
   2623 	sljit_ub *buf;
   2624 	struct sljit_const *const_;
   2625 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2626 	int reg;
   2627 #endif
   2628 
   2629 	CHECK_ERROR_PTR();
   2630 	check_sljit_emit_const(compiler, dst, dstw, init_value);
   2631 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2632 
   2633 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
   2634 
   2635 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
   2636 	PTR_FAIL_IF(!const_);
   2637 	set_const(const_, compiler);
   2638 
   2639 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2640 	compiler->mode32 = 0;
   2641 	reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
   2642 
   2643 	if (emit_load_imm64(compiler, reg, init_value))
   2644 		return NULL;
   2645 #else
   2646 	if (dst == SLJIT_UNUSED)
   2647 		dst = TMP_REGISTER;
   2648 
   2649 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
   2650 		return NULL;
   2651 #endif
   2652 
   2653 	buf = (sljit_ub*)ensure_buf(compiler, 2);
   2654 	PTR_FAIL_IF(!buf);
   2655 
   2656 	*buf++ = 0;
   2657 	*buf++ = 1;
   2658 
   2659 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
   2660 	if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
   2661 		if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
   2662 			return NULL;
   2663 #endif
   2664 
   2665 	return const_;
   2666 }
   2667 
   2668 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
   2669 {
   2670 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
   2671 	*(sljit_w*)addr = new_addr - (addr + 4);
   2672 #else
   2673 	*(sljit_uw*)addr = new_addr;
   2674 #endif
   2675 }
   2676 
   2677 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
   2678 {
   2679 	*(sljit_w*)addr = new_constant;
   2680 }
   2681