sljitNativeX86_common.c revision 1.1.1.6 1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - none
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - none
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65
66 /* Last register + 1. */
67 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
68
69 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
71 };
72
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
75 if (p <= compiler->scratches) \
76 w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
77 else \
78 w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
79 p = SLJIT_MEM1(SLJIT_SP); \
80 do; \
81 }
82
83 #else /* SLJIT_CONFIG_X86_32 */
84
85 /* Last register + 1. */
86 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
87 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
88 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
89
90 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
91 Note: avoid to use r12 and r13 for memory addessing
92 therefore r12 is better for SAVED_EREG than SAVED_REG. */
93 #ifndef _WIN64
94 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
95 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
96 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
97 };
98 /* low-map. reg_map & 0x7. */
99 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
100 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
101 };
102 #else
103 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
104 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
105 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
106 };
107 /* low-map. reg_map & 0x7. */
108 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
109 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
110 };
111 #endif
112
113 #define REX_W 0x48
114 #define REX_R 0x44
115 #define REX_X 0x42
116 #define REX_B 0x41
117 #define REX 0x40
118
119 #ifndef _WIN64
120 #define HALFWORD_MAX 0x7fffffffl
121 #define HALFWORD_MIN -0x80000000l
122 #else
123 #define HALFWORD_MAX 0x7fffffffll
124 #define HALFWORD_MIN -0x80000000ll
125 #endif
126
127 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
128 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
129
130 #define CHECK_EXTRA_REGS(p, w, do)
131
132 #endif /* SLJIT_CONFIG_X86_32 */
133
134 #define TMP_FREG (0)
135
136 /* Size flags for emit_x86_instruction: */
137 #define EX86_BIN_INS 0x0010
138 #define EX86_SHIFT_INS 0x0020
139 #define EX86_REX 0x0040
140 #define EX86_NO_REXW 0x0080
141 #define EX86_BYTE_ARG 0x0100
142 #define EX86_HALF_ARG 0x0200
143 #define EX86_PREF_66 0x0400
144 #define EX86_PREF_F2 0x0800
145 #define EX86_PREF_F3 0x1000
146 #define EX86_SSE2_OP1 0x2000
147 #define EX86_SSE2_OP2 0x4000
148 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
149
150 /* --------------------------------------------------------------------- */
151 /* Instrucion forms */
152 /* --------------------------------------------------------------------- */
153
154 #define ADD (/* BINARY */ 0 << 3)
155 #define ADD_EAX_i32 0x05
156 #define ADD_r_rm 0x03
157 #define ADD_rm_r 0x01
158 #define ADDSD_x_xm 0x58
159 #define ADC (/* BINARY */ 2 << 3)
160 #define ADC_EAX_i32 0x15
161 #define ADC_r_rm 0x13
162 #define ADC_rm_r 0x11
163 #define AND (/* BINARY */ 4 << 3)
164 #define AND_EAX_i32 0x25
165 #define AND_r_rm 0x23
166 #define AND_rm_r 0x21
167 #define ANDPD_x_xm 0x54
168 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
169 #define CALL_i32 0xe8
170 #define CALL_rm (/* GROUP_FF */ 2 << 3)
171 #define CDQ 0x99
172 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
173 #define CMP (/* BINARY */ 7 << 3)
174 #define CMP_EAX_i32 0x3d
175 #define CMP_r_rm 0x3b
176 #define CMP_rm_r 0x39
177 #define CVTPD2PS_x_xm 0x5a
178 #define CVTSI2SD_x_rm 0x2a
179 #define CVTTSD2SI_r_xm 0x2c
180 #define DIV (/* GROUP_F7 */ 6 << 3)
181 #define DIVSD_x_xm 0x5e
182 #define INT3 0xcc
183 #define IDIV (/* GROUP_F7 */ 7 << 3)
184 #define IMUL (/* GROUP_F7 */ 5 << 3)
185 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
186 #define IMUL_r_rm_i8 0x6b
187 #define IMUL_r_rm_i32 0x69
188 #define JE_i8 0x74
189 #define JNE_i8 0x75
190 #define JMP_i8 0xeb
191 #define JMP_i32 0xe9
192 #define JMP_rm (/* GROUP_FF */ 4 << 3)
193 #define LEA_r_m 0x8d
194 #define MOV_r_rm 0x8b
195 #define MOV_r_i32 0xb8
196 #define MOV_rm_r 0x89
197 #define MOV_rm_i32 0xc7
198 #define MOV_rm8_i8 0xc6
199 #define MOV_rm8_r8 0x88
200 #define MOVSD_x_xm 0x10
201 #define MOVSD_xm_x 0x11
202 #define MOVSXD_r_rm 0x63
203 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
204 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
205 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
206 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
207 #define MUL (/* GROUP_F7 */ 4 << 3)
208 #define MULSD_x_xm 0x59
209 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
210 #define NOP 0x90
211 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
212 #define OR (/* BINARY */ 1 << 3)
213 #define OR_r_rm 0x0b
214 #define OR_EAX_i32 0x0d
215 #define OR_rm_r 0x09
216 #define OR_rm8_r8 0x08
217 #define POP_r 0x58
218 #define POP_rm 0x8f
219 #define POPF 0x9d
220 #define PUSH_i32 0x68
221 #define PUSH_r 0x50
222 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
223 #define PUSHF 0x9c
224 #define RET_near 0xc3
225 #define RET_i16 0xc2
226 #define SBB (/* BINARY */ 3 << 3)
227 #define SBB_EAX_i32 0x1d
228 #define SBB_r_rm 0x1b
229 #define SBB_rm_r 0x19
230 #define SAR (/* SHIFT */ 7 << 3)
231 #define SHL (/* SHIFT */ 4 << 3)
232 #define SHR (/* SHIFT */ 5 << 3)
233 #define SUB (/* BINARY */ 5 << 3)
234 #define SUB_EAX_i32 0x2d
235 #define SUB_r_rm 0x2b
236 #define SUB_rm_r 0x29
237 #define SUBSD_x_xm 0x5c
238 #define TEST_EAX_i32 0xa9
239 #define TEST_rm_r 0x85
240 #define UCOMISD_x_xm 0x2e
241 #define UNPCKLPD_x_xm 0x14
242 #define XCHG_EAX_r 0x90
243 #define XCHG_r_rm 0x87
244 #define XOR (/* BINARY */ 6 << 3)
245 #define XOR_EAX_i32 0x35
246 #define XOR_r_rm 0x33
247 #define XOR_rm_r 0x31
248 #define XORPD_x_xm 0x57
249
250 #define GROUP_0F 0x0f
251 #define GROUP_F7 0xf7
252 #define GROUP_FF 0xff
253 #define GROUP_BINARY_81 0x81
254 #define GROUP_BINARY_83 0x83
255 #define GROUP_SHIFT_1 0xd1
256 #define GROUP_SHIFT_N 0xc1
257 #define GROUP_SHIFT_CL 0xd3
258
259 #define MOD_REG 0xc0
260 #define MOD_DISP8 0x40
261
262 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
263
264 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
265 #define POP_REG(r) (*inst++ = (POP_r + (r)))
266 #define RET() (*inst++ = (RET_near))
267 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
268 /* r32, r/m32 */
269 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
270
271 /* Multithreading does not affect these static variables, since they store
272 built-in CPU features. Therefore they can be overwritten by different threads
273 if they detect the CPU features in the same time. */
274 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
275 static sljit_s32 cpu_has_sse2 = -1;
276 #endif
277 static sljit_s32 cpu_has_cmov = -1;
278
279 #ifdef _WIN32_WCE
280 #include <cmnintrin.h>
281 #elif defined(_MSC_VER) && _MSC_VER >= 1400
282 #include <intrin.h>
283 #endif
284
285 /******************************************************/
286 /* Unaligned-store functions */
287 /******************************************************/
288
289 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
290 {
291 SLJIT_MEMCPY(addr, &value, sizeof(value));
292 }
293
294 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
295 {
296 SLJIT_MEMCPY(addr, &value, sizeof(value));
297 }
298
299 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
300 {
301 SLJIT_MEMCPY(addr, &value, sizeof(value));
302 }
303
304 /******************************************************/
305 /* Utility functions */
306 /******************************************************/
307
308 static void get_cpu_features(void)
309 {
310 sljit_u32 features;
311
312 #if defined(_MSC_VER) && _MSC_VER >= 1400
313
314 int CPUInfo[4];
315 __cpuid(CPUInfo, 1);
316 features = (sljit_u32)CPUInfo[3];
317
318 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
319
320 /* AT&T syntax. */
321 __asm__ (
322 "movl $0x1, %%eax\n"
323 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
324 /* On x86-32, there is no red zone, so this
325 should work (no need for a local variable). */
326 "push %%ebx\n"
327 #endif
328 "cpuid\n"
329 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
330 "pop %%ebx\n"
331 #endif
332 "movl %%edx, %0\n"
333 : "=g" (features)
334 :
335 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
336 : "%eax", "%ecx", "%edx"
337 #else
338 : "%rax", "%rbx", "%rcx", "%rdx"
339 #endif
340 );
341
342 #else /* _MSC_VER && _MSC_VER >= 1400 */
343
344 /* Intel syntax. */
345 __asm {
346 mov eax, 1
347 cpuid
348 mov features, edx
349 }
350
351 #endif /* _MSC_VER && _MSC_VER >= 1400 */
352
353 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
354 cpu_has_sse2 = (features >> 26) & 0x1;
355 #endif
356 cpu_has_cmov = (features >> 15) & 0x1;
357 }
358
359 static sljit_u8 get_jump_code(sljit_s32 type)
360 {
361 switch (type) {
362 case SLJIT_EQUAL:
363 case SLJIT_EQUAL_F64:
364 return 0x84 /* je */;
365
366 case SLJIT_NOT_EQUAL:
367 case SLJIT_NOT_EQUAL_F64:
368 return 0x85 /* jne */;
369
370 case SLJIT_LESS:
371 case SLJIT_LESS_F64:
372 return 0x82 /* jc */;
373
374 case SLJIT_GREATER_EQUAL:
375 case SLJIT_GREATER_EQUAL_F64:
376 return 0x83 /* jae */;
377
378 case SLJIT_GREATER:
379 case SLJIT_GREATER_F64:
380 return 0x87 /* jnbe */;
381
382 case SLJIT_LESS_EQUAL:
383 case SLJIT_LESS_EQUAL_F64:
384 return 0x86 /* jbe */;
385
386 case SLJIT_SIG_LESS:
387 return 0x8c /* jl */;
388
389 case SLJIT_SIG_GREATER_EQUAL:
390 return 0x8d /* jnl */;
391
392 case SLJIT_SIG_GREATER:
393 return 0x8f /* jnle */;
394
395 case SLJIT_SIG_LESS_EQUAL:
396 return 0x8e /* jle */;
397
398 case SLJIT_OVERFLOW:
399 case SLJIT_MUL_OVERFLOW:
400 return 0x80 /* jo */;
401
402 case SLJIT_NOT_OVERFLOW:
403 case SLJIT_MUL_NOT_OVERFLOW:
404 return 0x81 /* jno */;
405
406 case SLJIT_UNORDERED_F64:
407 return 0x8a /* jp */;
408
409 case SLJIT_ORDERED_F64:
410 return 0x8b /* jpo */;
411 }
412 return 0;
413 }
414
415 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
416 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
417 #else
418 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
419 #endif
420
421 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
422 {
423 sljit_s32 short_jump;
424 sljit_uw label_addr;
425
426 if (jump->flags & JUMP_LABEL)
427 label_addr = (sljit_uw)(code + jump->u.label->size);
428 else
429 label_addr = jump->u.target - executable_offset;
430
431 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
432
433 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
434 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
435 return generate_far_jump_code(jump, code_ptr, type);
436 #endif
437
438 if (type == SLJIT_JUMP) {
439 if (short_jump)
440 *code_ptr++ = JMP_i8;
441 else
442 *code_ptr++ = JMP_i32;
443 jump->addr++;
444 }
445 else if (type >= SLJIT_FAST_CALL) {
446 short_jump = 0;
447 *code_ptr++ = CALL_i32;
448 jump->addr++;
449 }
450 else if (short_jump) {
451 *code_ptr++ = get_jump_code(type) - 0x10;
452 jump->addr++;
453 }
454 else {
455 *code_ptr++ = GROUP_0F;
456 *code_ptr++ = get_jump_code(type);
457 jump->addr += 2;
458 }
459
460 if (short_jump) {
461 jump->flags |= PATCH_MB;
462 code_ptr += sizeof(sljit_s8);
463 } else {
464 jump->flags |= PATCH_MW;
465 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
466 code_ptr += sizeof(sljit_sw);
467 #else
468 code_ptr += sizeof(sljit_s32);
469 #endif
470 }
471
472 return code_ptr;
473 }
474
475 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
476 {
477 struct sljit_memory_fragment *buf;
478 sljit_u8 *code;
479 sljit_u8 *code_ptr;
480 sljit_u8 *buf_ptr;
481 sljit_u8 *buf_end;
482 sljit_u8 len;
483 sljit_sw executable_offset;
484 sljit_sw jump_addr;
485
486 struct sljit_label *label;
487 struct sljit_jump *jump;
488 struct sljit_const *const_;
489
490 CHECK_ERROR_PTR();
491 CHECK_PTR(check_sljit_generate_code(compiler));
492 reverse_buf(compiler);
493
494 /* Second code generation pass. */
495 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
496 PTR_FAIL_WITH_EXEC_IF(code);
497 buf = compiler->buf;
498
499 code_ptr = code;
500 label = compiler->labels;
501 jump = compiler->jumps;
502 const_ = compiler->consts;
503 executable_offset = SLJIT_EXEC_OFFSET(code);
504
505 do {
506 buf_ptr = buf->memory;
507 buf_end = buf_ptr + buf->used_size;
508 do {
509 len = *buf_ptr++;
510 if (len > 0) {
511 /* The code is already generated. */
512 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
513 code_ptr += len;
514 buf_ptr += len;
515 }
516 else {
517 if (*buf_ptr >= 2) {
518 jump->addr = (sljit_uw)code_ptr;
519 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
520 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
521 else {
522 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
523 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
524 #else
525 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
526 #endif
527 }
528 jump = jump->next;
529 }
530 else if (*buf_ptr == 0) {
531 label->addr = ((sljit_uw)code_ptr) + executable_offset;
532 label->size = code_ptr - code;
533 label = label->next;
534 }
535 else { /* *buf_ptr is 1 */
536 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
537 const_ = const_->next;
538 }
539 buf_ptr++;
540 }
541 } while (buf_ptr < buf_end);
542 SLJIT_ASSERT(buf_ptr == buf_end);
543 buf = buf->next;
544 } while (buf);
545
546 SLJIT_ASSERT(!label);
547 SLJIT_ASSERT(!jump);
548 SLJIT_ASSERT(!const_);
549
550 jump = compiler->jumps;
551 while (jump) {
552 jump_addr = jump->addr + executable_offset;
553
554 if (jump->flags & PATCH_MB) {
555 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
556 *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
557 } else if (jump->flags & PATCH_MW) {
558 if (jump->flags & JUMP_LABEL) {
559 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
560 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
561 #else
562 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
563 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
564 #endif
565 }
566 else {
567 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
568 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
569 #else
570 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
571 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
572 #endif
573 }
574 }
575 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
576 else if (jump->flags & PATCH_MD)
577 sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
578 #endif
579
580 jump = jump->next;
581 }
582
583 /* Some space may be wasted because of short jumps. */
584 SLJIT_ASSERT(code_ptr <= code + compiler->size);
585 compiler->error = SLJIT_ERR_COMPILED;
586 compiler->executable_offset = executable_offset;
587 compiler->executable_size = code_ptr - code;
588 return (void*)(code + executable_offset);
589 }
590
591 /* --------------------------------------------------------------------- */
592 /* Operators */
593 /* --------------------------------------------------------------------- */
594
595 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
596 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
597 sljit_s32 dst, sljit_sw dstw,
598 sljit_s32 src1, sljit_sw src1w,
599 sljit_s32 src2, sljit_sw src2w);
600
601 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
602 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
603 sljit_s32 dst, sljit_sw dstw,
604 sljit_s32 src1, sljit_sw src1w,
605 sljit_s32 src2, sljit_sw src2w);
606
607 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
608 sljit_s32 dst, sljit_sw dstw,
609 sljit_s32 src, sljit_sw srcw);
610
611 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
612 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
613
614 #ifdef _WIN32
615 #include <malloc.h>
616
617 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
618 {
619 /* Workaround for calling the internal _chkstk() function on Windows.
620 This function touches all 4k pages belongs to the requested stack space,
621 which size is passed in local_size. This is necessary on Windows where
622 the stack can only grow in 4k steps. However, this function just burn
623 CPU cycles if the stack is large enough. However, you don't know it in
624 advance, so it must always be called. I think this is a bad design in
625 general even if it has some reasons. */
626 *(volatile sljit_s32*)alloca(local_size) = 0;
627 }
628
629 #endif
630
631 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
632 #include "sljitNativeX86_32.c"
633 #else
634 #include "sljitNativeX86_64.c"
635 #endif
636
637 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
638 sljit_s32 dst, sljit_sw dstw,
639 sljit_s32 src, sljit_sw srcw)
640 {
641 sljit_u8* inst;
642
643 if (dst == SLJIT_UNUSED) {
644 /* No destination, doesn't need to setup flags. */
645 if (src & SLJIT_MEM) {
646 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
647 FAIL_IF(!inst);
648 *inst = MOV_r_rm;
649 }
650 return SLJIT_SUCCESS;
651 }
652 if (FAST_IS_REG(src)) {
653 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
654 FAIL_IF(!inst);
655 *inst = MOV_rm_r;
656 return SLJIT_SUCCESS;
657 }
658 if (src & SLJIT_IMM) {
659 if (FAST_IS_REG(dst)) {
660 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
661 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
662 #else
663 if (!compiler->mode32) {
664 if (NOT_HALFWORD(srcw))
665 return emit_load_imm64(compiler, dst, srcw);
666 }
667 else
668 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
669 #endif
670 }
671 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
672 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
673 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
674 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
675 FAIL_IF(!inst);
676 *inst = MOV_rm_r;
677 return SLJIT_SUCCESS;
678 }
679 #endif
680 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
681 FAIL_IF(!inst);
682 *inst = MOV_rm_i32;
683 return SLJIT_SUCCESS;
684 }
685 if (FAST_IS_REG(dst)) {
686 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
687 FAIL_IF(!inst);
688 *inst = MOV_r_rm;
689 return SLJIT_SUCCESS;
690 }
691
692 /* Memory to memory move. Requires two instruction. */
693 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
694 FAIL_IF(!inst);
695 *inst = MOV_r_rm;
696 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
697 FAIL_IF(!inst);
698 *inst = MOV_rm_r;
699 return SLJIT_SUCCESS;
700 }
701
702 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
703 {
704 sljit_u8 *inst;
705 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
706 sljit_s32 size;
707 #endif
708
709 CHECK_ERROR();
710 CHECK(check_sljit_emit_op0(compiler, op));
711
712 switch (GET_OPCODE(op)) {
713 case SLJIT_BREAKPOINT:
714 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
715 FAIL_IF(!inst);
716 INC_SIZE(1);
717 *inst = INT3;
718 break;
719 case SLJIT_NOP:
720 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
721 FAIL_IF(!inst);
722 INC_SIZE(1);
723 *inst = NOP;
724 break;
725 case SLJIT_LMUL_UW:
726 case SLJIT_LMUL_SW:
727 case SLJIT_DIVMOD_UW:
728 case SLJIT_DIVMOD_SW:
729 case SLJIT_DIV_UW:
730 case SLJIT_DIV_SW:
731 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
732 #ifdef _WIN64
733 SLJIT_ASSERT(
734 reg_map[SLJIT_R0] == 0
735 && reg_map[SLJIT_R1] == 2
736 && reg_map[TMP_REG1] > 7);
737 #else
738 SLJIT_ASSERT(
739 reg_map[SLJIT_R0] == 0
740 && reg_map[SLJIT_R1] < 7
741 && reg_map[TMP_REG1] == 2);
742 #endif
743 compiler->mode32 = op & SLJIT_I32_OP;
744 #endif
745 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
746
747 op = GET_OPCODE(op);
748 if ((op | 0x2) == SLJIT_DIV_UW) {
749 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
750 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
751 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
752 #else
753 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
754 #endif
755 FAIL_IF(!inst);
756 *inst = XOR_r_rm;
757 }
758
759 if ((op | 0x2) == SLJIT_DIV_SW) {
760 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
761 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
762 #endif
763
764 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
765 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
766 FAIL_IF(!inst);
767 INC_SIZE(1);
768 *inst = CDQ;
769 #else
770 if (compiler->mode32) {
771 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
772 FAIL_IF(!inst);
773 INC_SIZE(1);
774 *inst = CDQ;
775 } else {
776 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
777 FAIL_IF(!inst);
778 INC_SIZE(2);
779 *inst++ = REX_W;
780 *inst = CDQ;
781 }
782 #endif
783 }
784
785 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
786 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
787 FAIL_IF(!inst);
788 INC_SIZE(2);
789 *inst++ = GROUP_F7;
790 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
791 #else
792 #ifdef _WIN64
793 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
794 #else
795 size = (!compiler->mode32) ? 3 : 2;
796 #endif
797 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
798 FAIL_IF(!inst);
799 INC_SIZE(size);
800 #ifdef _WIN64
801 if (!compiler->mode32)
802 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
803 else if (op >= SLJIT_DIVMOD_UW)
804 *inst++ = REX_B;
805 *inst++ = GROUP_F7;
806 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
807 #else
808 if (!compiler->mode32)
809 *inst++ = REX_W;
810 *inst++ = GROUP_F7;
811 *inst = MOD_REG | reg_map[SLJIT_R1];
812 #endif
813 #endif
814 switch (op) {
815 case SLJIT_LMUL_UW:
816 *inst |= MUL;
817 break;
818 case SLJIT_LMUL_SW:
819 *inst |= IMUL;
820 break;
821 case SLJIT_DIVMOD_UW:
822 case SLJIT_DIV_UW:
823 *inst |= DIV;
824 break;
825 case SLJIT_DIVMOD_SW:
826 case SLJIT_DIV_SW:
827 *inst |= IDIV;
828 break;
829 }
830 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
831 if (op <= SLJIT_DIVMOD_SW)
832 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
833 #else
834 if (op >= SLJIT_DIV_UW)
835 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
836 #endif
837 break;
838 }
839
840 return SLJIT_SUCCESS;
841 }
842
843 #define ENCODE_PREFIX(prefix) \
844 do { \
845 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
846 FAIL_IF(!inst); \
847 INC_SIZE(1); \
848 *inst = (prefix); \
849 } while (0)
850
851 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
852 sljit_s32 dst, sljit_sw dstw,
853 sljit_s32 src, sljit_sw srcw)
854 {
855 sljit_u8* inst;
856 sljit_s32 dst_r;
857 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
858 sljit_s32 work_r;
859 #endif
860
861 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
862 compiler->mode32 = 0;
863 #endif
864
865 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
866 return SLJIT_SUCCESS; /* Empty instruction. */
867
868 if (src & SLJIT_IMM) {
869 if (FAST_IS_REG(dst)) {
870 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
871 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
872 #else
873 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
874 FAIL_IF(!inst);
875 *inst = MOV_rm_i32;
876 return SLJIT_SUCCESS;
877 #endif
878 }
879 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
880 FAIL_IF(!inst);
881 *inst = MOV_rm8_i8;
882 return SLJIT_SUCCESS;
883 }
884
885 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
886
887 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
888 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
889 if (reg_map[src] >= 4) {
890 SLJIT_ASSERT(dst_r == TMP_REG1);
891 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
892 } else
893 dst_r = src;
894 #else
895 dst_r = src;
896 #endif
897 }
898 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
899 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
900 /* src, dst are registers. */
901 SLJIT_ASSERT(SLOW_IS_REG(dst));
902 if (reg_map[dst] < 4) {
903 if (dst != src)
904 EMIT_MOV(compiler, dst, 0, src, 0);
905 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
906 FAIL_IF(!inst);
907 *inst++ = GROUP_0F;
908 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
909 }
910 else {
911 if (dst != src)
912 EMIT_MOV(compiler, dst, 0, src, 0);
913 if (sign) {
914 /* shl reg, 24 */
915 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
916 FAIL_IF(!inst);
917 *inst |= SHL;
918 /* sar reg, 24 */
919 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
920 FAIL_IF(!inst);
921 *inst |= SAR;
922 }
923 else {
924 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
925 FAIL_IF(!inst);
926 *(inst + 1) |= AND;
927 }
928 }
929 return SLJIT_SUCCESS;
930 }
931 #endif
932 else {
933 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
934 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
935 FAIL_IF(!inst);
936 *inst++ = GROUP_0F;
937 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
938 }
939
940 if (dst & SLJIT_MEM) {
941 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
942 if (dst_r == TMP_REG1) {
943 /* Find a non-used register, whose reg_map[src] < 4. */
944 if ((dst & REG_MASK) == SLJIT_R0) {
945 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
946 work_r = SLJIT_R2;
947 else
948 work_r = SLJIT_R1;
949 }
950 else {
951 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
952 work_r = SLJIT_R0;
953 else if ((dst & REG_MASK) == SLJIT_R1)
954 work_r = SLJIT_R2;
955 else
956 work_r = SLJIT_R1;
957 }
958
959 if (work_r == SLJIT_R0) {
960 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
961 }
962 else {
963 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
964 FAIL_IF(!inst);
965 *inst = XCHG_r_rm;
966 }
967
968 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
969 FAIL_IF(!inst);
970 *inst = MOV_rm8_r8;
971
972 if (work_r == SLJIT_R0) {
973 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
974 }
975 else {
976 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
977 FAIL_IF(!inst);
978 *inst = XCHG_r_rm;
979 }
980 }
981 else {
982 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
983 FAIL_IF(!inst);
984 *inst = MOV_rm8_r8;
985 }
986 #else
987 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
988 FAIL_IF(!inst);
989 *inst = MOV_rm8_r8;
990 #endif
991 }
992
993 return SLJIT_SUCCESS;
994 }
995
996 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
997 sljit_s32 dst, sljit_sw dstw,
998 sljit_s32 src, sljit_sw srcw)
999 {
1000 sljit_u8* inst;
1001 sljit_s32 dst_r;
1002
1003 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1004 compiler->mode32 = 0;
1005 #endif
1006
1007 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1008 return SLJIT_SUCCESS; /* Empty instruction. */
1009
1010 if (src & SLJIT_IMM) {
1011 if (FAST_IS_REG(dst)) {
1012 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1013 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1014 #else
1015 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1016 FAIL_IF(!inst);
1017 *inst = MOV_rm_i32;
1018 return SLJIT_SUCCESS;
1019 #endif
1020 }
1021 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1022 FAIL_IF(!inst);
1023 *inst = MOV_rm_i32;
1024 return SLJIT_SUCCESS;
1025 }
1026
1027 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1028
1029 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1030 dst_r = src;
1031 else {
1032 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1033 FAIL_IF(!inst);
1034 *inst++ = GROUP_0F;
1035 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1036 }
1037
1038 if (dst & SLJIT_MEM) {
1039 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1040 FAIL_IF(!inst);
1041 *inst = MOV_rm_r;
1042 }
1043
1044 return SLJIT_SUCCESS;
1045 }
1046
1047 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1048 sljit_s32 dst, sljit_sw dstw,
1049 sljit_s32 src, sljit_sw srcw)
1050 {
1051 sljit_u8* inst;
1052
1053 if (dst == SLJIT_UNUSED) {
1054 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1055 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1056 FAIL_IF(!inst);
1057 *inst++ = GROUP_F7;
1058 *inst |= opcode;
1059 return SLJIT_SUCCESS;
1060 }
1061 if (dst == src && dstw == srcw) {
1062 /* Same input and output */
1063 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1064 FAIL_IF(!inst);
1065 *inst++ = GROUP_F7;
1066 *inst |= opcode;
1067 return SLJIT_SUCCESS;
1068 }
1069 if (FAST_IS_REG(dst)) {
1070 EMIT_MOV(compiler, dst, 0, src, srcw);
1071 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1072 FAIL_IF(!inst);
1073 *inst++ = GROUP_F7;
1074 *inst |= opcode;
1075 return SLJIT_SUCCESS;
1076 }
1077 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1078 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1079 FAIL_IF(!inst);
1080 *inst++ = GROUP_F7;
1081 *inst |= opcode;
1082 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1083 return SLJIT_SUCCESS;
1084 }
1085
1086 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1087 sljit_s32 dst, sljit_sw dstw,
1088 sljit_s32 src, sljit_sw srcw)
1089 {
1090 sljit_u8* inst;
1091
1092 if (dst == SLJIT_UNUSED) {
1093 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1094 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1095 FAIL_IF(!inst);
1096 *inst++ = GROUP_F7;
1097 *inst |= NOT_rm;
1098 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1099 FAIL_IF(!inst);
1100 *inst = OR_r_rm;
1101 return SLJIT_SUCCESS;
1102 }
1103 if (FAST_IS_REG(dst)) {
1104 EMIT_MOV(compiler, dst, 0, src, srcw);
1105 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1106 FAIL_IF(!inst);
1107 *inst++ = GROUP_F7;
1108 *inst |= NOT_rm;
1109 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1110 FAIL_IF(!inst);
1111 *inst = OR_r_rm;
1112 return SLJIT_SUCCESS;
1113 }
1114 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1115 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1116 FAIL_IF(!inst);
1117 *inst++ = GROUP_F7;
1118 *inst |= NOT_rm;
1119 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1120 FAIL_IF(!inst);
1121 *inst = OR_r_rm;
1122 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1123 return SLJIT_SUCCESS;
1124 }
1125
1126 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1127 sljit_s32 dst, sljit_sw dstw,
1128 sljit_s32 src, sljit_sw srcw)
1129 {
1130 sljit_u8* inst;
1131 sljit_s32 dst_r;
1132
1133 SLJIT_UNUSED_ARG(op_flags);
1134 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1135 /* Just set the zero flag. */
1136 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1137 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1138 FAIL_IF(!inst);
1139 *inst++ = GROUP_F7;
1140 *inst |= NOT_rm;
1141 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1142 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1143 #else
1144 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0);
1145 #endif
1146 FAIL_IF(!inst);
1147 *inst |= SHR;
1148 return SLJIT_SUCCESS;
1149 }
1150
1151 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1152 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1153 src = TMP_REG1;
1154 srcw = 0;
1155 }
1156
1157 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1158 FAIL_IF(!inst);
1159 *inst++ = GROUP_0F;
1160 *inst = BSR_r_rm;
1161
1162 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1163 if (FAST_IS_REG(dst))
1164 dst_r = dst;
1165 else {
1166 /* Find an unused temporary register. */
1167 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1168 dst_r = SLJIT_R0;
1169 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1170 dst_r = SLJIT_R1;
1171 else
1172 dst_r = SLJIT_R2;
1173 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1174 }
1175 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1176 #else
1177 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1178 compiler->mode32 = 0;
1179 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31);
1180 compiler->mode32 = op_flags & SLJIT_I32_OP;
1181 #endif
1182
1183 if (cpu_has_cmov == -1)
1184 get_cpu_features();
1185
1186 if (cpu_has_cmov) {
1187 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1188 FAIL_IF(!inst);
1189 *inst++ = GROUP_0F;
1190 *inst = CMOVNE_r_rm;
1191 } else {
1192 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1193 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1194 FAIL_IF(!inst);
1195 INC_SIZE(4);
1196
1197 *inst++ = JE_i8;
1198 *inst++ = 2;
1199 *inst++ = MOV_r_rm;
1200 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1201 #else
1202 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
1203 FAIL_IF(!inst);
1204 INC_SIZE(5);
1205
1206 *inst++ = JE_i8;
1207 *inst++ = 3;
1208 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1209 *inst++ = MOV_r_rm;
1210 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1211 #endif
1212 }
1213
1214 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1215 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1216 #else
1217 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
1218 #endif
1219 FAIL_IF(!inst);
1220 *(inst + 1) |= XOR;
1221
1222 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1223 if (dst & SLJIT_MEM) {
1224 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1225 FAIL_IF(!inst);
1226 *inst = XCHG_r_rm;
1227 }
1228 #else
1229 if (dst & SLJIT_MEM)
1230 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1231 #endif
1232 return SLJIT_SUCCESS;
1233 }
1234
1235 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1236 sljit_s32 dst, sljit_sw dstw,
1237 sljit_s32 src, sljit_sw srcw)
1238 {
1239 sljit_s32 update = 0;
1240 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1241 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1242 sljit_s32 dst_is_ereg = 0;
1243 sljit_s32 src_is_ereg = 0;
1244 #else
1245 # define src_is_ereg 0
1246 #endif
1247
1248 CHECK_ERROR();
1249 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1250 ADJUST_LOCAL_OFFSET(dst, dstw);
1251 ADJUST_LOCAL_OFFSET(src, srcw);
1252
1253 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1254 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1255 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1256 compiler->mode32 = op_flags & SLJIT_I32_OP;
1257 #endif
1258
1259 op = GET_OPCODE(op);
1260 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1261 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1262 compiler->mode32 = 0;
1263 #endif
1264
1265 if (op_flags & SLJIT_I32_OP) {
1266 if (FAST_IS_REG(src) && src == dst) {
1267 if (!TYPE_CAST_NEEDED(op))
1268 return SLJIT_SUCCESS;
1269 }
1270 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1271 if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1272 op = SLJIT_MOV_U32;
1273 if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1274 op = SLJIT_MOVU_U32;
1275 if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1276 op = SLJIT_MOV_S32;
1277 if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1278 op = SLJIT_MOVU_S32;
1279 #endif
1280 }
1281
1282 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1283 if (op >= SLJIT_MOVU) {
1284 update = 1;
1285 op -= 8;
1286 }
1287
1288 if (src & SLJIT_IMM) {
1289 switch (op) {
1290 case SLJIT_MOV_U8:
1291 srcw = (sljit_u8)srcw;
1292 break;
1293 case SLJIT_MOV_S8:
1294 srcw = (sljit_s8)srcw;
1295 break;
1296 case SLJIT_MOV_U16:
1297 srcw = (sljit_u16)srcw;
1298 break;
1299 case SLJIT_MOV_S16:
1300 srcw = (sljit_s16)srcw;
1301 break;
1302 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1303 case SLJIT_MOV_U32:
1304 srcw = (sljit_u32)srcw;
1305 break;
1306 case SLJIT_MOV_S32:
1307 srcw = (sljit_s32)srcw;
1308 break;
1309 #endif
1310 }
1311 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1312 if (SLJIT_UNLIKELY(dst_is_ereg))
1313 return emit_mov(compiler, dst, dstw, src, srcw);
1314 #endif
1315 }
1316
1317 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1318 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1319 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1320 dst = TMP_REG1;
1321 }
1322 #endif
1323
1324 switch (op) {
1325 case SLJIT_MOV:
1326 case SLJIT_MOV_P:
1327 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1328 case SLJIT_MOV_U32:
1329 case SLJIT_MOV_S32:
1330 #endif
1331 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1332 break;
1333 case SLJIT_MOV_U8:
1334 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1335 break;
1336 case SLJIT_MOV_S8:
1337 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1338 break;
1339 case SLJIT_MOV_U16:
1340 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1341 break;
1342 case SLJIT_MOV_S16:
1343 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1344 break;
1345 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1346 case SLJIT_MOV_U32:
1347 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1348 break;
1349 case SLJIT_MOV_S32:
1350 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1351 break;
1352 #endif
1353 }
1354
1355 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1356 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1357 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1358 #endif
1359
1360 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) {
1361 if ((src & OFFS_REG_MASK) != 0) {
1362 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1363 (src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0));
1364 }
1365 else if (srcw != 0) {
1366 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1367 (src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw));
1368 }
1369 }
1370
1371 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) {
1372 if ((dst & OFFS_REG_MASK) != 0) {
1373 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1374 (dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0));
1375 }
1376 else if (dstw != 0) {
1377 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1378 (dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw));
1379 }
1380 }
1381 return SLJIT_SUCCESS;
1382 }
1383
1384 switch (op) {
1385 case SLJIT_NOT:
1386 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
1387 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1388 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1389
1390 case SLJIT_NEG:
1391 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1392
1393 case SLJIT_CLZ:
1394 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1395 }
1396
1397 return SLJIT_SUCCESS;
1398
1399 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1400 # undef src_is_ereg
1401 #endif
1402 }
1403
1404 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1405
1406 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1407 if (IS_HALFWORD(immw) || compiler->mode32) { \
1408 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1409 FAIL_IF(!inst); \
1410 *(inst + 1) |= (op_imm); \
1411 } \
1412 else { \
1413 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1414 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1415 FAIL_IF(!inst); \
1416 *inst = (op_mr); \
1417 }
1418
1419 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1420 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1421
1422 #else
1423
1424 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1425 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1426 FAIL_IF(!inst); \
1427 *(inst + 1) |= (op_imm);
1428
1429 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1430 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1431
1432 #endif
1433
1434 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1435 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1436 sljit_s32 dst, sljit_sw dstw,
1437 sljit_s32 src1, sljit_sw src1w,
1438 sljit_s32 src2, sljit_sw src2w)
1439 {
1440 sljit_u8* inst;
1441
1442 if (dst == SLJIT_UNUSED) {
1443 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1444 if (src2 & SLJIT_IMM) {
1445 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1446 }
1447 else {
1448 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1449 FAIL_IF(!inst);
1450 *inst = op_rm;
1451 }
1452 return SLJIT_SUCCESS;
1453 }
1454
1455 if (dst == src1 && dstw == src1w) {
1456 if (src2 & SLJIT_IMM) {
1457 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1458 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1459 #else
1460 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1461 #endif
1462 BINARY_EAX_IMM(op_eax_imm, src2w);
1463 }
1464 else {
1465 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1466 }
1467 }
1468 else if (FAST_IS_REG(dst)) {
1469 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1470 FAIL_IF(!inst);
1471 *inst = op_rm;
1472 }
1473 else if (FAST_IS_REG(src2)) {
1474 /* Special exception for sljit_emit_op_flags. */
1475 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1476 FAIL_IF(!inst);
1477 *inst = op_mr;
1478 }
1479 else {
1480 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1481 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1482 FAIL_IF(!inst);
1483 *inst = op_mr;
1484 }
1485 return SLJIT_SUCCESS;
1486 }
1487
1488 /* Only for cumulative operations. */
1489 if (dst == src2 && dstw == src2w) {
1490 if (src1 & SLJIT_IMM) {
1491 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1492 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1493 #else
1494 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1495 #endif
1496 BINARY_EAX_IMM(op_eax_imm, src1w);
1497 }
1498 else {
1499 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1500 }
1501 }
1502 else if (FAST_IS_REG(dst)) {
1503 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1504 FAIL_IF(!inst);
1505 *inst = op_rm;
1506 }
1507 else if (FAST_IS_REG(src1)) {
1508 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1509 FAIL_IF(!inst);
1510 *inst = op_mr;
1511 }
1512 else {
1513 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1514 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1515 FAIL_IF(!inst);
1516 *inst = op_mr;
1517 }
1518 return SLJIT_SUCCESS;
1519 }
1520
1521 /* General version. */
1522 if (FAST_IS_REG(dst)) {
1523 EMIT_MOV(compiler, dst, 0, src1, src1w);
1524 if (src2 & SLJIT_IMM) {
1525 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1526 }
1527 else {
1528 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1529 FAIL_IF(!inst);
1530 *inst = op_rm;
1531 }
1532 }
1533 else {
1534 /* This version requires less memory writing. */
1535 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1536 if (src2 & SLJIT_IMM) {
1537 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1538 }
1539 else {
1540 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1541 FAIL_IF(!inst);
1542 *inst = op_rm;
1543 }
1544 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1545 }
1546
1547 return SLJIT_SUCCESS;
1548 }
1549
1550 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1551 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1552 sljit_s32 dst, sljit_sw dstw,
1553 sljit_s32 src1, sljit_sw src1w,
1554 sljit_s32 src2, sljit_sw src2w)
1555 {
1556 sljit_u8* inst;
1557
1558 if (dst == SLJIT_UNUSED) {
1559 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1560 if (src2 & SLJIT_IMM) {
1561 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1562 }
1563 else {
1564 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1565 FAIL_IF(!inst);
1566 *inst = op_rm;
1567 }
1568 return SLJIT_SUCCESS;
1569 }
1570
1571 if (dst == src1 && dstw == src1w) {
1572 if (src2 & SLJIT_IMM) {
1573 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1574 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1575 #else
1576 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1577 #endif
1578 BINARY_EAX_IMM(op_eax_imm, src2w);
1579 }
1580 else {
1581 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1582 }
1583 }
1584 else if (FAST_IS_REG(dst)) {
1585 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1586 FAIL_IF(!inst);
1587 *inst = op_rm;
1588 }
1589 else if (FAST_IS_REG(src2)) {
1590 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1591 FAIL_IF(!inst);
1592 *inst = op_mr;
1593 }
1594 else {
1595 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1596 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1597 FAIL_IF(!inst);
1598 *inst = op_mr;
1599 }
1600 return SLJIT_SUCCESS;
1601 }
1602
1603 /* General version. */
1604 if (FAST_IS_REG(dst) && dst != src2) {
1605 EMIT_MOV(compiler, dst, 0, src1, src1w);
1606 if (src2 & SLJIT_IMM) {
1607 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1608 }
1609 else {
1610 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1611 FAIL_IF(!inst);
1612 *inst = op_rm;
1613 }
1614 }
1615 else {
1616 /* This version requires less memory writing. */
1617 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1618 if (src2 & SLJIT_IMM) {
1619 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1620 }
1621 else {
1622 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1623 FAIL_IF(!inst);
1624 *inst = op_rm;
1625 }
1626 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1627 }
1628
1629 return SLJIT_SUCCESS;
1630 }
1631
1632 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1633 sljit_s32 dst, sljit_sw dstw,
1634 sljit_s32 src1, sljit_sw src1w,
1635 sljit_s32 src2, sljit_sw src2w)
1636 {
1637 sljit_u8* inst;
1638 sljit_s32 dst_r;
1639
1640 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1641
1642 /* Register destination. */
1643 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1644 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1645 FAIL_IF(!inst);
1646 *inst++ = GROUP_0F;
1647 *inst = IMUL_r_rm;
1648 }
1649 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1650 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1651 FAIL_IF(!inst);
1652 *inst++ = GROUP_0F;
1653 *inst = IMUL_r_rm;
1654 }
1655 else if (src1 & SLJIT_IMM) {
1656 if (src2 & SLJIT_IMM) {
1657 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1658 src2 = dst_r;
1659 src2w = 0;
1660 }
1661
1662 if (src1w <= 127 && src1w >= -128) {
1663 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1664 FAIL_IF(!inst);
1665 *inst = IMUL_r_rm_i8;
1666 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1667 FAIL_IF(!inst);
1668 INC_SIZE(1);
1669 *inst = (sljit_s8)src1w;
1670 }
1671 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1672 else {
1673 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1674 FAIL_IF(!inst);
1675 *inst = IMUL_r_rm_i32;
1676 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1677 FAIL_IF(!inst);
1678 INC_SIZE(4);
1679 sljit_unaligned_store_sw(inst, src1w);
1680 }
1681 #else
1682 else if (IS_HALFWORD(src1w)) {
1683 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1684 FAIL_IF(!inst);
1685 *inst = IMUL_r_rm_i32;
1686 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1687 FAIL_IF(!inst);
1688 INC_SIZE(4);
1689 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1690 }
1691 else {
1692 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1693 if (dst_r != src2)
1694 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1695 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1696 FAIL_IF(!inst);
1697 *inst++ = GROUP_0F;
1698 *inst = IMUL_r_rm;
1699 }
1700 #endif
1701 }
1702 else if (src2 & SLJIT_IMM) {
1703 /* Note: src1 is NOT immediate. */
1704
1705 if (src2w <= 127 && src2w >= -128) {
1706 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1707 FAIL_IF(!inst);
1708 *inst = IMUL_r_rm_i8;
1709 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1710 FAIL_IF(!inst);
1711 INC_SIZE(1);
1712 *inst = (sljit_s8)src2w;
1713 }
1714 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1715 else {
1716 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1717 FAIL_IF(!inst);
1718 *inst = IMUL_r_rm_i32;
1719 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1720 FAIL_IF(!inst);
1721 INC_SIZE(4);
1722 sljit_unaligned_store_sw(inst, src2w);
1723 }
1724 #else
1725 else if (IS_HALFWORD(src2w)) {
1726 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1727 FAIL_IF(!inst);
1728 *inst = IMUL_r_rm_i32;
1729 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1730 FAIL_IF(!inst);
1731 INC_SIZE(4);
1732 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1733 }
1734 else {
1735 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
1736 if (dst_r != src1)
1737 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1738 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1739 FAIL_IF(!inst);
1740 *inst++ = GROUP_0F;
1741 *inst = IMUL_r_rm;
1742 }
1743 #endif
1744 }
1745 else {
1746 /* Neither argument is immediate. */
1747 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1748 dst_r = TMP_REG1;
1749 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1750 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1751 FAIL_IF(!inst);
1752 *inst++ = GROUP_0F;
1753 *inst = IMUL_r_rm;
1754 }
1755
1756 if (dst_r == TMP_REG1)
1757 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1758
1759 return SLJIT_SUCCESS;
1760 }
1761
1762 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
1763 sljit_s32 dst, sljit_sw dstw,
1764 sljit_s32 src1, sljit_sw src1w,
1765 sljit_s32 src2, sljit_sw src2w)
1766 {
1767 sljit_u8* inst;
1768 sljit_s32 dst_r, done = 0;
1769
1770 /* These cases better be left to handled by normal way. */
1771 if (dst == src1 && dstw == src1w)
1772 return SLJIT_ERR_UNSUPPORTED;
1773 if (dst == src2 && dstw == src2w)
1774 return SLJIT_ERR_UNSUPPORTED;
1775
1776 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1777
1778 if (FAST_IS_REG(src1)) {
1779 if (FAST_IS_REG(src2)) {
1780 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1781 FAIL_IF(!inst);
1782 *inst = LEA_r_m;
1783 done = 1;
1784 }
1785 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1786 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1787 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1788 #else
1789 if (src2 & SLJIT_IMM) {
1790 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1791 #endif
1792 FAIL_IF(!inst);
1793 *inst = LEA_r_m;
1794 done = 1;
1795 }
1796 }
1797 else if (FAST_IS_REG(src2)) {
1798 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1799 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1800 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1801 #else
1802 if (src1 & SLJIT_IMM) {
1803 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1804 #endif
1805 FAIL_IF(!inst);
1806 *inst = LEA_r_m;
1807 done = 1;
1808 }
1809 }
1810
1811 if (done) {
1812 if (dst_r == TMP_REG1)
1813 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1814 return SLJIT_SUCCESS;
1815 }
1816 return SLJIT_ERR_UNSUPPORTED;
1817 }
1818
1819 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1820 sljit_s32 src1, sljit_sw src1w,
1821 sljit_s32 src2, sljit_sw src2w)
1822 {
1823 sljit_u8* inst;
1824
1825 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1826 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1827 #else
1828 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1829 #endif
1830 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1831 return SLJIT_SUCCESS;
1832 }
1833
1834 if (FAST_IS_REG(src1)) {
1835 if (src2 & SLJIT_IMM) {
1836 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1837 }
1838 else {
1839 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1840 FAIL_IF(!inst);
1841 *inst = CMP_r_rm;
1842 }
1843 return SLJIT_SUCCESS;
1844 }
1845
1846 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1847 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1848 FAIL_IF(!inst);
1849 *inst = CMP_rm_r;
1850 return SLJIT_SUCCESS;
1851 }
1852
1853 if (src2 & SLJIT_IMM) {
1854 if (src1 & SLJIT_IMM) {
1855 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1856 src1 = TMP_REG1;
1857 src1w = 0;
1858 }
1859 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1860 }
1861 else {
1862 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1863 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1864 FAIL_IF(!inst);
1865 *inst = CMP_r_rm;
1866 }
1867 return SLJIT_SUCCESS;
1868 }
1869
1870 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
1871 sljit_s32 src1, sljit_sw src1w,
1872 sljit_s32 src2, sljit_sw src2w)
1873 {
1874 sljit_u8* inst;
1875
1876 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1877 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1878 #else
1879 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1880 #endif
1881 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1882 return SLJIT_SUCCESS;
1883 }
1884
1885 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1886 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1887 #else
1888 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1889 #endif
1890 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1891 return SLJIT_SUCCESS;
1892 }
1893
1894 if (!(src1 & SLJIT_IMM)) {
1895 if (src2 & SLJIT_IMM) {
1896 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1897 if (IS_HALFWORD(src2w) || compiler->mode32) {
1898 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1899 FAIL_IF(!inst);
1900 *inst = GROUP_F7;
1901 }
1902 else {
1903 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1904 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
1905 FAIL_IF(!inst);
1906 *inst = TEST_rm_r;
1907 }
1908 #else
1909 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1910 FAIL_IF(!inst);
1911 *inst = GROUP_F7;
1912 #endif
1913 return SLJIT_SUCCESS;
1914 }
1915 else if (FAST_IS_REG(src1)) {
1916 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1917 FAIL_IF(!inst);
1918 *inst = TEST_rm_r;
1919 return SLJIT_SUCCESS;
1920 }
1921 }
1922
1923 if (!(src2 & SLJIT_IMM)) {
1924 if (src1 & SLJIT_IMM) {
1925 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1926 if (IS_HALFWORD(src1w) || compiler->mode32) {
1927 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1928 FAIL_IF(!inst);
1929 *inst = GROUP_F7;
1930 }
1931 else {
1932 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1933 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
1934 FAIL_IF(!inst);
1935 *inst = TEST_rm_r;
1936 }
1937 #else
1938 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1939 FAIL_IF(!inst);
1940 *inst = GROUP_F7;
1941 #endif
1942 return SLJIT_SUCCESS;
1943 }
1944 else if (FAST_IS_REG(src2)) {
1945 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1946 FAIL_IF(!inst);
1947 *inst = TEST_rm_r;
1948 return SLJIT_SUCCESS;
1949 }
1950 }
1951
1952 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1953 if (src2 & SLJIT_IMM) {
1954 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1955 if (IS_HALFWORD(src2w) || compiler->mode32) {
1956 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1957 FAIL_IF(!inst);
1958 *inst = GROUP_F7;
1959 }
1960 else {
1961 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1962 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1963 FAIL_IF(!inst);
1964 *inst = TEST_rm_r;
1965 }
1966 #else
1967 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1968 FAIL_IF(!inst);
1969 *inst = GROUP_F7;
1970 #endif
1971 }
1972 else {
1973 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1974 FAIL_IF(!inst);
1975 *inst = TEST_rm_r;
1976 }
1977 return SLJIT_SUCCESS;
1978 }
1979
1980 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
1981 sljit_u8 mode,
1982 sljit_s32 dst, sljit_sw dstw,
1983 sljit_s32 src1, sljit_sw src1w,
1984 sljit_s32 src2, sljit_sw src2w)
1985 {
1986 sljit_u8* inst;
1987
1988 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1989 if (dst == src1 && dstw == src1w) {
1990 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1991 FAIL_IF(!inst);
1992 *inst |= mode;
1993 return SLJIT_SUCCESS;
1994 }
1995 if (dst == SLJIT_UNUSED) {
1996 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1997 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
1998 FAIL_IF(!inst);
1999 *inst |= mode;
2000 return SLJIT_SUCCESS;
2001 }
2002 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2003 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2004 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2005 FAIL_IF(!inst);
2006 *inst |= mode;
2007 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2008 return SLJIT_SUCCESS;
2009 }
2010 if (FAST_IS_REG(dst)) {
2011 EMIT_MOV(compiler, dst, 0, src1, src1w);
2012 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2013 FAIL_IF(!inst);
2014 *inst |= mode;
2015 return SLJIT_SUCCESS;
2016 }
2017
2018 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2019 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2020 FAIL_IF(!inst);
2021 *inst |= mode;
2022 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2023 return SLJIT_SUCCESS;
2024 }
2025
2026 if (dst == SLJIT_PREF_SHIFT_REG) {
2027 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2028 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2029 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2030 FAIL_IF(!inst);
2031 *inst |= mode;
2032 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2033 }
2034 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2035 if (src1 != dst)
2036 EMIT_MOV(compiler, dst, 0, src1, src1w);
2037 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2038 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2039 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2040 FAIL_IF(!inst);
2041 *inst |= mode;
2042 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2043 }
2044 else {
2045 /* This case is complex since ecx itself may be used for
2046 addressing, and this case must be supported as well. */
2047 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2048 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2049 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2050 #else
2051 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2052 #endif
2053 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2054 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2055 FAIL_IF(!inst);
2056 *inst |= mode;
2057 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2058 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2059 #else
2060 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2061 #endif
2062 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2063 }
2064
2065 return SLJIT_SUCCESS;
2066 }
2067
2068 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2069 sljit_u8 mode, sljit_s32 set_flags,
2070 sljit_s32 dst, sljit_sw dstw,
2071 sljit_s32 src1, sljit_sw src1w,
2072 sljit_s32 src2, sljit_sw src2w)
2073 {
2074 /* The CPU does not set flags if the shift count is 0. */
2075 if (src2 & SLJIT_IMM) {
2076 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2077 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2078 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2079 #else
2080 if ((src2w & 0x1f) != 0)
2081 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2082 #endif
2083 if (!set_flags)
2084 return emit_mov(compiler, dst, dstw, src1, src1w);
2085 /* OR dst, src, 0 */
2086 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2087 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2088 }
2089
2090 if (!set_flags)
2091 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2092
2093 if (!FAST_IS_REG(dst))
2094 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2095
2096 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2097
2098 if (FAST_IS_REG(dst))
2099 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2100 return SLJIT_SUCCESS;
2101 }
2102
2103 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2104 sljit_s32 dst, sljit_sw dstw,
2105 sljit_s32 src1, sljit_sw src1w,
2106 sljit_s32 src2, sljit_sw src2w)
2107 {
2108 CHECK_ERROR();
2109 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2110 ADJUST_LOCAL_OFFSET(dst, dstw);
2111 ADJUST_LOCAL_OFFSET(src1, src1w);
2112 ADJUST_LOCAL_OFFSET(src2, src2w);
2113
2114 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2115 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2116 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2117 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2118 compiler->mode32 = op & SLJIT_I32_OP;
2119 #endif
2120
2121 switch (GET_OPCODE(op)) {
2122 case SLJIT_ADD:
2123 if (!HAS_FLAGS(op)) {
2124 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2125 return compiler->error;
2126 }
2127 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2128 dst, dstw, src1, src1w, src2, src2w);
2129 case SLJIT_ADDC:
2130 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2131 dst, dstw, src1, src1w, src2, src2w);
2132 case SLJIT_SUB:
2133 if (!HAS_FLAGS(op)) {
2134 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2135 return compiler->error;
2136 }
2137
2138 if (dst == SLJIT_UNUSED)
2139 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2140 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2141 dst, dstw, src1, src1w, src2, src2w);
2142 case SLJIT_SUBC:
2143 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2144 dst, dstw, src1, src1w, src2, src2w);
2145 case SLJIT_MUL:
2146 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2147 case SLJIT_AND:
2148 if (dst == SLJIT_UNUSED)
2149 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2150 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2151 dst, dstw, src1, src1w, src2, src2w);
2152 case SLJIT_OR:
2153 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2154 dst, dstw, src1, src1w, src2, src2w);
2155 case SLJIT_XOR:
2156 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2157 dst, dstw, src1, src1w, src2, src2w);
2158 case SLJIT_SHL:
2159 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2160 dst, dstw, src1, src1w, src2, src2w);
2161 case SLJIT_LSHR:
2162 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2163 dst, dstw, src1, src1w, src2, src2w);
2164 case SLJIT_ASHR:
2165 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2166 dst, dstw, src1, src1w, src2, src2w);
2167 }
2168
2169 return SLJIT_SUCCESS;
2170 }
2171
2172 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2173 {
2174 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2175 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2176 if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
2177 return -1;
2178 #endif
2179 return reg_map[reg];
2180 }
2181
2182 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2183 {
2184 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2185 return reg;
2186 }
2187
2188 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2189 void *instruction, sljit_s32 size)
2190 {
2191 sljit_u8 *inst;
2192
2193 CHECK_ERROR();
2194 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2195
2196 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2197 FAIL_IF(!inst);
2198 INC_SIZE(size);
2199 SLJIT_MEMCPY(inst, instruction, size);
2200 return SLJIT_SUCCESS;
2201 }
2202
2203 /* --------------------------------------------------------------------- */
2204 /* Floating point operators */
2205 /* --------------------------------------------------------------------- */
2206
2207 /* Alignment + 2 * 16 bytes. */
2208 static sljit_s32 sse2_data[3 + (4 + 4) * 2];
2209 static sljit_s32 *sse2_buffer;
2210
2211 static void init_compiler(void)
2212 {
2213 sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
2214 /* Single precision constants. */
2215 sse2_buffer[0] = 0x80000000;
2216 sse2_buffer[4] = 0x7fffffff;
2217 /* Double precision constants. */
2218 sse2_buffer[8] = 0;
2219 sse2_buffer[9] = 0x80000000;
2220 sse2_buffer[12] = 0xffffffff;
2221 sse2_buffer[13] = 0x7fffffff;
2222 }
2223
2224 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
2225 {
2226 #ifdef SLJIT_IS_FPU_AVAILABLE
2227 return SLJIT_IS_FPU_AVAILABLE;
2228 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2229 if (cpu_has_sse2 == -1)
2230 get_cpu_features();
2231 return cpu_has_sse2;
2232 #else /* SLJIT_DETECT_SSE2 */
2233 return 1;
2234 #endif /* SLJIT_DETECT_SSE2 */
2235 }
2236
2237 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2238 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2239 {
2240 sljit_u8 *inst;
2241
2242 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2243 FAIL_IF(!inst);
2244 *inst++ = GROUP_0F;
2245 *inst = opcode;
2246 return SLJIT_SUCCESS;
2247 }
2248
2249 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2250 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2251 {
2252 sljit_u8 *inst;
2253
2254 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2255 FAIL_IF(!inst);
2256 *inst++ = GROUP_0F;
2257 *inst = opcode;
2258 return SLJIT_SUCCESS;
2259 }
2260
2261 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2262 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2263 {
2264 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2265 }
2266
2267 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2268 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2269 {
2270 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2271 }
2272
2273 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2274 sljit_s32 dst, sljit_sw dstw,
2275 sljit_s32 src, sljit_sw srcw)
2276 {
2277 sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2278 sljit_u8 *inst;
2279
2280 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2281 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2282 compiler->mode32 = 0;
2283 #endif
2284
2285 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2286 FAIL_IF(!inst);
2287 *inst++ = GROUP_0F;
2288 *inst = CVTTSD2SI_r_xm;
2289
2290 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2291 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2292 return SLJIT_SUCCESS;
2293 }
2294
2295 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2296 sljit_s32 dst, sljit_sw dstw,
2297 sljit_s32 src, sljit_sw srcw)
2298 {
2299 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2300 sljit_u8 *inst;
2301
2302 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2303 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2304 compiler->mode32 = 0;
2305 #endif
2306
2307 if (src & SLJIT_IMM) {
2308 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2309 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2310 srcw = (sljit_s32)srcw;
2311 #endif
2312 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2313 src = TMP_REG1;
2314 srcw = 0;
2315 }
2316
2317 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2318 FAIL_IF(!inst);
2319 *inst++ = GROUP_0F;
2320 *inst = CVTSI2SD_x_rm;
2321
2322 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2323 compiler->mode32 = 1;
2324 #endif
2325 if (dst_r == TMP_FREG)
2326 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2327 return SLJIT_SUCCESS;
2328 }
2329
2330 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2331 sljit_s32 src1, sljit_sw src1w,
2332 sljit_s32 src2, sljit_sw src2w)
2333 {
2334 if (!FAST_IS_REG(src1)) {
2335 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2336 src1 = TMP_FREG;
2337 }
2338 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
2339 }
2340
2341 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2342 sljit_s32 dst, sljit_sw dstw,
2343 sljit_s32 src, sljit_sw srcw)
2344 {
2345 sljit_s32 dst_r;
2346
2347 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2348 compiler->mode32 = 1;
2349 #endif
2350
2351 CHECK_ERROR();
2352 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2353
2354 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2355 if (FAST_IS_REG(dst))
2356 return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
2357 if (FAST_IS_REG(src))
2358 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
2359 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
2360 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2361 }
2362
2363 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2364 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2365 if (FAST_IS_REG(src)) {
2366 /* We overwrite the high bits of source. From SLJIT point of view,
2367 this is not an issue.
2368 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2369 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
2370 }
2371 else {
2372 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
2373 src = TMP_FREG;
2374 }
2375
2376 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
2377 if (dst_r == TMP_FREG)
2378 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2379 return SLJIT_SUCCESS;
2380 }
2381
2382 if (SLOW_IS_REG(dst)) {
2383 dst_r = dst;
2384 if (dst != src)
2385 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2386 }
2387 else {
2388 dst_r = TMP_FREG;
2389 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2390 }
2391
2392 switch (GET_OPCODE(op)) {
2393 case SLJIT_NEG_F64:
2394 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
2395 break;
2396
2397 case SLJIT_ABS_F64:
2398 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2399 break;
2400 }
2401
2402 if (dst_r == TMP_FREG)
2403 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2404 return SLJIT_SUCCESS;
2405 }
2406
2407 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2408 sljit_s32 dst, sljit_sw dstw,
2409 sljit_s32 src1, sljit_sw src1w,
2410 sljit_s32 src2, sljit_sw src2w)
2411 {
2412 sljit_s32 dst_r;
2413
2414 CHECK_ERROR();
2415 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2416 ADJUST_LOCAL_OFFSET(dst, dstw);
2417 ADJUST_LOCAL_OFFSET(src1, src1w);
2418 ADJUST_LOCAL_OFFSET(src2, src2w);
2419
2420 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2421 compiler->mode32 = 1;
2422 #endif
2423
2424 if (FAST_IS_REG(dst)) {
2425 dst_r = dst;
2426 if (dst == src1)
2427 ; /* Do nothing here. */
2428 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2429 /* Swap arguments. */
2430 src2 = src1;
2431 src2w = src1w;
2432 }
2433 else if (dst != src2)
2434 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
2435 else {
2436 dst_r = TMP_FREG;
2437 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2438 }
2439 }
2440 else {
2441 dst_r = TMP_FREG;
2442 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2443 }
2444
2445 switch (GET_OPCODE(op)) {
2446 case SLJIT_ADD_F64:
2447 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2448 break;
2449
2450 case SLJIT_SUB_F64:
2451 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2452 break;
2453
2454 case SLJIT_MUL_F64:
2455 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2456 break;
2457
2458 case SLJIT_DIV_F64:
2459 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2460 break;
2461 }
2462
2463 if (dst_r == TMP_FREG)
2464 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2465 return SLJIT_SUCCESS;
2466 }
2467
2468 /* --------------------------------------------------------------------- */
2469 /* Conditional instructions */
2470 /* --------------------------------------------------------------------- */
2471
2472 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2473 {
2474 sljit_u8 *inst;
2475 struct sljit_label *label;
2476
2477 CHECK_ERROR_PTR();
2478 CHECK_PTR(check_sljit_emit_label(compiler));
2479
2480 if (compiler->last_label && compiler->last_label->size == compiler->size)
2481 return compiler->last_label;
2482
2483 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2484 PTR_FAIL_IF(!label);
2485 set_label(label, compiler);
2486
2487 inst = (sljit_u8*)ensure_buf(compiler, 2);
2488 PTR_FAIL_IF(!inst);
2489
2490 *inst++ = 0;
2491 *inst++ = 0;
2492
2493 return label;
2494 }
2495
2496 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2497 {
2498 sljit_u8 *inst;
2499 struct sljit_jump *jump;
2500
2501 CHECK_ERROR_PTR();
2502 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2503
2504 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2505 PTR_FAIL_IF_NULL(jump);
2506 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2507 type &= 0xff;
2508
2509 if (type >= SLJIT_CALL1)
2510 PTR_FAIL_IF(call_with_args(compiler, type));
2511
2512 /* Worst case size. */
2513 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2514 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2515 #else
2516 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2517 #endif
2518
2519 inst = (sljit_u8*)ensure_buf(compiler, 2);
2520 PTR_FAIL_IF_NULL(inst);
2521
2522 *inst++ = 0;
2523 *inst++ = type + 2;
2524 return jump;
2525 }
2526
2527 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2528 {
2529 sljit_u8 *inst;
2530 struct sljit_jump *jump;
2531
2532 CHECK_ERROR();
2533 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2534 ADJUST_LOCAL_OFFSET(src, srcw);
2535
2536 CHECK_EXTRA_REGS(src, srcw, (void)0);
2537
2538 if (type >= SLJIT_CALL1) {
2539 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2540 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2541 if (src == SLJIT_R2) {
2542 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2543 src = TMP_REG1;
2544 }
2545 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2546 srcw += sizeof(sljit_sw);
2547 #endif
2548 #endif
2549 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2550 if (src == SLJIT_R2) {
2551 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2552 src = TMP_REG1;
2553 }
2554 #endif
2555 FAIL_IF(call_with_args(compiler, type));
2556 }
2557
2558 if (src == SLJIT_IMM) {
2559 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2560 FAIL_IF_NULL(jump);
2561 set_jump(jump, compiler, JUMP_ADDR);
2562 jump->u.target = srcw;
2563
2564 /* Worst case size. */
2565 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2566 compiler->size += 5;
2567 #else
2568 compiler->size += 10 + 3;
2569 #endif
2570
2571 inst = (sljit_u8*)ensure_buf(compiler, 2);
2572 FAIL_IF_NULL(inst);
2573
2574 *inst++ = 0;
2575 *inst++ = type + 2;
2576 }
2577 else {
2578 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2579 /* REX_W is not necessary (src is not immediate). */
2580 compiler->mode32 = 1;
2581 #endif
2582 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2583 FAIL_IF(!inst);
2584 *inst++ = GROUP_FF;
2585 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2586 }
2587 return SLJIT_SUCCESS;
2588 }
2589
2590 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2591 sljit_s32 dst, sljit_sw dstw,
2592 sljit_s32 src, sljit_sw srcw,
2593 sljit_s32 type)
2594 {
2595 sljit_u8 *inst;
2596 sljit_u8 cond_set = 0;
2597 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2598 sljit_s32 reg;
2599 #endif
2600 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
2601 sljit_s32 dst_save = dst;
2602 sljit_sw dstw_save = dstw;
2603
2604 CHECK_ERROR();
2605 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2606 SLJIT_UNUSED_ARG(srcw);
2607
2608 if (dst == SLJIT_UNUSED)
2609 return SLJIT_SUCCESS;
2610
2611 ADJUST_LOCAL_OFFSET(dst, dstw);
2612 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2613
2614 type &= 0xff;
2615 /* setcc = jcc + 0x10. */
2616 cond_set = get_jump_code(type) + 0x10;
2617
2618 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2619 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2620 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2621 FAIL_IF(!inst);
2622 INC_SIZE(4 + 3);
2623 /* Set low register to conditional flag. */
2624 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2625 *inst++ = GROUP_0F;
2626 *inst++ = cond_set;
2627 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2628 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2629 *inst++ = OR_rm8_r8;
2630 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2631 return SLJIT_SUCCESS;
2632 }
2633
2634 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2635
2636 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2637 FAIL_IF(!inst);
2638 INC_SIZE(4 + 4);
2639 /* Set low register to conditional flag. */
2640 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2641 *inst++ = GROUP_0F;
2642 *inst++ = cond_set;
2643 *inst++ = MOD_REG | reg_lmap[reg];
2644 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2645 /* The movzx instruction does not affect flags. */
2646 *inst++ = GROUP_0F;
2647 *inst++ = MOVZX_r_rm8;
2648 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2649
2650 if (reg != TMP_REG1)
2651 return SLJIT_SUCCESS;
2652
2653 if (GET_OPCODE(op) < SLJIT_ADD) {
2654 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2655 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2656 }
2657 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2658 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2659 compiler->skip_checks = 1;
2660 #endif
2661 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2662
2663 #else
2664 /* The SLJIT_CONFIG_X86_32 code path starts here. */
2665 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2666 if (reg_map[dst] <= 4) {
2667 /* Low byte is accessible. */
2668 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2669 FAIL_IF(!inst);
2670 INC_SIZE(3 + 3);
2671 /* Set low byte to conditional flag. */
2672 *inst++ = GROUP_0F;
2673 *inst++ = cond_set;
2674 *inst++ = MOD_REG | reg_map[dst];
2675
2676 *inst++ = GROUP_0F;
2677 *inst++ = MOVZX_r_rm8;
2678 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2679 return SLJIT_SUCCESS;
2680 }
2681
2682 /* Low byte is not accessible. */
2683 if (cpu_has_cmov == -1)
2684 get_cpu_features();
2685
2686 if (cpu_has_cmov) {
2687 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2688 /* a xor reg, reg operation would overwrite the flags. */
2689 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2690
2691 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2692 FAIL_IF(!inst);
2693 INC_SIZE(3);
2694
2695 *inst++ = GROUP_0F;
2696 /* cmovcc = setcc - 0x50. */
2697 *inst++ = cond_set - 0x50;
2698 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2699 return SLJIT_SUCCESS;
2700 }
2701
2702 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2703 FAIL_IF(!inst);
2704 INC_SIZE(1 + 3 + 3 + 1);
2705 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2706 /* Set al to conditional flag. */
2707 *inst++ = GROUP_0F;
2708 *inst++ = cond_set;
2709 *inst++ = MOD_REG | 0 /* eax */;
2710
2711 *inst++ = GROUP_0F;
2712 *inst++ = MOVZX_r_rm8;
2713 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2714 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2715 return SLJIT_SUCCESS;
2716 }
2717
2718 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2719 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
2720
2721 if (dst != SLJIT_R0) {
2722 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2723 FAIL_IF(!inst);
2724 INC_SIZE(1 + 3 + 2 + 1);
2725 /* Set low register to conditional flag. */
2726 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2727 *inst++ = GROUP_0F;
2728 *inst++ = cond_set;
2729 *inst++ = MOD_REG | 0 /* eax */;
2730 *inst++ = OR_rm8_r8;
2731 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2732 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2733 }
2734 else {
2735 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2736 FAIL_IF(!inst);
2737 INC_SIZE(2 + 3 + 2 + 2);
2738 /* Set low register to conditional flag. */
2739 *inst++ = XCHG_r_rm;
2740 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2741 *inst++ = GROUP_0F;
2742 *inst++ = cond_set;
2743 *inst++ = MOD_REG | 1 /* ecx */;
2744 *inst++ = OR_rm8_r8;
2745 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2746 *inst++ = XCHG_r_rm;
2747 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2748 }
2749 return SLJIT_SUCCESS;
2750 }
2751
2752 /* Set TMP_REG1 to the bit. */
2753 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2754 FAIL_IF(!inst);
2755 INC_SIZE(1 + 3 + 3 + 1);
2756 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2757 /* Set al to conditional flag. */
2758 *inst++ = GROUP_0F;
2759 *inst++ = cond_set;
2760 *inst++ = MOD_REG | 0 /* eax */;
2761
2762 *inst++ = GROUP_0F;
2763 *inst++ = MOVZX_r_rm8;
2764 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2765
2766 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2767
2768 if (GET_OPCODE(op) < SLJIT_ADD)
2769 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2770
2771 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2772 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2773 compiler->skip_checks = 1;
2774 #endif
2775 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2776 #endif /* SLJIT_CONFIG_X86_64 */
2777 }
2778
2779 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
2780 {
2781 CHECK_ERROR();
2782 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2783 ADJUST_LOCAL_OFFSET(dst, dstw);
2784
2785 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2786
2787 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2788 compiler->mode32 = 0;
2789 #endif
2790
2791 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2792
2793 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2794 if (NOT_HALFWORD(offset)) {
2795 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2796 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2797 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2798 return compiler->error;
2799 #else
2800 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2801 #endif
2802 }
2803 #endif
2804
2805 if (offset != 0)
2806 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2807 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2808 }
2809
2810 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2811 {
2812 sljit_u8 *inst;
2813 struct sljit_const *const_;
2814 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2815 sljit_s32 reg;
2816 #endif
2817
2818 CHECK_ERROR_PTR();
2819 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2820 ADJUST_LOCAL_OFFSET(dst, dstw);
2821
2822 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2823
2824 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2825 PTR_FAIL_IF(!const_);
2826 set_const(const_, compiler);
2827
2828 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2829 compiler->mode32 = 0;
2830 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2831
2832 if (emit_load_imm64(compiler, reg, init_value))
2833 return NULL;
2834 #else
2835 if (dst == SLJIT_UNUSED)
2836 dst = TMP_REG1;
2837
2838 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2839 return NULL;
2840 #endif
2841
2842 inst = (sljit_u8*)ensure_buf(compiler, 2);
2843 PTR_FAIL_IF(!inst);
2844
2845 *inst++ = 0;
2846 *inst++ = 1;
2847
2848 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2849 if (dst & SLJIT_MEM)
2850 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2851 return NULL;
2852 #endif
2853
2854 return const_;
2855 }
2856
2857 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
2858 {
2859 SLJIT_UNUSED_ARG(executable_offset);
2860 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2861 sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
2862 #else
2863 sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
2864 #endif
2865 }
2866
2867 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
2868 {
2869 SLJIT_UNUSED_ARG(executable_offset);
2870 sljit_unaligned_store_sw((void*)addr, new_constant);
2871 }
2872
2873 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)
2874 {
2875 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2876 if (cpu_has_sse2 == -1)
2877 get_cpu_features();
2878 return cpu_has_sse2;
2879 #else
2880 return 1;
2881 #endif
2882 }
2883
2884 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void)
2885 {
2886 if (cpu_has_cmov == -1)
2887 get_cpu_features();
2888 return cpu_has_cmov;
2889 }
2890
2891 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,
2892 sljit_s32 type,
2893 sljit_s32 dst_reg,
2894 sljit_s32 src, sljit_sw srcw)
2895 {
2896 sljit_u8* inst;
2897
2898 CHECK_ERROR();
2899 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2900 CHECK_ARGUMENT(sljit_x86_is_cmov_available());
2901 CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
2902 CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
2903 CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
2904 FUNCTION_CHECK_SRC(src, srcw);
2905
2906 if ((type & 0xff) <= SLJIT_NOT_ZERO)
2907 CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
2908 else
2909 CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff));
2910 #endif
2911 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
2912 if (SLJIT_UNLIKELY(!!compiler->verbose)) {
2913 fprintf(compiler->verbose, " x86_cmov%s %s%s, ",
2914 !(dst_reg & SLJIT_I32_OP) ? "" : ".i",
2915 jump_names[type & 0xff], JUMP_POSTFIX(type));
2916 sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
2917 fprintf(compiler->verbose, ", ");
2918 sljit_verbose_param(compiler, src, srcw);
2919 fprintf(compiler->verbose, "\n");
2920 }
2921 #endif
2922
2923 ADJUST_LOCAL_OFFSET(src, srcw);
2924 CHECK_EXTRA_REGS(src, srcw, (void)0);
2925
2926 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2927 compiler->mode32 = dst_reg & SLJIT_I32_OP;
2928 #endif
2929 dst_reg &= ~SLJIT_I32_OP;
2930
2931 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2932 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2933 src = TMP_REG1;
2934 srcw = 0;
2935 }
2936
2937 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
2938 FAIL_IF(!inst);
2939 *inst++ = GROUP_0F;
2940 *inst = get_jump_code(type & 0xff) - 0x40;
2941 return SLJIT_SUCCESS;
2942 }
2943