sljitNativeX86_common.c revision 1.4.2.4 1 /* $NetBSD: sljitNativeX86_common.c,v 1.4.2.4 2017/12/03 11:38:04 jdolecek Exp $ */
2
3 /*
4 * Stack-less Just-In-Time compiler
5 *
6 * Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without modification, are
9 * permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright notice, this list of
12 * conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
15 * of conditions and the following disclaimer in the documentation and/or other materials
16 * provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
21 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
30 {
31 return "x86" SLJIT_CPUINFO;
32 }
33
34 /*
35 32b register indexes:
36 0 - EAX
37 1 - ECX
38 2 - EDX
39 3 - EBX
40 4 - none
41 5 - EBP
42 6 - ESI
43 7 - EDI
44 */
45
46 /*
47 64b register indexes:
48 0 - RAX
49 1 - RCX
50 2 - RDX
51 3 - RBX
52 4 - none
53 5 - RBP
54 6 - RSI
55 7 - RDI
56 8 - R8 - From now on REX prefix is required
57 9 - R9
58 10 - R10
59 11 - R11
60 12 - R12
61 13 - R13
62 14 - R14
63 15 - R15
64 */
65
66 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
67
68 /* Last register + 1. */
69 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
70
71 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
72 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
73 };
74
75 #define CHECK_EXTRA_REGS(p, w, do) \
76 if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
77 w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
78 p = SLJIT_MEM1(SLJIT_SP); \
79 do; \
80 }
81
82 #else /* SLJIT_CONFIG_X86_32 */
83
84 /* Last register + 1. */
85 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
86 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
87 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
88
89 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
90 Note: avoid to use r12 and r13 for memory addessing
91 therefore r12 is better for SAVED_EREG than SAVED_REG. */
92 #ifndef _WIN64
93 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
94 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
95 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
96 };
97 /* low-map. reg_map & 0x7. */
98 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
99 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
100 };
101 #else
102 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
103 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
104 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
105 };
106 /* low-map. reg_map & 0x7. */
107 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
108 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
109 };
110 #endif
111
112 #define REX_W 0x48
113 #define REX_R 0x44
114 #define REX_X 0x42
115 #define REX_B 0x41
116 #define REX 0x40
117
118 #ifndef _WIN64
119 #define HALFWORD_MAX 0x7fffffffl
120 #define HALFWORD_MIN -0x80000000l
121 #else
122 #define HALFWORD_MAX 0x7fffffffll
123 #define HALFWORD_MIN -0x80000000ll
124 #endif
125
126 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
127 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
128
129 #define CHECK_EXTRA_REGS(p, w, do)
130
131 #endif /* SLJIT_CONFIG_X86_32 */
132
133 #define TMP_FREG (0)
134
135 /* Size flags for emit_x86_instruction: */
136 #define EX86_BIN_INS 0x0010
137 #define EX86_SHIFT_INS 0x0020
138 #define EX86_REX 0x0040
139 #define EX86_NO_REXW 0x0080
140 #define EX86_BYTE_ARG 0x0100
141 #define EX86_HALF_ARG 0x0200
142 #define EX86_PREF_66 0x0400
143 #define EX86_PREF_F2 0x0800
144 #define EX86_PREF_F3 0x1000
145 #define EX86_SSE2_OP1 0x2000
146 #define EX86_SSE2_OP2 0x4000
147 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
148
149 /* --------------------------------------------------------------------- */
150 /* Instrucion forms */
151 /* --------------------------------------------------------------------- */
152
153 #define ADD (/* BINARY */ 0 << 3)
154 #define ADD_EAX_i32 0x05
155 #define ADD_r_rm 0x03
156 #define ADD_rm_r 0x01
157 #define ADDSD_x_xm 0x58
158 #define ADC (/* BINARY */ 2 << 3)
159 #define ADC_EAX_i32 0x15
160 #define ADC_r_rm 0x13
161 #define ADC_rm_r 0x11
162 #define AND (/* BINARY */ 4 << 3)
163 #define AND_EAX_i32 0x25
164 #define AND_r_rm 0x23
165 #define AND_rm_r 0x21
166 #define ANDPD_x_xm 0x54
167 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
168 #define CALL_i32 0xe8
169 #define CALL_rm (/* GROUP_FF */ 2 << 3)
170 #define CDQ 0x99
171 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
172 #define CMP (/* BINARY */ 7 << 3)
173 #define CMP_EAX_i32 0x3d
174 #define CMP_r_rm 0x3b
175 #define CMP_rm_r 0x39
176 #define CVTPD2PS_x_xm 0x5a
177 #define CVTSI2SD_x_rm 0x2a
178 #define CVTTSD2SI_r_xm 0x2c
179 #define DIV (/* GROUP_F7 */ 6 << 3)
180 #define DIVSD_x_xm 0x5e
181 #define INT3 0xcc
182 #define IDIV (/* GROUP_F7 */ 7 << 3)
183 #define IMUL (/* GROUP_F7 */ 5 << 3)
184 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
185 #define IMUL_r_rm_i8 0x6b
186 #define IMUL_r_rm_i32 0x69
187 #define JE_i8 0x74
188 #define JNE_i8 0x75
189 #define JMP_i8 0xeb
190 #define JMP_i32 0xe9
191 #define JMP_rm (/* GROUP_FF */ 4 << 3)
192 #define LEA_r_m 0x8d
193 #define MOV_r_rm 0x8b
194 #define MOV_r_i32 0xb8
195 #define MOV_rm_r 0x89
196 #define MOV_rm_i32 0xc7
197 #define MOV_rm8_i8 0xc6
198 #define MOV_rm8_r8 0x88
199 #define MOVSD_x_xm 0x10
200 #define MOVSD_xm_x 0x11
201 #define MOVSXD_r_rm 0x63
202 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
203 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
204 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
205 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
206 #define MUL (/* GROUP_F7 */ 4 << 3)
207 #define MULSD_x_xm 0x59
208 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
209 #define NOP 0x90
210 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
211 #define OR (/* BINARY */ 1 << 3)
212 #define OR_r_rm 0x0b
213 #define OR_EAX_i32 0x0d
214 #define OR_rm_r 0x09
215 #define OR_rm8_r8 0x08
216 #define POP_r 0x58
217 #define POP_rm 0x8f
218 #define POPF 0x9d
219 #define PUSH_i32 0x68
220 #define PUSH_r 0x50
221 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
222 #define PUSHF 0x9c
223 #define RET_near 0xc3
224 #define RET_i16 0xc2
225 #define SBB (/* BINARY */ 3 << 3)
226 #define SBB_EAX_i32 0x1d
227 #define SBB_r_rm 0x1b
228 #define SBB_rm_r 0x19
229 #define SAR (/* SHIFT */ 7 << 3)
230 #define SHL (/* SHIFT */ 4 << 3)
231 #define SHR (/* SHIFT */ 5 << 3)
232 #define SUB (/* BINARY */ 5 << 3)
233 #define SUB_EAX_i32 0x2d
234 #define SUB_r_rm 0x2b
235 #define SUB_rm_r 0x29
236 #define SUBSD_x_xm 0x5c
237 #define TEST_EAX_i32 0xa9
238 #define TEST_rm_r 0x85
239 #define UCOMISD_x_xm 0x2e
240 #define UNPCKLPD_x_xm 0x14
241 #define XCHG_EAX_r 0x90
242 #define XCHG_r_rm 0x87
243 #define XOR (/* BINARY */ 6 << 3)
244 #define XOR_EAX_i32 0x35
245 #define XOR_r_rm 0x33
246 #define XOR_rm_r 0x31
247 #define XORPD_x_xm 0x57
248
249 #define GROUP_0F 0x0f
250 #define GROUP_F7 0xf7
251 #define GROUP_FF 0xff
252 #define GROUP_BINARY_81 0x81
253 #define GROUP_BINARY_83 0x83
254 #define GROUP_SHIFT_1 0xd1
255 #define GROUP_SHIFT_N 0xc1
256 #define GROUP_SHIFT_CL 0xd3
257
258 #define MOD_REG 0xc0
259 #define MOD_DISP8 0x40
260
261 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
262
263 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
264 #define POP_REG(r) (*inst++ = (POP_r + (r)))
265 #define RET() (*inst++ = (RET_near))
266 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
267 /* r32, r/m32 */
268 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
269
270 /* Multithreading does not affect these static variables, since they store
271 built-in CPU features. Therefore they can be overwritten by different threads
272 if they detect the CPU features in the same time. */
273 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
274 static sljit_s32 cpu_has_sse2 = -1;
275 #endif
276 static sljit_s32 cpu_has_cmov = -1;
277
278 #ifdef _WIN32_WCE
279 #include <cmnintrin.h>
280 #elif defined(_MSC_VER) && _MSC_VER >= 1400
281 #include <intrin.h>
282 #endif
283
284 static void get_cpu_features(void)
285 {
286 sljit_u32 features;
287
288 #if defined(_MSC_VER) && _MSC_VER >= 1400
289
290 int CPUInfo[4];
291 __cpuid(CPUInfo, 1);
292 features = (sljit_u32)CPUInfo[3];
293
294 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
295
296 /* AT&T syntax. */
297 __asm__ (
298 "movl $0x1, %%eax\n"
299 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
300 /* On x86-32, there is no red zone, so this
301 should work (no need for a local variable). */
302 "push %%ebx\n"
303 #endif
304 "cpuid\n"
305 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
306 "pop %%ebx\n"
307 #endif
308 "movl %%edx, %0\n"
309 : "=g" (features)
310 :
311 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
312 : "%eax", "%ecx", "%edx"
313 #else
314 : "%rax", "%rbx", "%rcx", "%rdx"
315 #endif
316 );
317
318 #else /* _MSC_VER && _MSC_VER >= 1400 */
319
320 /* Intel syntax. */
321 __asm {
322 mov eax, 1
323 cpuid
324 mov features, edx
325 }
326
327 #endif /* _MSC_VER && _MSC_VER >= 1400 */
328
329 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
330 cpu_has_sse2 = (features >> 26) & 0x1;
331 #endif
332 cpu_has_cmov = (features >> 15) & 0x1;
333 }
334
335 static sljit_u8 get_jump_code(sljit_s32 type)
336 {
337 switch (type) {
338 case SLJIT_EQUAL:
339 case SLJIT_EQUAL_F64:
340 return 0x84 /* je */;
341
342 case SLJIT_NOT_EQUAL:
343 case SLJIT_NOT_EQUAL_F64:
344 return 0x85 /* jne */;
345
346 case SLJIT_LESS:
347 case SLJIT_LESS_F64:
348 return 0x82 /* jc */;
349
350 case SLJIT_GREATER_EQUAL:
351 case SLJIT_GREATER_EQUAL_F64:
352 return 0x83 /* jae */;
353
354 case SLJIT_GREATER:
355 case SLJIT_GREATER_F64:
356 return 0x87 /* jnbe */;
357
358 case SLJIT_LESS_EQUAL:
359 case SLJIT_LESS_EQUAL_F64:
360 return 0x86 /* jbe */;
361
362 case SLJIT_SIG_LESS:
363 return 0x8c /* jl */;
364
365 case SLJIT_SIG_GREATER_EQUAL:
366 return 0x8d /* jnl */;
367
368 case SLJIT_SIG_GREATER:
369 return 0x8f /* jnle */;
370
371 case SLJIT_SIG_LESS_EQUAL:
372 return 0x8e /* jle */;
373
374 case SLJIT_OVERFLOW:
375 case SLJIT_MUL_OVERFLOW:
376 return 0x80 /* jo */;
377
378 case SLJIT_NOT_OVERFLOW:
379 case SLJIT_MUL_NOT_OVERFLOW:
380 return 0x81 /* jno */;
381
382 case SLJIT_UNORDERED_F64:
383 return 0x8a /* jp */;
384
385 case SLJIT_ORDERED_F64:
386 return 0x8b /* jpo */;
387 }
388 return 0;
389 }
390
391 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
392
393 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
394 static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type);
395 #endif
396
397 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type)
398 {
399 sljit_s32 short_jump;
400 sljit_uw label_addr;
401
402 if (jump->flags & JUMP_LABEL)
403 label_addr = (sljit_uw)(code + jump->u.label->size);
404 else
405 label_addr = jump->u.target;
406 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
407
408 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
409 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
410 return generate_far_jump_code(jump, code_ptr, type);
411 #endif
412
413 if (type == SLJIT_JUMP) {
414 if (short_jump)
415 *code_ptr++ = JMP_i8;
416 else
417 *code_ptr++ = JMP_i32;
418 jump->addr++;
419 }
420 else if (type >= SLJIT_FAST_CALL) {
421 short_jump = 0;
422 *code_ptr++ = CALL_i32;
423 jump->addr++;
424 }
425 else if (short_jump) {
426 *code_ptr++ = get_jump_code(type) - 0x10;
427 jump->addr++;
428 }
429 else {
430 *code_ptr++ = GROUP_0F;
431 *code_ptr++ = get_jump_code(type);
432 jump->addr += 2;
433 }
434
435 if (short_jump) {
436 jump->flags |= PATCH_MB;
437 code_ptr += sizeof(sljit_s8);
438 } else {
439 jump->flags |= PATCH_MW;
440 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
441 code_ptr += sizeof(sljit_sw);
442 #else
443 code_ptr += sizeof(sljit_s32);
444 #endif
445 }
446
447 return code_ptr;
448 }
449
450 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
451 {
452 struct sljit_memory_fragment *buf;
453 sljit_u8 *code;
454 sljit_u8 *code_ptr;
455 sljit_u8 *buf_ptr;
456 sljit_u8 *buf_end;
457 sljit_u8 len;
458
459 struct sljit_label *label;
460 struct sljit_jump *jump;
461 struct sljit_const *const_;
462
463 CHECK_ERROR_PTR();
464 CHECK_PTR(check_sljit_generate_code(compiler));
465 reverse_buf(compiler);
466
467 /* Second code generation pass. */
468 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
469 PTR_FAIL_WITH_EXEC_IF(code);
470 buf = compiler->buf;
471
472 code_ptr = code;
473 label = compiler->labels;
474 jump = compiler->jumps;
475 const_ = compiler->consts;
476 do {
477 buf_ptr = buf->memory;
478 buf_end = buf_ptr + buf->used_size;
479 do {
480 len = *buf_ptr++;
481 if (len > 0) {
482 /* The code is already generated. */
483 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
484 code_ptr += len;
485 buf_ptr += len;
486 }
487 else {
488 if (*buf_ptr >= 4) {
489 jump->addr = (sljit_uw)code_ptr;
490 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
491 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
492 else
493 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
494 jump = jump->next;
495 }
496 else if (*buf_ptr == 0) {
497 label->addr = (sljit_uw)code_ptr;
498 label->size = code_ptr - code;
499 label = label->next;
500 }
501 else if (*buf_ptr == 1) {
502 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
503 const_ = const_->next;
504 }
505 else {
506 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
507 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
508 buf_ptr++;
509 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
510 code_ptr += sizeof(sljit_sw);
511 buf_ptr += sizeof(sljit_sw) - 1;
512 #else
513 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
514 buf_ptr += sizeof(sljit_sw);
515 #endif
516 }
517 buf_ptr++;
518 }
519 } while (buf_ptr < buf_end);
520 SLJIT_ASSERT(buf_ptr == buf_end);
521 buf = buf->next;
522 } while (buf);
523
524 SLJIT_ASSERT(!label);
525 SLJIT_ASSERT(!jump);
526 SLJIT_ASSERT(!const_);
527
528 jump = compiler->jumps;
529 while (jump) {
530 if (jump->flags & PATCH_MB) {
531 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) <= 127);
532 *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8)));
533 } else if (jump->flags & PATCH_MW) {
534 if (jump->flags & JUMP_LABEL) {
535 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
536 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
537 #else
538 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
539 *(sljit_s32*)jump->addr = (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32)));
540 #endif
541 }
542 else {
543 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
544 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
545 #else
546 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
547 *(sljit_s32*)jump->addr = (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32)));
548 #endif
549 }
550 }
551 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
552 else if (jump->flags & PATCH_MD)
553 *(sljit_sw*)jump->addr = jump->u.label->addr;
554 #endif
555
556 jump = jump->next;
557 }
558
559 /* Maybe we waste some space because of short jumps. */
560 SLJIT_ASSERT(code_ptr <= code + compiler->size);
561 compiler->error = SLJIT_ERR_COMPILED;
562 compiler->executable_size = code_ptr - code;
563 return (void*)code;
564 }
565
566 /* --------------------------------------------------------------------- */
567 /* Operators */
568 /* --------------------------------------------------------------------- */
569
570 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
571 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
572 sljit_s32 dst, sljit_sw dstw,
573 sljit_s32 src1, sljit_sw src1w,
574 sljit_s32 src2, sljit_sw src2w);
575
576 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
577 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
578 sljit_s32 dst, sljit_sw dstw,
579 sljit_s32 src1, sljit_sw src1w,
580 sljit_s32 src2, sljit_sw src2w);
581
582 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
583 sljit_s32 dst, sljit_sw dstw,
584 sljit_s32 src, sljit_sw srcw);
585
586 static SLJIT_INLINE sljit_s32 emit_save_flags(struct sljit_compiler *compiler)
587 {
588 sljit_u8 *inst;
589
590 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
591 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
592 FAIL_IF(!inst);
593 INC_SIZE(5);
594 #else
595 inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
596 FAIL_IF(!inst);
597 INC_SIZE(6);
598 *inst++ = REX_W;
599 #endif
600 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
601 *inst++ = 0x64;
602 *inst++ = 0x24;
603 *inst++ = (sljit_u8)sizeof(sljit_sw);
604 *inst++ = PUSHF;
605 compiler->flags_saved = 1;
606 return SLJIT_SUCCESS;
607 }
608
609 static SLJIT_INLINE sljit_s32 emit_restore_flags(struct sljit_compiler *compiler, sljit_s32 keep_flags)
610 {
611 sljit_u8 *inst;
612
613 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
614 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
615 FAIL_IF(!inst);
616 INC_SIZE(5);
617 *inst++ = POPF;
618 #else
619 inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
620 FAIL_IF(!inst);
621 INC_SIZE(6);
622 *inst++ = POPF;
623 *inst++ = REX_W;
624 #endif
625 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
626 *inst++ = 0x64;
627 *inst++ = 0x24;
628 *inst++ = (sljit_u8)(-(sljit_s8)sizeof(sljit_sw));
629 compiler->flags_saved = keep_flags;
630 return SLJIT_SUCCESS;
631 }
632
633 #ifdef _WIN32
634 #include <malloc.h>
635
636 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
637 {
638 /* Workaround for calling the internal _chkstk() function on Windows.
639 This function touches all 4k pages belongs to the requested stack space,
640 which size is passed in local_size. This is necessary on Windows where
641 the stack can only grow in 4k steps. However, this function just burn
642 CPU cycles if the stack is large enough. However, you don't know it in
643 advance, so it must always be called. I think this is a bad design in
644 general even if it has some reasons. */
645 *(volatile sljit_s32*)alloca(local_size) = 0;
646 }
647
648 #endif
649
650 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
651 #include "sljitNativeX86_32.c"
652 #else
653 #include "sljitNativeX86_64.c"
654 #endif
655
656 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
657 sljit_s32 dst, sljit_sw dstw,
658 sljit_s32 src, sljit_sw srcw)
659 {
660 sljit_u8* inst;
661
662 if (dst == SLJIT_UNUSED) {
663 /* No destination, doesn't need to setup flags. */
664 if (src & SLJIT_MEM) {
665 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
666 FAIL_IF(!inst);
667 *inst = MOV_r_rm;
668 }
669 return SLJIT_SUCCESS;
670 }
671 if (FAST_IS_REG(src)) {
672 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
673 FAIL_IF(!inst);
674 *inst = MOV_rm_r;
675 return SLJIT_SUCCESS;
676 }
677 if (src & SLJIT_IMM) {
678 if (FAST_IS_REG(dst)) {
679 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
680 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
681 #else
682 if (!compiler->mode32) {
683 if (NOT_HALFWORD(srcw))
684 return emit_load_imm64(compiler, dst, srcw);
685 }
686 else
687 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
688 #endif
689 }
690 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
691 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
692 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
693 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
694 FAIL_IF(!inst);
695 *inst = MOV_rm_r;
696 return SLJIT_SUCCESS;
697 }
698 #endif
699 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
700 FAIL_IF(!inst);
701 *inst = MOV_rm_i32;
702 return SLJIT_SUCCESS;
703 }
704 if (FAST_IS_REG(dst)) {
705 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
706 FAIL_IF(!inst);
707 *inst = MOV_r_rm;
708 return SLJIT_SUCCESS;
709 }
710
711 /* Memory to memory move. Requires two instruction. */
712 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
713 FAIL_IF(!inst);
714 *inst = MOV_r_rm;
715 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
716 FAIL_IF(!inst);
717 *inst = MOV_rm_r;
718 return SLJIT_SUCCESS;
719 }
720
721 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
722 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
723
724 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
725 {
726 sljit_u8 *inst;
727 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
728 sljit_s32 size;
729 #endif
730
731 CHECK_ERROR();
732 CHECK(check_sljit_emit_op0(compiler, op));
733
734 switch (GET_OPCODE(op)) {
735 case SLJIT_BREAKPOINT:
736 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
737 FAIL_IF(!inst);
738 INC_SIZE(1);
739 *inst = INT3;
740 break;
741 case SLJIT_NOP:
742 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
743 FAIL_IF(!inst);
744 INC_SIZE(1);
745 *inst = NOP;
746 break;
747 case SLJIT_LMUL_UW:
748 case SLJIT_LMUL_SW:
749 case SLJIT_DIVMOD_UW:
750 case SLJIT_DIVMOD_SW:
751 case SLJIT_DIV_UW:
752 case SLJIT_DIV_SW:
753 compiler->flags_saved = 0;
754 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
755 #ifdef _WIN64
756 SLJIT_COMPILE_ASSERT(
757 reg_map[SLJIT_R0] == 0
758 && reg_map[SLJIT_R1] == 2
759 && reg_map[TMP_REG1] > 7,
760 invalid_register_assignment_for_div_mul);
761 #else
762 SLJIT_COMPILE_ASSERT(
763 reg_map[SLJIT_R0] == 0
764 && reg_map[SLJIT_R1] < 7
765 && reg_map[TMP_REG1] == 2,
766 invalid_register_assignment_for_div_mul);
767 #endif
768 compiler->mode32 = op & SLJIT_I32_OP;
769 #endif
770 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
771
772 op = GET_OPCODE(op);
773 if ((op | 0x2) == SLJIT_DIV_UW) {
774 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
775 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
776 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
777 #else
778 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
779 #endif
780 FAIL_IF(!inst);
781 *inst = XOR_r_rm;
782 }
783
784 if ((op | 0x2) == SLJIT_DIV_SW) {
785 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
786 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
787 #endif
788
789 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
790 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
791 FAIL_IF(!inst);
792 INC_SIZE(1);
793 *inst = CDQ;
794 #else
795 if (compiler->mode32) {
796 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
797 FAIL_IF(!inst);
798 INC_SIZE(1);
799 *inst = CDQ;
800 } else {
801 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
802 FAIL_IF(!inst);
803 INC_SIZE(2);
804 *inst++ = REX_W;
805 *inst = CDQ;
806 }
807 #endif
808 }
809
810 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
811 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
812 FAIL_IF(!inst);
813 INC_SIZE(2);
814 *inst++ = GROUP_F7;
815 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
816 #else
817 #ifdef _WIN64
818 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
819 #else
820 size = (!compiler->mode32) ? 3 : 2;
821 #endif
822 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
823 FAIL_IF(!inst);
824 INC_SIZE(size);
825 #ifdef _WIN64
826 if (!compiler->mode32)
827 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
828 else if (op >= SLJIT_DIVMOD_UW)
829 *inst++ = REX_B;
830 *inst++ = GROUP_F7;
831 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
832 #else
833 if (!compiler->mode32)
834 *inst++ = REX_W;
835 *inst++ = GROUP_F7;
836 *inst = MOD_REG | reg_map[SLJIT_R1];
837 #endif
838 #endif
839 switch (op) {
840 case SLJIT_LMUL_UW:
841 *inst |= MUL;
842 break;
843 case SLJIT_LMUL_SW:
844 *inst |= IMUL;
845 break;
846 case SLJIT_DIVMOD_UW:
847 case SLJIT_DIV_UW:
848 *inst |= DIV;
849 break;
850 case SLJIT_DIVMOD_SW:
851 case SLJIT_DIV_SW:
852 *inst |= IDIV;
853 break;
854 }
855 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
856 if (op <= SLJIT_DIVMOD_SW)
857 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
858 #else
859 if (op >= SLJIT_DIV_UW)
860 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
861 #endif
862 break;
863 }
864
865 return SLJIT_SUCCESS;
866 }
867
868 #define ENCODE_PREFIX(prefix) \
869 do { \
870 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
871 FAIL_IF(!inst); \
872 INC_SIZE(1); \
873 *inst = (prefix); \
874 } while (0)
875
876 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
877 sljit_s32 dst, sljit_sw dstw,
878 sljit_s32 src, sljit_sw srcw)
879 {
880 sljit_u8* inst;
881 sljit_s32 dst_r;
882 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
883 sljit_s32 work_r;
884 #endif
885
886 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
887 compiler->mode32 = 0;
888 #endif
889
890 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
891 return SLJIT_SUCCESS; /* Empty instruction. */
892
893 if (src & SLJIT_IMM) {
894 if (FAST_IS_REG(dst)) {
895 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
896 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
897 #else
898 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
899 FAIL_IF(!inst);
900 *inst = MOV_rm_i32;
901 return SLJIT_SUCCESS;
902 #endif
903 }
904 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
905 FAIL_IF(!inst);
906 *inst = MOV_rm8_i8;
907 return SLJIT_SUCCESS;
908 }
909
910 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
911
912 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
913 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
914 if (reg_map[src] >= 4) {
915 SLJIT_ASSERT(dst_r == TMP_REG1);
916 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
917 } else
918 dst_r = src;
919 #else
920 dst_r = src;
921 #endif
922 }
923 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
924 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
925 /* src, dst are registers. */
926 SLJIT_ASSERT(SLOW_IS_REG(dst));
927 if (reg_map[dst] < 4) {
928 if (dst != src)
929 EMIT_MOV(compiler, dst, 0, src, 0);
930 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
931 FAIL_IF(!inst);
932 *inst++ = GROUP_0F;
933 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
934 }
935 else {
936 if (dst != src)
937 EMIT_MOV(compiler, dst, 0, src, 0);
938 if (sign) {
939 /* shl reg, 24 */
940 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
941 FAIL_IF(!inst);
942 *inst |= SHL;
943 /* sar reg, 24 */
944 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
945 FAIL_IF(!inst);
946 *inst |= SAR;
947 }
948 else {
949 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
950 FAIL_IF(!inst);
951 *(inst + 1) |= AND;
952 }
953 }
954 return SLJIT_SUCCESS;
955 }
956 #endif
957 else {
958 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
959 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
960 FAIL_IF(!inst);
961 *inst++ = GROUP_0F;
962 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
963 }
964
965 if (dst & SLJIT_MEM) {
966 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
967 if (dst_r == TMP_REG1) {
968 /* Find a non-used register, whose reg_map[src] < 4. */
969 if ((dst & REG_MASK) == SLJIT_R0) {
970 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
971 work_r = SLJIT_R2;
972 else
973 work_r = SLJIT_R1;
974 }
975 else {
976 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
977 work_r = SLJIT_R0;
978 else if ((dst & REG_MASK) == SLJIT_R1)
979 work_r = SLJIT_R2;
980 else
981 work_r = SLJIT_R1;
982 }
983
984 if (work_r == SLJIT_R0) {
985 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
986 }
987 else {
988 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
989 FAIL_IF(!inst);
990 *inst = XCHG_r_rm;
991 }
992
993 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
994 FAIL_IF(!inst);
995 *inst = MOV_rm8_r8;
996
997 if (work_r == SLJIT_R0) {
998 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
999 }
1000 else {
1001 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1002 FAIL_IF(!inst);
1003 *inst = XCHG_r_rm;
1004 }
1005 }
1006 else {
1007 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1008 FAIL_IF(!inst);
1009 *inst = MOV_rm8_r8;
1010 }
1011 #else
1012 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1013 FAIL_IF(!inst);
1014 *inst = MOV_rm8_r8;
1015 #endif
1016 }
1017
1018 return SLJIT_SUCCESS;
1019 }
1020
1021 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1022 sljit_s32 dst, sljit_sw dstw,
1023 sljit_s32 src, sljit_sw srcw)
1024 {
1025 sljit_u8* inst;
1026 sljit_s32 dst_r;
1027
1028 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1029 compiler->mode32 = 0;
1030 #endif
1031
1032 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1033 return SLJIT_SUCCESS; /* Empty instruction. */
1034
1035 if (src & SLJIT_IMM) {
1036 if (FAST_IS_REG(dst)) {
1037 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1038 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1039 #else
1040 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1041 FAIL_IF(!inst);
1042 *inst = MOV_rm_i32;
1043 return SLJIT_SUCCESS;
1044 #endif
1045 }
1046 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1047 FAIL_IF(!inst);
1048 *inst = MOV_rm_i32;
1049 return SLJIT_SUCCESS;
1050 }
1051
1052 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1053
1054 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1055 dst_r = src;
1056 else {
1057 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1058 FAIL_IF(!inst);
1059 *inst++ = GROUP_0F;
1060 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1061 }
1062
1063 if (dst & SLJIT_MEM) {
1064 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1065 FAIL_IF(!inst);
1066 *inst = MOV_rm_r;
1067 }
1068
1069 return SLJIT_SUCCESS;
1070 }
1071
1072 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1073 sljit_s32 dst, sljit_sw dstw,
1074 sljit_s32 src, sljit_sw srcw)
1075 {
1076 sljit_u8* inst;
1077
1078 if (dst == SLJIT_UNUSED) {
1079 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1080 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1081 FAIL_IF(!inst);
1082 *inst++ = GROUP_F7;
1083 *inst |= opcode;
1084 return SLJIT_SUCCESS;
1085 }
1086 if (dst == src && dstw == srcw) {
1087 /* Same input and output */
1088 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1089 FAIL_IF(!inst);
1090 *inst++ = GROUP_F7;
1091 *inst |= opcode;
1092 return SLJIT_SUCCESS;
1093 }
1094 if (FAST_IS_REG(dst)) {
1095 EMIT_MOV(compiler, dst, 0, src, srcw);
1096 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1097 FAIL_IF(!inst);
1098 *inst++ = GROUP_F7;
1099 *inst |= opcode;
1100 return SLJIT_SUCCESS;
1101 }
1102 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1103 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1104 FAIL_IF(!inst);
1105 *inst++ = GROUP_F7;
1106 *inst |= opcode;
1107 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1108 return SLJIT_SUCCESS;
1109 }
1110
1111 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1112 sljit_s32 dst, sljit_sw dstw,
1113 sljit_s32 src, sljit_sw srcw)
1114 {
1115 sljit_u8* inst;
1116
1117 if (dst == SLJIT_UNUSED) {
1118 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1119 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1120 FAIL_IF(!inst);
1121 *inst++ = GROUP_F7;
1122 *inst |= NOT_rm;
1123 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1124 FAIL_IF(!inst);
1125 *inst = OR_r_rm;
1126 return SLJIT_SUCCESS;
1127 }
1128 if (FAST_IS_REG(dst)) {
1129 EMIT_MOV(compiler, dst, 0, src, srcw);
1130 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1131 FAIL_IF(!inst);
1132 *inst++ = GROUP_F7;
1133 *inst |= NOT_rm;
1134 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1135 FAIL_IF(!inst);
1136 *inst = OR_r_rm;
1137 return SLJIT_SUCCESS;
1138 }
1139 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1140 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1141 FAIL_IF(!inst);
1142 *inst++ = GROUP_F7;
1143 *inst |= NOT_rm;
1144 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1145 FAIL_IF(!inst);
1146 *inst = OR_r_rm;
1147 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1148 return SLJIT_SUCCESS;
1149 }
1150
1151 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1152 sljit_s32 dst, sljit_sw dstw,
1153 sljit_s32 src, sljit_sw srcw)
1154 {
1155 sljit_u8* inst;
1156 sljit_s32 dst_r;
1157
1158 SLJIT_UNUSED_ARG(op_flags);
1159 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1160 /* Just set the zero flag. */
1161 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1162 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1163 FAIL_IF(!inst);
1164 *inst++ = GROUP_F7;
1165 *inst |= NOT_rm;
1166 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1167 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1168 #else
1169 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0);
1170 #endif
1171 FAIL_IF(!inst);
1172 *inst |= SHR;
1173 return SLJIT_SUCCESS;
1174 }
1175
1176 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1177 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1178 src = TMP_REG1;
1179 srcw = 0;
1180 }
1181
1182 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1183 FAIL_IF(!inst);
1184 *inst++ = GROUP_0F;
1185 *inst = BSR_r_rm;
1186
1187 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1188 if (FAST_IS_REG(dst))
1189 dst_r = dst;
1190 else {
1191 /* Find an unused temporary register. */
1192 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1193 dst_r = SLJIT_R0;
1194 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1195 dst_r = SLJIT_R1;
1196 else
1197 dst_r = SLJIT_R2;
1198 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1199 }
1200 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1201 #else
1202 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1203 compiler->mode32 = 0;
1204 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31);
1205 compiler->mode32 = op_flags & SLJIT_I32_OP;
1206 #endif
1207
1208 if (cpu_has_cmov == -1)
1209 get_cpu_features();
1210
1211 if (cpu_has_cmov) {
1212 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1213 FAIL_IF(!inst);
1214 *inst++ = GROUP_0F;
1215 *inst = CMOVNE_r_rm;
1216 } else {
1217 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1218 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1219 FAIL_IF(!inst);
1220 INC_SIZE(4);
1221
1222 *inst++ = JE_i8;
1223 *inst++ = 2;
1224 *inst++ = MOV_r_rm;
1225 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1226 #else
1227 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
1228 FAIL_IF(!inst);
1229 INC_SIZE(5);
1230
1231 *inst++ = JE_i8;
1232 *inst++ = 3;
1233 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1234 *inst++ = MOV_r_rm;
1235 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1236 #endif
1237 }
1238
1239 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1240 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1241 #else
1242 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
1243 #endif
1244 FAIL_IF(!inst);
1245 *(inst + 1) |= XOR;
1246
1247 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1248 if (dst & SLJIT_MEM) {
1249 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1250 FAIL_IF(!inst);
1251 *inst = XCHG_r_rm;
1252 }
1253 #else
1254 if (dst & SLJIT_MEM)
1255 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1256 #endif
1257 return SLJIT_SUCCESS;
1258 }
1259
1260 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1261 sljit_s32 dst, sljit_sw dstw,
1262 sljit_s32 src, sljit_sw srcw)
1263 {
1264 sljit_u8* inst;
1265 sljit_s32 update = 0;
1266 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1267 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1268 sljit_s32 dst_is_ereg = 0;
1269 sljit_s32 src_is_ereg = 0;
1270 #else
1271 # define src_is_ereg 0
1272 #endif
1273
1274 CHECK_ERROR();
1275 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1276 ADJUST_LOCAL_OFFSET(dst, dstw);
1277 ADJUST_LOCAL_OFFSET(src, srcw);
1278
1279 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1280 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1281 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1282 compiler->mode32 = op_flags & SLJIT_I32_OP;
1283 #endif
1284
1285 op = GET_OPCODE(op);
1286 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1287 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1288 compiler->mode32 = 0;
1289 #endif
1290
1291 if (op_flags & SLJIT_I32_OP) {
1292 if (FAST_IS_REG(src) && src == dst) {
1293 if (!TYPE_CAST_NEEDED(op))
1294 return SLJIT_SUCCESS;
1295 }
1296 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1297 if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1298 op = SLJIT_MOV_U32;
1299 if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1300 op = SLJIT_MOVU_U32;
1301 if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1302 op = SLJIT_MOV_S32;
1303 if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1304 op = SLJIT_MOVU_S32;
1305 #endif
1306 }
1307
1308 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1309 if (op >= SLJIT_MOVU) {
1310 update = 1;
1311 op -= 8;
1312 }
1313
1314 if (src & SLJIT_IMM) {
1315 switch (op) {
1316 case SLJIT_MOV_U8:
1317 srcw = (sljit_u8)srcw;
1318 break;
1319 case SLJIT_MOV_S8:
1320 srcw = (sljit_s8)srcw;
1321 break;
1322 case SLJIT_MOV_U16:
1323 srcw = (sljit_u16)srcw;
1324 break;
1325 case SLJIT_MOV_S16:
1326 srcw = (sljit_s16)srcw;
1327 break;
1328 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1329 case SLJIT_MOV_U32:
1330 srcw = (sljit_u32)srcw;
1331 break;
1332 case SLJIT_MOV_S32:
1333 srcw = (sljit_s32)srcw;
1334 break;
1335 #endif
1336 }
1337 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1338 if (SLJIT_UNLIKELY(dst_is_ereg))
1339 return emit_mov(compiler, dst, dstw, src, srcw);
1340 #endif
1341 }
1342
1343 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1344 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1345 FAIL_IF(!inst);
1346 *inst = LEA_r_m;
1347 src &= SLJIT_MEM | 0xf;
1348 srcw = 0;
1349 }
1350
1351 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1352 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1353 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1354 dst = TMP_REG1;
1355 }
1356 #endif
1357
1358 switch (op) {
1359 case SLJIT_MOV:
1360 case SLJIT_MOV_P:
1361 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1362 case SLJIT_MOV_U32:
1363 case SLJIT_MOV_S32:
1364 #endif
1365 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1366 break;
1367 case SLJIT_MOV_U8:
1368 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1369 break;
1370 case SLJIT_MOV_S8:
1371 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1372 break;
1373 case SLJIT_MOV_U16:
1374 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1375 break;
1376 case SLJIT_MOV_S16:
1377 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1378 break;
1379 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1380 case SLJIT_MOV_U32:
1381 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1382 break;
1383 case SLJIT_MOV_S32:
1384 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1385 break;
1386 #endif
1387 }
1388
1389 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1390 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1391 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1392 #endif
1393
1394 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1395 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1396 FAIL_IF(!inst);
1397 *inst = LEA_r_m;
1398 }
1399 return SLJIT_SUCCESS;
1400 }
1401
1402 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1403 compiler->flags_saved = 0;
1404
1405 switch (op) {
1406 case SLJIT_NOT:
1407 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1408 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1409 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1410
1411 case SLJIT_NEG:
1412 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1413 FAIL_IF(emit_save_flags(compiler));
1414 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1415
1416 case SLJIT_CLZ:
1417 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1418 FAIL_IF(emit_save_flags(compiler));
1419 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1420 }
1421
1422 return SLJIT_SUCCESS;
1423
1424 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1425 # undef src_is_ereg
1426 #endif
1427 }
1428
1429 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1430
1431 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1432 if (IS_HALFWORD(immw) || compiler->mode32) { \
1433 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1434 FAIL_IF(!inst); \
1435 *(inst + 1) |= (op_imm); \
1436 } \
1437 else { \
1438 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1439 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1440 FAIL_IF(!inst); \
1441 *inst = (op_mr); \
1442 }
1443
1444 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1445 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1446
1447 #else
1448
1449 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1450 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1451 FAIL_IF(!inst); \
1452 *(inst + 1) |= (op_imm);
1453
1454 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1455 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1456
1457 #endif
1458
1459 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1460 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1461 sljit_s32 dst, sljit_sw dstw,
1462 sljit_s32 src1, sljit_sw src1w,
1463 sljit_s32 src2, sljit_sw src2w)
1464 {
1465 sljit_u8* inst;
1466
1467 if (dst == SLJIT_UNUSED) {
1468 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1469 if (src2 & SLJIT_IMM) {
1470 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1471 }
1472 else {
1473 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1474 FAIL_IF(!inst);
1475 *inst = op_rm;
1476 }
1477 return SLJIT_SUCCESS;
1478 }
1479
1480 if (dst == src1 && dstw == src1w) {
1481 if (src2 & SLJIT_IMM) {
1482 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1483 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1484 #else
1485 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1486 #endif
1487 BINARY_EAX_IMM(op_eax_imm, src2w);
1488 }
1489 else {
1490 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1491 }
1492 }
1493 else if (FAST_IS_REG(dst)) {
1494 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1495 FAIL_IF(!inst);
1496 *inst = op_rm;
1497 }
1498 else if (FAST_IS_REG(src2)) {
1499 /* Special exception for sljit_emit_op_flags. */
1500 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1501 FAIL_IF(!inst);
1502 *inst = op_mr;
1503 }
1504 else {
1505 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1506 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1507 FAIL_IF(!inst);
1508 *inst = op_mr;
1509 }
1510 return SLJIT_SUCCESS;
1511 }
1512
1513 /* Only for cumulative operations. */
1514 if (dst == src2 && dstw == src2w) {
1515 if (src1 & SLJIT_IMM) {
1516 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1517 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1518 #else
1519 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1520 #endif
1521 BINARY_EAX_IMM(op_eax_imm, src1w);
1522 }
1523 else {
1524 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1525 }
1526 }
1527 else if (FAST_IS_REG(dst)) {
1528 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1529 FAIL_IF(!inst);
1530 *inst = op_rm;
1531 }
1532 else if (FAST_IS_REG(src1)) {
1533 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1534 FAIL_IF(!inst);
1535 *inst = op_mr;
1536 }
1537 else {
1538 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1539 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1540 FAIL_IF(!inst);
1541 *inst = op_mr;
1542 }
1543 return SLJIT_SUCCESS;
1544 }
1545
1546 /* General version. */
1547 if (FAST_IS_REG(dst)) {
1548 EMIT_MOV(compiler, dst, 0, src1, src1w);
1549 if (src2 & SLJIT_IMM) {
1550 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1551 }
1552 else {
1553 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1554 FAIL_IF(!inst);
1555 *inst = op_rm;
1556 }
1557 }
1558 else {
1559 /* This version requires less memory writing. */
1560 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1561 if (src2 & SLJIT_IMM) {
1562 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1563 }
1564 else {
1565 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1566 FAIL_IF(!inst);
1567 *inst = op_rm;
1568 }
1569 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1570 }
1571
1572 return SLJIT_SUCCESS;
1573 }
1574
1575 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1576 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1577 sljit_s32 dst, sljit_sw dstw,
1578 sljit_s32 src1, sljit_sw src1w,
1579 sljit_s32 src2, sljit_sw src2w)
1580 {
1581 sljit_u8* inst;
1582
1583 if (dst == SLJIT_UNUSED) {
1584 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1585 if (src2 & SLJIT_IMM) {
1586 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1587 }
1588 else {
1589 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1590 FAIL_IF(!inst);
1591 *inst = op_rm;
1592 }
1593 return SLJIT_SUCCESS;
1594 }
1595
1596 if (dst == src1 && dstw == src1w) {
1597 if (src2 & SLJIT_IMM) {
1598 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1599 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1600 #else
1601 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1602 #endif
1603 BINARY_EAX_IMM(op_eax_imm, src2w);
1604 }
1605 else {
1606 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1607 }
1608 }
1609 else if (FAST_IS_REG(dst)) {
1610 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1611 FAIL_IF(!inst);
1612 *inst = op_rm;
1613 }
1614 else if (FAST_IS_REG(src2)) {
1615 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1616 FAIL_IF(!inst);
1617 *inst = op_mr;
1618 }
1619 else {
1620 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1621 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1622 FAIL_IF(!inst);
1623 *inst = op_mr;
1624 }
1625 return SLJIT_SUCCESS;
1626 }
1627
1628 /* General version. */
1629 if (FAST_IS_REG(dst) && dst != src2) {
1630 EMIT_MOV(compiler, dst, 0, src1, src1w);
1631 if (src2 & SLJIT_IMM) {
1632 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1633 }
1634 else {
1635 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1636 FAIL_IF(!inst);
1637 *inst = op_rm;
1638 }
1639 }
1640 else {
1641 /* This version requires less memory writing. */
1642 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1643 if (src2 & SLJIT_IMM) {
1644 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1645 }
1646 else {
1647 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1648 FAIL_IF(!inst);
1649 *inst = op_rm;
1650 }
1651 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1652 }
1653
1654 return SLJIT_SUCCESS;
1655 }
1656
1657 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1658 sljit_s32 dst, sljit_sw dstw,
1659 sljit_s32 src1, sljit_sw src1w,
1660 sljit_s32 src2, sljit_sw src2w)
1661 {
1662 sljit_u8* inst;
1663 sljit_s32 dst_r;
1664
1665 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1666
1667 /* Register destination. */
1668 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1669 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1670 FAIL_IF(!inst);
1671 *inst++ = GROUP_0F;
1672 *inst = IMUL_r_rm;
1673 }
1674 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1675 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1676 FAIL_IF(!inst);
1677 *inst++ = GROUP_0F;
1678 *inst = IMUL_r_rm;
1679 }
1680 else if (src1 & SLJIT_IMM) {
1681 if (src2 & SLJIT_IMM) {
1682 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1683 src2 = dst_r;
1684 src2w = 0;
1685 }
1686
1687 if (src1w <= 127 && src1w >= -128) {
1688 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1689 FAIL_IF(!inst);
1690 *inst = IMUL_r_rm_i8;
1691 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1692 FAIL_IF(!inst);
1693 INC_SIZE(1);
1694 *inst = (sljit_s8)src1w;
1695 }
1696 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1697 else {
1698 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1699 FAIL_IF(!inst);
1700 *inst = IMUL_r_rm_i32;
1701 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1702 FAIL_IF(!inst);
1703 INC_SIZE(4);
1704 *(sljit_sw*)inst = src1w;
1705 }
1706 #else
1707 else if (IS_HALFWORD(src1w)) {
1708 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1709 FAIL_IF(!inst);
1710 *inst = IMUL_r_rm_i32;
1711 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1712 FAIL_IF(!inst);
1713 INC_SIZE(4);
1714 *(sljit_s32*)inst = (sljit_s32)src1w;
1715 }
1716 else {
1717 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1718 if (dst_r != src2)
1719 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1720 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1721 FAIL_IF(!inst);
1722 *inst++ = GROUP_0F;
1723 *inst = IMUL_r_rm;
1724 }
1725 #endif
1726 }
1727 else if (src2 & SLJIT_IMM) {
1728 /* Note: src1 is NOT immediate. */
1729
1730 if (src2w <= 127 && src2w >= -128) {
1731 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1732 FAIL_IF(!inst);
1733 *inst = IMUL_r_rm_i8;
1734 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1735 FAIL_IF(!inst);
1736 INC_SIZE(1);
1737 *inst = (sljit_s8)src2w;
1738 }
1739 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1740 else {
1741 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1742 FAIL_IF(!inst);
1743 *inst = IMUL_r_rm_i32;
1744 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1745 FAIL_IF(!inst);
1746 INC_SIZE(4);
1747 *(sljit_sw*)inst = src2w;
1748 }
1749 #else
1750 else if (IS_HALFWORD(src2w)) {
1751 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1752 FAIL_IF(!inst);
1753 *inst = IMUL_r_rm_i32;
1754 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1755 FAIL_IF(!inst);
1756 INC_SIZE(4);
1757 *(sljit_s32*)inst = (sljit_s32)src2w;
1758 }
1759 else {
1760 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
1761 if (dst_r != src1)
1762 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1763 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1764 FAIL_IF(!inst);
1765 *inst++ = GROUP_0F;
1766 *inst = IMUL_r_rm;
1767 }
1768 #endif
1769 }
1770 else {
1771 /* Neither argument is immediate. */
1772 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1773 dst_r = TMP_REG1;
1774 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1775 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1776 FAIL_IF(!inst);
1777 *inst++ = GROUP_0F;
1778 *inst = IMUL_r_rm;
1779 }
1780
1781 if (dst_r == TMP_REG1)
1782 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1783
1784 return SLJIT_SUCCESS;
1785 }
1786
1787 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep_flags,
1788 sljit_s32 dst, sljit_sw dstw,
1789 sljit_s32 src1, sljit_sw src1w,
1790 sljit_s32 src2, sljit_sw src2w)
1791 {
1792 sljit_u8* inst;
1793 sljit_s32 dst_r, done = 0;
1794
1795 /* These cases better be left to handled by normal way. */
1796 if (!keep_flags) {
1797 if (dst == src1 && dstw == src1w)
1798 return SLJIT_ERR_UNSUPPORTED;
1799 if (dst == src2 && dstw == src2w)
1800 return SLJIT_ERR_UNSUPPORTED;
1801 }
1802
1803 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1804
1805 if (FAST_IS_REG(src1)) {
1806 if (FAST_IS_REG(src2)) {
1807 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1808 FAIL_IF(!inst);
1809 *inst = LEA_r_m;
1810 done = 1;
1811 }
1812 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1813 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1814 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1815 #else
1816 if (src2 & SLJIT_IMM) {
1817 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1818 #endif
1819 FAIL_IF(!inst);
1820 *inst = LEA_r_m;
1821 done = 1;
1822 }
1823 }
1824 else if (FAST_IS_REG(src2)) {
1825 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1826 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1827 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1828 #else
1829 if (src1 & SLJIT_IMM) {
1830 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1831 #endif
1832 FAIL_IF(!inst);
1833 *inst = LEA_r_m;
1834 done = 1;
1835 }
1836 }
1837
1838 if (done) {
1839 if (dst_r == TMP_REG1)
1840 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1841 return SLJIT_SUCCESS;
1842 }
1843 return SLJIT_ERR_UNSUPPORTED;
1844 }
1845
1846 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1847 sljit_s32 src1, sljit_sw src1w,
1848 sljit_s32 src2, sljit_sw src2w)
1849 {
1850 sljit_u8* inst;
1851
1852 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1853 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1854 #else
1855 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1856 #endif
1857 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1858 return SLJIT_SUCCESS;
1859 }
1860
1861 if (FAST_IS_REG(src1)) {
1862 if (src2 & SLJIT_IMM) {
1863 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1864 }
1865 else {
1866 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1867 FAIL_IF(!inst);
1868 *inst = CMP_r_rm;
1869 }
1870 return SLJIT_SUCCESS;
1871 }
1872
1873 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1874 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1875 FAIL_IF(!inst);
1876 *inst = CMP_rm_r;
1877 return SLJIT_SUCCESS;
1878 }
1879
1880 if (src2 & SLJIT_IMM) {
1881 if (src1 & SLJIT_IMM) {
1882 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1883 src1 = TMP_REG1;
1884 src1w = 0;
1885 }
1886 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1887 }
1888 else {
1889 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1890 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1891 FAIL_IF(!inst);
1892 *inst = CMP_r_rm;
1893 }
1894 return SLJIT_SUCCESS;
1895 }
1896
1897 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
1898 sljit_s32 src1, sljit_sw src1w,
1899 sljit_s32 src2, sljit_sw src2w)
1900 {
1901 sljit_u8* inst;
1902
1903 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1904 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1905 #else
1906 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1907 #endif
1908 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1909 return SLJIT_SUCCESS;
1910 }
1911
1912 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1913 if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1914 #else
1915 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1916 #endif
1917 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1918 return SLJIT_SUCCESS;
1919 }
1920
1921 if (!(src1 & SLJIT_IMM)) {
1922 if (src2 & SLJIT_IMM) {
1923 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1924 if (IS_HALFWORD(src2w) || compiler->mode32) {
1925 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1926 FAIL_IF(!inst);
1927 *inst = GROUP_F7;
1928 }
1929 else {
1930 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1931 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
1932 FAIL_IF(!inst);
1933 *inst = TEST_rm_r;
1934 }
1935 #else
1936 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1937 FAIL_IF(!inst);
1938 *inst = GROUP_F7;
1939 #endif
1940 return SLJIT_SUCCESS;
1941 }
1942 else if (FAST_IS_REG(src1)) {
1943 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1944 FAIL_IF(!inst);
1945 *inst = TEST_rm_r;
1946 return SLJIT_SUCCESS;
1947 }
1948 }
1949
1950 if (!(src2 & SLJIT_IMM)) {
1951 if (src1 & SLJIT_IMM) {
1952 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1953 if (IS_HALFWORD(src1w) || compiler->mode32) {
1954 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1955 FAIL_IF(!inst);
1956 *inst = GROUP_F7;
1957 }
1958 else {
1959 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1960 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
1961 FAIL_IF(!inst);
1962 *inst = TEST_rm_r;
1963 }
1964 #else
1965 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1966 FAIL_IF(!inst);
1967 *inst = GROUP_F7;
1968 #endif
1969 return SLJIT_SUCCESS;
1970 }
1971 else if (FAST_IS_REG(src2)) {
1972 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1973 FAIL_IF(!inst);
1974 *inst = TEST_rm_r;
1975 return SLJIT_SUCCESS;
1976 }
1977 }
1978
1979 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1980 if (src2 & SLJIT_IMM) {
1981 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1982 if (IS_HALFWORD(src2w) || compiler->mode32) {
1983 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1984 FAIL_IF(!inst);
1985 *inst = GROUP_F7;
1986 }
1987 else {
1988 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1989 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1990 FAIL_IF(!inst);
1991 *inst = TEST_rm_r;
1992 }
1993 #else
1994 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1995 FAIL_IF(!inst);
1996 *inst = GROUP_F7;
1997 #endif
1998 }
1999 else {
2000 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2001 FAIL_IF(!inst);
2002 *inst = TEST_rm_r;
2003 }
2004 return SLJIT_SUCCESS;
2005 }
2006
2007 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2008 sljit_u8 mode,
2009 sljit_s32 dst, sljit_sw dstw,
2010 sljit_s32 src1, sljit_sw src1w,
2011 sljit_s32 src2, sljit_sw src2w)
2012 {
2013 sljit_u8* inst;
2014
2015 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2016 if (dst == src1 && dstw == src1w) {
2017 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2018 FAIL_IF(!inst);
2019 *inst |= mode;
2020 return SLJIT_SUCCESS;
2021 }
2022 if (dst == SLJIT_UNUSED) {
2023 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2024 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2025 FAIL_IF(!inst);
2026 *inst |= mode;
2027 return SLJIT_SUCCESS;
2028 }
2029 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2030 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2031 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2032 FAIL_IF(!inst);
2033 *inst |= mode;
2034 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2035 return SLJIT_SUCCESS;
2036 }
2037 if (FAST_IS_REG(dst)) {
2038 EMIT_MOV(compiler, dst, 0, src1, src1w);
2039 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2040 FAIL_IF(!inst);
2041 *inst |= mode;
2042 return SLJIT_SUCCESS;
2043 }
2044
2045 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2046 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2047 FAIL_IF(!inst);
2048 *inst |= mode;
2049 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2050 return SLJIT_SUCCESS;
2051 }
2052
2053 if (dst == SLJIT_PREF_SHIFT_REG) {
2054 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2055 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2056 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2057 FAIL_IF(!inst);
2058 *inst |= mode;
2059 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2060 }
2061 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2062 if (src1 != dst)
2063 EMIT_MOV(compiler, dst, 0, src1, src1w);
2064 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2065 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2066 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2067 FAIL_IF(!inst);
2068 *inst |= mode;
2069 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2070 }
2071 else {
2072 /* This case is really difficult, since ecx itself may used for
2073 addressing, and we must ensure to work even in that case. */
2074 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2075 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2076 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2077 #else
2078 /* [esp+0] contains the flags. */
2079 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2080 #endif
2081 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2082 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2083 FAIL_IF(!inst);
2084 *inst |= mode;
2085 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2086 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2087 #else
2088 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
2089 #endif
2090 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2091 }
2092
2093 return SLJIT_SUCCESS;
2094 }
2095
2096 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2097 sljit_u8 mode, sljit_s32 set_flags,
2098 sljit_s32 dst, sljit_sw dstw,
2099 sljit_s32 src1, sljit_sw src1w,
2100 sljit_s32 src2, sljit_sw src2w)
2101 {
2102 /* The CPU does not set flags if the shift count is 0. */
2103 if (src2 & SLJIT_IMM) {
2104 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2105 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2106 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2107 #else
2108 if ((src2w & 0x1f) != 0)
2109 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2110 #endif
2111 if (!set_flags)
2112 return emit_mov(compiler, dst, dstw, src1, src1w);
2113 /* OR dst, src, 0 */
2114 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2115 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2116 }
2117
2118 if (!set_flags)
2119 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2120
2121 if (!FAST_IS_REG(dst))
2122 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2123
2124 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2125
2126 if (FAST_IS_REG(dst))
2127 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2128 return SLJIT_SUCCESS;
2129 }
2130
2131 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2132 sljit_s32 dst, sljit_sw dstw,
2133 sljit_s32 src1, sljit_sw src1w,
2134 sljit_s32 src2, sljit_sw src2w)
2135 {
2136 CHECK_ERROR();
2137 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2138 ADJUST_LOCAL_OFFSET(dst, dstw);
2139 ADJUST_LOCAL_OFFSET(src1, src1w);
2140 ADJUST_LOCAL_OFFSET(src2, src2w);
2141
2142 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2143 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2144 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2145 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2146 compiler->mode32 = op & SLJIT_I32_OP;
2147 #endif
2148
2149 if (GET_OPCODE(op) >= SLJIT_MUL) {
2150 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2151 compiler->flags_saved = 0;
2152 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2153 FAIL_IF(emit_save_flags(compiler));
2154 }
2155
2156 switch (GET_OPCODE(op)) {
2157 case SLJIT_ADD:
2158 if (!GET_FLAGS(op)) {
2159 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2160 return compiler->error;
2161 }
2162 else
2163 compiler->flags_saved = 0;
2164 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2165 FAIL_IF(emit_save_flags(compiler));
2166 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2167 dst, dstw, src1, src1w, src2, src2w);
2168 case SLJIT_ADDC:
2169 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2170 FAIL_IF(emit_restore_flags(compiler, 1));
2171 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2172 FAIL_IF(emit_save_flags(compiler));
2173 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2174 compiler->flags_saved = 0;
2175 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2176 dst, dstw, src1, src1w, src2, src2w);
2177 case SLJIT_SUB:
2178 if (!GET_FLAGS(op)) {
2179 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2180 return compiler->error;
2181 }
2182 else
2183 compiler->flags_saved = 0;
2184 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2185 FAIL_IF(emit_save_flags(compiler));
2186 if (dst == SLJIT_UNUSED)
2187 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2188 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2189 dst, dstw, src1, src1w, src2, src2w);
2190 case SLJIT_SUBC:
2191 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2192 FAIL_IF(emit_restore_flags(compiler, 1));
2193 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2194 FAIL_IF(emit_save_flags(compiler));
2195 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2196 compiler->flags_saved = 0;
2197 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2198 dst, dstw, src1, src1w, src2, src2w);
2199 case SLJIT_MUL:
2200 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2201 case SLJIT_AND:
2202 if (dst == SLJIT_UNUSED)
2203 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2204 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2205 dst, dstw, src1, src1w, src2, src2w);
2206 case SLJIT_OR:
2207 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2208 dst, dstw, src1, src1w, src2, src2w);
2209 case SLJIT_XOR:
2210 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2211 dst, dstw, src1, src1w, src2, src2w);
2212 case SLJIT_SHL:
2213 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2214 dst, dstw, src1, src1w, src2, src2w);
2215 case SLJIT_LSHR:
2216 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2217 dst, dstw, src1, src1w, src2, src2w);
2218 case SLJIT_ASHR:
2219 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2220 dst, dstw, src1, src1w, src2, src2w);
2221 }
2222
2223 return SLJIT_SUCCESS;
2224 }
2225
2226 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2227 {
2228 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2229 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2230 if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
2231 return -1;
2232 #endif
2233 return reg_map[reg];
2234 }
2235
2236 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2237 {
2238 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2239 return reg;
2240 }
2241
2242 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2243 void *instruction, sljit_s32 size)
2244 {
2245 sljit_u8 *inst;
2246
2247 CHECK_ERROR();
2248 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2249
2250 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2251 FAIL_IF(!inst);
2252 INC_SIZE(size);
2253 SLJIT_MEMMOVE(inst, instruction, size);
2254 return SLJIT_SUCCESS;
2255 }
2256
2257 /* --------------------------------------------------------------------- */
2258 /* Floating point operators */
2259 /* --------------------------------------------------------------------- */
2260
2261 /* Alignment + 2 * 16 bytes. */
2262 static sljit_s32 sse2_data[3 + (4 + 4) * 2];
2263 static sljit_s32 *sse2_buffer;
2264
2265 static void init_compiler(void)
2266 {
2267 sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
2268 /* Single precision constants. */
2269 sse2_buffer[0] = 0x80000000;
2270 sse2_buffer[4] = 0x7fffffff;
2271 /* Double precision constants. */
2272 sse2_buffer[8] = 0;
2273 sse2_buffer[9] = 0x80000000;
2274 sse2_buffer[12] = 0xffffffff;
2275 sse2_buffer[13] = 0x7fffffff;
2276 }
2277
2278 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
2279 {
2280 #ifdef SLJIT_IS_FPU_AVAILABLE
2281 return SLJIT_IS_FPU_AVAILABLE;
2282 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2283 if (cpu_has_sse2 == -1)
2284 get_cpu_features();
2285 return cpu_has_sse2;
2286 #else /* SLJIT_DETECT_SSE2 */
2287 return 1;
2288 #endif /* SLJIT_DETECT_SSE2 */
2289 }
2290
2291 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2292 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2293 {
2294 sljit_u8 *inst;
2295
2296 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2297 FAIL_IF(!inst);
2298 *inst++ = GROUP_0F;
2299 *inst = opcode;
2300 return SLJIT_SUCCESS;
2301 }
2302
2303 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2304 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2305 {
2306 sljit_u8 *inst;
2307
2308 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2309 FAIL_IF(!inst);
2310 *inst++ = GROUP_0F;
2311 *inst = opcode;
2312 return SLJIT_SUCCESS;
2313 }
2314
2315 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2316 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2317 {
2318 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2319 }
2320
2321 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2322 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2323 {
2324 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2325 }
2326
2327 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2328 sljit_s32 dst, sljit_sw dstw,
2329 sljit_s32 src, sljit_sw srcw)
2330 {
2331 sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2332 sljit_u8 *inst;
2333
2334 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2335 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2336 compiler->mode32 = 0;
2337 #endif
2338
2339 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2340 FAIL_IF(!inst);
2341 *inst++ = GROUP_0F;
2342 *inst = CVTTSD2SI_r_xm;
2343
2344 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2345 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2346 return SLJIT_SUCCESS;
2347 }
2348
2349 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2350 sljit_s32 dst, sljit_sw dstw,
2351 sljit_s32 src, sljit_sw srcw)
2352 {
2353 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2354 sljit_u8 *inst;
2355
2356 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2357 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2358 compiler->mode32 = 0;
2359 #endif
2360
2361 if (src & SLJIT_IMM) {
2362 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2363 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2364 srcw = (sljit_s32)srcw;
2365 #endif
2366 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2367 src = TMP_REG1;
2368 srcw = 0;
2369 }
2370
2371 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2372 FAIL_IF(!inst);
2373 *inst++ = GROUP_0F;
2374 *inst = CVTSI2SD_x_rm;
2375
2376 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2377 compiler->mode32 = 1;
2378 #endif
2379 if (dst_r == TMP_FREG)
2380 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2381 return SLJIT_SUCCESS;
2382 }
2383
2384 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2385 sljit_s32 src1, sljit_sw src1w,
2386 sljit_s32 src2, sljit_sw src2w)
2387 {
2388 compiler->flags_saved = 0;
2389 if (!FAST_IS_REG(src1)) {
2390 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2391 src1 = TMP_FREG;
2392 }
2393 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
2394 }
2395
2396 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2397 sljit_s32 dst, sljit_sw dstw,
2398 sljit_s32 src, sljit_sw srcw)
2399 {
2400 sljit_s32 dst_r;
2401
2402 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2403 compiler->mode32 = 1;
2404 #endif
2405
2406 CHECK_ERROR();
2407 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2408
2409 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2410 if (FAST_IS_REG(dst))
2411 return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
2412 if (FAST_IS_REG(src))
2413 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
2414 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
2415 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2416 }
2417
2418 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2419 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2420 if (FAST_IS_REG(src)) {
2421 /* We overwrite the high bits of source. From SLJIT point of view,
2422 this is not an issue.
2423 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2424 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
2425 }
2426 else {
2427 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
2428 src = TMP_FREG;
2429 }
2430
2431 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
2432 if (dst_r == TMP_FREG)
2433 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2434 return SLJIT_SUCCESS;
2435 }
2436
2437 if (SLOW_IS_REG(dst)) {
2438 dst_r = dst;
2439 if (dst != src)
2440 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2441 }
2442 else {
2443 dst_r = TMP_FREG;
2444 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2445 }
2446
2447 switch (GET_OPCODE(op)) {
2448 case SLJIT_NEG_F64:
2449 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
2450 break;
2451
2452 case SLJIT_ABS_F64:
2453 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2454 break;
2455 }
2456
2457 if (dst_r == TMP_FREG)
2458 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2459 return SLJIT_SUCCESS;
2460 }
2461
2462 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2463 sljit_s32 dst, sljit_sw dstw,
2464 sljit_s32 src1, sljit_sw src1w,
2465 sljit_s32 src2, sljit_sw src2w)
2466 {
2467 sljit_s32 dst_r;
2468
2469 CHECK_ERROR();
2470 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2471 ADJUST_LOCAL_OFFSET(dst, dstw);
2472 ADJUST_LOCAL_OFFSET(src1, src1w);
2473 ADJUST_LOCAL_OFFSET(src2, src2w);
2474
2475 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2476 compiler->mode32 = 1;
2477 #endif
2478
2479 if (FAST_IS_REG(dst)) {
2480 dst_r = dst;
2481 if (dst == src1)
2482 ; /* Do nothing here. */
2483 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2484 /* Swap arguments. */
2485 src2 = src1;
2486 src2w = src1w;
2487 }
2488 else if (dst != src2)
2489 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
2490 else {
2491 dst_r = TMP_FREG;
2492 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2493 }
2494 }
2495 else {
2496 dst_r = TMP_FREG;
2497 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2498 }
2499
2500 switch (GET_OPCODE(op)) {
2501 case SLJIT_ADD_F64:
2502 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2503 break;
2504
2505 case SLJIT_SUB_F64:
2506 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2507 break;
2508
2509 case SLJIT_MUL_F64:
2510 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2511 break;
2512
2513 case SLJIT_DIV_F64:
2514 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2515 break;
2516 }
2517
2518 if (dst_r == TMP_FREG)
2519 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2520 return SLJIT_SUCCESS;
2521 }
2522
2523 /* --------------------------------------------------------------------- */
2524 /* Conditional instructions */
2525 /* --------------------------------------------------------------------- */
2526
2527 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2528 {
2529 sljit_u8 *inst;
2530 struct sljit_label *label;
2531
2532 CHECK_ERROR_PTR();
2533 CHECK_PTR(check_sljit_emit_label(compiler));
2534
2535 /* We should restore the flags before the label,
2536 since other taken jumps has their own flags as well. */
2537 if (SLJIT_UNLIKELY(compiler->flags_saved))
2538 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2539
2540 if (compiler->last_label && compiler->last_label->size == compiler->size)
2541 return compiler->last_label;
2542
2543 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2544 PTR_FAIL_IF(!label);
2545 set_label(label, compiler);
2546
2547 inst = (sljit_u8*)ensure_buf(compiler, 2);
2548 PTR_FAIL_IF(!inst);
2549
2550 *inst++ = 0;
2551 *inst++ = 0;
2552
2553 return label;
2554 }
2555
2556 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2557 {
2558 sljit_u8 *inst;
2559 struct sljit_jump *jump;
2560
2561 CHECK_ERROR_PTR();
2562 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2563
2564 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2565 if ((type & 0xff) <= SLJIT_JUMP)
2566 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2567 compiler->flags_saved = 0;
2568 }
2569
2570 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2571 PTR_FAIL_IF_NULL(jump);
2572 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2573 type &= 0xff;
2574
2575 if (type >= SLJIT_CALL1)
2576 PTR_FAIL_IF(call_with_args(compiler, type));
2577
2578 /* Worst case size. */
2579 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2580 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2581 #else
2582 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2583 #endif
2584
2585 inst = (sljit_u8*)ensure_buf(compiler, 2);
2586 PTR_FAIL_IF_NULL(inst);
2587
2588 *inst++ = 0;
2589 *inst++ = type + 4;
2590 return jump;
2591 }
2592
2593 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2594 {
2595 sljit_u8 *inst;
2596 struct sljit_jump *jump;
2597
2598 CHECK_ERROR();
2599 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2600 ADJUST_LOCAL_OFFSET(src, srcw);
2601
2602 CHECK_EXTRA_REGS(src, srcw, (void)0);
2603
2604 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2605 if (type <= SLJIT_JUMP)
2606 FAIL_IF(emit_restore_flags(compiler, 0));
2607 compiler->flags_saved = 0;
2608 }
2609
2610 if (type >= SLJIT_CALL1) {
2611 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2612 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2613 if (src == SLJIT_R2) {
2614 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2615 src = TMP_REG1;
2616 }
2617 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2618 srcw += sizeof(sljit_sw);
2619 #endif
2620 #endif
2621 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2622 if (src == SLJIT_R2) {
2623 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2624 src = TMP_REG1;
2625 }
2626 #endif
2627 FAIL_IF(call_with_args(compiler, type));
2628 }
2629
2630 if (src == SLJIT_IMM) {
2631 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2632 FAIL_IF_NULL(jump);
2633 set_jump(jump, compiler, JUMP_ADDR);
2634 jump->u.target = srcw;
2635
2636 /* Worst case size. */
2637 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2638 compiler->size += 5;
2639 #else
2640 compiler->size += 10 + 3;
2641 #endif
2642
2643 inst = (sljit_u8*)ensure_buf(compiler, 2);
2644 FAIL_IF_NULL(inst);
2645
2646 *inst++ = 0;
2647 *inst++ = type + 4;
2648 }
2649 else {
2650 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2651 /* REX_W is not necessary (src is not immediate). */
2652 compiler->mode32 = 1;
2653 #endif
2654 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2655 FAIL_IF(!inst);
2656 *inst++ = GROUP_FF;
2657 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2658 }
2659 return SLJIT_SUCCESS;
2660 }
2661
2662 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2663 sljit_s32 dst, sljit_sw dstw,
2664 sljit_s32 src, sljit_sw srcw,
2665 sljit_s32 type)
2666 {
2667 sljit_u8 *inst;
2668 sljit_u8 cond_set = 0;
2669 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2670 sljit_s32 reg;
2671 #else
2672 /* CHECK_EXTRA_REGS migh overwrite these values. */
2673 sljit_s32 dst_save = dst;
2674 sljit_sw dstw_save = dstw;
2675 #endif
2676
2677 CHECK_ERROR();
2678 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2679 SLJIT_UNUSED_ARG(srcw);
2680
2681 if (dst == SLJIT_UNUSED)
2682 return SLJIT_SUCCESS;
2683
2684 ADJUST_LOCAL_OFFSET(dst, dstw);
2685 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2686 if (SLJIT_UNLIKELY(compiler->flags_saved))
2687 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2688
2689 type &= 0xff;
2690 /* setcc = jcc + 0x10. */
2691 cond_set = get_jump_code(type) + 0x10;
2692
2693 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2694 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2695 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2696 FAIL_IF(!inst);
2697 INC_SIZE(4 + 3);
2698 /* Set low register to conditional flag. */
2699 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2700 *inst++ = GROUP_0F;
2701 *inst++ = cond_set;
2702 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2703 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2704 *inst++ = OR_rm8_r8;
2705 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2706 return SLJIT_SUCCESS;
2707 }
2708
2709 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2710
2711 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2712 FAIL_IF(!inst);
2713 INC_SIZE(4 + 4);
2714 /* Set low register to conditional flag. */
2715 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2716 *inst++ = GROUP_0F;
2717 *inst++ = cond_set;
2718 *inst++ = MOD_REG | reg_lmap[reg];
2719 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2720 *inst++ = GROUP_0F;
2721 *inst++ = MOVZX_r_rm8;
2722 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2723
2724 if (reg != TMP_REG1)
2725 return SLJIT_SUCCESS;
2726
2727 if (GET_OPCODE(op) < SLJIT_ADD) {
2728 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2729 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2730 }
2731 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2732 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2733 compiler->skip_checks = 1;
2734 #endif
2735 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2736 #else /* SLJIT_CONFIG_X86_64 */
2737 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2738 if (reg_map[dst] <= 4) {
2739 /* Low byte is accessible. */
2740 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2741 FAIL_IF(!inst);
2742 INC_SIZE(3 + 3);
2743 /* Set low byte to conditional flag. */
2744 *inst++ = GROUP_0F;
2745 *inst++ = cond_set;
2746 *inst++ = MOD_REG | reg_map[dst];
2747
2748 *inst++ = GROUP_0F;
2749 *inst++ = MOVZX_r_rm8;
2750 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2751 return SLJIT_SUCCESS;
2752 }
2753
2754 /* Low byte is not accessible. */
2755 if (cpu_has_cmov == -1)
2756 get_cpu_features();
2757
2758 if (cpu_has_cmov) {
2759 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2760 /* a xor reg, reg operation would overwrite the flags. */
2761 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2762
2763 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2764 FAIL_IF(!inst);
2765 INC_SIZE(3);
2766
2767 *inst++ = GROUP_0F;
2768 /* cmovcc = setcc - 0x50. */
2769 *inst++ = cond_set - 0x50;
2770 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2771 return SLJIT_SUCCESS;
2772 }
2773
2774 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2775 FAIL_IF(!inst);
2776 INC_SIZE(1 + 3 + 3 + 1);
2777 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2778 /* Set al to conditional flag. */
2779 *inst++ = GROUP_0F;
2780 *inst++ = cond_set;
2781 *inst++ = MOD_REG | 0 /* eax */;
2782
2783 *inst++ = GROUP_0F;
2784 *inst++ = MOVZX_r_rm8;
2785 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2786 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2787 return SLJIT_SUCCESS;
2788 }
2789
2790 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2791 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
2792 if (dst != SLJIT_R0) {
2793 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2794 FAIL_IF(!inst);
2795 INC_SIZE(1 + 3 + 2 + 1);
2796 /* Set low register to conditional flag. */
2797 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2798 *inst++ = GROUP_0F;
2799 *inst++ = cond_set;
2800 *inst++ = MOD_REG | 0 /* eax */;
2801 *inst++ = OR_rm8_r8;
2802 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2803 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2804 }
2805 else {
2806 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2807 FAIL_IF(!inst);
2808 INC_SIZE(2 + 3 + 2 + 2);
2809 /* Set low register to conditional flag. */
2810 *inst++ = XCHG_r_rm;
2811 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2812 *inst++ = GROUP_0F;
2813 *inst++ = cond_set;
2814 *inst++ = MOD_REG | 1 /* ecx */;
2815 *inst++ = OR_rm8_r8;
2816 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2817 *inst++ = XCHG_r_rm;
2818 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2819 }
2820 return SLJIT_SUCCESS;
2821 }
2822
2823 /* Set TMP_REG1 to the bit. */
2824 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2825 FAIL_IF(!inst);
2826 INC_SIZE(1 + 3 + 3 + 1);
2827 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2828 /* Set al to conditional flag. */
2829 *inst++ = GROUP_0F;
2830 *inst++ = cond_set;
2831 *inst++ = MOD_REG | 0 /* eax */;
2832
2833 *inst++ = GROUP_0F;
2834 *inst++ = MOVZX_r_rm8;
2835 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2836
2837 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2838
2839 if (GET_OPCODE(op) < SLJIT_ADD)
2840 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2841
2842 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2843 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2844 compiler->skip_checks = 1;
2845 #endif
2846 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2847 #endif /* SLJIT_CONFIG_X86_64 */
2848 }
2849
2850 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
2851 {
2852 CHECK_ERROR();
2853 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2854 ADJUST_LOCAL_OFFSET(dst, dstw);
2855
2856 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2857
2858 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2859 compiler->mode32 = 0;
2860 #endif
2861
2862 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2863
2864 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2865 if (NOT_HALFWORD(offset)) {
2866 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2867 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2868 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2869 return compiler->error;
2870 #else
2871 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2872 #endif
2873 }
2874 #endif
2875
2876 if (offset != 0)
2877 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2878 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2879 }
2880
2881 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2882 {
2883 sljit_u8 *inst;
2884 struct sljit_const *const_;
2885 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2886 sljit_s32 reg;
2887 #endif
2888
2889 CHECK_ERROR_PTR();
2890 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2891 ADJUST_LOCAL_OFFSET(dst, dstw);
2892
2893 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2894
2895 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2896 PTR_FAIL_IF(!const_);
2897 set_const(const_, compiler);
2898
2899 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2900 compiler->mode32 = 0;
2901 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2902
2903 if (emit_load_imm64(compiler, reg, init_value))
2904 return NULL;
2905 #else
2906 if (dst == SLJIT_UNUSED)
2907 dst = TMP_REG1;
2908
2909 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2910 return NULL;
2911 #endif
2912
2913 inst = (sljit_u8*)ensure_buf(compiler, 2);
2914 PTR_FAIL_IF(!inst);
2915
2916 *inst++ = 0;
2917 *inst++ = 1;
2918
2919 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2920 if (dst & SLJIT_MEM)
2921 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2922 return NULL;
2923 #endif
2924
2925 return const_;
2926 }
2927
2928 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2929 {
2930 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2931 *(sljit_sw*)addr = new_addr - (addr + 4);
2932 #else
2933 *(sljit_uw*)addr = new_addr;
2934 #endif
2935 }
2936
2937 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2938 {
2939 *(sljit_sw*)addr = new_constant;
2940 }
2941
2942 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)
2943 {
2944 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2945 if (cpu_has_sse2 == -1)
2946 get_cpu_features();
2947 return cpu_has_sse2;
2948 #else
2949 return 1;
2950 #endif
2951 }
2952
2953 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void)
2954 {
2955 if (cpu_has_cmov == -1)
2956 get_cpu_features();
2957 return cpu_has_cmov;
2958 }
2959
2960 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,
2961 sljit_s32 type,
2962 sljit_s32 dst_reg,
2963 sljit_s32 src, sljit_sw srcw)
2964 {
2965 sljit_u8* inst;
2966
2967 CHECK_ERROR();
2968 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2969 CHECK_ARGUMENT(sljit_x86_is_cmov_available());
2970 CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
2971 CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
2972 CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
2973 FUNCTION_CHECK_SRC(src, srcw);
2974 #endif
2975 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
2976 if (SLJIT_UNLIKELY(!!compiler->verbose)) {
2977 fprintf(compiler->verbose, " x86_cmov%s %s%s, ",
2978 !(dst_reg & SLJIT_I32_OP) ? "" : ".i",
2979 jump_names[type & 0xff], JUMP_POSTFIX(type));
2980 sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
2981 fprintf(compiler->verbose, ", ");
2982 sljit_verbose_param(compiler, src, srcw);
2983 fprintf(compiler->verbose, "\n");
2984 }
2985 #endif
2986
2987 ADJUST_LOCAL_OFFSET(src, srcw);
2988 CHECK_EXTRA_REGS(src, srcw, (void)0);
2989
2990 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2991 compiler->mode32 = dst_reg & SLJIT_I32_OP;
2992 #endif
2993 dst_reg &= ~SLJIT_I32_OP;
2994
2995 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2996 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2997 src = TMP_REG1;
2998 srcw = 0;
2999 }
3000
3001 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
3002 FAIL_IF(!inst);
3003 *inst++ = GROUP_0F;
3004 *inst = get_jump_code(type & 0xff) - 0x40;
3005 return SLJIT_SUCCESS;
3006 }
3007