sljitNativeX86_common.c revision 1.1.1.4 1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - none
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - none
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65
66 /* Last register + 1. */
67 #define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
68
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
70 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
71 };
72
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
75 w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \
76 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
77 do; \
78 } \
79 else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
80 w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \
81 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
82 do; \
83 }
84
85 #else /* SLJIT_CONFIG_X86_32 */
86
87 /* Last register + 1. */
88 #define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
89 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
90 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
91
92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
93 Note: avoid to use r12 and r13 for memory addessing
94 therefore r12 is better for SAVED_EREG than SAVED_REG. */
95 #ifndef _WIN64
96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
97 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
98 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
99 };
100 /* low-map. reg_map & 0x7. */
101 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
102 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
103 };
104 #else
105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
106 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
107 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
108 };
109 /* low-map. reg_map & 0x7. */
110 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
111 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1
112 };
113 #endif
114
115 #define REX_W 0x48
116 #define REX_R 0x44
117 #define REX_X 0x42
118 #define REX_B 0x41
119 #define REX 0x40
120
121 #ifndef _WIN64
122 #define HALFWORD_MAX 0x7fffffffl
123 #define HALFWORD_MIN -0x80000000l
124 #else
125 #define HALFWORD_MAX 0x7fffffffll
126 #define HALFWORD_MIN -0x80000000ll
127 #endif
128
129 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
130 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
131
132 #define CHECK_EXTRA_REGS(p, w, do)
133
134 #endif /* SLJIT_CONFIG_X86_32 */
135
136 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
137 #define TMP_FREG (0)
138 #endif
139
140 /* Size flags for emit_x86_instruction: */
141 #define EX86_BIN_INS 0x0010
142 #define EX86_SHIFT_INS 0x0020
143 #define EX86_REX 0x0040
144 #define EX86_NO_REXW 0x0080
145 #define EX86_BYTE_ARG 0x0100
146 #define EX86_HALF_ARG 0x0200
147 #define EX86_PREF_66 0x0400
148
149 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
150 #define EX86_SSE2 0x0800
151 #define EX86_PREF_F2 0x1000
152 #define EX86_PREF_F3 0x2000
153 #endif
154
155 /* --------------------------------------------------------------------- */
156 /* Instrucion forms */
157 /* --------------------------------------------------------------------- */
158
159 #define ADD (/* BINARY */ 0 << 3)
160 #define ADD_EAX_i32 0x05
161 #define ADD_r_rm 0x03
162 #define ADD_rm_r 0x01
163 #define ADDSD_x_xm 0x58
164 #define ADC (/* BINARY */ 2 << 3)
165 #define ADC_EAX_i32 0x15
166 #define ADC_r_rm 0x13
167 #define ADC_rm_r 0x11
168 #define AND (/* BINARY */ 4 << 3)
169 #define AND_EAX_i32 0x25
170 #define AND_r_rm 0x23
171 #define AND_rm_r 0x21
172 #define ANDPD_x_xm 0x54
173 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
174 #define CALL_i32 0xe8
175 #define CALL_rm (/* GROUP_FF */ 2 << 3)
176 #define CDQ 0x99
177 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
178 #define CMP (/* BINARY */ 7 << 3)
179 #define CMP_EAX_i32 0x3d
180 #define CMP_r_rm 0x3b
181 #define CMP_rm_r 0x39
182 #define DIV (/* GROUP_F7 */ 6 << 3)
183 #define DIVSD_x_xm 0x5e
184 #define INT3 0xcc
185 #define IDIV (/* GROUP_F7 */ 7 << 3)
186 #define IMUL (/* GROUP_F7 */ 5 << 3)
187 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
188 #define IMUL_r_rm_i8 0x6b
189 #define IMUL_r_rm_i32 0x69
190 #define JE_i8 0x74
191 #define JMP_i8 0xeb
192 #define JMP_i32 0xe9
193 #define JMP_rm (/* GROUP_FF */ 4 << 3)
194 #define LEA_r_m 0x8d
195 #define MOV_r_rm 0x8b
196 #define MOV_r_i32 0xb8
197 #define MOV_rm_r 0x89
198 #define MOV_rm_i32 0xc7
199 #define MOV_rm8_i8 0xc6
200 #define MOV_rm8_r8 0x88
201 #define MOVSD_x_xm 0x10
202 #define MOVSD_xm_x 0x11
203 #define MOVSXD_r_rm 0x63
204 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
205 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
206 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
207 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
208 #define MUL (/* GROUP_F7 */ 4 << 3)
209 #define MULSD_x_xm 0x59
210 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
211 #define NOP 0x90
212 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
213 #define OR (/* BINARY */ 1 << 3)
214 #define OR_r_rm 0x0b
215 #define OR_EAX_i32 0x0d
216 #define OR_rm_r 0x09
217 #define OR_rm8_r8 0x08
218 #define POP_r 0x58
219 #define POP_rm 0x8f
220 #define POPF 0x9d
221 #define PUSH_i32 0x68
222 #define PUSH_r 0x50
223 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
224 #define PUSHF 0x9c
225 #define RET_near 0xc3
226 #define RET_i16 0xc2
227 #define SBB (/* BINARY */ 3 << 3)
228 #define SBB_EAX_i32 0x1d
229 #define SBB_r_rm 0x1b
230 #define SBB_rm_r 0x19
231 #define SAR (/* SHIFT */ 7 << 3)
232 #define SHL (/* SHIFT */ 4 << 3)
233 #define SHR (/* SHIFT */ 5 << 3)
234 #define SUB (/* BINARY */ 5 << 3)
235 #define SUB_EAX_i32 0x2d
236 #define SUB_r_rm 0x2b
237 #define SUB_rm_r 0x29
238 #define SUBSD_x_xm 0x5c
239 #define TEST_EAX_i32 0xa9
240 #define TEST_rm_r 0x85
241 #define UCOMISD_x_xm 0x2e
242 #define XCHG_EAX_r 0x90
243 #define XCHG_r_rm 0x87
244 #define XOR (/* BINARY */ 6 << 3)
245 #define XOR_EAX_i32 0x35
246 #define XOR_r_rm 0x33
247 #define XOR_rm_r 0x31
248 #define XORPD_x_xm 0x57
249
250 #define GROUP_0F 0x0f
251 #define GROUP_F7 0xf7
252 #define GROUP_FF 0xff
253 #define GROUP_BINARY_81 0x81
254 #define GROUP_BINARY_83 0x83
255 #define GROUP_SHIFT_1 0xd1
256 #define GROUP_SHIFT_N 0xc1
257 #define GROUP_SHIFT_CL 0xd3
258
259 #define MOD_REG 0xc0
260 #define MOD_DISP8 0x40
261
262 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
263
264 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
265 #define POP_REG(r) (*inst++ = (POP_r + (r)))
266 #define RET() (*inst++ = (RET_near))
267 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
268 /* r32, r/m32 */
269 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
270
271 /* Multithreading does not affect these static variables, since they store
272 built-in CPU features. Therefore they can be overwritten by different threads
273 if they detect the CPU features in the same time. */
274 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
275 static sljit_si cpu_has_sse2 = -1;
276 #endif
277 static sljit_si cpu_has_cmov = -1;
278
279 #if defined(_MSC_VER) && _MSC_VER >= 1400
280 #include <intrin.h>
281 #endif
282
283 static void get_cpu_features(void)
284 {
285 sljit_ui features;
286
287 #if defined(_MSC_VER) && _MSC_VER >= 1400
288
289 int CPUInfo[4];
290 __cpuid(CPUInfo, 1);
291 features = (sljit_ui)CPUInfo[3];
292
293 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
294
295 /* AT&T syntax. */
296 __asm__ (
297 "movl $0x1, %%eax\n"
298 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
299 /* On x86-32, there is no red zone, so this
300 should work (no need for a local variable). */
301 "push %%ebx\n"
302 #endif
303 "cpuid\n"
304 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
305 "pop %%ebx\n"
306 #endif
307 "movl %%edx, %0\n"
308 : "=g" (features)
309 :
310 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
311 : "%eax", "%ecx", "%edx"
312 #else
313 : "%rax", "%rbx", "%rcx", "%rdx"
314 #endif
315 );
316
317 #else /* _MSC_VER && _MSC_VER >= 1400 */
318
319 /* Intel syntax. */
320 __asm {
321 mov eax, 1
322 cpuid
323 mov features, edx
324 }
325
326 #endif /* _MSC_VER && _MSC_VER >= 1400 */
327
328 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
329 cpu_has_sse2 = (features >> 26) & 0x1;
330 #endif
331 cpu_has_cmov = (features >> 15) & 0x1;
332 }
333
334 static sljit_ub get_jump_code(sljit_si type)
335 {
336 switch (type) {
337 case SLJIT_C_EQUAL:
338 case SLJIT_C_FLOAT_EQUAL:
339 return 0x84 /* je */;
340
341 case SLJIT_C_NOT_EQUAL:
342 case SLJIT_C_FLOAT_NOT_EQUAL:
343 return 0x85 /* jne */;
344
345 case SLJIT_C_LESS:
346 case SLJIT_C_FLOAT_LESS:
347 return 0x82 /* jc */;
348
349 case SLJIT_C_GREATER_EQUAL:
350 case SLJIT_C_FLOAT_GREATER_EQUAL:
351 return 0x83 /* jae */;
352
353 case SLJIT_C_GREATER:
354 case SLJIT_C_FLOAT_GREATER:
355 return 0x87 /* jnbe */;
356
357 case SLJIT_C_LESS_EQUAL:
358 case SLJIT_C_FLOAT_LESS_EQUAL:
359 return 0x86 /* jbe */;
360
361 case SLJIT_C_SIG_LESS:
362 return 0x8c /* jl */;
363
364 case SLJIT_C_SIG_GREATER_EQUAL:
365 return 0x8d /* jnl */;
366
367 case SLJIT_C_SIG_GREATER:
368 return 0x8f /* jnle */;
369
370 case SLJIT_C_SIG_LESS_EQUAL:
371 return 0x8e /* jle */;
372
373 case SLJIT_C_OVERFLOW:
374 case SLJIT_C_MUL_OVERFLOW:
375 return 0x80 /* jo */;
376
377 case SLJIT_C_NOT_OVERFLOW:
378 case SLJIT_C_MUL_NOT_OVERFLOW:
379 return 0x81 /* jno */;
380
381 case SLJIT_C_FLOAT_UNORDERED:
382 return 0x8a /* jp */;
383
384 case SLJIT_C_FLOAT_ORDERED:
385 return 0x8b /* jpo */;
386 }
387 return 0;
388 }
389
390 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
391
392 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
393 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
394 #endif
395
396 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
397 {
398 sljit_si short_jump;
399 sljit_uw label_addr;
400
401 if (jump->flags & JUMP_LABEL)
402 label_addr = (sljit_uw)(code + jump->u.label->size);
403 else
404 label_addr = jump->u.target;
405 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
406
407 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
408 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
409 return generate_far_jump_code(jump, code_ptr, type);
410 #endif
411
412 if (type == SLJIT_JUMP) {
413 if (short_jump)
414 *code_ptr++ = JMP_i8;
415 else
416 *code_ptr++ = JMP_i32;
417 jump->addr++;
418 }
419 else if (type >= SLJIT_FAST_CALL) {
420 short_jump = 0;
421 *code_ptr++ = CALL_i32;
422 jump->addr++;
423 }
424 else if (short_jump) {
425 *code_ptr++ = get_jump_code(type) - 0x10;
426 jump->addr++;
427 }
428 else {
429 *code_ptr++ = GROUP_0F;
430 *code_ptr++ = get_jump_code(type);
431 jump->addr += 2;
432 }
433
434 if (short_jump) {
435 jump->flags |= PATCH_MB;
436 code_ptr += sizeof(sljit_sb);
437 } else {
438 jump->flags |= PATCH_MW;
439 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
440 code_ptr += sizeof(sljit_sw);
441 #else
442 code_ptr += sizeof(sljit_si);
443 #endif
444 }
445
446 return code_ptr;
447 }
448
449 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
450 {
451 struct sljit_memory_fragment *buf;
452 sljit_ub *code;
453 sljit_ub *code_ptr;
454 sljit_ub *buf_ptr;
455 sljit_ub *buf_end;
456 sljit_ub len;
457
458 struct sljit_label *label;
459 struct sljit_jump *jump;
460 struct sljit_const *const_;
461
462 CHECK_ERROR_PTR();
463 check_sljit_generate_code(compiler);
464 reverse_buf(compiler);
465
466 /* Second code generation pass. */
467 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
468 PTR_FAIL_WITH_EXEC_IF(code);
469 buf = compiler->buf;
470
471 code_ptr = code;
472 label = compiler->labels;
473 jump = compiler->jumps;
474 const_ = compiler->consts;
475 do {
476 buf_ptr = buf->memory;
477 buf_end = buf_ptr + buf->used_size;
478 do {
479 len = *buf_ptr++;
480 if (len > 0) {
481 /* The code is already generated. */
482 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
483 code_ptr += len;
484 buf_ptr += len;
485 }
486 else {
487 if (*buf_ptr >= 4) {
488 jump->addr = (sljit_uw)code_ptr;
489 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
490 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
491 else
492 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
493 jump = jump->next;
494 }
495 else if (*buf_ptr == 0) {
496 label->addr = (sljit_uw)code_ptr;
497 label->size = code_ptr - code;
498 label = label->next;
499 }
500 else if (*buf_ptr == 1) {
501 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
502 const_ = const_->next;
503 }
504 else {
505 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
506 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
507 buf_ptr++;
508 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
509 code_ptr += sizeof(sljit_sw);
510 buf_ptr += sizeof(sljit_sw) - 1;
511 #else
512 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
513 buf_ptr += sizeof(sljit_sw);
514 #endif
515 }
516 buf_ptr++;
517 }
518 } while (buf_ptr < buf_end);
519 SLJIT_ASSERT(buf_ptr == buf_end);
520 buf = buf->next;
521 } while (buf);
522
523 SLJIT_ASSERT(!label);
524 SLJIT_ASSERT(!jump);
525 SLJIT_ASSERT(!const_);
526
527 jump = compiler->jumps;
528 while (jump) {
529 if (jump->flags & PATCH_MB) {
530 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
531 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
532 } else if (jump->flags & PATCH_MW) {
533 if (jump->flags & JUMP_LABEL) {
534 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
535 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
536 #else
537 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
538 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
539 #endif
540 }
541 else {
542 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
543 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
544 #else
545 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
546 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
547 #endif
548 }
549 }
550 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
551 else if (jump->flags & PATCH_MD)
552 *(sljit_sw*)jump->addr = jump->u.label->addr;
553 #endif
554
555 jump = jump->next;
556 }
557
558 /* Maybe we waste some space because of short jumps. */
559 SLJIT_ASSERT(code_ptr <= code + compiler->size);
560 compiler->error = SLJIT_ERR_COMPILED;
561 compiler->executable_size = code_ptr - code;
562 return (void*)code;
563 }
564
565 /* --------------------------------------------------------------------- */
566 /* Operators */
567 /* --------------------------------------------------------------------- */
568
569 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
570 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
571 sljit_si dst, sljit_sw dstw,
572 sljit_si src1, sljit_sw src1w,
573 sljit_si src2, sljit_sw src2w);
574
575 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
576 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
577 sljit_si dst, sljit_sw dstw,
578 sljit_si src1, sljit_sw src1w,
579 sljit_si src2, sljit_sw src2w);
580
581 static sljit_si emit_mov(struct sljit_compiler *compiler,
582 sljit_si dst, sljit_sw dstw,
583 sljit_si src, sljit_sw srcw);
584
585 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
586 {
587 sljit_ub *inst;
588
589 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
590 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
591 FAIL_IF(!inst);
592 INC_SIZE(5);
593 #else
594 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
595 FAIL_IF(!inst);
596 INC_SIZE(6);
597 *inst++ = REX_W;
598 #endif
599 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
600 *inst++ = 0x64;
601 *inst++ = 0x24;
602 *inst++ = (sljit_ub)sizeof(sljit_sw);
603 *inst++ = PUSHF;
604 compiler->flags_saved = 1;
605 return SLJIT_SUCCESS;
606 }
607
608 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
609 {
610 sljit_ub *inst;
611
612 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
613 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
614 FAIL_IF(!inst);
615 INC_SIZE(5);
616 *inst++ = POPF;
617 #else
618 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
619 FAIL_IF(!inst);
620 INC_SIZE(6);
621 *inst++ = POPF;
622 *inst++ = REX_W;
623 #endif
624 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
625 *inst++ = 0x64;
626 *inst++ = 0x24;
627 *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
628 compiler->flags_saved = keep_flags;
629 return SLJIT_SUCCESS;
630 }
631
632 #ifdef _WIN32
633 #include <malloc.h>
634
635 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
636 {
637 /* Workaround for calling the internal _chkstk() function on Windows.
638 This function touches all 4k pages belongs to the requested stack space,
639 which size is passed in local_size. This is necessary on Windows where
640 the stack can only grow in 4k steps. However, this function just burn
641 CPU cycles if the stack is large enough. However, you don't know it in
642 advance, so it must always be called. I think this is a bad design in
643 general even if it has some reasons. */
644 *(volatile sljit_si*)alloca(local_size) = 0;
645 }
646
647 #endif
648
649 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
650 #include "sljitNativeX86_32.c"
651 #else
652 #include "sljitNativeX86_64.c"
653 #endif
654
655 static sljit_si emit_mov(struct sljit_compiler *compiler,
656 sljit_si dst, sljit_sw dstw,
657 sljit_si src, sljit_sw srcw)
658 {
659 sljit_ub* inst;
660
661 if (dst == SLJIT_UNUSED) {
662 /* No destination, doesn't need to setup flags. */
663 if (src & SLJIT_MEM) {
664 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
665 FAIL_IF(!inst);
666 *inst = MOV_r_rm;
667 }
668 return SLJIT_SUCCESS;
669 }
670 if (FAST_IS_REG(src)) {
671 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
672 FAIL_IF(!inst);
673 *inst = MOV_rm_r;
674 return SLJIT_SUCCESS;
675 }
676 if (src & SLJIT_IMM) {
677 if (FAST_IS_REG(dst)) {
678 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
679 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
680 #else
681 if (!compiler->mode32) {
682 if (NOT_HALFWORD(srcw))
683 return emit_load_imm64(compiler, dst, srcw);
684 }
685 else
686 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
687 #endif
688 }
689 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
690 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
691 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
692 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
693 FAIL_IF(!inst);
694 *inst = MOV_rm_r;
695 return SLJIT_SUCCESS;
696 }
697 #endif
698 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
699 FAIL_IF(!inst);
700 *inst = MOV_rm_i32;
701 return SLJIT_SUCCESS;
702 }
703 if (FAST_IS_REG(dst)) {
704 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
705 FAIL_IF(!inst);
706 *inst = MOV_r_rm;
707 return SLJIT_SUCCESS;
708 }
709
710 /* Memory to memory move. Requires two instruction. */
711 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
712 FAIL_IF(!inst);
713 *inst = MOV_r_rm;
714 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
715 FAIL_IF(!inst);
716 *inst = MOV_rm_r;
717 return SLJIT_SUCCESS;
718 }
719
720 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
721 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
722
723 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
724 {
725 sljit_ub *inst;
726 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
727 sljit_si size;
728 #endif
729
730 CHECK_ERROR();
731 check_sljit_emit_op0(compiler, op);
732
733 switch (GET_OPCODE(op)) {
734 case SLJIT_BREAKPOINT:
735 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
736 FAIL_IF(!inst);
737 INC_SIZE(1);
738 *inst = INT3;
739 break;
740 case SLJIT_NOP:
741 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
742 FAIL_IF(!inst);
743 INC_SIZE(1);
744 *inst = NOP;
745 break;
746 case SLJIT_UMUL:
747 case SLJIT_SMUL:
748 case SLJIT_UDIV:
749 case SLJIT_SDIV:
750 compiler->flags_saved = 0;
751 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
752 #ifdef _WIN64
753 SLJIT_COMPILE_ASSERT(
754 reg_map[SLJIT_SCRATCH_REG1] == 0
755 && reg_map[SLJIT_SCRATCH_REG2] == 2
756 && reg_map[TMP_REG1] > 7,
757 invalid_register_assignment_for_div_mul);
758 #else
759 SLJIT_COMPILE_ASSERT(
760 reg_map[SLJIT_SCRATCH_REG1] == 0
761 && reg_map[SLJIT_SCRATCH_REG2] < 7
762 && reg_map[TMP_REG1] == 2,
763 invalid_register_assignment_for_div_mul);
764 #endif
765 compiler->mode32 = op & SLJIT_INT_OP;
766 #endif
767
768 op = GET_OPCODE(op);
769 if (op == SLJIT_UDIV) {
770 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
771 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
772 inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0);
773 #else
774 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
775 #endif
776 FAIL_IF(!inst);
777 *inst = XOR_r_rm;
778 }
779
780 if (op == SLJIT_SDIV) {
781 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
782 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0);
783 #endif
784
785 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
786 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
787 FAIL_IF(!inst);
788 INC_SIZE(1);
789 *inst = CDQ;
790 #else
791 if (compiler->mode32) {
792 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
793 FAIL_IF(!inst);
794 INC_SIZE(1);
795 *inst = CDQ;
796 } else {
797 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
798 FAIL_IF(!inst);
799 INC_SIZE(2);
800 *inst++ = REX_W;
801 *inst = CDQ;
802 }
803 #endif
804 }
805
806 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
807 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
808 FAIL_IF(!inst);
809 INC_SIZE(2);
810 *inst++ = GROUP_F7;
811 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_SCRATCH_REG2]);
812 #else
813 #ifdef _WIN64
814 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
815 #else
816 size = (!compiler->mode32) ? 3 : 2;
817 #endif
818 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
819 FAIL_IF(!inst);
820 INC_SIZE(size);
821 #ifdef _WIN64
822 if (!compiler->mode32)
823 *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
824 else if (op >= SLJIT_UDIV)
825 *inst++ = REX_B;
826 *inst++ = GROUP_F7;
827 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_SCRATCH_REG2]);
828 #else
829 if (!compiler->mode32)
830 *inst++ = REX_W;
831 *inst++ = GROUP_F7;
832 *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2];
833 #endif
834 #endif
835 switch (op) {
836 case SLJIT_UMUL:
837 *inst |= MUL;
838 break;
839 case SLJIT_SMUL:
840 *inst |= IMUL;
841 break;
842 case SLJIT_UDIV:
843 *inst |= DIV;
844 break;
845 case SLJIT_SDIV:
846 *inst |= IDIV;
847 break;
848 }
849 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
850 EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REG1, 0);
851 #endif
852 break;
853 }
854
855 return SLJIT_SUCCESS;
856 }
857
858 #define ENCODE_PREFIX(prefix) \
859 do { \
860 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
861 FAIL_IF(!inst); \
862 INC_SIZE(1); \
863 *inst = (prefix); \
864 } while (0)
865
866 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
867 sljit_si dst, sljit_sw dstw,
868 sljit_si src, sljit_sw srcw)
869 {
870 sljit_ub* inst;
871 sljit_si dst_r;
872 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
873 sljit_si work_r;
874 #endif
875
876 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
877 compiler->mode32 = 0;
878 #endif
879
880 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
881 return SLJIT_SUCCESS; /* Empty instruction. */
882
883 if (src & SLJIT_IMM) {
884 if (FAST_IS_REG(dst)) {
885 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
886 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
887 #else
888 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
889 FAIL_IF(!inst);
890 *inst = MOV_rm_i32;
891 return SLJIT_SUCCESS;
892 #endif
893 }
894 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
895 FAIL_IF(!inst);
896 *inst = MOV_rm8_i8;
897 return SLJIT_SUCCESS;
898 }
899
900 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
901
902 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
903 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
904 if (reg_map[src] >= 4) {
905 SLJIT_ASSERT(dst_r == TMP_REG1);
906 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
907 } else
908 dst_r = src;
909 #else
910 dst_r = src;
911 #endif
912 }
913 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
914 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
915 /* src, dst are registers. */
916 SLJIT_ASSERT(SLOW_IS_REG(dst));
917 if (reg_map[dst] < 4) {
918 if (dst != src)
919 EMIT_MOV(compiler, dst, 0, src, 0);
920 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
921 FAIL_IF(!inst);
922 *inst++ = GROUP_0F;
923 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
924 }
925 else {
926 if (dst != src)
927 EMIT_MOV(compiler, dst, 0, src, 0);
928 if (sign) {
929 /* shl reg, 24 */
930 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
931 FAIL_IF(!inst);
932 *inst |= SHL;
933 /* sar reg, 24 */
934 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
935 FAIL_IF(!inst);
936 *inst |= SAR;
937 }
938 else {
939 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
940 FAIL_IF(!inst);
941 *(inst + 1) |= AND;
942 }
943 }
944 return SLJIT_SUCCESS;
945 }
946 #endif
947 else {
948 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
949 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
950 FAIL_IF(!inst);
951 *inst++ = GROUP_0F;
952 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
953 }
954
955 if (dst & SLJIT_MEM) {
956 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
957 if (dst_r == TMP_REG1) {
958 /* Find a non-used register, whose reg_map[src] < 4. */
959 if ((dst & REG_MASK) == SLJIT_SCRATCH_REG1) {
960 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SCRATCH_REG2))
961 work_r = SLJIT_SCRATCH_REG3;
962 else
963 work_r = SLJIT_SCRATCH_REG2;
964 }
965 else {
966 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
967 work_r = SLJIT_SCRATCH_REG1;
968 else if ((dst & REG_MASK) == SLJIT_SCRATCH_REG2)
969 work_r = SLJIT_SCRATCH_REG3;
970 else
971 work_r = SLJIT_SCRATCH_REG2;
972 }
973
974 if (work_r == SLJIT_SCRATCH_REG1) {
975 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
976 }
977 else {
978 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
979 FAIL_IF(!inst);
980 *inst = XCHG_r_rm;
981 }
982
983 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
984 FAIL_IF(!inst);
985 *inst = MOV_rm8_r8;
986
987 if (work_r == SLJIT_SCRATCH_REG1) {
988 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
989 }
990 else {
991 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
992 FAIL_IF(!inst);
993 *inst = XCHG_r_rm;
994 }
995 }
996 else {
997 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
998 FAIL_IF(!inst);
999 *inst = MOV_rm8_r8;
1000 }
1001 #else
1002 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1003 FAIL_IF(!inst);
1004 *inst = MOV_rm8_r8;
1005 #endif
1006 }
1007
1008 return SLJIT_SUCCESS;
1009 }
1010
1011 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
1012 sljit_si dst, sljit_sw dstw,
1013 sljit_si src, sljit_sw srcw)
1014 {
1015 sljit_ub* inst;
1016 sljit_si dst_r;
1017
1018 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1019 compiler->mode32 = 0;
1020 #endif
1021
1022 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1023 return SLJIT_SUCCESS; /* Empty instruction. */
1024
1025 if (src & SLJIT_IMM) {
1026 if (FAST_IS_REG(dst)) {
1027 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1028 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1029 #else
1030 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1031 FAIL_IF(!inst);
1032 *inst = MOV_rm_i32;
1033 return SLJIT_SUCCESS;
1034 #endif
1035 }
1036 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1037 FAIL_IF(!inst);
1038 *inst = MOV_rm_i32;
1039 return SLJIT_SUCCESS;
1040 }
1041
1042 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1043
1044 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1045 dst_r = src;
1046 else {
1047 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1048 FAIL_IF(!inst);
1049 *inst++ = GROUP_0F;
1050 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1051 }
1052
1053 if (dst & SLJIT_MEM) {
1054 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1055 FAIL_IF(!inst);
1056 *inst = MOV_rm_r;
1057 }
1058
1059 return SLJIT_SUCCESS;
1060 }
1061
1062 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
1063 sljit_si dst, sljit_sw dstw,
1064 sljit_si src, sljit_sw srcw)
1065 {
1066 sljit_ub* inst;
1067
1068 if (dst == SLJIT_UNUSED) {
1069 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1070 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1071 FAIL_IF(!inst);
1072 *inst++ = GROUP_F7;
1073 *inst |= opcode;
1074 return SLJIT_SUCCESS;
1075 }
1076 if (dst == src && dstw == srcw) {
1077 /* Same input and output */
1078 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1079 FAIL_IF(!inst);
1080 *inst++ = GROUP_F7;
1081 *inst |= opcode;
1082 return SLJIT_SUCCESS;
1083 }
1084 if (FAST_IS_REG(dst)) {
1085 EMIT_MOV(compiler, dst, 0, src, srcw);
1086 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1087 FAIL_IF(!inst);
1088 *inst++ = GROUP_F7;
1089 *inst |= opcode;
1090 return SLJIT_SUCCESS;
1091 }
1092 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1093 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1094 FAIL_IF(!inst);
1095 *inst++ = GROUP_F7;
1096 *inst |= opcode;
1097 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1098 return SLJIT_SUCCESS;
1099 }
1100
1101 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
1102 sljit_si dst, sljit_sw dstw,
1103 sljit_si src, sljit_sw srcw)
1104 {
1105 sljit_ub* inst;
1106
1107 if (dst == SLJIT_UNUSED) {
1108 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1109 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1110 FAIL_IF(!inst);
1111 *inst++ = GROUP_F7;
1112 *inst |= NOT_rm;
1113 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1114 FAIL_IF(!inst);
1115 *inst = OR_r_rm;
1116 return SLJIT_SUCCESS;
1117 }
1118 if (FAST_IS_REG(dst)) {
1119 EMIT_MOV(compiler, dst, 0, src, srcw);
1120 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1121 FAIL_IF(!inst);
1122 *inst++ = GROUP_F7;
1123 *inst |= NOT_rm;
1124 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1125 FAIL_IF(!inst);
1126 *inst = OR_r_rm;
1127 return SLJIT_SUCCESS;
1128 }
1129 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1130 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1131 FAIL_IF(!inst);
1132 *inst++ = GROUP_F7;
1133 *inst |= NOT_rm;
1134 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1135 FAIL_IF(!inst);
1136 *inst = OR_r_rm;
1137 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1138 return SLJIT_SUCCESS;
1139 }
1140
1141 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
1142 sljit_si dst, sljit_sw dstw,
1143 sljit_si src, sljit_sw srcw)
1144 {
1145 sljit_ub* inst;
1146 sljit_si dst_r;
1147
1148 SLJIT_UNUSED_ARG(op_flags);
1149 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1150 /* Just set the zero flag. */
1151 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1152 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1153 FAIL_IF(!inst);
1154 *inst++ = GROUP_F7;
1155 *inst |= NOT_rm;
1156 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1157 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1158 #else
1159 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
1160 #endif
1161 FAIL_IF(!inst);
1162 *inst |= SHR;
1163 return SLJIT_SUCCESS;
1164 }
1165
1166 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1167 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1168 src = TMP_REG1;
1169 srcw = 0;
1170 }
1171
1172 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1173 FAIL_IF(!inst);
1174 *inst++ = GROUP_0F;
1175 *inst = BSR_r_rm;
1176
1177 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1178 if (FAST_IS_REG(dst))
1179 dst_r = dst;
1180 else {
1181 /* Find an unused temporary register. */
1182 if ((dst & REG_MASK) != SLJIT_SCRATCH_REG1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1))
1183 dst_r = SLJIT_SCRATCH_REG1;
1184 else if ((dst & REG_MASK) != SLJIT_SCRATCH_REG2 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG2))
1185 dst_r = SLJIT_SCRATCH_REG2;
1186 else
1187 dst_r = SLJIT_SCRATCH_REG3;
1188 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1189 }
1190 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1191 #else
1192 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1193 compiler->mode32 = 0;
1194 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1195 compiler->mode32 = op_flags & SLJIT_INT_OP;
1196 #endif
1197
1198 if (cpu_has_cmov == -1)
1199 get_cpu_features();
1200
1201 if (cpu_has_cmov) {
1202 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1203 FAIL_IF(!inst);
1204 *inst++ = GROUP_0F;
1205 *inst = CMOVNE_r_rm;
1206 } else {
1207 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1208 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1209 FAIL_IF(!inst);
1210 INC_SIZE(4);
1211
1212 *inst++ = JE_i8;
1213 *inst++ = 2;
1214 *inst++ = MOV_r_rm;
1215 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1216 #else
1217 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
1218 FAIL_IF(!inst);
1219 INC_SIZE(5);
1220
1221 *inst++ = JE_i8;
1222 *inst++ = 3;
1223 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1224 *inst++ = MOV_r_rm;
1225 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1226 #endif
1227 }
1228
1229 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1230 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1231 #else
1232 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1233 #endif
1234 FAIL_IF(!inst);
1235 *(inst + 1) |= XOR;
1236
1237 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1238 if (dst & SLJIT_MEM) {
1239 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1240 FAIL_IF(!inst);
1241 *inst = XCHG_r_rm;
1242 }
1243 #else
1244 if (dst & SLJIT_MEM)
1245 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1246 #endif
1247 return SLJIT_SUCCESS;
1248 }
1249
1250 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1251 sljit_si dst, sljit_sw dstw,
1252 sljit_si src, sljit_sw srcw)
1253 {
1254 sljit_ub* inst;
1255 sljit_si update = 0;
1256 sljit_si op_flags = GET_ALL_FLAGS(op);
1257 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1258 sljit_si dst_is_ereg = 0;
1259 sljit_si src_is_ereg = 0;
1260 #else
1261 # define src_is_ereg 0
1262 #endif
1263
1264 CHECK_ERROR();
1265 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1266 ADJUST_LOCAL_OFFSET(dst, dstw);
1267 ADJUST_LOCAL_OFFSET(src, srcw);
1268
1269 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1270 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1271 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1272 compiler->mode32 = op_flags & SLJIT_INT_OP;
1273 #endif
1274
1275 op = GET_OPCODE(op);
1276 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1277 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1278 compiler->mode32 = 0;
1279 #endif
1280
1281 if (op_flags & SLJIT_INT_OP) {
1282 if (FAST_IS_REG(src) && src == dst) {
1283 if (!TYPE_CAST_NEEDED(op))
1284 return SLJIT_SUCCESS;
1285 }
1286 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1287 if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1288 op = SLJIT_MOV_UI;
1289 if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1290 op = SLJIT_MOVU_UI;
1291 if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1292 op = SLJIT_MOV_SI;
1293 if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1294 op = SLJIT_MOVU_SI;
1295 #endif
1296 }
1297
1298 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1299 if (op >= SLJIT_MOVU) {
1300 update = 1;
1301 op -= 8;
1302 }
1303
1304 if (src & SLJIT_IMM) {
1305 switch (op) {
1306 case SLJIT_MOV_UB:
1307 srcw = (sljit_ub)srcw;
1308 break;
1309 case SLJIT_MOV_SB:
1310 srcw = (sljit_sb)srcw;
1311 break;
1312 case SLJIT_MOV_UH:
1313 srcw = (sljit_uh)srcw;
1314 break;
1315 case SLJIT_MOV_SH:
1316 srcw = (sljit_sh)srcw;
1317 break;
1318 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1319 case SLJIT_MOV_UI:
1320 srcw = (sljit_ui)srcw;
1321 break;
1322 case SLJIT_MOV_SI:
1323 srcw = (sljit_si)srcw;
1324 break;
1325 #endif
1326 }
1327 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1328 if (SLJIT_UNLIKELY(dst_is_ereg))
1329 return emit_mov(compiler, dst, dstw, src, srcw);
1330 #endif
1331 }
1332
1333 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1334 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1335 FAIL_IF(!inst);
1336 *inst = LEA_r_m;
1337 src &= SLJIT_MEM | 0xf;
1338 srcw = 0;
1339 }
1340
1341 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1342 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1343 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1344 dst = TMP_REG1;
1345 }
1346 #endif
1347
1348 switch (op) {
1349 case SLJIT_MOV:
1350 case SLJIT_MOV_P:
1351 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1352 case SLJIT_MOV_UI:
1353 case SLJIT_MOV_SI:
1354 #endif
1355 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1356 break;
1357 case SLJIT_MOV_UB:
1358 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1359 break;
1360 case SLJIT_MOV_SB:
1361 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1362 break;
1363 case SLJIT_MOV_UH:
1364 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1365 break;
1366 case SLJIT_MOV_SH:
1367 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1368 break;
1369 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1370 case SLJIT_MOV_UI:
1371 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1372 break;
1373 case SLJIT_MOV_SI:
1374 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1375 break;
1376 #endif
1377 }
1378
1379 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1380 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1381 return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REG1, 0);
1382 #endif
1383
1384 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1385 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1386 FAIL_IF(!inst);
1387 *inst = LEA_r_m;
1388 }
1389 return SLJIT_SUCCESS;
1390 }
1391
1392 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1393 compiler->flags_saved = 0;
1394
1395 switch (op) {
1396 case SLJIT_NOT:
1397 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1398 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1399 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1400
1401 case SLJIT_NEG:
1402 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1403 FAIL_IF(emit_save_flags(compiler));
1404 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1405
1406 case SLJIT_CLZ:
1407 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1408 FAIL_IF(emit_save_flags(compiler));
1409 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1410 }
1411
1412 return SLJIT_SUCCESS;
1413
1414 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1415 # undef src_is_ereg
1416 #endif
1417 }
1418
1419 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1420
1421 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1422 if (IS_HALFWORD(immw) || compiler->mode32) { \
1423 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1424 FAIL_IF(!inst); \
1425 *(inst + 1) |= (op_imm); \
1426 } \
1427 else { \
1428 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1429 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1430 FAIL_IF(!inst); \
1431 *inst = (op_mr); \
1432 }
1433
1434 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1435 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1436
1437 #else
1438
1439 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1440 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1441 FAIL_IF(!inst); \
1442 *(inst + 1) |= (op_imm);
1443
1444 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1445 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1446
1447 #endif
1448
1449 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
1450 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1451 sljit_si dst, sljit_sw dstw,
1452 sljit_si src1, sljit_sw src1w,
1453 sljit_si src2, sljit_sw src2w)
1454 {
1455 sljit_ub* inst;
1456
1457 if (dst == SLJIT_UNUSED) {
1458 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1459 if (src2 & SLJIT_IMM) {
1460 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1461 }
1462 else {
1463 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1464 FAIL_IF(!inst);
1465 *inst = op_rm;
1466 }
1467 return SLJIT_SUCCESS;
1468 }
1469
1470 if (dst == src1 && dstw == src1w) {
1471 if (src2 & SLJIT_IMM) {
1472 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1473 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1474 #else
1475 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
1476 #endif
1477 BINARY_EAX_IMM(op_eax_imm, src2w);
1478 }
1479 else {
1480 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1481 }
1482 }
1483 else if (FAST_IS_REG(dst)) {
1484 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1485 FAIL_IF(!inst);
1486 *inst = op_rm;
1487 }
1488 else if (FAST_IS_REG(src2)) {
1489 /* Special exception for sljit_emit_op_flags. */
1490 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1491 FAIL_IF(!inst);
1492 *inst = op_mr;
1493 }
1494 else {
1495 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1496 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1497 FAIL_IF(!inst);
1498 *inst = op_mr;
1499 }
1500 return SLJIT_SUCCESS;
1501 }
1502
1503 /* Only for cumulative operations. */
1504 if (dst == src2 && dstw == src2w) {
1505 if (src1 & SLJIT_IMM) {
1506 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1507 if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1508 #else
1509 if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) {
1510 #endif
1511 BINARY_EAX_IMM(op_eax_imm, src1w);
1512 }
1513 else {
1514 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1515 }
1516 }
1517 else if (FAST_IS_REG(dst)) {
1518 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1519 FAIL_IF(!inst);
1520 *inst = op_rm;
1521 }
1522 else if (FAST_IS_REG(src1)) {
1523 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1524 FAIL_IF(!inst);
1525 *inst = op_mr;
1526 }
1527 else {
1528 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1529 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1530 FAIL_IF(!inst);
1531 *inst = op_mr;
1532 }
1533 return SLJIT_SUCCESS;
1534 }
1535
1536 /* General version. */
1537 if (FAST_IS_REG(dst)) {
1538 EMIT_MOV(compiler, dst, 0, src1, src1w);
1539 if (src2 & SLJIT_IMM) {
1540 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1541 }
1542 else {
1543 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1544 FAIL_IF(!inst);
1545 *inst = op_rm;
1546 }
1547 }
1548 else {
1549 /* This version requires less memory writing. */
1550 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1551 if (src2 & SLJIT_IMM) {
1552 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1553 }
1554 else {
1555 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1556 FAIL_IF(!inst);
1557 *inst = op_rm;
1558 }
1559 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1560 }
1561
1562 return SLJIT_SUCCESS;
1563 }
1564
1565 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
1566 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1567 sljit_si dst, sljit_sw dstw,
1568 sljit_si src1, sljit_sw src1w,
1569 sljit_si src2, sljit_sw src2w)
1570 {
1571 sljit_ub* inst;
1572
1573 if (dst == SLJIT_UNUSED) {
1574 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1575 if (src2 & SLJIT_IMM) {
1576 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1577 }
1578 else {
1579 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1580 FAIL_IF(!inst);
1581 *inst = op_rm;
1582 }
1583 return SLJIT_SUCCESS;
1584 }
1585
1586 if (dst == src1 && dstw == src1w) {
1587 if (src2 & SLJIT_IMM) {
1588 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1589 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1590 #else
1591 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
1592 #endif
1593 BINARY_EAX_IMM(op_eax_imm, src2w);
1594 }
1595 else {
1596 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1597 }
1598 }
1599 else if (FAST_IS_REG(dst)) {
1600 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1601 FAIL_IF(!inst);
1602 *inst = op_rm;
1603 }
1604 else if (FAST_IS_REG(src2)) {
1605 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1606 FAIL_IF(!inst);
1607 *inst = op_mr;
1608 }
1609 else {
1610 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1611 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1612 FAIL_IF(!inst);
1613 *inst = op_mr;
1614 }
1615 return SLJIT_SUCCESS;
1616 }
1617
1618 /* General version. */
1619 if (FAST_IS_REG(dst) && dst != src2) {
1620 EMIT_MOV(compiler, dst, 0, src1, src1w);
1621 if (src2 & SLJIT_IMM) {
1622 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1623 }
1624 else {
1625 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1626 FAIL_IF(!inst);
1627 *inst = op_rm;
1628 }
1629 }
1630 else {
1631 /* This version requires less memory writing. */
1632 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1633 if (src2 & SLJIT_IMM) {
1634 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1635 }
1636 else {
1637 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1638 FAIL_IF(!inst);
1639 *inst = op_rm;
1640 }
1641 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1642 }
1643
1644 return SLJIT_SUCCESS;
1645 }
1646
1647 static sljit_si emit_mul(struct sljit_compiler *compiler,
1648 sljit_si dst, sljit_sw dstw,
1649 sljit_si src1, sljit_sw src1w,
1650 sljit_si src2, sljit_sw src2w)
1651 {
1652 sljit_ub* inst;
1653 sljit_si dst_r;
1654
1655 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1656
1657 /* Register destination. */
1658 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1659 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1660 FAIL_IF(!inst);
1661 *inst++ = GROUP_0F;
1662 *inst = IMUL_r_rm;
1663 }
1664 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1665 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1666 FAIL_IF(!inst);
1667 *inst++ = GROUP_0F;
1668 *inst = IMUL_r_rm;
1669 }
1670 else if (src1 & SLJIT_IMM) {
1671 if (src2 & SLJIT_IMM) {
1672 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1673 src2 = dst_r;
1674 src2w = 0;
1675 }
1676
1677 if (src1w <= 127 && src1w >= -128) {
1678 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1679 FAIL_IF(!inst);
1680 *inst = IMUL_r_rm_i8;
1681 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1682 FAIL_IF(!inst);
1683 INC_SIZE(1);
1684 *inst = (sljit_sb)src1w;
1685 }
1686 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1687 else {
1688 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1689 FAIL_IF(!inst);
1690 *inst = IMUL_r_rm_i32;
1691 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1692 FAIL_IF(!inst);
1693 INC_SIZE(4);
1694 *(sljit_sw*)inst = src1w;
1695 }
1696 #else
1697 else if (IS_HALFWORD(src1w)) {
1698 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1699 FAIL_IF(!inst);
1700 *inst = IMUL_r_rm_i32;
1701 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1702 FAIL_IF(!inst);
1703 INC_SIZE(4);
1704 *(sljit_si*)inst = (sljit_si)src1w;
1705 }
1706 else {
1707 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1708 if (dst_r != src2)
1709 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1710 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1711 FAIL_IF(!inst);
1712 *inst++ = GROUP_0F;
1713 *inst = IMUL_r_rm;
1714 }
1715 #endif
1716 }
1717 else if (src2 & SLJIT_IMM) {
1718 /* Note: src1 is NOT immediate. */
1719
1720 if (src2w <= 127 && src2w >= -128) {
1721 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1722 FAIL_IF(!inst);
1723 *inst = IMUL_r_rm_i8;
1724 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1725 FAIL_IF(!inst);
1726 INC_SIZE(1);
1727 *inst = (sljit_sb)src2w;
1728 }
1729 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1730 else {
1731 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1732 FAIL_IF(!inst);
1733 *inst = IMUL_r_rm_i32;
1734 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1735 FAIL_IF(!inst);
1736 INC_SIZE(4);
1737 *(sljit_sw*)inst = src2w;
1738 }
1739 #else
1740 else if (IS_HALFWORD(src2w)) {
1741 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1742 FAIL_IF(!inst);
1743 *inst = IMUL_r_rm_i32;
1744 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1745 FAIL_IF(!inst);
1746 INC_SIZE(4);
1747 *(sljit_si*)inst = (sljit_si)src2w;
1748 }
1749 else {
1750 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1751 if (dst_r != src1)
1752 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1753 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1754 FAIL_IF(!inst);
1755 *inst++ = GROUP_0F;
1756 *inst = IMUL_r_rm;
1757 }
1758 #endif
1759 }
1760 else {
1761 /* Neither argument is immediate. */
1762 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1763 dst_r = TMP_REG1;
1764 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1765 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1766 FAIL_IF(!inst);
1767 *inst++ = GROUP_0F;
1768 *inst = IMUL_r_rm;
1769 }
1770
1771 if (dst_r == TMP_REG1)
1772 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1773
1774 return SLJIT_SUCCESS;
1775 }
1776
1777 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
1778 sljit_si dst, sljit_sw dstw,
1779 sljit_si src1, sljit_sw src1w,
1780 sljit_si src2, sljit_sw src2w)
1781 {
1782 sljit_ub* inst;
1783 sljit_si dst_r, done = 0;
1784
1785 /* These cases better be left to handled by normal way. */
1786 if (!keep_flags) {
1787 if (dst == src1 && dstw == src1w)
1788 return SLJIT_ERR_UNSUPPORTED;
1789 if (dst == src2 && dstw == src2w)
1790 return SLJIT_ERR_UNSUPPORTED;
1791 }
1792
1793 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1794
1795 if (FAST_IS_REG(src1)) {
1796 if (FAST_IS_REG(src2)) {
1797 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1798 FAIL_IF(!inst);
1799 *inst = LEA_r_m;
1800 done = 1;
1801 }
1802 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1803 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1804 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
1805 #else
1806 if (src2 & SLJIT_IMM) {
1807 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1808 #endif
1809 FAIL_IF(!inst);
1810 *inst = LEA_r_m;
1811 done = 1;
1812 }
1813 }
1814 else if (FAST_IS_REG(src2)) {
1815 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1816 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1817 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
1818 #else
1819 if (src1 & SLJIT_IMM) {
1820 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1821 #endif
1822 FAIL_IF(!inst);
1823 *inst = LEA_r_m;
1824 done = 1;
1825 }
1826 }
1827
1828 if (done) {
1829 if (dst_r == TMP_REG1)
1830 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1831 return SLJIT_SUCCESS;
1832 }
1833 return SLJIT_ERR_UNSUPPORTED;
1834 }
1835
1836 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
1837 sljit_si src1, sljit_sw src1w,
1838 sljit_si src2, sljit_sw src2w)
1839 {
1840 sljit_ub* inst;
1841
1842 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1843 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1844 #else
1845 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1846 #endif
1847 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1848 return SLJIT_SUCCESS;
1849 }
1850
1851 if (FAST_IS_REG(src1)) {
1852 if (src2 & SLJIT_IMM) {
1853 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1854 }
1855 else {
1856 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1857 FAIL_IF(!inst);
1858 *inst = CMP_r_rm;
1859 }
1860 return SLJIT_SUCCESS;
1861 }
1862
1863 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1864 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1865 FAIL_IF(!inst);
1866 *inst = CMP_rm_r;
1867 return SLJIT_SUCCESS;
1868 }
1869
1870 if (src2 & SLJIT_IMM) {
1871 if (src1 & SLJIT_IMM) {
1872 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1873 src1 = TMP_REG1;
1874 src1w = 0;
1875 }
1876 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1877 }
1878 else {
1879 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1880 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1881 FAIL_IF(!inst);
1882 *inst = CMP_r_rm;
1883 }
1884 return SLJIT_SUCCESS;
1885 }
1886
1887 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
1888 sljit_si src1, sljit_sw src1w,
1889 sljit_si src2, sljit_sw src2w)
1890 {
1891 sljit_ub* inst;
1892
1893 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1894 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1895 #else
1896 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1897 #endif
1898 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1899 return SLJIT_SUCCESS;
1900 }
1901
1902 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1903 if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1904 #else
1905 if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1906 #endif
1907 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1908 return SLJIT_SUCCESS;
1909 }
1910
1911 if (FAST_IS_REG(src1)) {
1912 if (src2 & SLJIT_IMM) {
1913 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1914 if (IS_HALFWORD(src2w) || compiler->mode32) {
1915 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1916 FAIL_IF(!inst);
1917 *inst = GROUP_F7;
1918 }
1919 else {
1920 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1921 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1922 FAIL_IF(!inst);
1923 *inst = TEST_rm_r;
1924 }
1925 #else
1926 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1927 FAIL_IF(!inst);
1928 *inst = GROUP_F7;
1929 #endif
1930 }
1931 else {
1932 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1933 FAIL_IF(!inst);
1934 *inst = TEST_rm_r;
1935 }
1936 return SLJIT_SUCCESS;
1937 }
1938
1939 if (FAST_IS_REG(src2)) {
1940 if (src1 & SLJIT_IMM) {
1941 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1942 if (IS_HALFWORD(src1w) || compiler->mode32) {
1943 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1944 FAIL_IF(!inst);
1945 *inst = GROUP_F7;
1946 }
1947 else {
1948 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1949 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1950 FAIL_IF(!inst);
1951 *inst = TEST_rm_r;
1952 }
1953 #else
1954 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1955 FAIL_IF(!inst);
1956 *inst = GROUP_F7;
1957 #endif
1958 }
1959 else {
1960 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1961 FAIL_IF(!inst);
1962 *inst = TEST_rm_r;
1963 }
1964 return SLJIT_SUCCESS;
1965 }
1966
1967 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1968 if (src2 & SLJIT_IMM) {
1969 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1970 if (IS_HALFWORD(src2w) || compiler->mode32) {
1971 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1972 FAIL_IF(!inst);
1973 *inst = GROUP_F7;
1974 }
1975 else {
1976 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1977 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1978 FAIL_IF(!inst);
1979 *inst = TEST_rm_r;
1980 }
1981 #else
1982 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1983 FAIL_IF(!inst);
1984 *inst = GROUP_F7;
1985 #endif
1986 }
1987 else {
1988 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1989 FAIL_IF(!inst);
1990 *inst = TEST_rm_r;
1991 }
1992 return SLJIT_SUCCESS;
1993 }
1994
1995 static sljit_si emit_shift(struct sljit_compiler *compiler,
1996 sljit_ub mode,
1997 sljit_si dst, sljit_sw dstw,
1998 sljit_si src1, sljit_sw src1w,
1999 sljit_si src2, sljit_sw src2w)
2000 {
2001 sljit_ub* inst;
2002
2003 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2004 if (dst == src1 && dstw == src1w) {
2005 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2006 FAIL_IF(!inst);
2007 *inst |= mode;
2008 return SLJIT_SUCCESS;
2009 }
2010 if (dst == SLJIT_UNUSED) {
2011 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2012 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2013 FAIL_IF(!inst);
2014 *inst |= mode;
2015 return SLJIT_SUCCESS;
2016 }
2017 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2018 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2019 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2020 FAIL_IF(!inst);
2021 *inst |= mode;
2022 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2023 return SLJIT_SUCCESS;
2024 }
2025 if (FAST_IS_REG(dst)) {
2026 EMIT_MOV(compiler, dst, 0, src1, src1w);
2027 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2028 FAIL_IF(!inst);
2029 *inst |= mode;
2030 return SLJIT_SUCCESS;
2031 }
2032
2033 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2034 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2035 FAIL_IF(!inst);
2036 *inst |= mode;
2037 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2038 return SLJIT_SUCCESS;
2039 }
2040
2041 if (dst == SLJIT_PREF_SHIFT_REG) {
2042 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2043 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2044 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2045 FAIL_IF(!inst);
2046 *inst |= mode;
2047 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2048 }
2049 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2050 if (src1 != dst)
2051 EMIT_MOV(compiler, dst, 0, src1, src1w);
2052 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2053 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2054 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2055 FAIL_IF(!inst);
2056 *inst |= mode;
2057 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2058 }
2059 else {
2060 /* This case is really difficult, since ecx itself may used for
2061 addressing, and we must ensure to work even in that case. */
2062 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2063 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2064 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2065 #else
2066 /* [esp+0] contains the flags. */
2067 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2068 #endif
2069 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2070 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2071 FAIL_IF(!inst);
2072 *inst |= mode;
2073 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2074 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2075 #else
2076 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw));
2077 #endif
2078 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2079 }
2080
2081 return SLJIT_SUCCESS;
2082 }
2083
2084 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
2085 sljit_ub mode, sljit_si set_flags,
2086 sljit_si dst, sljit_sw dstw,
2087 sljit_si src1, sljit_sw src1w,
2088 sljit_si src2, sljit_sw src2w)
2089 {
2090 /* The CPU does not set flags if the shift count is 0. */
2091 if (src2 & SLJIT_IMM) {
2092 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2093 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2094 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2095 #else
2096 if ((src2w & 0x1f) != 0)
2097 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2098 #endif
2099 if (!set_flags)
2100 return emit_mov(compiler, dst, dstw, src1, src1w);
2101 /* OR dst, src, 0 */
2102 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2103 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2104 }
2105
2106 if (!set_flags)
2107 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2108
2109 if (!FAST_IS_REG(dst))
2110 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2111
2112 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2113
2114 if (FAST_IS_REG(dst))
2115 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2116 return SLJIT_SUCCESS;
2117 }
2118
2119 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
2120 sljit_si dst, sljit_sw dstw,
2121 sljit_si src1, sljit_sw src1w,
2122 sljit_si src2, sljit_sw src2w)
2123 {
2124 CHECK_ERROR();
2125 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2126 ADJUST_LOCAL_OFFSET(dst, dstw);
2127 ADJUST_LOCAL_OFFSET(src1, src1w);
2128 ADJUST_LOCAL_OFFSET(src2, src2w);
2129
2130 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2131 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2132 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2133 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2134 compiler->mode32 = op & SLJIT_INT_OP;
2135 #endif
2136
2137 if (GET_OPCODE(op) >= SLJIT_MUL) {
2138 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2139 compiler->flags_saved = 0;
2140 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2141 FAIL_IF(emit_save_flags(compiler));
2142 }
2143
2144 switch (GET_OPCODE(op)) {
2145 case SLJIT_ADD:
2146 if (!GET_FLAGS(op)) {
2147 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2148 return compiler->error;
2149 }
2150 else
2151 compiler->flags_saved = 0;
2152 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2153 FAIL_IF(emit_save_flags(compiler));
2154 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2155 dst, dstw, src1, src1w, src2, src2w);
2156 case SLJIT_ADDC:
2157 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2158 FAIL_IF(emit_restore_flags(compiler, 1));
2159 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2160 FAIL_IF(emit_save_flags(compiler));
2161 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2162 compiler->flags_saved = 0;
2163 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2164 dst, dstw, src1, src1w, src2, src2w);
2165 case SLJIT_SUB:
2166 if (!GET_FLAGS(op)) {
2167 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2168 return compiler->error;
2169 }
2170 else
2171 compiler->flags_saved = 0;
2172 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2173 FAIL_IF(emit_save_flags(compiler));
2174 if (dst == SLJIT_UNUSED)
2175 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2176 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2177 dst, dstw, src1, src1w, src2, src2w);
2178 case SLJIT_SUBC:
2179 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2180 FAIL_IF(emit_restore_flags(compiler, 1));
2181 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2182 FAIL_IF(emit_save_flags(compiler));
2183 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2184 compiler->flags_saved = 0;
2185 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2186 dst, dstw, src1, src1w, src2, src2w);
2187 case SLJIT_MUL:
2188 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2189 case SLJIT_AND:
2190 if (dst == SLJIT_UNUSED)
2191 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2192 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2193 dst, dstw, src1, src1w, src2, src2w);
2194 case SLJIT_OR:
2195 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2196 dst, dstw, src1, src1w, src2, src2w);
2197 case SLJIT_XOR:
2198 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2199 dst, dstw, src1, src1w, src2, src2w);
2200 case SLJIT_SHL:
2201 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2202 dst, dstw, src1, src1w, src2, src2w);
2203 case SLJIT_LSHR:
2204 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2205 dst, dstw, src1, src1w, src2, src2w);
2206 case SLJIT_ASHR:
2207 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2208 dst, dstw, src1, src1w, src2, src2w);
2209 }
2210
2211 return SLJIT_SUCCESS;
2212 }
2213
2214 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
2215 {
2216 check_sljit_get_register_index(reg);
2217 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2218 if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
2219 || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
2220 return -1;
2221 #endif
2222 return reg_map[reg];
2223 }
2224
2225 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
2226 {
2227 check_sljit_get_float_register_index(reg);
2228 return reg;
2229 }
2230
2231 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
2232 void *instruction, sljit_si size)
2233 {
2234 sljit_ub *inst;
2235
2236 CHECK_ERROR();
2237 check_sljit_emit_op_custom(compiler, instruction, size);
2238 SLJIT_ASSERT(size > 0 && size < 16);
2239
2240 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
2241 FAIL_IF(!inst);
2242 INC_SIZE(size);
2243 SLJIT_MEMMOVE(inst, instruction, size);
2244 return SLJIT_SUCCESS;
2245 }
2246
2247 /* --------------------------------------------------------------------- */
2248 /* Floating point operators */
2249 /* --------------------------------------------------------------------- */
2250
2251 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2252
2253 /* Alignment + 2 * 16 bytes. */
2254 static sljit_si sse2_data[3 + (4 + 4) * 2];
2255 static sljit_si *sse2_buffer;
2256
2257 static void init_compiler(void)
2258 {
2259 sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
2260 /* Single precision constants. */
2261 sse2_buffer[0] = 0x80000000;
2262 sse2_buffer[4] = 0x7fffffff;
2263 /* Double precision constants. */
2264 sse2_buffer[8] = 0;
2265 sse2_buffer[9] = 0x80000000;
2266 sse2_buffer[12] = 0xffffffff;
2267 sse2_buffer[13] = 0x7fffffff;
2268 }
2269
2270 #endif
2271
2272 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2273 {
2274 #ifdef SLJIT_IS_FPU_AVAILABLE
2275 return SLJIT_IS_FPU_AVAILABLE;
2276 #elif (defined SLJIT_SSE2 && SLJIT_SSE2)
2277 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2278 if (cpu_has_sse2 == -1)
2279 get_cpu_features();
2280 return cpu_has_sse2;
2281 #else /* SLJIT_DETECT_SSE2 */
2282 return 1;
2283 #endif /* SLJIT_DETECT_SSE2 */
2284 #else /* SLJIT_SSE2 */
2285 return 0;
2286 #endif
2287 }
2288
2289 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2290
2291 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2292 sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2293 {
2294 sljit_ub *inst;
2295
2296 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2297 FAIL_IF(!inst);
2298 *inst++ = GROUP_0F;
2299 *inst = opcode;
2300 return SLJIT_SUCCESS;
2301 }
2302
2303 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2304 sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2305 {
2306 sljit_ub *inst;
2307
2308 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2309 FAIL_IF(!inst);
2310 *inst++ = GROUP_0F;
2311 *inst = opcode;
2312 return SLJIT_SUCCESS;
2313 }
2314
2315 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
2316 sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
2317 {
2318 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2319 }
2320
2321 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
2322 sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
2323 {
2324 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2325 }
2326
2327 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2328 sljit_si dst, sljit_sw dstw,
2329 sljit_si src, sljit_sw srcw)
2330 {
2331 sljit_si dst_r;
2332
2333 CHECK_ERROR();
2334 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2335
2336 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2337 compiler->mode32 = 1;
2338 #endif
2339
2340 if (GET_OPCODE(op) == SLJIT_CMPD) {
2341 compiler->flags_saved = 0;
2342 if (FAST_IS_REG(dst))
2343 dst_r = dst;
2344 else {
2345 dst_r = TMP_FREG;
2346 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw));
2347 }
2348 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw);
2349 }
2350
2351 if (op == SLJIT_MOVD) {
2352 if (FAST_IS_REG(dst))
2353 return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
2354 if (FAST_IS_REG(src))
2355 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
2356 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
2357 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2358 }
2359
2360 if (SLOW_IS_REG(dst)) {
2361 dst_r = dst;
2362 if (dst != src)
2363 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2364 }
2365 else {
2366 dst_r = TMP_FREG;
2367 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2368 }
2369
2370 switch (GET_OPCODE(op)) {
2371 case SLJIT_NEGD:
2372 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
2373 break;
2374
2375 case SLJIT_ABSD:
2376 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2377 break;
2378 }
2379
2380 if (dst_r == TMP_FREG)
2381 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2382 return SLJIT_SUCCESS;
2383 }
2384
2385 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2386 sljit_si dst, sljit_sw dstw,
2387 sljit_si src1, sljit_sw src1w,
2388 sljit_si src2, sljit_sw src2w)
2389 {
2390 sljit_si dst_r;
2391
2392 CHECK_ERROR();
2393 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2394
2395 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2396 compiler->mode32 = 1;
2397 #endif
2398
2399 if (FAST_IS_REG(dst)) {
2400 dst_r = dst;
2401 if (dst == src1)
2402 ; /* Do nothing here. */
2403 else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) {
2404 /* Swap arguments. */
2405 src2 = src1;
2406 src2w = src1w;
2407 }
2408 else if (dst != src2)
2409 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
2410 else {
2411 dst_r = TMP_FREG;
2412 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2413 }
2414 }
2415 else {
2416 dst_r = TMP_FREG;
2417 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2418 }
2419
2420 switch (GET_OPCODE(op)) {
2421 case SLJIT_ADDD:
2422 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2423 break;
2424
2425 case SLJIT_SUBD:
2426 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2427 break;
2428
2429 case SLJIT_MULD:
2430 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2431 break;
2432
2433 case SLJIT_DIVD:
2434 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2435 break;
2436 }
2437
2438 if (dst_r == TMP_FREG)
2439 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2440 return SLJIT_SUCCESS;
2441 }
2442
2443 #else
2444
2445 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2446 sljit_si dst, sljit_sw dstw,
2447 sljit_si src, sljit_sw srcw)
2448 {
2449 CHECK_ERROR();
2450 /* Should cause an assertion fail. */
2451 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2452 compiler->error = SLJIT_ERR_UNSUPPORTED;
2453 return SLJIT_ERR_UNSUPPORTED;
2454 }
2455
2456 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2457 sljit_si dst, sljit_sw dstw,
2458 sljit_si src1, sljit_sw src1w,
2459 sljit_si src2, sljit_sw src2w)
2460 {
2461 CHECK_ERROR();
2462 /* Should cause an assertion fail. */
2463 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2464 compiler->error = SLJIT_ERR_UNSUPPORTED;
2465 return SLJIT_ERR_UNSUPPORTED;
2466 }
2467
2468 #endif
2469
2470 /* --------------------------------------------------------------------- */
2471 /* Conditional instructions */
2472 /* --------------------------------------------------------------------- */
2473
2474 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2475 {
2476 sljit_ub *inst;
2477 struct sljit_label *label;
2478
2479 CHECK_ERROR_PTR();
2480 check_sljit_emit_label(compiler);
2481
2482 /* We should restore the flags before the label,
2483 since other taken jumps has their own flags as well. */
2484 if (SLJIT_UNLIKELY(compiler->flags_saved))
2485 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2486
2487 if (compiler->last_label && compiler->last_label->size == compiler->size)
2488 return compiler->last_label;
2489
2490 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2491 PTR_FAIL_IF(!label);
2492 set_label(label, compiler);
2493
2494 inst = (sljit_ub*)ensure_buf(compiler, 2);
2495 PTR_FAIL_IF(!inst);
2496
2497 *inst++ = 0;
2498 *inst++ = 0;
2499
2500 return label;
2501 }
2502
2503 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2504 {
2505 sljit_ub *inst;
2506 struct sljit_jump *jump;
2507
2508 CHECK_ERROR_PTR();
2509 check_sljit_emit_jump(compiler, type);
2510
2511 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2512 if ((type & 0xff) <= SLJIT_JUMP)
2513 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2514 compiler->flags_saved = 0;
2515 }
2516
2517 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2518 PTR_FAIL_IF_NULL(jump);
2519 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2520 type &= 0xff;
2521
2522 if (type >= SLJIT_CALL1)
2523 PTR_FAIL_IF(call_with_args(compiler, type));
2524
2525 /* Worst case size. */
2526 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2527 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2528 #else
2529 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2530 #endif
2531
2532 inst = (sljit_ub*)ensure_buf(compiler, 2);
2533 PTR_FAIL_IF_NULL(inst);
2534
2535 *inst++ = 0;
2536 *inst++ = type + 4;
2537 return jump;
2538 }
2539
2540 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2541 {
2542 sljit_ub *inst;
2543 struct sljit_jump *jump;
2544
2545 CHECK_ERROR();
2546 check_sljit_emit_ijump(compiler, type, src, srcw);
2547 ADJUST_LOCAL_OFFSET(src, srcw);
2548
2549 CHECK_EXTRA_REGS(src, srcw, (void)0);
2550
2551 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2552 if (type <= SLJIT_JUMP)
2553 FAIL_IF(emit_restore_flags(compiler, 0));
2554 compiler->flags_saved = 0;
2555 }
2556
2557 if (type >= SLJIT_CALL1) {
2558 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2559 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2560 if (src == SLJIT_SCRATCH_REG3) {
2561 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2562 src = TMP_REG1;
2563 }
2564 if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
2565 srcw += sizeof(sljit_sw);
2566 #endif
2567 #endif
2568 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2569 if (src == SLJIT_SCRATCH_REG3) {
2570 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2571 src = TMP_REG1;
2572 }
2573 #endif
2574 FAIL_IF(call_with_args(compiler, type));
2575 }
2576
2577 if (src == SLJIT_IMM) {
2578 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2579 FAIL_IF_NULL(jump);
2580 set_jump(jump, compiler, JUMP_ADDR);
2581 jump->u.target = srcw;
2582
2583 /* Worst case size. */
2584 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2585 compiler->size += 5;
2586 #else
2587 compiler->size += 10 + 3;
2588 #endif
2589
2590 inst = (sljit_ub*)ensure_buf(compiler, 2);
2591 FAIL_IF_NULL(inst);
2592
2593 *inst++ = 0;
2594 *inst++ = type + 4;
2595 }
2596 else {
2597 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2598 /* REX_W is not necessary (src is not immediate). */
2599 compiler->mode32 = 1;
2600 #endif
2601 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2602 FAIL_IF(!inst);
2603 *inst++ = GROUP_FF;
2604 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2605 }
2606 return SLJIT_SUCCESS;
2607 }
2608
2609 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2610 sljit_si dst, sljit_sw dstw,
2611 sljit_si src, sljit_sw srcw,
2612 sljit_si type)
2613 {
2614 sljit_ub *inst;
2615 sljit_ub cond_set = 0;
2616 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2617 sljit_si reg;
2618 #else
2619 /* CHECK_EXTRA_REGS migh overwrite these values. */
2620 sljit_si dst_save = dst;
2621 sljit_sw dstw_save = dstw;
2622 #endif
2623
2624 CHECK_ERROR();
2625 check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
2626
2627 if (dst == SLJIT_UNUSED)
2628 return SLJIT_SUCCESS;
2629
2630 ADJUST_LOCAL_OFFSET(dst, dstw);
2631 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2632 if (SLJIT_UNLIKELY(compiler->flags_saved))
2633 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2634
2635 /* setcc = jcc + 0x10. */
2636 cond_set = get_jump_code(type) + 0x10;
2637
2638 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2639 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2640 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
2641 FAIL_IF(!inst);
2642 INC_SIZE(4 + 3);
2643 /* Set low register to conditional flag. */
2644 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2645 *inst++ = GROUP_0F;
2646 *inst++ = cond_set;
2647 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2648 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2649 *inst++ = OR_rm8_r8;
2650 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2651 return SLJIT_SUCCESS;
2652 }
2653
2654 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2655
2656 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2657 FAIL_IF(!inst);
2658 INC_SIZE(4 + 4);
2659 /* Set low register to conditional flag. */
2660 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2661 *inst++ = GROUP_0F;
2662 *inst++ = cond_set;
2663 *inst++ = MOD_REG | reg_lmap[reg];
2664 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2665 *inst++ = GROUP_0F;
2666 *inst++ = MOVZX_r_rm8;
2667 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2668
2669 if (reg != TMP_REG1)
2670 return SLJIT_SUCCESS;
2671
2672 if (GET_OPCODE(op) < SLJIT_ADD) {
2673 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2674 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2675 }
2676 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2677 compiler->skip_checks = 1;
2678 #endif
2679 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2680 #else /* SLJIT_CONFIG_X86_64 */
2681 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2682 if (reg_map[dst] <= 4) {
2683 /* Low byte is accessible. */
2684 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2685 FAIL_IF(!inst);
2686 INC_SIZE(3 + 3);
2687 /* Set low byte to conditional flag. */
2688 *inst++ = GROUP_0F;
2689 *inst++ = cond_set;
2690 *inst++ = MOD_REG | reg_map[dst];
2691
2692 *inst++ = GROUP_0F;
2693 *inst++ = MOVZX_r_rm8;
2694 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2695 return SLJIT_SUCCESS;
2696 }
2697
2698 /* Low byte is not accessible. */
2699 if (cpu_has_cmov == -1)
2700 get_cpu_features();
2701
2702 if (cpu_has_cmov) {
2703 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2704 /* a xor reg, reg operation would overwrite the flags. */
2705 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2706
2707 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2708 FAIL_IF(!inst);
2709 INC_SIZE(3);
2710
2711 *inst++ = GROUP_0F;
2712 /* cmovcc = setcc - 0x50. */
2713 *inst++ = cond_set - 0x50;
2714 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2715 return SLJIT_SUCCESS;
2716 }
2717
2718 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2719 FAIL_IF(!inst);
2720 INC_SIZE(1 + 3 + 3 + 1);
2721 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2722 /* Set al to conditional flag. */
2723 *inst++ = GROUP_0F;
2724 *inst++ = cond_set;
2725 *inst++ = MOD_REG | 0 /* eax */;
2726
2727 *inst++ = GROUP_0F;
2728 *inst++ = MOVZX_r_rm8;
2729 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2730 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2731 return SLJIT_SUCCESS;
2732 }
2733
2734 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2735 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax);
2736 if (dst != SLJIT_SCRATCH_REG1) {
2737 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2738 FAIL_IF(!inst);
2739 INC_SIZE(1 + 3 + 2 + 1);
2740 /* Set low register to conditional flag. */
2741 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2742 *inst++ = GROUP_0F;
2743 *inst++ = cond_set;
2744 *inst++ = MOD_REG | 0 /* eax */;
2745 *inst++ = OR_rm8_r8;
2746 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2747 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2748 }
2749 else {
2750 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2751 FAIL_IF(!inst);
2752 INC_SIZE(2 + 3 + 2 + 2);
2753 /* Set low register to conditional flag. */
2754 *inst++ = XCHG_r_rm;
2755 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2756 *inst++ = GROUP_0F;
2757 *inst++ = cond_set;
2758 *inst++ = MOD_REG | 1 /* ecx */;
2759 *inst++ = OR_rm8_r8;
2760 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2761 *inst++ = XCHG_r_rm;
2762 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2763 }
2764 return SLJIT_SUCCESS;
2765 }
2766
2767 /* Set TMP_REG1 to the bit. */
2768 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2769 FAIL_IF(!inst);
2770 INC_SIZE(1 + 3 + 3 + 1);
2771 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2772 /* Set al to conditional flag. */
2773 *inst++ = GROUP_0F;
2774 *inst++ = cond_set;
2775 *inst++ = MOD_REG | 0 /* eax */;
2776
2777 *inst++ = GROUP_0F;
2778 *inst++ = MOVZX_r_rm8;
2779 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2780
2781 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2782
2783 if (GET_OPCODE(op) < SLJIT_ADD)
2784 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2785
2786 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2787 compiler->skip_checks = 1;
2788 #endif
2789 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2790 #endif /* SLJIT_CONFIG_X86_64 */
2791 }
2792
2793 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
2794 {
2795 CHECK_ERROR();
2796 check_sljit_get_local_base(compiler, dst, dstw, offset);
2797 ADJUST_LOCAL_OFFSET(dst, dstw);
2798
2799 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2800
2801 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2802 compiler->mode32 = 0;
2803 #endif
2804
2805 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
2806
2807 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2808 if (NOT_HALFWORD(offset)) {
2809 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2810 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2811 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2812 return compiler->error;
2813 #else
2814 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0);
2815 #endif
2816 }
2817 #endif
2818
2819 if (offset != 0)
2820 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
2821 return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
2822 }
2823
2824 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2825 {
2826 sljit_ub *inst;
2827 struct sljit_const *const_;
2828 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2829 sljit_si reg;
2830 #endif
2831
2832 CHECK_ERROR_PTR();
2833 check_sljit_emit_const(compiler, dst, dstw, init_value);
2834 ADJUST_LOCAL_OFFSET(dst, dstw);
2835
2836 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2837
2838 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2839 PTR_FAIL_IF(!const_);
2840 set_const(const_, compiler);
2841
2842 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2843 compiler->mode32 = 0;
2844 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2845
2846 if (emit_load_imm64(compiler, reg, init_value))
2847 return NULL;
2848 #else
2849 if (dst == SLJIT_UNUSED)
2850 dst = TMP_REG1;
2851
2852 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2853 return NULL;
2854 #endif
2855
2856 inst = (sljit_ub*)ensure_buf(compiler, 2);
2857 PTR_FAIL_IF(!inst);
2858
2859 *inst++ = 0;
2860 *inst++ = 1;
2861
2862 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2863 if (dst & SLJIT_MEM)
2864 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2865 return NULL;
2866 #endif
2867
2868 return const_;
2869 }
2870
2871 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2872 {
2873 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2874 *(sljit_sw*)addr = new_addr - (addr + 4);
2875 #else
2876 *(sljit_uw*)addr = new_addr;
2877 #endif
2878 }
2879
2880 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2881 {
2882 *(sljit_sw*)addr = new_constant;
2883 }
2884