1 1.10 christos /* $NetBSD: sljitNativeX86_common.c,v 1.10 2021/11/30 12:32:09 christos Exp $ */ 2 1.6 alnsn 3 1.1 alnsn /* 4 1.1 alnsn * Stack-less Just-In-Time compiler 5 1.1 alnsn * 6 1.9 alnsn * Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 7 1.1 alnsn * 8 1.1 alnsn * Redistribution and use in source and binary forms, with or without modification, are 9 1.1 alnsn * permitted provided that the following conditions are met: 10 1.1 alnsn * 11 1.1 alnsn * 1. Redistributions of source code must retain the above copyright notice, this list of 12 1.1 alnsn * conditions and the following disclaimer. 13 1.1 alnsn * 14 1.1 alnsn * 2. Redistributions in binary form must reproduce the above copyright notice, this list 15 1.1 alnsn * of conditions and the following disclaimer in the documentation and/or other materials 16 1.1 alnsn * provided with the distribution. 17 1.1 alnsn * 18 1.1 alnsn * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 19 1.1 alnsn * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 1.1 alnsn * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 21 1.1 alnsn * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 1.1 alnsn * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 23 1.1 alnsn * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 1.1 alnsn * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 1.1 alnsn * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 1.1 alnsn * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 1.1 alnsn */ 28 1.1 alnsn 29 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) 30 1.1 alnsn { 31 1.1 alnsn return "x86" SLJIT_CPUINFO; 32 1.1 alnsn } 33 1.1 alnsn 34 1.1 alnsn /* 35 1.1 alnsn 32b register indexes: 36 1.1 alnsn 0 - EAX 37 1.1 alnsn 1 - ECX 38 1.1 alnsn 2 - EDX 39 1.1 alnsn 3 - EBX 40 1.1 alnsn 4 - none 41 1.1 alnsn 5 - EBP 42 1.1 alnsn 6 - ESI 43 1.1 alnsn 7 - EDI 44 1.1 alnsn */ 45 1.1 alnsn 46 1.1 alnsn /* 47 1.1 alnsn 64b register indexes: 48 1.1 alnsn 0 - RAX 49 1.1 alnsn 1 - RCX 50 1.1 alnsn 2 - RDX 51 1.1 alnsn 3 - RBX 52 1.1 alnsn 4 - none 53 1.1 alnsn 5 - RBP 54 1.1 alnsn 6 - RSI 55 1.1 alnsn 7 - RDI 56 1.1 alnsn 8 - R8 - From now on REX prefix is required 57 1.1 alnsn 9 - R9 58 1.1 alnsn 10 - R10 59 1.1 alnsn 11 - R11 60 1.1 alnsn 12 - R12 61 1.1 alnsn 13 - R13 62 1.1 alnsn 14 - R14 63 1.1 alnsn 15 - R15 64 1.1 alnsn */ 65 1.1 alnsn 66 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 67 1.1 alnsn 68 1.1 alnsn /* Last register + 1. */ 69 1.8 alnsn #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 70 1.1 alnsn 71 1.8 alnsn static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { 72 1.9 alnsn 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5 73 1.1 alnsn }; 74 1.1 alnsn 75 1.1 alnsn #define CHECK_EXTRA_REGS(p, w, do) \ 76 1.9 alnsn if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ 77 1.9 alnsn if (p <= compiler->scratches) \ 78 1.9 alnsn w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \ 79 1.9 alnsn else \ 80 1.9 alnsn w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \ 81 1.8 alnsn p = SLJIT_MEM1(SLJIT_SP); \ 82 1.1 alnsn do; \ 83 1.1 alnsn } 84 1.1 alnsn 85 1.1 alnsn #else /* SLJIT_CONFIG_X86_32 */ 86 1.1 alnsn 87 1.1 alnsn /* Last register + 1. */ 88 1.8 alnsn #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 89 1.8 alnsn #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 90 1.8 alnsn #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) 91 1.1 alnsn 92 1.1 alnsn /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present 93 1.1 alnsn Note: avoid to use r12 and r13 for memory addessing 94 1.1 alnsn therefore r12 is better for SAVED_EREG than SAVED_REG. */ 95 1.1 alnsn #ifndef _WIN64 96 1.1 alnsn /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ 97 1.8 alnsn static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 98 1.8 alnsn 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 99 1.1 alnsn }; 100 1.1 alnsn /* low-map. reg_map & 0x7. */ 101 1.8 alnsn static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 102 1.8 alnsn 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 103 1.1 alnsn }; 104 1.1 alnsn #else 105 1.1 alnsn /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ 106 1.8 alnsn static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 107 1.8 alnsn 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 108 1.1 alnsn }; 109 1.1 alnsn /* low-map. reg_map & 0x7. */ 110 1.8 alnsn static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { 111 1.8 alnsn 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 112 1.1 alnsn }; 113 1.1 alnsn #endif 114 1.1 alnsn 115 1.1 alnsn #define REX_W 0x48 116 1.1 alnsn #define REX_R 0x44 117 1.1 alnsn #define REX_X 0x42 118 1.1 alnsn #define REX_B 0x41 119 1.1 alnsn #define REX 0x40 120 1.1 alnsn 121 1.5 alnsn #ifndef _WIN64 122 1.5 alnsn #define HALFWORD_MAX 0x7fffffffl 123 1.5 alnsn #define HALFWORD_MIN -0x80000000l 124 1.5 alnsn #else 125 1.5 alnsn #define HALFWORD_MAX 0x7fffffffll 126 1.5 alnsn #define HALFWORD_MIN -0x80000000ll 127 1.5 alnsn #endif 128 1.1 alnsn 129 1.5 alnsn #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) 130 1.5 alnsn #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) 131 1.1 alnsn 132 1.1 alnsn #define CHECK_EXTRA_REGS(p, w, do) 133 1.1 alnsn 134 1.1 alnsn #endif /* SLJIT_CONFIG_X86_32 */ 135 1.1 alnsn 136 1.5 alnsn #define TMP_FREG (0) 137 1.1 alnsn 138 1.1 alnsn /* Size flags for emit_x86_instruction: */ 139 1.1 alnsn #define EX86_BIN_INS 0x0010 140 1.1 alnsn #define EX86_SHIFT_INS 0x0020 141 1.1 alnsn #define EX86_REX 0x0040 142 1.1 alnsn #define EX86_NO_REXW 0x0080 143 1.1 alnsn #define EX86_BYTE_ARG 0x0100 144 1.1 alnsn #define EX86_HALF_ARG 0x0200 145 1.1 alnsn #define EX86_PREF_66 0x0400 146 1.8 alnsn #define EX86_PREF_F2 0x0800 147 1.8 alnsn #define EX86_PREF_F3 0x1000 148 1.8 alnsn #define EX86_SSE2_OP1 0x2000 149 1.8 alnsn #define EX86_SSE2_OP2 0x4000 150 1.8 alnsn #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) 151 1.1 alnsn 152 1.5 alnsn /* --------------------------------------------------------------------- */ 153 1.5 alnsn /* Instrucion forms */ 154 1.5 alnsn /* --------------------------------------------------------------------- */ 155 1.1 alnsn 156 1.5 alnsn #define ADD (/* BINARY */ 0 << 3) 157 1.5 alnsn #define ADD_EAX_i32 0x05 158 1.5 alnsn #define ADD_r_rm 0x03 159 1.5 alnsn #define ADD_rm_r 0x01 160 1.5 alnsn #define ADDSD_x_xm 0x58 161 1.5 alnsn #define ADC (/* BINARY */ 2 << 3) 162 1.5 alnsn #define ADC_EAX_i32 0x15 163 1.5 alnsn #define ADC_r_rm 0x13 164 1.5 alnsn #define ADC_rm_r 0x11 165 1.5 alnsn #define AND (/* BINARY */ 4 << 3) 166 1.5 alnsn #define AND_EAX_i32 0x25 167 1.5 alnsn #define AND_r_rm 0x23 168 1.5 alnsn #define AND_rm_r 0x21 169 1.5 alnsn #define ANDPD_x_xm 0x54 170 1.5 alnsn #define BSR_r_rm (/* GROUP_0F */ 0xbd) 171 1.5 alnsn #define CALL_i32 0xe8 172 1.5 alnsn #define CALL_rm (/* GROUP_FF */ 2 << 3) 173 1.5 alnsn #define CDQ 0x99 174 1.5 alnsn #define CMOVNE_r_rm (/* GROUP_0F */ 0x45) 175 1.5 alnsn #define CMP (/* BINARY */ 7 << 3) 176 1.5 alnsn #define CMP_EAX_i32 0x3d 177 1.5 alnsn #define CMP_r_rm 0x3b 178 1.5 alnsn #define CMP_rm_r 0x39 179 1.8 alnsn #define CVTPD2PS_x_xm 0x5a 180 1.8 alnsn #define CVTSI2SD_x_rm 0x2a 181 1.8 alnsn #define CVTTSD2SI_r_xm 0x2c 182 1.5 alnsn #define DIV (/* GROUP_F7 */ 6 << 3) 183 1.5 alnsn #define DIVSD_x_xm 0x5e 184 1.5 alnsn #define INT3 0xcc 185 1.5 alnsn #define IDIV (/* GROUP_F7 */ 7 << 3) 186 1.5 alnsn #define IMUL (/* GROUP_F7 */ 5 << 3) 187 1.5 alnsn #define IMUL_r_rm (/* GROUP_0F */ 0xaf) 188 1.5 alnsn #define IMUL_r_rm_i8 0x6b 189 1.5 alnsn #define IMUL_r_rm_i32 0x69 190 1.5 alnsn #define JE_i8 0x74 191 1.8 alnsn #define JNE_i8 0x75 192 1.5 alnsn #define JMP_i8 0xeb 193 1.5 alnsn #define JMP_i32 0xe9 194 1.5 alnsn #define JMP_rm (/* GROUP_FF */ 4 << 3) 195 1.5 alnsn #define LEA_r_m 0x8d 196 1.5 alnsn #define MOV_r_rm 0x8b 197 1.5 alnsn #define MOV_r_i32 0xb8 198 1.5 alnsn #define MOV_rm_r 0x89 199 1.5 alnsn #define MOV_rm_i32 0xc7 200 1.5 alnsn #define MOV_rm8_i8 0xc6 201 1.5 alnsn #define MOV_rm8_r8 0x88 202 1.5 alnsn #define MOVSD_x_xm 0x10 203 1.5 alnsn #define MOVSD_xm_x 0x11 204 1.5 alnsn #define MOVSXD_r_rm 0x63 205 1.5 alnsn #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) 206 1.5 alnsn #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) 207 1.5 alnsn #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) 208 1.5 alnsn #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) 209 1.5 alnsn #define MUL (/* GROUP_F7 */ 4 << 3) 210 1.5 alnsn #define MULSD_x_xm 0x59 211 1.5 alnsn #define NEG_rm (/* GROUP_F7 */ 3 << 3) 212 1.5 alnsn #define NOP 0x90 213 1.5 alnsn #define NOT_rm (/* GROUP_F7 */ 2 << 3) 214 1.5 alnsn #define OR (/* BINARY */ 1 << 3) 215 1.5 alnsn #define OR_r_rm 0x0b 216 1.5 alnsn #define OR_EAX_i32 0x0d 217 1.5 alnsn #define OR_rm_r 0x09 218 1.5 alnsn #define OR_rm8_r8 0x08 219 1.5 alnsn #define POP_r 0x58 220 1.5 alnsn #define POP_rm 0x8f 221 1.5 alnsn #define POPF 0x9d 222 1.5 alnsn #define PUSH_i32 0x68 223 1.5 alnsn #define PUSH_r 0x50 224 1.5 alnsn #define PUSH_rm (/* GROUP_FF */ 6 << 3) 225 1.5 alnsn #define PUSHF 0x9c 226 1.5 alnsn #define RET_near 0xc3 227 1.5 alnsn #define RET_i16 0xc2 228 1.5 alnsn #define SBB (/* BINARY */ 3 << 3) 229 1.5 alnsn #define SBB_EAX_i32 0x1d 230 1.5 alnsn #define SBB_r_rm 0x1b 231 1.5 alnsn #define SBB_rm_r 0x19 232 1.5 alnsn #define SAR (/* SHIFT */ 7 << 3) 233 1.5 alnsn #define SHL (/* SHIFT */ 4 << 3) 234 1.5 alnsn #define SHR (/* SHIFT */ 5 << 3) 235 1.5 alnsn #define SUB (/* BINARY */ 5 << 3) 236 1.5 alnsn #define SUB_EAX_i32 0x2d 237 1.5 alnsn #define SUB_r_rm 0x2b 238 1.5 alnsn #define SUB_rm_r 0x29 239 1.5 alnsn #define SUBSD_x_xm 0x5c 240 1.5 alnsn #define TEST_EAX_i32 0xa9 241 1.5 alnsn #define TEST_rm_r 0x85 242 1.5 alnsn #define UCOMISD_x_xm 0x2e 243 1.8 alnsn #define UNPCKLPD_x_xm 0x14 244 1.5 alnsn #define XCHG_EAX_r 0x90 245 1.5 alnsn #define XCHG_r_rm 0x87 246 1.5 alnsn #define XOR (/* BINARY */ 6 << 3) 247 1.5 alnsn #define XOR_EAX_i32 0x35 248 1.5 alnsn #define XOR_r_rm 0x33 249 1.5 alnsn #define XOR_rm_r 0x31 250 1.5 alnsn #define XORPD_x_xm 0x57 251 1.5 alnsn 252 1.5 alnsn #define GROUP_0F 0x0f 253 1.5 alnsn #define GROUP_F7 0xf7 254 1.5 alnsn #define GROUP_FF 0xff 255 1.5 alnsn #define GROUP_BINARY_81 0x81 256 1.5 alnsn #define GROUP_BINARY_83 0x83 257 1.5 alnsn #define GROUP_SHIFT_1 0xd1 258 1.5 alnsn #define GROUP_SHIFT_N 0xc1 259 1.5 alnsn #define GROUP_SHIFT_CL 0xd3 260 1.5 alnsn 261 1.5 alnsn #define MOD_REG 0xc0 262 1.5 alnsn #define MOD_DISP8 0x40 263 1.5 alnsn 264 1.5 alnsn #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) 265 1.5 alnsn 266 1.5 alnsn #define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) 267 1.5 alnsn #define POP_REG(r) (*inst++ = (POP_r + (r))) 268 1.5 alnsn #define RET() (*inst++ = (RET_near)) 269 1.5 alnsn #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) 270 1.1 alnsn /* r32, r/m32 */ 271 1.5 alnsn #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) 272 1.5 alnsn 273 1.5 alnsn /* Multithreading does not affect these static variables, since they store 274 1.5 alnsn built-in CPU features. Therefore they can be overwritten by different threads 275 1.5 alnsn if they detect the CPU features in the same time. */ 276 1.8 alnsn #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 277 1.8 alnsn static sljit_s32 cpu_has_sse2 = -1; 278 1.5 alnsn #endif 279 1.8 alnsn static sljit_s32 cpu_has_cmov = -1; 280 1.5 alnsn 281 1.8 alnsn #ifdef _WIN32_WCE 282 1.8 alnsn #include <cmnintrin.h> 283 1.8 alnsn #elif defined(_MSC_VER) && _MSC_VER >= 1400 284 1.5 alnsn #include <intrin.h> 285 1.5 alnsn #endif 286 1.1 alnsn 287 1.9 alnsn /******************************************************/ 288 1.9 alnsn /* Unaligned-store functions */ 289 1.9 alnsn /******************************************************/ 290 1.9 alnsn 291 1.9 alnsn static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value) 292 1.9 alnsn { 293 1.9 alnsn SLJIT_MEMCPY(addr, &value, sizeof(value)); 294 1.9 alnsn } 295 1.9 alnsn 296 1.9 alnsn static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value) 297 1.9 alnsn { 298 1.9 alnsn SLJIT_MEMCPY(addr, &value, sizeof(value)); 299 1.9 alnsn } 300 1.9 alnsn 301 1.9 alnsn static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) 302 1.9 alnsn { 303 1.9 alnsn SLJIT_MEMCPY(addr, &value, sizeof(value)); 304 1.9 alnsn } 305 1.9 alnsn 306 1.9 alnsn /******************************************************/ 307 1.9 alnsn /* Utility functions */ 308 1.9 alnsn /******************************************************/ 309 1.9 alnsn 310 1.5 alnsn static void get_cpu_features(void) 311 1.5 alnsn { 312 1.8 alnsn sljit_u32 features; 313 1.5 alnsn 314 1.5 alnsn #if defined(_MSC_VER) && _MSC_VER >= 1400 315 1.5 alnsn 316 1.5 alnsn int CPUInfo[4]; 317 1.5 alnsn __cpuid(CPUInfo, 1); 318 1.8 alnsn features = (sljit_u32)CPUInfo[3]; 319 1.5 alnsn 320 1.10 christos #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__lint__) 321 1.5 alnsn 322 1.5 alnsn /* AT&T syntax. */ 323 1.5 alnsn __asm__ ( 324 1.5 alnsn "movl $0x1, %%eax\n" 325 1.5 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 326 1.5 alnsn /* On x86-32, there is no red zone, so this 327 1.5 alnsn should work (no need for a local variable). */ 328 1.5 alnsn "push %%ebx\n" 329 1.5 alnsn #endif 330 1.5 alnsn "cpuid\n" 331 1.5 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 332 1.5 alnsn "pop %%ebx\n" 333 1.5 alnsn #endif 334 1.5 alnsn "movl %%edx, %0\n" 335 1.5 alnsn : "=g" (features) 336 1.5 alnsn : 337 1.5 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 338 1.5 alnsn : "%eax", "%ecx", "%edx" 339 1.5 alnsn #else 340 1.5 alnsn : "%rax", "%rbx", "%rcx", "%rdx" 341 1.5 alnsn #endif 342 1.5 alnsn ); 343 1.5 alnsn 344 1.5 alnsn #else /* _MSC_VER && _MSC_VER >= 1400 */ 345 1.5 alnsn 346 1.5 alnsn /* Intel syntax. */ 347 1.5 alnsn __asm { 348 1.5 alnsn mov eax, 1 349 1.5 alnsn cpuid 350 1.5 alnsn mov features, edx 351 1.5 alnsn } 352 1.5 alnsn 353 1.5 alnsn #endif /* _MSC_VER && _MSC_VER >= 1400 */ 354 1.5 alnsn 355 1.8 alnsn #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 356 1.5 alnsn cpu_has_sse2 = (features >> 26) & 0x1; 357 1.5 alnsn #endif 358 1.5 alnsn cpu_has_cmov = (features >> 15) & 0x1; 359 1.5 alnsn } 360 1.5 alnsn 361 1.8 alnsn static sljit_u8 get_jump_code(sljit_s32 type) 362 1.1 alnsn { 363 1.1 alnsn switch (type) { 364 1.8 alnsn case SLJIT_EQUAL: 365 1.8 alnsn case SLJIT_EQUAL_F64: 366 1.5 alnsn return 0x84 /* je */; 367 1.1 alnsn 368 1.8 alnsn case SLJIT_NOT_EQUAL: 369 1.8 alnsn case SLJIT_NOT_EQUAL_F64: 370 1.5 alnsn return 0x85 /* jne */; 371 1.1 alnsn 372 1.8 alnsn case SLJIT_LESS: 373 1.8 alnsn case SLJIT_LESS_F64: 374 1.5 alnsn return 0x82 /* jc */; 375 1.1 alnsn 376 1.8 alnsn case SLJIT_GREATER_EQUAL: 377 1.8 alnsn case SLJIT_GREATER_EQUAL_F64: 378 1.5 alnsn return 0x83 /* jae */; 379 1.1 alnsn 380 1.8 alnsn case SLJIT_GREATER: 381 1.8 alnsn case SLJIT_GREATER_F64: 382 1.5 alnsn return 0x87 /* jnbe */; 383 1.1 alnsn 384 1.8 alnsn case SLJIT_LESS_EQUAL: 385 1.8 alnsn case SLJIT_LESS_EQUAL_F64: 386 1.5 alnsn return 0x86 /* jbe */; 387 1.1 alnsn 388 1.8 alnsn case SLJIT_SIG_LESS: 389 1.5 alnsn return 0x8c /* jl */; 390 1.1 alnsn 391 1.8 alnsn case SLJIT_SIG_GREATER_EQUAL: 392 1.5 alnsn return 0x8d /* jnl */; 393 1.1 alnsn 394 1.8 alnsn case SLJIT_SIG_GREATER: 395 1.5 alnsn return 0x8f /* jnle */; 396 1.1 alnsn 397 1.8 alnsn case SLJIT_SIG_LESS_EQUAL: 398 1.5 alnsn return 0x8e /* jle */; 399 1.1 alnsn 400 1.8 alnsn case SLJIT_OVERFLOW: 401 1.8 alnsn case SLJIT_MUL_OVERFLOW: 402 1.5 alnsn return 0x80 /* jo */; 403 1.1 alnsn 404 1.8 alnsn case SLJIT_NOT_OVERFLOW: 405 1.8 alnsn case SLJIT_MUL_NOT_OVERFLOW: 406 1.5 alnsn return 0x81 /* jno */; 407 1.1 alnsn 408 1.8 alnsn case SLJIT_UNORDERED_F64: 409 1.5 alnsn return 0x8a /* jp */; 410 1.1 alnsn 411 1.8 alnsn case SLJIT_ORDERED_F64: 412 1.5 alnsn return 0x8b /* jpo */; 413 1.1 alnsn } 414 1.1 alnsn return 0; 415 1.1 alnsn } 416 1.1 alnsn 417 1.9 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 418 1.9 alnsn static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset); 419 1.9 alnsn #else 420 1.8 alnsn static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type); 421 1.1 alnsn #endif 422 1.1 alnsn 423 1.9 alnsn static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset) 424 1.1 alnsn { 425 1.8 alnsn sljit_s32 short_jump; 426 1.1 alnsn sljit_uw label_addr; 427 1.1 alnsn 428 1.1 alnsn if (jump->flags & JUMP_LABEL) 429 1.1 alnsn label_addr = (sljit_uw)(code + jump->u.label->size); 430 1.1 alnsn else 431 1.9 alnsn label_addr = jump->u.target - executable_offset; 432 1.9 alnsn 433 1.5 alnsn short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; 434 1.1 alnsn 435 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 436 1.5 alnsn if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) 437 1.1 alnsn return generate_far_jump_code(jump, code_ptr, type); 438 1.1 alnsn #endif 439 1.1 alnsn 440 1.1 alnsn if (type == SLJIT_JUMP) { 441 1.1 alnsn if (short_jump) 442 1.5 alnsn *code_ptr++ = JMP_i8; 443 1.1 alnsn else 444 1.5 alnsn *code_ptr++ = JMP_i32; 445 1.1 alnsn jump->addr++; 446 1.1 alnsn } 447 1.1 alnsn else if (type >= SLJIT_FAST_CALL) { 448 1.1 alnsn short_jump = 0; 449 1.5 alnsn *code_ptr++ = CALL_i32; 450 1.1 alnsn jump->addr++; 451 1.1 alnsn } 452 1.1 alnsn else if (short_jump) { 453 1.1 alnsn *code_ptr++ = get_jump_code(type) - 0x10; 454 1.1 alnsn jump->addr++; 455 1.1 alnsn } 456 1.1 alnsn else { 457 1.5 alnsn *code_ptr++ = GROUP_0F; 458 1.1 alnsn *code_ptr++ = get_jump_code(type); 459 1.1 alnsn jump->addr += 2; 460 1.1 alnsn } 461 1.1 alnsn 462 1.1 alnsn if (short_jump) { 463 1.1 alnsn jump->flags |= PATCH_MB; 464 1.8 alnsn code_ptr += sizeof(sljit_s8); 465 1.1 alnsn } else { 466 1.1 alnsn jump->flags |= PATCH_MW; 467 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 468 1.5 alnsn code_ptr += sizeof(sljit_sw); 469 1.1 alnsn #else 470 1.8 alnsn code_ptr += sizeof(sljit_s32); 471 1.1 alnsn #endif 472 1.1 alnsn } 473 1.1 alnsn 474 1.1 alnsn return code_ptr; 475 1.1 alnsn } 476 1.1 alnsn 477 1.1 alnsn SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) 478 1.1 alnsn { 479 1.1 alnsn struct sljit_memory_fragment *buf; 480 1.8 alnsn sljit_u8 *code; 481 1.8 alnsn sljit_u8 *code_ptr; 482 1.8 alnsn sljit_u8 *buf_ptr; 483 1.8 alnsn sljit_u8 *buf_end; 484 1.8 alnsn sljit_u8 len; 485 1.9 alnsn sljit_sw executable_offset; 486 1.9 alnsn sljit_sw jump_addr; 487 1.1 alnsn 488 1.1 alnsn struct sljit_label *label; 489 1.1 alnsn struct sljit_jump *jump; 490 1.1 alnsn struct sljit_const *const_; 491 1.1 alnsn 492 1.1 alnsn CHECK_ERROR_PTR(); 493 1.8 alnsn CHECK_PTR(check_sljit_generate_code(compiler)); 494 1.1 alnsn reverse_buf(compiler); 495 1.1 alnsn 496 1.1 alnsn /* Second code generation pass. */ 497 1.8 alnsn code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size); 498 1.1 alnsn PTR_FAIL_WITH_EXEC_IF(code); 499 1.1 alnsn buf = compiler->buf; 500 1.1 alnsn 501 1.1 alnsn code_ptr = code; 502 1.1 alnsn label = compiler->labels; 503 1.1 alnsn jump = compiler->jumps; 504 1.1 alnsn const_ = compiler->consts; 505 1.9 alnsn executable_offset = SLJIT_EXEC_OFFSET(code); 506 1.9 alnsn 507 1.1 alnsn do { 508 1.1 alnsn buf_ptr = buf->memory; 509 1.1 alnsn buf_end = buf_ptr + buf->used_size; 510 1.1 alnsn do { 511 1.1 alnsn len = *buf_ptr++; 512 1.1 alnsn if (len > 0) { 513 1.1 alnsn /* The code is already generated. */ 514 1.9 alnsn SLJIT_MEMCPY(code_ptr, buf_ptr, len); 515 1.1 alnsn code_ptr += len; 516 1.1 alnsn buf_ptr += len; 517 1.1 alnsn } 518 1.1 alnsn else { 519 1.9 alnsn if (*buf_ptr >= 2) { 520 1.1 alnsn jump->addr = (sljit_uw)code_ptr; 521 1.1 alnsn if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) 522 1.9 alnsn code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset); 523 1.9 alnsn else { 524 1.9 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 525 1.9 alnsn code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset); 526 1.9 alnsn #else 527 1.9 alnsn code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2); 528 1.9 alnsn #endif 529 1.9 alnsn } 530 1.1 alnsn jump = jump->next; 531 1.1 alnsn } 532 1.1 alnsn else if (*buf_ptr == 0) { 533 1.9 alnsn label->addr = ((sljit_uw)code_ptr) + executable_offset; 534 1.1 alnsn label->size = code_ptr - code; 535 1.1 alnsn label = label->next; 536 1.1 alnsn } 537 1.9 alnsn else { /* *buf_ptr is 1 */ 538 1.5 alnsn const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); 539 1.1 alnsn const_ = const_->next; 540 1.1 alnsn } 541 1.1 alnsn buf_ptr++; 542 1.1 alnsn } 543 1.1 alnsn } while (buf_ptr < buf_end); 544 1.1 alnsn SLJIT_ASSERT(buf_ptr == buf_end); 545 1.1 alnsn buf = buf->next; 546 1.1 alnsn } while (buf); 547 1.1 alnsn 548 1.1 alnsn SLJIT_ASSERT(!label); 549 1.1 alnsn SLJIT_ASSERT(!jump); 550 1.1 alnsn SLJIT_ASSERT(!const_); 551 1.1 alnsn 552 1.1 alnsn jump = compiler->jumps; 553 1.1 alnsn while (jump) { 554 1.9 alnsn jump_addr = jump->addr + executable_offset; 555 1.9 alnsn 556 1.1 alnsn if (jump->flags & PATCH_MB) { 557 1.9 alnsn SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127); 558 1.9 alnsn *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))); 559 1.1 alnsn } else if (jump->flags & PATCH_MW) { 560 1.1 alnsn if (jump->flags & JUMP_LABEL) { 561 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 562 1.9 alnsn sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw)))); 563 1.1 alnsn #else 564 1.9 alnsn SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX); 565 1.9 alnsn sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32)))); 566 1.1 alnsn #endif 567 1.1 alnsn } 568 1.1 alnsn else { 569 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 570 1.9 alnsn sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw)))); 571 1.1 alnsn #else 572 1.9 alnsn SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX); 573 1.9 alnsn sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32)))); 574 1.1 alnsn #endif 575 1.1 alnsn } 576 1.1 alnsn } 577 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 578 1.1 alnsn else if (jump->flags & PATCH_MD) 579 1.9 alnsn sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr); 580 1.1 alnsn #endif 581 1.1 alnsn 582 1.1 alnsn jump = jump->next; 583 1.1 alnsn } 584 1.1 alnsn 585 1.9 alnsn /* Some space may be wasted because of short jumps. */ 586 1.1 alnsn SLJIT_ASSERT(code_ptr <= code + compiler->size); 587 1.1 alnsn compiler->error = SLJIT_ERR_COMPILED; 588 1.9 alnsn compiler->executable_offset = executable_offset; 589 1.5 alnsn compiler->executable_size = code_ptr - code; 590 1.9 alnsn return (void*)(code + executable_offset); 591 1.1 alnsn } 592 1.1 alnsn 593 1.1 alnsn /* --------------------------------------------------------------------- */ 594 1.1 alnsn /* Operators */ 595 1.1 alnsn /* --------------------------------------------------------------------- */ 596 1.1 alnsn 597 1.8 alnsn static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, 598 1.8 alnsn sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 599 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 600 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 601 1.8 alnsn sljit_s32 src2, sljit_sw src2w); 602 1.1 alnsn 603 1.8 alnsn static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, 604 1.8 alnsn sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 605 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 606 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 607 1.8 alnsn sljit_s32 src2, sljit_sw src2w); 608 1.1 alnsn 609 1.8 alnsn static sljit_s32 emit_mov(struct sljit_compiler *compiler, 610 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 611 1.8 alnsn sljit_s32 src, sljit_sw srcw); 612 1.1 alnsn 613 1.9 alnsn #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ 614 1.9 alnsn FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 615 1.1 alnsn 616 1.1 alnsn #ifdef _WIN32 617 1.1 alnsn #include <malloc.h> 618 1.1 alnsn 619 1.5 alnsn static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) 620 1.1 alnsn { 621 1.1 alnsn /* Workaround for calling the internal _chkstk() function on Windows. 622 1.1 alnsn This function touches all 4k pages belongs to the requested stack space, 623 1.1 alnsn which size is passed in local_size. This is necessary on Windows where 624 1.1 alnsn the stack can only grow in 4k steps. However, this function just burn 625 1.5 alnsn CPU cycles if the stack is large enough. However, you don't know it in 626 1.5 alnsn advance, so it must always be called. I think this is a bad design in 627 1.5 alnsn general even if it has some reasons. */ 628 1.8 alnsn *(volatile sljit_s32*)alloca(local_size) = 0; 629 1.1 alnsn } 630 1.1 alnsn 631 1.1 alnsn #endif 632 1.1 alnsn 633 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 634 1.1 alnsn #include "sljitNativeX86_32.c" 635 1.1 alnsn #else 636 1.1 alnsn #include "sljitNativeX86_64.c" 637 1.1 alnsn #endif 638 1.1 alnsn 639 1.8 alnsn static sljit_s32 emit_mov(struct sljit_compiler *compiler, 640 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 641 1.8 alnsn sljit_s32 src, sljit_sw srcw) 642 1.1 alnsn { 643 1.8 alnsn sljit_u8* inst; 644 1.1 alnsn 645 1.1 alnsn if (dst == SLJIT_UNUSED) { 646 1.1 alnsn /* No destination, doesn't need to setup flags. */ 647 1.1 alnsn if (src & SLJIT_MEM) { 648 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 649 1.5 alnsn FAIL_IF(!inst); 650 1.5 alnsn *inst = MOV_r_rm; 651 1.1 alnsn } 652 1.1 alnsn return SLJIT_SUCCESS; 653 1.1 alnsn } 654 1.5 alnsn if (FAST_IS_REG(src)) { 655 1.5 alnsn inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); 656 1.5 alnsn FAIL_IF(!inst); 657 1.5 alnsn *inst = MOV_rm_r; 658 1.1 alnsn return SLJIT_SUCCESS; 659 1.1 alnsn } 660 1.1 alnsn if (src & SLJIT_IMM) { 661 1.5 alnsn if (FAST_IS_REG(dst)) { 662 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 663 1.5 alnsn return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 664 1.1 alnsn #else 665 1.1 alnsn if (!compiler->mode32) { 666 1.1 alnsn if (NOT_HALFWORD(srcw)) 667 1.1 alnsn return emit_load_imm64(compiler, dst, srcw); 668 1.1 alnsn } 669 1.1 alnsn else 670 1.5 alnsn return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); 671 1.1 alnsn #endif 672 1.1 alnsn } 673 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 674 1.1 alnsn if (!compiler->mode32 && NOT_HALFWORD(srcw)) { 675 1.1 alnsn FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); 676 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); 677 1.5 alnsn FAIL_IF(!inst); 678 1.5 alnsn *inst = MOV_rm_r; 679 1.1 alnsn return SLJIT_SUCCESS; 680 1.1 alnsn } 681 1.1 alnsn #endif 682 1.5 alnsn inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); 683 1.5 alnsn FAIL_IF(!inst); 684 1.5 alnsn *inst = MOV_rm_i32; 685 1.1 alnsn return SLJIT_SUCCESS; 686 1.1 alnsn } 687 1.5 alnsn if (FAST_IS_REG(dst)) { 688 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); 689 1.5 alnsn FAIL_IF(!inst); 690 1.5 alnsn *inst = MOV_r_rm; 691 1.1 alnsn return SLJIT_SUCCESS; 692 1.1 alnsn } 693 1.1 alnsn 694 1.1 alnsn /* Memory to memory move. Requires two instruction. */ 695 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); 696 1.5 alnsn FAIL_IF(!inst); 697 1.5 alnsn *inst = MOV_r_rm; 698 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 699 1.5 alnsn FAIL_IF(!inst); 700 1.5 alnsn *inst = MOV_rm_r; 701 1.1 alnsn return SLJIT_SUCCESS; 702 1.1 alnsn } 703 1.1 alnsn 704 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) 705 1.1 alnsn { 706 1.8 alnsn sljit_u8 *inst; 707 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 708 1.8 alnsn sljit_s32 size; 709 1.1 alnsn #endif 710 1.1 alnsn 711 1.1 alnsn CHECK_ERROR(); 712 1.8 alnsn CHECK(check_sljit_emit_op0(compiler, op)); 713 1.1 alnsn 714 1.1 alnsn switch (GET_OPCODE(op)) { 715 1.1 alnsn case SLJIT_BREAKPOINT: 716 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 717 1.5 alnsn FAIL_IF(!inst); 718 1.1 alnsn INC_SIZE(1); 719 1.5 alnsn *inst = INT3; 720 1.1 alnsn break; 721 1.1 alnsn case SLJIT_NOP: 722 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 723 1.5 alnsn FAIL_IF(!inst); 724 1.1 alnsn INC_SIZE(1); 725 1.5 alnsn *inst = NOP; 726 1.1 alnsn break; 727 1.8 alnsn case SLJIT_LMUL_UW: 728 1.8 alnsn case SLJIT_LMUL_SW: 729 1.8 alnsn case SLJIT_DIVMOD_UW: 730 1.8 alnsn case SLJIT_DIVMOD_SW: 731 1.8 alnsn case SLJIT_DIV_UW: 732 1.8 alnsn case SLJIT_DIV_SW: 733 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 734 1.1 alnsn #ifdef _WIN64 735 1.9 alnsn SLJIT_ASSERT( 736 1.8 alnsn reg_map[SLJIT_R0] == 0 737 1.8 alnsn && reg_map[SLJIT_R1] == 2 738 1.9 alnsn && reg_map[TMP_REG1] > 7); 739 1.1 alnsn #else 740 1.9 alnsn SLJIT_ASSERT( 741 1.8 alnsn reg_map[SLJIT_R0] == 0 742 1.8 alnsn && reg_map[SLJIT_R1] < 7 743 1.9 alnsn && reg_map[TMP_REG1] == 2); 744 1.1 alnsn #endif 745 1.8 alnsn compiler->mode32 = op & SLJIT_I32_OP; 746 1.1 alnsn #endif 747 1.8 alnsn SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); 748 1.1 alnsn 749 1.1 alnsn op = GET_OPCODE(op); 750 1.8 alnsn if ((op | 0x2) == SLJIT_DIV_UW) { 751 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 752 1.8 alnsn EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 753 1.8 alnsn inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); 754 1.1 alnsn #else 755 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 756 1.1 alnsn #endif 757 1.5 alnsn FAIL_IF(!inst); 758 1.5 alnsn *inst = XOR_r_rm; 759 1.1 alnsn } 760 1.1 alnsn 761 1.8 alnsn if ((op | 0x2) == SLJIT_DIV_SW) { 762 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) 763 1.8 alnsn EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); 764 1.1 alnsn #endif 765 1.1 alnsn 766 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 767 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 768 1.5 alnsn FAIL_IF(!inst); 769 1.1 alnsn INC_SIZE(1); 770 1.5 alnsn *inst = CDQ; 771 1.1 alnsn #else 772 1.1 alnsn if (compiler->mode32) { 773 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 774 1.5 alnsn FAIL_IF(!inst); 775 1.1 alnsn INC_SIZE(1); 776 1.5 alnsn *inst = CDQ; 777 1.1 alnsn } else { 778 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 779 1.5 alnsn FAIL_IF(!inst); 780 1.1 alnsn INC_SIZE(2); 781 1.5 alnsn *inst++ = REX_W; 782 1.5 alnsn *inst = CDQ; 783 1.1 alnsn } 784 1.1 alnsn #endif 785 1.1 alnsn } 786 1.1 alnsn 787 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 788 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 789 1.5 alnsn FAIL_IF(!inst); 790 1.1 alnsn INC_SIZE(2); 791 1.5 alnsn *inst++ = GROUP_F7; 792 1.8 alnsn *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); 793 1.1 alnsn #else 794 1.1 alnsn #ifdef _WIN64 795 1.8 alnsn size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2; 796 1.1 alnsn #else 797 1.1 alnsn size = (!compiler->mode32) ? 3 : 2; 798 1.1 alnsn #endif 799 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 800 1.5 alnsn FAIL_IF(!inst); 801 1.1 alnsn INC_SIZE(size); 802 1.1 alnsn #ifdef _WIN64 803 1.1 alnsn if (!compiler->mode32) 804 1.8 alnsn *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0); 805 1.8 alnsn else if (op >= SLJIT_DIVMOD_UW) 806 1.5 alnsn *inst++ = REX_B; 807 1.5 alnsn *inst++ = GROUP_F7; 808 1.8 alnsn *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); 809 1.1 alnsn #else 810 1.1 alnsn if (!compiler->mode32) 811 1.5 alnsn *inst++ = REX_W; 812 1.5 alnsn *inst++ = GROUP_F7; 813 1.8 alnsn *inst = MOD_REG | reg_map[SLJIT_R1]; 814 1.1 alnsn #endif 815 1.1 alnsn #endif 816 1.1 alnsn switch (op) { 817 1.8 alnsn case SLJIT_LMUL_UW: 818 1.5 alnsn *inst |= MUL; 819 1.1 alnsn break; 820 1.8 alnsn case SLJIT_LMUL_SW: 821 1.5 alnsn *inst |= IMUL; 822 1.1 alnsn break; 823 1.8 alnsn case SLJIT_DIVMOD_UW: 824 1.8 alnsn case SLJIT_DIV_UW: 825 1.5 alnsn *inst |= DIV; 826 1.1 alnsn break; 827 1.8 alnsn case SLJIT_DIVMOD_SW: 828 1.8 alnsn case SLJIT_DIV_SW: 829 1.5 alnsn *inst |= IDIV; 830 1.1 alnsn break; 831 1.1 alnsn } 832 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) 833 1.8 alnsn if (op <= SLJIT_DIVMOD_SW) 834 1.8 alnsn EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); 835 1.8 alnsn #else 836 1.8 alnsn if (op >= SLJIT_DIV_UW) 837 1.8 alnsn EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); 838 1.1 alnsn #endif 839 1.1 alnsn break; 840 1.1 alnsn } 841 1.1 alnsn 842 1.1 alnsn return SLJIT_SUCCESS; 843 1.1 alnsn } 844 1.1 alnsn 845 1.1 alnsn #define ENCODE_PREFIX(prefix) \ 846 1.1 alnsn do { \ 847 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \ 848 1.5 alnsn FAIL_IF(!inst); \ 849 1.5 alnsn INC_SIZE(1); \ 850 1.5 alnsn *inst = (prefix); \ 851 1.1 alnsn } while (0) 852 1.1 alnsn 853 1.8 alnsn static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, 854 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 855 1.8 alnsn sljit_s32 src, sljit_sw srcw) 856 1.1 alnsn { 857 1.8 alnsn sljit_u8* inst; 858 1.8 alnsn sljit_s32 dst_r; 859 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 860 1.8 alnsn sljit_s32 work_r; 861 1.1 alnsn #endif 862 1.1 alnsn 863 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 864 1.1 alnsn compiler->mode32 = 0; 865 1.1 alnsn #endif 866 1.1 alnsn 867 1.1 alnsn if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 868 1.1 alnsn return SLJIT_SUCCESS; /* Empty instruction. */ 869 1.1 alnsn 870 1.1 alnsn if (src & SLJIT_IMM) { 871 1.5 alnsn if (FAST_IS_REG(dst)) { 872 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 873 1.5 alnsn return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 874 1.1 alnsn #else 875 1.5 alnsn inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 876 1.5 alnsn FAIL_IF(!inst); 877 1.5 alnsn *inst = MOV_rm_i32; 878 1.5 alnsn return SLJIT_SUCCESS; 879 1.1 alnsn #endif 880 1.1 alnsn } 881 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); 882 1.5 alnsn FAIL_IF(!inst); 883 1.5 alnsn *inst = MOV_rm8_i8; 884 1.1 alnsn return SLJIT_SUCCESS; 885 1.1 alnsn } 886 1.1 alnsn 887 1.5 alnsn dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 888 1.1 alnsn 889 1.5 alnsn if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { 890 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 891 1.1 alnsn if (reg_map[src] >= 4) { 892 1.5 alnsn SLJIT_ASSERT(dst_r == TMP_REG1); 893 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 894 1.1 alnsn } else 895 1.1 alnsn dst_r = src; 896 1.1 alnsn #else 897 1.1 alnsn dst_r = src; 898 1.1 alnsn #endif 899 1.1 alnsn } 900 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 901 1.5 alnsn else if (FAST_IS_REG(src) && reg_map[src] >= 4) { 902 1.1 alnsn /* src, dst are registers. */ 903 1.5 alnsn SLJIT_ASSERT(SLOW_IS_REG(dst)); 904 1.1 alnsn if (reg_map[dst] < 4) { 905 1.1 alnsn if (dst != src) 906 1.1 alnsn EMIT_MOV(compiler, dst, 0, src, 0); 907 1.5 alnsn inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); 908 1.5 alnsn FAIL_IF(!inst); 909 1.5 alnsn *inst++ = GROUP_0F; 910 1.5 alnsn *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 911 1.1 alnsn } 912 1.1 alnsn else { 913 1.1 alnsn if (dst != src) 914 1.1 alnsn EMIT_MOV(compiler, dst, 0, src, 0); 915 1.1 alnsn if (sign) { 916 1.1 alnsn /* shl reg, 24 */ 917 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 918 1.5 alnsn FAIL_IF(!inst); 919 1.5 alnsn *inst |= SHL; 920 1.5 alnsn /* sar reg, 24 */ 921 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); 922 1.5 alnsn FAIL_IF(!inst); 923 1.5 alnsn *inst |= SAR; 924 1.1 alnsn } 925 1.1 alnsn else { 926 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); 927 1.5 alnsn FAIL_IF(!inst); 928 1.5 alnsn *(inst + 1) |= AND; 929 1.1 alnsn } 930 1.1 alnsn } 931 1.1 alnsn return SLJIT_SUCCESS; 932 1.1 alnsn } 933 1.1 alnsn #endif 934 1.1 alnsn else { 935 1.1 alnsn /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ 936 1.5 alnsn inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 937 1.5 alnsn FAIL_IF(!inst); 938 1.5 alnsn *inst++ = GROUP_0F; 939 1.5 alnsn *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; 940 1.1 alnsn } 941 1.1 alnsn 942 1.1 alnsn if (dst & SLJIT_MEM) { 943 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 944 1.5 alnsn if (dst_r == TMP_REG1) { 945 1.1 alnsn /* Find a non-used register, whose reg_map[src] < 4. */ 946 1.8 alnsn if ((dst & REG_MASK) == SLJIT_R0) { 947 1.8 alnsn if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) 948 1.8 alnsn work_r = SLJIT_R2; 949 1.1 alnsn else 950 1.8 alnsn work_r = SLJIT_R1; 951 1.1 alnsn } 952 1.1 alnsn else { 953 1.8 alnsn if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 954 1.8 alnsn work_r = SLJIT_R0; 955 1.8 alnsn else if ((dst & REG_MASK) == SLJIT_R1) 956 1.8 alnsn work_r = SLJIT_R2; 957 1.1 alnsn else 958 1.8 alnsn work_r = SLJIT_R1; 959 1.1 alnsn } 960 1.1 alnsn 961 1.8 alnsn if (work_r == SLJIT_R0) { 962 1.5 alnsn ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 963 1.1 alnsn } 964 1.1 alnsn else { 965 1.5 alnsn inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 966 1.5 alnsn FAIL_IF(!inst); 967 1.5 alnsn *inst = XCHG_r_rm; 968 1.1 alnsn } 969 1.1 alnsn 970 1.5 alnsn inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); 971 1.5 alnsn FAIL_IF(!inst); 972 1.5 alnsn *inst = MOV_rm8_r8; 973 1.1 alnsn 974 1.8 alnsn if (work_r == SLJIT_R0) { 975 1.5 alnsn ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); 976 1.1 alnsn } 977 1.1 alnsn else { 978 1.5 alnsn inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); 979 1.5 alnsn FAIL_IF(!inst); 980 1.5 alnsn *inst = XCHG_r_rm; 981 1.1 alnsn } 982 1.1 alnsn } 983 1.1 alnsn else { 984 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 985 1.5 alnsn FAIL_IF(!inst); 986 1.5 alnsn *inst = MOV_rm8_r8; 987 1.1 alnsn } 988 1.1 alnsn #else 989 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); 990 1.5 alnsn FAIL_IF(!inst); 991 1.5 alnsn *inst = MOV_rm8_r8; 992 1.1 alnsn #endif 993 1.1 alnsn } 994 1.1 alnsn 995 1.1 alnsn return SLJIT_SUCCESS; 996 1.1 alnsn } 997 1.1 alnsn 998 1.8 alnsn static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, 999 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 1000 1.8 alnsn sljit_s32 src, sljit_sw srcw) 1001 1.1 alnsn { 1002 1.8 alnsn sljit_u8* inst; 1003 1.8 alnsn sljit_s32 dst_r; 1004 1.1 alnsn 1005 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1006 1.1 alnsn compiler->mode32 = 0; 1007 1.1 alnsn #endif 1008 1.1 alnsn 1009 1.1 alnsn if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 1010 1.1 alnsn return SLJIT_SUCCESS; /* Empty instruction. */ 1011 1.1 alnsn 1012 1.1 alnsn if (src & SLJIT_IMM) { 1013 1.5 alnsn if (FAST_IS_REG(dst)) { 1014 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1015 1.5 alnsn return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); 1016 1.1 alnsn #else 1017 1.5 alnsn inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); 1018 1.5 alnsn FAIL_IF(!inst); 1019 1.5 alnsn *inst = MOV_rm_i32; 1020 1.5 alnsn return SLJIT_SUCCESS; 1021 1.1 alnsn #endif 1022 1.1 alnsn } 1023 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); 1024 1.5 alnsn FAIL_IF(!inst); 1025 1.5 alnsn *inst = MOV_rm_i32; 1026 1.1 alnsn return SLJIT_SUCCESS; 1027 1.1 alnsn } 1028 1.1 alnsn 1029 1.5 alnsn dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1030 1.1 alnsn 1031 1.5 alnsn if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) 1032 1.1 alnsn dst_r = src; 1033 1.1 alnsn else { 1034 1.5 alnsn inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); 1035 1.5 alnsn FAIL_IF(!inst); 1036 1.5 alnsn *inst++ = GROUP_0F; 1037 1.5 alnsn *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; 1038 1.1 alnsn } 1039 1.1 alnsn 1040 1.1 alnsn if (dst & SLJIT_MEM) { 1041 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); 1042 1.5 alnsn FAIL_IF(!inst); 1043 1.5 alnsn *inst = MOV_rm_r; 1044 1.1 alnsn } 1045 1.1 alnsn 1046 1.1 alnsn return SLJIT_SUCCESS; 1047 1.1 alnsn } 1048 1.1 alnsn 1049 1.8 alnsn static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, 1050 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 1051 1.8 alnsn sljit_s32 src, sljit_sw srcw) 1052 1.1 alnsn { 1053 1.8 alnsn sljit_u8* inst; 1054 1.1 alnsn 1055 1.1 alnsn if (dst == SLJIT_UNUSED) { 1056 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1057 1.5 alnsn inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1058 1.5 alnsn FAIL_IF(!inst); 1059 1.5 alnsn *inst++ = GROUP_F7; 1060 1.5 alnsn *inst |= opcode; 1061 1.1 alnsn return SLJIT_SUCCESS; 1062 1.1 alnsn } 1063 1.1 alnsn if (dst == src && dstw == srcw) { 1064 1.1 alnsn /* Same input and output */ 1065 1.5 alnsn inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1066 1.5 alnsn FAIL_IF(!inst); 1067 1.5 alnsn *inst++ = GROUP_F7; 1068 1.5 alnsn *inst |= opcode; 1069 1.1 alnsn return SLJIT_SUCCESS; 1070 1.1 alnsn } 1071 1.5 alnsn if (FAST_IS_REG(dst)) { 1072 1.1 alnsn EMIT_MOV(compiler, dst, 0, src, srcw); 1073 1.5 alnsn inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1074 1.5 alnsn FAIL_IF(!inst); 1075 1.5 alnsn *inst++ = GROUP_F7; 1076 1.5 alnsn *inst |= opcode; 1077 1.1 alnsn return SLJIT_SUCCESS; 1078 1.1 alnsn } 1079 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1080 1.5 alnsn inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1081 1.5 alnsn FAIL_IF(!inst); 1082 1.5 alnsn *inst++ = GROUP_F7; 1083 1.5 alnsn *inst |= opcode; 1084 1.5 alnsn EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1085 1.1 alnsn return SLJIT_SUCCESS; 1086 1.1 alnsn } 1087 1.1 alnsn 1088 1.8 alnsn static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, 1089 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 1090 1.8 alnsn sljit_s32 src, sljit_sw srcw) 1091 1.1 alnsn { 1092 1.8 alnsn sljit_u8* inst; 1093 1.1 alnsn 1094 1.1 alnsn if (dst == SLJIT_UNUSED) { 1095 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1096 1.5 alnsn inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1097 1.5 alnsn FAIL_IF(!inst); 1098 1.5 alnsn *inst++ = GROUP_F7; 1099 1.5 alnsn *inst |= NOT_rm; 1100 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1101 1.5 alnsn FAIL_IF(!inst); 1102 1.5 alnsn *inst = OR_r_rm; 1103 1.1 alnsn return SLJIT_SUCCESS; 1104 1.1 alnsn } 1105 1.5 alnsn if (FAST_IS_REG(dst)) { 1106 1.1 alnsn EMIT_MOV(compiler, dst, 0, src, srcw); 1107 1.5 alnsn inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 1108 1.5 alnsn FAIL_IF(!inst); 1109 1.5 alnsn *inst++ = GROUP_F7; 1110 1.5 alnsn *inst |= NOT_rm; 1111 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); 1112 1.5 alnsn FAIL_IF(!inst); 1113 1.5 alnsn *inst = OR_r_rm; 1114 1.5 alnsn return SLJIT_SUCCESS; 1115 1.5 alnsn } 1116 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1117 1.5 alnsn inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1118 1.5 alnsn FAIL_IF(!inst); 1119 1.5 alnsn *inst++ = GROUP_F7; 1120 1.5 alnsn *inst |= NOT_rm; 1121 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); 1122 1.5 alnsn FAIL_IF(!inst); 1123 1.5 alnsn *inst = OR_r_rm; 1124 1.5 alnsn EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1125 1.1 alnsn return SLJIT_SUCCESS; 1126 1.1 alnsn } 1127 1.1 alnsn 1128 1.8 alnsn static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, 1129 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 1130 1.8 alnsn sljit_s32 src, sljit_sw srcw) 1131 1.1 alnsn { 1132 1.8 alnsn sljit_u8* inst; 1133 1.8 alnsn sljit_s32 dst_r; 1134 1.1 alnsn 1135 1.5 alnsn SLJIT_UNUSED_ARG(op_flags); 1136 1.1 alnsn if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 1137 1.1 alnsn /* Just set the zero flag. */ 1138 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 1139 1.5 alnsn inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); 1140 1.5 alnsn FAIL_IF(!inst); 1141 1.5 alnsn *inst++ = GROUP_F7; 1142 1.5 alnsn *inst |= NOT_rm; 1143 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1144 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0); 1145 1.1 alnsn #else 1146 1.8 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0); 1147 1.1 alnsn #endif 1148 1.5 alnsn FAIL_IF(!inst); 1149 1.5 alnsn *inst |= SHR; 1150 1.1 alnsn return SLJIT_SUCCESS; 1151 1.1 alnsn } 1152 1.1 alnsn 1153 1.1 alnsn if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 1154 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 1155 1.5 alnsn src = TMP_REG1; 1156 1.1 alnsn srcw = 0; 1157 1.1 alnsn } 1158 1.1 alnsn 1159 1.5 alnsn inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw); 1160 1.5 alnsn FAIL_IF(!inst); 1161 1.5 alnsn *inst++ = GROUP_0F; 1162 1.5 alnsn *inst = BSR_r_rm; 1163 1.1 alnsn 1164 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1165 1.5 alnsn if (FAST_IS_REG(dst)) 1166 1.1 alnsn dst_r = dst; 1167 1.1 alnsn else { 1168 1.1 alnsn /* Find an unused temporary register. */ 1169 1.8 alnsn if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) 1170 1.8 alnsn dst_r = SLJIT_R0; 1171 1.8 alnsn else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1)) 1172 1.8 alnsn dst_r = SLJIT_R1; 1173 1.1 alnsn else 1174 1.8 alnsn dst_r = SLJIT_R2; 1175 1.1 alnsn EMIT_MOV(compiler, dst, dstw, dst_r, 0); 1176 1.1 alnsn } 1177 1.1 alnsn EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); 1178 1.1 alnsn #else 1179 1.5 alnsn dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; 1180 1.1 alnsn compiler->mode32 = 0; 1181 1.8 alnsn EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31); 1182 1.8 alnsn compiler->mode32 = op_flags & SLJIT_I32_OP; 1183 1.1 alnsn #endif 1184 1.1 alnsn 1185 1.5 alnsn if (cpu_has_cmov == -1) 1186 1.5 alnsn get_cpu_features(); 1187 1.5 alnsn 1188 1.5 alnsn if (cpu_has_cmov) { 1189 1.5 alnsn inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); 1190 1.5 alnsn FAIL_IF(!inst); 1191 1.5 alnsn *inst++ = GROUP_0F; 1192 1.5 alnsn *inst = CMOVNE_r_rm; 1193 1.5 alnsn } else { 1194 1.5 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1195 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1196 1.5 alnsn FAIL_IF(!inst); 1197 1.5 alnsn INC_SIZE(4); 1198 1.5 alnsn 1199 1.5 alnsn *inst++ = JE_i8; 1200 1.5 alnsn *inst++ = 2; 1201 1.5 alnsn *inst++ = MOV_r_rm; 1202 1.5 alnsn *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1]; 1203 1.5 alnsn #else 1204 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 1205 1.5 alnsn FAIL_IF(!inst); 1206 1.5 alnsn INC_SIZE(5); 1207 1.5 alnsn 1208 1.5 alnsn *inst++ = JE_i8; 1209 1.5 alnsn *inst++ = 3; 1210 1.5 alnsn *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0); 1211 1.5 alnsn *inst++ = MOV_r_rm; 1212 1.5 alnsn *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1]; 1213 1.5 alnsn #endif 1214 1.5 alnsn } 1215 1.1 alnsn 1216 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1217 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); 1218 1.1 alnsn #else 1219 1.8 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0); 1220 1.1 alnsn #endif 1221 1.5 alnsn FAIL_IF(!inst); 1222 1.5 alnsn *(inst + 1) |= XOR; 1223 1.1 alnsn 1224 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1225 1.1 alnsn if (dst & SLJIT_MEM) { 1226 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 1227 1.5 alnsn FAIL_IF(!inst); 1228 1.5 alnsn *inst = XCHG_r_rm; 1229 1.1 alnsn } 1230 1.1 alnsn #else 1231 1.1 alnsn if (dst & SLJIT_MEM) 1232 1.1 alnsn EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0); 1233 1.1 alnsn #endif 1234 1.1 alnsn return SLJIT_SUCCESS; 1235 1.1 alnsn } 1236 1.1 alnsn 1237 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, 1238 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 1239 1.8 alnsn sljit_s32 src, sljit_sw srcw) 1240 1.8 alnsn { 1241 1.8 alnsn sljit_s32 update = 0; 1242 1.8 alnsn sljit_s32 op_flags = GET_ALL_FLAGS(op); 1243 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1244 1.8 alnsn sljit_s32 dst_is_ereg = 0; 1245 1.8 alnsn sljit_s32 src_is_ereg = 0; 1246 1.1 alnsn #else 1247 1.5 alnsn # define src_is_ereg 0 1248 1.1 alnsn #endif 1249 1.1 alnsn 1250 1.1 alnsn CHECK_ERROR(); 1251 1.8 alnsn CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); 1252 1.1 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 1253 1.1 alnsn ADJUST_LOCAL_OFFSET(src, srcw); 1254 1.1 alnsn 1255 1.1 alnsn CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); 1256 1.1 alnsn CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); 1257 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1258 1.8 alnsn compiler->mode32 = op_flags & SLJIT_I32_OP; 1259 1.1 alnsn #endif 1260 1.1 alnsn 1261 1.5 alnsn op = GET_OPCODE(op); 1262 1.5 alnsn if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { 1263 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1264 1.1 alnsn compiler->mode32 = 0; 1265 1.1 alnsn #endif 1266 1.1 alnsn 1267 1.8 alnsn if (op_flags & SLJIT_I32_OP) { 1268 1.5 alnsn if (FAST_IS_REG(src) && src == dst) { 1269 1.5 alnsn if (!TYPE_CAST_NEEDED(op)) 1270 1.5 alnsn return SLJIT_SUCCESS; 1271 1.5 alnsn } 1272 1.5 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1273 1.8 alnsn if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM)) 1274 1.8 alnsn op = SLJIT_MOV_U32; 1275 1.8 alnsn if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM)) 1276 1.8 alnsn op = SLJIT_MOVU_U32; 1277 1.8 alnsn if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM)) 1278 1.8 alnsn op = SLJIT_MOV_S32; 1279 1.8 alnsn if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM)) 1280 1.8 alnsn op = SLJIT_MOVU_S32; 1281 1.5 alnsn #endif 1282 1.5 alnsn } 1283 1.5 alnsn 1284 1.5 alnsn SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset); 1285 1.1 alnsn if (op >= SLJIT_MOVU) { 1286 1.1 alnsn update = 1; 1287 1.5 alnsn op -= 8; 1288 1.1 alnsn } 1289 1.1 alnsn 1290 1.1 alnsn if (src & SLJIT_IMM) { 1291 1.1 alnsn switch (op) { 1292 1.8 alnsn case SLJIT_MOV_U8: 1293 1.8 alnsn srcw = (sljit_u8)srcw; 1294 1.1 alnsn break; 1295 1.8 alnsn case SLJIT_MOV_S8: 1296 1.8 alnsn srcw = (sljit_s8)srcw; 1297 1.1 alnsn break; 1298 1.8 alnsn case SLJIT_MOV_U16: 1299 1.8 alnsn srcw = (sljit_u16)srcw; 1300 1.1 alnsn break; 1301 1.8 alnsn case SLJIT_MOV_S16: 1302 1.8 alnsn srcw = (sljit_s16)srcw; 1303 1.1 alnsn break; 1304 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1305 1.8 alnsn case SLJIT_MOV_U32: 1306 1.8 alnsn srcw = (sljit_u32)srcw; 1307 1.1 alnsn break; 1308 1.8 alnsn case SLJIT_MOV_S32: 1309 1.8 alnsn srcw = (sljit_s32)srcw; 1310 1.1 alnsn break; 1311 1.1 alnsn #endif 1312 1.1 alnsn } 1313 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1314 1.1 alnsn if (SLJIT_UNLIKELY(dst_is_ereg)) 1315 1.1 alnsn return emit_mov(compiler, dst, dstw, src, srcw); 1316 1.1 alnsn #endif 1317 1.1 alnsn } 1318 1.1 alnsn 1319 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1320 1.8 alnsn if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { 1321 1.8 alnsn SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); 1322 1.5 alnsn dst = TMP_REG1; 1323 1.1 alnsn } 1324 1.1 alnsn #endif 1325 1.1 alnsn 1326 1.1 alnsn switch (op) { 1327 1.1 alnsn case SLJIT_MOV: 1328 1.5 alnsn case SLJIT_MOV_P: 1329 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1330 1.8 alnsn case SLJIT_MOV_U32: 1331 1.8 alnsn case SLJIT_MOV_S32: 1332 1.1 alnsn #endif 1333 1.1 alnsn FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); 1334 1.1 alnsn break; 1335 1.8 alnsn case SLJIT_MOV_U8: 1336 1.5 alnsn FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); 1337 1.1 alnsn break; 1338 1.8 alnsn case SLJIT_MOV_S8: 1339 1.5 alnsn FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); 1340 1.1 alnsn break; 1341 1.8 alnsn case SLJIT_MOV_U16: 1342 1.5 alnsn FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); 1343 1.1 alnsn break; 1344 1.8 alnsn case SLJIT_MOV_S16: 1345 1.5 alnsn FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); 1346 1.1 alnsn break; 1347 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1348 1.8 alnsn case SLJIT_MOV_U32: 1349 1.5 alnsn FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); 1350 1.1 alnsn break; 1351 1.8 alnsn case SLJIT_MOV_S32: 1352 1.5 alnsn FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); 1353 1.1 alnsn break; 1354 1.1 alnsn #endif 1355 1.1 alnsn } 1356 1.1 alnsn 1357 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1358 1.5 alnsn if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) 1359 1.8 alnsn return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); 1360 1.1 alnsn #endif 1361 1.1 alnsn 1362 1.9 alnsn if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) { 1363 1.9 alnsn if ((src & OFFS_REG_MASK) != 0) { 1364 1.9 alnsn FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 1365 1.9 alnsn (src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0)); 1366 1.9 alnsn } 1367 1.9 alnsn else if (srcw != 0) { 1368 1.9 alnsn FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 1369 1.9 alnsn (src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw)); 1370 1.9 alnsn } 1371 1.9 alnsn } 1372 1.9 alnsn 1373 1.9 alnsn if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) { 1374 1.9 alnsn if ((dst & OFFS_REG_MASK) != 0) { 1375 1.9 alnsn FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 1376 1.9 alnsn (dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0)); 1377 1.9 alnsn } 1378 1.9 alnsn else if (dstw != 0) { 1379 1.9 alnsn FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 1380 1.9 alnsn (dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw)); 1381 1.9 alnsn } 1382 1.1 alnsn } 1383 1.1 alnsn return SLJIT_SUCCESS; 1384 1.1 alnsn } 1385 1.1 alnsn 1386 1.5 alnsn switch (op) { 1387 1.1 alnsn case SLJIT_NOT: 1388 1.9 alnsn if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z)) 1389 1.1 alnsn return emit_not_with_flags(compiler, dst, dstw, src, srcw); 1390 1.5 alnsn return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); 1391 1.1 alnsn 1392 1.1 alnsn case SLJIT_NEG: 1393 1.5 alnsn return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); 1394 1.1 alnsn 1395 1.1 alnsn case SLJIT_CLZ: 1396 1.5 alnsn return emit_clz(compiler, op_flags, dst, dstw, src, srcw); 1397 1.1 alnsn } 1398 1.1 alnsn 1399 1.1 alnsn return SLJIT_SUCCESS; 1400 1.1 alnsn 1401 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1402 1.5 alnsn # undef src_is_ereg 1403 1.1 alnsn #endif 1404 1.1 alnsn } 1405 1.1 alnsn 1406 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1407 1.1 alnsn 1408 1.5 alnsn #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1409 1.1 alnsn if (IS_HALFWORD(immw) || compiler->mode32) { \ 1410 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1411 1.5 alnsn FAIL_IF(!inst); \ 1412 1.5 alnsn *(inst + 1) |= (op_imm); \ 1413 1.1 alnsn } \ 1414 1.1 alnsn else { \ 1415 1.1 alnsn FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ 1416 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ 1417 1.5 alnsn FAIL_IF(!inst); \ 1418 1.5 alnsn *inst = (op_mr); \ 1419 1.1 alnsn } 1420 1.1 alnsn 1421 1.5 alnsn #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1422 1.5 alnsn FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) 1423 1.1 alnsn 1424 1.1 alnsn #else 1425 1.1 alnsn 1426 1.5 alnsn #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ 1427 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ 1428 1.5 alnsn FAIL_IF(!inst); \ 1429 1.5 alnsn *(inst + 1) |= (op_imm); 1430 1.1 alnsn 1431 1.5 alnsn #define BINARY_EAX_IMM(op_eax_imm, immw) \ 1432 1.5 alnsn FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) 1433 1.1 alnsn 1434 1.1 alnsn #endif 1435 1.1 alnsn 1436 1.8 alnsn static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, 1437 1.8 alnsn sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 1438 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 1439 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 1440 1.8 alnsn sljit_s32 src2, sljit_sw src2w) 1441 1.1 alnsn { 1442 1.8 alnsn sljit_u8* inst; 1443 1.1 alnsn 1444 1.1 alnsn if (dst == SLJIT_UNUSED) { 1445 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1446 1.1 alnsn if (src2 & SLJIT_IMM) { 1447 1.5 alnsn BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1448 1.1 alnsn } 1449 1.1 alnsn else { 1450 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1451 1.5 alnsn FAIL_IF(!inst); 1452 1.5 alnsn *inst = op_rm; 1453 1.1 alnsn } 1454 1.1 alnsn return SLJIT_SUCCESS; 1455 1.1 alnsn } 1456 1.1 alnsn 1457 1.1 alnsn if (dst == src1 && dstw == src1w) { 1458 1.1 alnsn if (src2 & SLJIT_IMM) { 1459 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1460 1.8 alnsn if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1461 1.1 alnsn #else 1462 1.8 alnsn if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1463 1.1 alnsn #endif 1464 1.1 alnsn BINARY_EAX_IMM(op_eax_imm, src2w); 1465 1.1 alnsn } 1466 1.1 alnsn else { 1467 1.1 alnsn BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1468 1.1 alnsn } 1469 1.1 alnsn } 1470 1.5 alnsn else if (FAST_IS_REG(dst)) { 1471 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1472 1.5 alnsn FAIL_IF(!inst); 1473 1.5 alnsn *inst = op_rm; 1474 1.5 alnsn } 1475 1.5 alnsn else if (FAST_IS_REG(src2)) { 1476 1.5 alnsn /* Special exception for sljit_emit_op_flags. */ 1477 1.5 alnsn inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1478 1.5 alnsn FAIL_IF(!inst); 1479 1.5 alnsn *inst = op_mr; 1480 1.5 alnsn } 1481 1.5 alnsn else { 1482 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1483 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1484 1.5 alnsn FAIL_IF(!inst); 1485 1.5 alnsn *inst = op_mr; 1486 1.1 alnsn } 1487 1.1 alnsn return SLJIT_SUCCESS; 1488 1.1 alnsn } 1489 1.1 alnsn 1490 1.1 alnsn /* Only for cumulative operations. */ 1491 1.1 alnsn if (dst == src2 && dstw == src2w) { 1492 1.1 alnsn if (src1 & SLJIT_IMM) { 1493 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1494 1.8 alnsn if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1495 1.1 alnsn #else 1496 1.8 alnsn if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { 1497 1.1 alnsn #endif 1498 1.1 alnsn BINARY_EAX_IMM(op_eax_imm, src1w); 1499 1.1 alnsn } 1500 1.1 alnsn else { 1501 1.1 alnsn BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); 1502 1.1 alnsn } 1503 1.1 alnsn } 1504 1.5 alnsn else if (FAST_IS_REG(dst)) { 1505 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); 1506 1.5 alnsn FAIL_IF(!inst); 1507 1.5 alnsn *inst = op_rm; 1508 1.5 alnsn } 1509 1.5 alnsn else if (FAST_IS_REG(src1)) { 1510 1.5 alnsn inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); 1511 1.5 alnsn FAIL_IF(!inst); 1512 1.5 alnsn *inst = op_mr; 1513 1.1 alnsn } 1514 1.1 alnsn else { 1515 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1516 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1517 1.5 alnsn FAIL_IF(!inst); 1518 1.5 alnsn *inst = op_mr; 1519 1.1 alnsn } 1520 1.1 alnsn return SLJIT_SUCCESS; 1521 1.1 alnsn } 1522 1.1 alnsn 1523 1.1 alnsn /* General version. */ 1524 1.5 alnsn if (FAST_IS_REG(dst)) { 1525 1.1 alnsn EMIT_MOV(compiler, dst, 0, src1, src1w); 1526 1.1 alnsn if (src2 & SLJIT_IMM) { 1527 1.1 alnsn BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1528 1.1 alnsn } 1529 1.1 alnsn else { 1530 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1531 1.5 alnsn FAIL_IF(!inst); 1532 1.5 alnsn *inst = op_rm; 1533 1.1 alnsn } 1534 1.1 alnsn } 1535 1.1 alnsn else { 1536 1.1 alnsn /* This version requires less memory writing. */ 1537 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1538 1.1 alnsn if (src2 & SLJIT_IMM) { 1539 1.5 alnsn BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1540 1.1 alnsn } 1541 1.1 alnsn else { 1542 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1543 1.5 alnsn FAIL_IF(!inst); 1544 1.5 alnsn *inst = op_rm; 1545 1.1 alnsn } 1546 1.5 alnsn EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1547 1.1 alnsn } 1548 1.1 alnsn 1549 1.1 alnsn return SLJIT_SUCCESS; 1550 1.1 alnsn } 1551 1.1 alnsn 1552 1.8 alnsn static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, 1553 1.8 alnsn sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, 1554 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 1555 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 1556 1.8 alnsn sljit_s32 src2, sljit_sw src2w) 1557 1.1 alnsn { 1558 1.8 alnsn sljit_u8* inst; 1559 1.1 alnsn 1560 1.1 alnsn if (dst == SLJIT_UNUSED) { 1561 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1562 1.1 alnsn if (src2 & SLJIT_IMM) { 1563 1.5 alnsn BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1564 1.1 alnsn } 1565 1.1 alnsn else { 1566 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1567 1.5 alnsn FAIL_IF(!inst); 1568 1.5 alnsn *inst = op_rm; 1569 1.1 alnsn } 1570 1.1 alnsn return SLJIT_SUCCESS; 1571 1.1 alnsn } 1572 1.1 alnsn 1573 1.1 alnsn if (dst == src1 && dstw == src1w) { 1574 1.1 alnsn if (src2 & SLJIT_IMM) { 1575 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1576 1.8 alnsn if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1577 1.1 alnsn #else 1578 1.8 alnsn if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { 1579 1.1 alnsn #endif 1580 1.1 alnsn BINARY_EAX_IMM(op_eax_imm, src2w); 1581 1.1 alnsn } 1582 1.1 alnsn else { 1583 1.1 alnsn BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); 1584 1.1 alnsn } 1585 1.1 alnsn } 1586 1.5 alnsn else if (FAST_IS_REG(dst)) { 1587 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); 1588 1.5 alnsn FAIL_IF(!inst); 1589 1.5 alnsn *inst = op_rm; 1590 1.5 alnsn } 1591 1.5 alnsn else if (FAST_IS_REG(src2)) { 1592 1.5 alnsn inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); 1593 1.5 alnsn FAIL_IF(!inst); 1594 1.5 alnsn *inst = op_mr; 1595 1.1 alnsn } 1596 1.1 alnsn else { 1597 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); 1598 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); 1599 1.5 alnsn FAIL_IF(!inst); 1600 1.5 alnsn *inst = op_mr; 1601 1.1 alnsn } 1602 1.1 alnsn return SLJIT_SUCCESS; 1603 1.1 alnsn } 1604 1.1 alnsn 1605 1.1 alnsn /* General version. */ 1606 1.5 alnsn if (FAST_IS_REG(dst) && dst != src2) { 1607 1.1 alnsn EMIT_MOV(compiler, dst, 0, src1, src1w); 1608 1.1 alnsn if (src2 & SLJIT_IMM) { 1609 1.1 alnsn BINARY_IMM(op_imm, op_mr, src2w, dst, 0); 1610 1.1 alnsn } 1611 1.1 alnsn else { 1612 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); 1613 1.5 alnsn FAIL_IF(!inst); 1614 1.5 alnsn *inst = op_rm; 1615 1.1 alnsn } 1616 1.1 alnsn } 1617 1.1 alnsn else { 1618 1.1 alnsn /* This version requires less memory writing. */ 1619 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1620 1.1 alnsn if (src2 & SLJIT_IMM) { 1621 1.5 alnsn BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); 1622 1.1 alnsn } 1623 1.1 alnsn else { 1624 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1625 1.5 alnsn FAIL_IF(!inst); 1626 1.5 alnsn *inst = op_rm; 1627 1.1 alnsn } 1628 1.5 alnsn EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1629 1.1 alnsn } 1630 1.1 alnsn 1631 1.1 alnsn return SLJIT_SUCCESS; 1632 1.1 alnsn } 1633 1.1 alnsn 1634 1.8 alnsn static sljit_s32 emit_mul(struct sljit_compiler *compiler, 1635 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 1636 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 1637 1.8 alnsn sljit_s32 src2, sljit_sw src2w) 1638 1.1 alnsn { 1639 1.8 alnsn sljit_u8* inst; 1640 1.8 alnsn sljit_s32 dst_r; 1641 1.1 alnsn 1642 1.5 alnsn dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1643 1.1 alnsn 1644 1.1 alnsn /* Register destination. */ 1645 1.1 alnsn if (dst_r == src1 && !(src2 & SLJIT_IMM)) { 1646 1.5 alnsn inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1647 1.5 alnsn FAIL_IF(!inst); 1648 1.5 alnsn *inst++ = GROUP_0F; 1649 1.5 alnsn *inst = IMUL_r_rm; 1650 1.1 alnsn } 1651 1.1 alnsn else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { 1652 1.5 alnsn inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); 1653 1.5 alnsn FAIL_IF(!inst); 1654 1.5 alnsn *inst++ = GROUP_0F; 1655 1.5 alnsn *inst = IMUL_r_rm; 1656 1.1 alnsn } 1657 1.1 alnsn else if (src1 & SLJIT_IMM) { 1658 1.1 alnsn if (src2 & SLJIT_IMM) { 1659 1.1 alnsn EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); 1660 1.1 alnsn src2 = dst_r; 1661 1.1 alnsn src2w = 0; 1662 1.1 alnsn } 1663 1.1 alnsn 1664 1.1 alnsn if (src1w <= 127 && src1w >= -128) { 1665 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1666 1.5 alnsn FAIL_IF(!inst); 1667 1.5 alnsn *inst = IMUL_r_rm_i8; 1668 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 1669 1.5 alnsn FAIL_IF(!inst); 1670 1.5 alnsn INC_SIZE(1); 1671 1.8 alnsn *inst = (sljit_s8)src1w; 1672 1.1 alnsn } 1673 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1674 1.1 alnsn else { 1675 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1676 1.5 alnsn FAIL_IF(!inst); 1677 1.5 alnsn *inst = IMUL_r_rm_i32; 1678 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1679 1.5 alnsn FAIL_IF(!inst); 1680 1.5 alnsn INC_SIZE(4); 1681 1.9 alnsn sljit_unaligned_store_sw(inst, src1w); 1682 1.1 alnsn } 1683 1.1 alnsn #else 1684 1.1 alnsn else if (IS_HALFWORD(src1w)) { 1685 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); 1686 1.5 alnsn FAIL_IF(!inst); 1687 1.5 alnsn *inst = IMUL_r_rm_i32; 1688 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1689 1.5 alnsn FAIL_IF(!inst); 1690 1.5 alnsn INC_SIZE(4); 1691 1.9 alnsn sljit_unaligned_store_s32(inst, (sljit_s32)src1w); 1692 1.1 alnsn } 1693 1.1 alnsn else { 1694 1.1 alnsn EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); 1695 1.1 alnsn if (dst_r != src2) 1696 1.1 alnsn EMIT_MOV(compiler, dst_r, 0, src2, src2w); 1697 1.5 alnsn inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1698 1.5 alnsn FAIL_IF(!inst); 1699 1.5 alnsn *inst++ = GROUP_0F; 1700 1.5 alnsn *inst = IMUL_r_rm; 1701 1.1 alnsn } 1702 1.1 alnsn #endif 1703 1.1 alnsn } 1704 1.1 alnsn else if (src2 & SLJIT_IMM) { 1705 1.1 alnsn /* Note: src1 is NOT immediate. */ 1706 1.1 alnsn 1707 1.1 alnsn if (src2w <= 127 && src2w >= -128) { 1708 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1709 1.5 alnsn FAIL_IF(!inst); 1710 1.5 alnsn *inst = IMUL_r_rm_i8; 1711 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 1712 1.5 alnsn FAIL_IF(!inst); 1713 1.5 alnsn INC_SIZE(1); 1714 1.8 alnsn *inst = (sljit_s8)src2w; 1715 1.1 alnsn } 1716 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 1717 1.1 alnsn else { 1718 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1719 1.5 alnsn FAIL_IF(!inst); 1720 1.5 alnsn *inst = IMUL_r_rm_i32; 1721 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1722 1.5 alnsn FAIL_IF(!inst); 1723 1.5 alnsn INC_SIZE(4); 1724 1.9 alnsn sljit_unaligned_store_sw(inst, src2w); 1725 1.1 alnsn } 1726 1.1 alnsn #else 1727 1.1 alnsn else if (IS_HALFWORD(src2w)) { 1728 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); 1729 1.5 alnsn FAIL_IF(!inst); 1730 1.5 alnsn *inst = IMUL_r_rm_i32; 1731 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 1732 1.5 alnsn FAIL_IF(!inst); 1733 1.5 alnsn INC_SIZE(4); 1734 1.9 alnsn sljit_unaligned_store_s32(inst, (sljit_s32)src2w); 1735 1.1 alnsn } 1736 1.1 alnsn else { 1737 1.7 alnsn EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w); 1738 1.1 alnsn if (dst_r != src1) 1739 1.1 alnsn EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1740 1.5 alnsn inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); 1741 1.5 alnsn FAIL_IF(!inst); 1742 1.5 alnsn *inst++ = GROUP_0F; 1743 1.5 alnsn *inst = IMUL_r_rm; 1744 1.1 alnsn } 1745 1.1 alnsn #endif 1746 1.1 alnsn } 1747 1.1 alnsn else { 1748 1.1 alnsn /* Neither argument is immediate. */ 1749 1.1 alnsn if (ADDRESSING_DEPENDS_ON(src2, dst_r)) 1750 1.5 alnsn dst_r = TMP_REG1; 1751 1.1 alnsn EMIT_MOV(compiler, dst_r, 0, src1, src1w); 1752 1.5 alnsn inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); 1753 1.5 alnsn FAIL_IF(!inst); 1754 1.5 alnsn *inst++ = GROUP_0F; 1755 1.5 alnsn *inst = IMUL_r_rm; 1756 1.1 alnsn } 1757 1.1 alnsn 1758 1.5 alnsn if (dst_r == TMP_REG1) 1759 1.5 alnsn EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 1760 1.1 alnsn 1761 1.1 alnsn return SLJIT_SUCCESS; 1762 1.1 alnsn } 1763 1.1 alnsn 1764 1.9 alnsn static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, 1765 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 1766 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 1767 1.8 alnsn sljit_s32 src2, sljit_sw src2w) 1768 1.1 alnsn { 1769 1.8 alnsn sljit_u8* inst; 1770 1.8 alnsn sljit_s32 dst_r, done = 0; 1771 1.1 alnsn 1772 1.1 alnsn /* These cases better be left to handled by normal way. */ 1773 1.9 alnsn if (dst == src1 && dstw == src1w) 1774 1.9 alnsn return SLJIT_ERR_UNSUPPORTED; 1775 1.9 alnsn if (dst == src2 && dstw == src2w) 1776 1.9 alnsn return SLJIT_ERR_UNSUPPORTED; 1777 1.5 alnsn 1778 1.5 alnsn dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1779 1.5 alnsn 1780 1.5 alnsn if (FAST_IS_REG(src1)) { 1781 1.5 alnsn if (FAST_IS_REG(src2)) { 1782 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); 1783 1.5 alnsn FAIL_IF(!inst); 1784 1.5 alnsn *inst = LEA_r_m; 1785 1.1 alnsn done = 1; 1786 1.1 alnsn } 1787 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1788 1.1 alnsn if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1789 1.8 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w); 1790 1.1 alnsn #else 1791 1.1 alnsn if (src2 & SLJIT_IMM) { 1792 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); 1793 1.1 alnsn #endif 1794 1.5 alnsn FAIL_IF(!inst); 1795 1.5 alnsn *inst = LEA_r_m; 1796 1.1 alnsn done = 1; 1797 1.1 alnsn } 1798 1.1 alnsn } 1799 1.5 alnsn else if (FAST_IS_REG(src2)) { 1800 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1801 1.1 alnsn if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1802 1.8 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w); 1803 1.1 alnsn #else 1804 1.1 alnsn if (src1 & SLJIT_IMM) { 1805 1.5 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); 1806 1.1 alnsn #endif 1807 1.5 alnsn FAIL_IF(!inst); 1808 1.5 alnsn *inst = LEA_r_m; 1809 1.1 alnsn done = 1; 1810 1.1 alnsn } 1811 1.1 alnsn } 1812 1.1 alnsn 1813 1.1 alnsn if (done) { 1814 1.5 alnsn if (dst_r == TMP_REG1) 1815 1.5 alnsn return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 1816 1.1 alnsn return SLJIT_SUCCESS; 1817 1.1 alnsn } 1818 1.1 alnsn return SLJIT_ERR_UNSUPPORTED; 1819 1.1 alnsn } 1820 1.1 alnsn 1821 1.8 alnsn static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler, 1822 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 1823 1.8 alnsn sljit_s32 src2, sljit_sw src2w) 1824 1.1 alnsn { 1825 1.8 alnsn sljit_u8* inst; 1826 1.1 alnsn 1827 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1828 1.8 alnsn if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1829 1.1 alnsn #else 1830 1.8 alnsn if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1831 1.1 alnsn #endif 1832 1.5 alnsn BINARY_EAX_IMM(CMP_EAX_i32, src2w); 1833 1.1 alnsn return SLJIT_SUCCESS; 1834 1.1 alnsn } 1835 1.1 alnsn 1836 1.5 alnsn if (FAST_IS_REG(src1)) { 1837 1.1 alnsn if (src2 & SLJIT_IMM) { 1838 1.5 alnsn BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); 1839 1.1 alnsn } 1840 1.1 alnsn else { 1841 1.5 alnsn inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1842 1.5 alnsn FAIL_IF(!inst); 1843 1.5 alnsn *inst = CMP_r_rm; 1844 1.1 alnsn } 1845 1.1 alnsn return SLJIT_SUCCESS; 1846 1.1 alnsn } 1847 1.1 alnsn 1848 1.5 alnsn if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { 1849 1.5 alnsn inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1850 1.5 alnsn FAIL_IF(!inst); 1851 1.5 alnsn *inst = CMP_rm_r; 1852 1.1 alnsn return SLJIT_SUCCESS; 1853 1.1 alnsn } 1854 1.1 alnsn 1855 1.1 alnsn if (src2 & SLJIT_IMM) { 1856 1.1 alnsn if (src1 & SLJIT_IMM) { 1857 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1858 1.5 alnsn src1 = TMP_REG1; 1859 1.1 alnsn src1w = 0; 1860 1.1 alnsn } 1861 1.5 alnsn BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); 1862 1.1 alnsn } 1863 1.1 alnsn else { 1864 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1865 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1866 1.5 alnsn FAIL_IF(!inst); 1867 1.5 alnsn *inst = CMP_r_rm; 1868 1.1 alnsn } 1869 1.1 alnsn return SLJIT_SUCCESS; 1870 1.1 alnsn } 1871 1.1 alnsn 1872 1.8 alnsn static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, 1873 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 1874 1.8 alnsn sljit_s32 src2, sljit_sw src2w) 1875 1.1 alnsn { 1876 1.8 alnsn sljit_u8* inst; 1877 1.1 alnsn 1878 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1879 1.8 alnsn if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { 1880 1.1 alnsn #else 1881 1.8 alnsn if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { 1882 1.1 alnsn #endif 1883 1.5 alnsn BINARY_EAX_IMM(TEST_EAX_i32, src2w); 1884 1.1 alnsn return SLJIT_SUCCESS; 1885 1.1 alnsn } 1886 1.1 alnsn 1887 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1888 1.9 alnsn if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { 1889 1.1 alnsn #else 1890 1.8 alnsn if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { 1891 1.1 alnsn #endif 1892 1.5 alnsn BINARY_EAX_IMM(TEST_EAX_i32, src1w); 1893 1.1 alnsn return SLJIT_SUCCESS; 1894 1.1 alnsn } 1895 1.1 alnsn 1896 1.8 alnsn if (!(src1 & SLJIT_IMM)) { 1897 1.1 alnsn if (src2 & SLJIT_IMM) { 1898 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1899 1.1 alnsn if (IS_HALFWORD(src2w) || compiler->mode32) { 1900 1.8 alnsn inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); 1901 1.5 alnsn FAIL_IF(!inst); 1902 1.5 alnsn *inst = GROUP_F7; 1903 1.1 alnsn } 1904 1.1 alnsn else { 1905 1.1 alnsn FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1906 1.8 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w); 1907 1.5 alnsn FAIL_IF(!inst); 1908 1.5 alnsn *inst = TEST_rm_r; 1909 1.1 alnsn } 1910 1.1 alnsn #else 1911 1.8 alnsn inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); 1912 1.5 alnsn FAIL_IF(!inst); 1913 1.5 alnsn *inst = GROUP_F7; 1914 1.1 alnsn #endif 1915 1.8 alnsn return SLJIT_SUCCESS; 1916 1.1 alnsn } 1917 1.8 alnsn else if (FAST_IS_REG(src1)) { 1918 1.5 alnsn inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); 1919 1.5 alnsn FAIL_IF(!inst); 1920 1.5 alnsn *inst = TEST_rm_r; 1921 1.8 alnsn return SLJIT_SUCCESS; 1922 1.1 alnsn } 1923 1.1 alnsn } 1924 1.1 alnsn 1925 1.8 alnsn if (!(src2 & SLJIT_IMM)) { 1926 1.1 alnsn if (src1 & SLJIT_IMM) { 1927 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1928 1.1 alnsn if (IS_HALFWORD(src1w) || compiler->mode32) { 1929 1.8 alnsn inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); 1930 1.5 alnsn FAIL_IF(!inst); 1931 1.5 alnsn *inst = GROUP_F7; 1932 1.1 alnsn } 1933 1.1 alnsn else { 1934 1.1 alnsn FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); 1935 1.8 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w); 1936 1.5 alnsn FAIL_IF(!inst); 1937 1.5 alnsn *inst = TEST_rm_r; 1938 1.1 alnsn } 1939 1.1 alnsn #else 1940 1.8 alnsn inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); 1941 1.5 alnsn FAIL_IF(!inst); 1942 1.5 alnsn *inst = GROUP_F7; 1943 1.1 alnsn #endif 1944 1.8 alnsn return SLJIT_SUCCESS; 1945 1.1 alnsn } 1946 1.8 alnsn else if (FAST_IS_REG(src2)) { 1947 1.5 alnsn inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); 1948 1.5 alnsn FAIL_IF(!inst); 1949 1.5 alnsn *inst = TEST_rm_r; 1950 1.8 alnsn return SLJIT_SUCCESS; 1951 1.1 alnsn } 1952 1.1 alnsn } 1953 1.1 alnsn 1954 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1955 1.1 alnsn if (src2 & SLJIT_IMM) { 1956 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1957 1.1 alnsn if (IS_HALFWORD(src2w) || compiler->mode32) { 1958 1.5 alnsn inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1959 1.5 alnsn FAIL_IF(!inst); 1960 1.5 alnsn *inst = GROUP_F7; 1961 1.1 alnsn } 1962 1.1 alnsn else { 1963 1.1 alnsn FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); 1964 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); 1965 1.5 alnsn FAIL_IF(!inst); 1966 1.5 alnsn *inst = TEST_rm_r; 1967 1.1 alnsn } 1968 1.1 alnsn #else 1969 1.5 alnsn inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); 1970 1.5 alnsn FAIL_IF(!inst); 1971 1.5 alnsn *inst = GROUP_F7; 1972 1.1 alnsn #endif 1973 1.1 alnsn } 1974 1.1 alnsn else { 1975 1.5 alnsn inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); 1976 1.5 alnsn FAIL_IF(!inst); 1977 1.5 alnsn *inst = TEST_rm_r; 1978 1.1 alnsn } 1979 1.1 alnsn return SLJIT_SUCCESS; 1980 1.1 alnsn } 1981 1.1 alnsn 1982 1.8 alnsn static sljit_s32 emit_shift(struct sljit_compiler *compiler, 1983 1.8 alnsn sljit_u8 mode, 1984 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 1985 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 1986 1.8 alnsn sljit_s32 src2, sljit_sw src2w) 1987 1.1 alnsn { 1988 1.8 alnsn sljit_u8* inst; 1989 1.1 alnsn 1990 1.1 alnsn if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { 1991 1.1 alnsn if (dst == src1 && dstw == src1w) { 1992 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); 1993 1.5 alnsn FAIL_IF(!inst); 1994 1.5 alnsn *inst |= mode; 1995 1.1 alnsn return SLJIT_SUCCESS; 1996 1.1 alnsn } 1997 1.1 alnsn if (dst == SLJIT_UNUSED) { 1998 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 1999 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2000 1.5 alnsn FAIL_IF(!inst); 2001 1.5 alnsn *inst |= mode; 2002 1.1 alnsn return SLJIT_SUCCESS; 2003 1.1 alnsn } 2004 1.1 alnsn if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { 2005 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2006 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2007 1.5 alnsn FAIL_IF(!inst); 2008 1.5 alnsn *inst |= mode; 2009 1.5 alnsn EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2010 1.1 alnsn return SLJIT_SUCCESS; 2011 1.1 alnsn } 2012 1.5 alnsn if (FAST_IS_REG(dst)) { 2013 1.1 alnsn EMIT_MOV(compiler, dst, 0, src1, src1w); 2014 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); 2015 1.5 alnsn FAIL_IF(!inst); 2016 1.5 alnsn *inst |= mode; 2017 1.1 alnsn return SLJIT_SUCCESS; 2018 1.1 alnsn } 2019 1.1 alnsn 2020 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2021 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); 2022 1.5 alnsn FAIL_IF(!inst); 2023 1.5 alnsn *inst |= mode; 2024 1.5 alnsn EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2025 1.1 alnsn return SLJIT_SUCCESS; 2026 1.1 alnsn } 2027 1.1 alnsn 2028 1.1 alnsn if (dst == SLJIT_PREF_SHIFT_REG) { 2029 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2030 1.1 alnsn EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2031 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2032 1.5 alnsn FAIL_IF(!inst); 2033 1.5 alnsn *inst |= mode; 2034 1.5 alnsn EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2035 1.1 alnsn } 2036 1.5 alnsn else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { 2037 1.1 alnsn if (src1 != dst) 2038 1.1 alnsn EMIT_MOV(compiler, dst, 0, src1, src1w); 2039 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); 2040 1.1 alnsn EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2041 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); 2042 1.5 alnsn FAIL_IF(!inst); 2043 1.5 alnsn *inst |= mode; 2044 1.5 alnsn EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2045 1.1 alnsn } 2046 1.1 alnsn else { 2047 1.9 alnsn /* This case is complex since ecx itself may be used for 2048 1.9 alnsn addressing, and this case must be supported as well. */ 2049 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); 2050 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2051 1.1 alnsn EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); 2052 1.1 alnsn #else 2053 1.9 alnsn EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0); 2054 1.1 alnsn #endif 2055 1.1 alnsn EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); 2056 1.5 alnsn inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); 2057 1.5 alnsn FAIL_IF(!inst); 2058 1.5 alnsn *inst |= mode; 2059 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2060 1.1 alnsn EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); 2061 1.1 alnsn #else 2062 1.9 alnsn EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0); 2063 1.1 alnsn #endif 2064 1.5 alnsn EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); 2065 1.1 alnsn } 2066 1.1 alnsn 2067 1.1 alnsn return SLJIT_SUCCESS; 2068 1.1 alnsn } 2069 1.1 alnsn 2070 1.8 alnsn static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, 2071 1.8 alnsn sljit_u8 mode, sljit_s32 set_flags, 2072 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 2073 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 2074 1.8 alnsn sljit_s32 src2, sljit_sw src2w) 2075 1.1 alnsn { 2076 1.1 alnsn /* The CPU does not set flags if the shift count is 0. */ 2077 1.1 alnsn if (src2 & SLJIT_IMM) { 2078 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2079 1.1 alnsn if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0)) 2080 1.1 alnsn return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2081 1.1 alnsn #else 2082 1.1 alnsn if ((src2w & 0x1f) != 0) 2083 1.1 alnsn return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2084 1.1 alnsn #endif 2085 1.1 alnsn if (!set_flags) 2086 1.1 alnsn return emit_mov(compiler, dst, dstw, src1, src1w); 2087 1.1 alnsn /* OR dst, src, 0 */ 2088 1.5 alnsn return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2089 1.1 alnsn dst, dstw, src1, src1w, SLJIT_IMM, 0); 2090 1.1 alnsn } 2091 1.1 alnsn 2092 1.1 alnsn if (!set_flags) 2093 1.1 alnsn return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); 2094 1.1 alnsn 2095 1.5 alnsn if (!FAST_IS_REG(dst)) 2096 1.1 alnsn FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); 2097 1.1 alnsn 2098 1.1 alnsn FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); 2099 1.1 alnsn 2100 1.5 alnsn if (FAST_IS_REG(dst)) 2101 1.1 alnsn return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); 2102 1.1 alnsn return SLJIT_SUCCESS; 2103 1.1 alnsn } 2104 1.1 alnsn 2105 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, 2106 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 2107 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 2108 1.8 alnsn sljit_s32 src2, sljit_sw src2w) 2109 1.1 alnsn { 2110 1.1 alnsn CHECK_ERROR(); 2111 1.8 alnsn CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2112 1.1 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 2113 1.1 alnsn ADJUST_LOCAL_OFFSET(src1, src1w); 2114 1.1 alnsn ADJUST_LOCAL_OFFSET(src2, src2w); 2115 1.1 alnsn 2116 1.1 alnsn CHECK_EXTRA_REGS(dst, dstw, (void)0); 2117 1.1 alnsn CHECK_EXTRA_REGS(src1, src1w, (void)0); 2118 1.1 alnsn CHECK_EXTRA_REGS(src2, src2w, (void)0); 2119 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2120 1.8 alnsn compiler->mode32 = op & SLJIT_I32_OP; 2121 1.1 alnsn #endif 2122 1.1 alnsn 2123 1.1 alnsn switch (GET_OPCODE(op)) { 2124 1.1 alnsn case SLJIT_ADD: 2125 1.9 alnsn if (!HAS_FLAGS(op)) { 2126 1.9 alnsn if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) 2127 1.1 alnsn return compiler->error; 2128 1.1 alnsn } 2129 1.5 alnsn return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, 2130 1.1 alnsn dst, dstw, src1, src1w, src2, src2w); 2131 1.1 alnsn case SLJIT_ADDC: 2132 1.5 alnsn return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, 2133 1.1 alnsn dst, dstw, src1, src1w, src2, src2w); 2134 1.1 alnsn case SLJIT_SUB: 2135 1.9 alnsn if (!HAS_FLAGS(op)) { 2136 1.9 alnsn if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) 2137 1.1 alnsn return compiler->error; 2138 1.1 alnsn } 2139 1.9 alnsn 2140 1.1 alnsn if (dst == SLJIT_UNUSED) 2141 1.1 alnsn return emit_cmp_binary(compiler, src1, src1w, src2, src2w); 2142 1.5 alnsn return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, 2143 1.1 alnsn dst, dstw, src1, src1w, src2, src2w); 2144 1.1 alnsn case SLJIT_SUBC: 2145 1.5 alnsn return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, 2146 1.1 alnsn dst, dstw, src1, src1w, src2, src2w); 2147 1.1 alnsn case SLJIT_MUL: 2148 1.1 alnsn return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); 2149 1.1 alnsn case SLJIT_AND: 2150 1.1 alnsn if (dst == SLJIT_UNUSED) 2151 1.1 alnsn return emit_test_binary(compiler, src1, src1w, src2, src2w); 2152 1.5 alnsn return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, 2153 1.1 alnsn dst, dstw, src1, src1w, src2, src2w); 2154 1.1 alnsn case SLJIT_OR: 2155 1.5 alnsn return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, 2156 1.1 alnsn dst, dstw, src1, src1w, src2, src2w); 2157 1.1 alnsn case SLJIT_XOR: 2158 1.5 alnsn return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, 2159 1.1 alnsn dst, dstw, src1, src1w, src2, src2w); 2160 1.1 alnsn case SLJIT_SHL: 2161 1.9 alnsn return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op), 2162 1.1 alnsn dst, dstw, src1, src1w, src2, src2w); 2163 1.1 alnsn case SLJIT_LSHR: 2164 1.9 alnsn return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op), 2165 1.1 alnsn dst, dstw, src1, src1w, src2, src2w); 2166 1.1 alnsn case SLJIT_ASHR: 2167 1.9 alnsn return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op), 2168 1.1 alnsn dst, dstw, src1, src1w, src2, src2w); 2169 1.1 alnsn } 2170 1.1 alnsn 2171 1.1 alnsn return SLJIT_SUCCESS; 2172 1.1 alnsn } 2173 1.1 alnsn 2174 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) 2175 1.1 alnsn { 2176 1.8 alnsn CHECK_REG_INDEX(check_sljit_get_register_index(reg)); 2177 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2178 1.9 alnsn if (reg >= SLJIT_R3 && reg <= SLJIT_R8) 2179 1.1 alnsn return -1; 2180 1.1 alnsn #endif 2181 1.1 alnsn return reg_map[reg]; 2182 1.1 alnsn } 2183 1.1 alnsn 2184 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) 2185 1.5 alnsn { 2186 1.8 alnsn CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); 2187 1.5 alnsn return reg; 2188 1.5 alnsn } 2189 1.5 alnsn 2190 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, 2191 1.8 alnsn void *instruction, sljit_s32 size) 2192 1.1 alnsn { 2193 1.8 alnsn sljit_u8 *inst; 2194 1.1 alnsn 2195 1.1 alnsn CHECK_ERROR(); 2196 1.8 alnsn CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); 2197 1.1 alnsn 2198 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 2199 1.5 alnsn FAIL_IF(!inst); 2200 1.1 alnsn INC_SIZE(size); 2201 1.9 alnsn SLJIT_MEMCPY(inst, instruction, size); 2202 1.1 alnsn return SLJIT_SUCCESS; 2203 1.1 alnsn } 2204 1.1 alnsn 2205 1.1 alnsn /* --------------------------------------------------------------------- */ 2206 1.1 alnsn /* Floating point operators */ 2207 1.1 alnsn /* --------------------------------------------------------------------- */ 2208 1.1 alnsn 2209 1.1 alnsn /* Alignment + 2 * 16 bytes. */ 2210 1.8 alnsn static sljit_s32 sse2_data[3 + (4 + 4) * 2]; 2211 1.8 alnsn static sljit_s32 *sse2_buffer; 2212 1.1 alnsn 2213 1.2 alnsn static void init_compiler(void) 2214 1.1 alnsn { 2215 1.8 alnsn sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf); 2216 1.5 alnsn /* Single precision constants. */ 2217 1.5 alnsn sse2_buffer[0] = 0x80000000; 2218 1.5 alnsn sse2_buffer[4] = 0x7fffffff; 2219 1.5 alnsn /* Double precision constants. */ 2220 1.5 alnsn sse2_buffer[8] = 0; 2221 1.5 alnsn sse2_buffer[9] = 0x80000000; 2222 1.5 alnsn sse2_buffer[12] = 0xffffffff; 2223 1.5 alnsn sse2_buffer[13] = 0x7fffffff; 2224 1.1 alnsn } 2225 1.1 alnsn 2226 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 2227 1.1 alnsn { 2228 1.5 alnsn #ifdef SLJIT_IS_FPU_AVAILABLE 2229 1.5 alnsn return SLJIT_IS_FPU_AVAILABLE; 2230 1.8 alnsn #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2231 1.5 alnsn if (cpu_has_sse2 == -1) 2232 1.5 alnsn get_cpu_features(); 2233 1.5 alnsn return cpu_has_sse2; 2234 1.5 alnsn #else /* SLJIT_DETECT_SSE2 */ 2235 1.1 alnsn return 1; 2236 1.5 alnsn #endif /* SLJIT_DETECT_SSE2 */ 2237 1.1 alnsn } 2238 1.1 alnsn 2239 1.8 alnsn static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode, 2240 1.8 alnsn sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) 2241 1.1 alnsn { 2242 1.8 alnsn sljit_u8 *inst; 2243 1.1 alnsn 2244 1.5 alnsn inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2245 1.5 alnsn FAIL_IF(!inst); 2246 1.5 alnsn *inst++ = GROUP_0F; 2247 1.5 alnsn *inst = opcode; 2248 1.1 alnsn return SLJIT_SUCCESS; 2249 1.1 alnsn } 2250 1.1 alnsn 2251 1.8 alnsn static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode, 2252 1.8 alnsn sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) 2253 1.1 alnsn { 2254 1.8 alnsn sljit_u8 *inst; 2255 1.1 alnsn 2256 1.5 alnsn inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); 2257 1.5 alnsn FAIL_IF(!inst); 2258 1.5 alnsn *inst++ = GROUP_0F; 2259 1.5 alnsn *inst = opcode; 2260 1.1 alnsn return SLJIT_SUCCESS; 2261 1.1 alnsn } 2262 1.1 alnsn 2263 1.8 alnsn static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, 2264 1.8 alnsn sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw) 2265 1.1 alnsn { 2266 1.5 alnsn return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); 2267 1.1 alnsn } 2268 1.1 alnsn 2269 1.8 alnsn static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, 2270 1.8 alnsn sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src) 2271 1.1 alnsn { 2272 1.5 alnsn return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); 2273 1.1 alnsn } 2274 1.1 alnsn 2275 1.8 alnsn static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, 2276 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 2277 1.8 alnsn sljit_s32 src, sljit_sw srcw) 2278 1.8 alnsn { 2279 1.8 alnsn sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2280 1.8 alnsn sljit_u8 *inst; 2281 1.8 alnsn 2282 1.8 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2283 1.8 alnsn if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) 2284 1.8 alnsn compiler->mode32 = 0; 2285 1.8 alnsn #endif 2286 1.8 alnsn 2287 1.8 alnsn inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); 2288 1.8 alnsn FAIL_IF(!inst); 2289 1.8 alnsn *inst++ = GROUP_0F; 2290 1.8 alnsn *inst = CVTTSD2SI_r_xm; 2291 1.8 alnsn 2292 1.8 alnsn if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) 2293 1.8 alnsn return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2294 1.8 alnsn return SLJIT_SUCCESS; 2295 1.8 alnsn } 2296 1.8 alnsn 2297 1.8 alnsn static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, 2298 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 2299 1.8 alnsn sljit_s32 src, sljit_sw srcw) 2300 1.8 alnsn { 2301 1.8 alnsn sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2302 1.8 alnsn sljit_u8 *inst; 2303 1.8 alnsn 2304 1.8 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2305 1.8 alnsn if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) 2306 1.8 alnsn compiler->mode32 = 0; 2307 1.8 alnsn #endif 2308 1.8 alnsn 2309 1.8 alnsn if (src & SLJIT_IMM) { 2310 1.8 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2311 1.8 alnsn if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) 2312 1.8 alnsn srcw = (sljit_s32)srcw; 2313 1.8 alnsn #endif 2314 1.8 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); 2315 1.8 alnsn src = TMP_REG1; 2316 1.8 alnsn srcw = 0; 2317 1.8 alnsn } 2318 1.8 alnsn 2319 1.8 alnsn inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); 2320 1.8 alnsn FAIL_IF(!inst); 2321 1.8 alnsn *inst++ = GROUP_0F; 2322 1.8 alnsn *inst = CVTSI2SD_x_rm; 2323 1.8 alnsn 2324 1.8 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2325 1.8 alnsn compiler->mode32 = 1; 2326 1.8 alnsn #endif 2327 1.8 alnsn if (dst_r == TMP_FREG) 2328 1.8 alnsn return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2329 1.8 alnsn return SLJIT_SUCCESS; 2330 1.8 alnsn } 2331 1.8 alnsn 2332 1.8 alnsn static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, 2333 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 2334 1.8 alnsn sljit_s32 src2, sljit_sw src2w) 2335 1.1 alnsn { 2336 1.8 alnsn if (!FAST_IS_REG(src1)) { 2337 1.8 alnsn FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2338 1.8 alnsn src1 = TMP_FREG; 2339 1.8 alnsn } 2340 1.8 alnsn return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w); 2341 1.8 alnsn } 2342 1.1 alnsn 2343 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, 2344 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 2345 1.8 alnsn sljit_s32 src, sljit_sw srcw) 2346 1.8 alnsn { 2347 1.8 alnsn sljit_s32 dst_r; 2348 1.1 alnsn 2349 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2350 1.1 alnsn compiler->mode32 = 1; 2351 1.1 alnsn #endif 2352 1.1 alnsn 2353 1.8 alnsn CHECK_ERROR(); 2354 1.8 alnsn SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); 2355 1.8 alnsn 2356 1.8 alnsn if (GET_OPCODE(op) == SLJIT_MOV_F64) { 2357 1.5 alnsn if (FAST_IS_REG(dst)) 2358 1.8 alnsn return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw); 2359 1.8 alnsn if (FAST_IS_REG(src)) 2360 1.8 alnsn return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src); 2361 1.8 alnsn FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw)); 2362 1.8 alnsn return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2363 1.8 alnsn } 2364 1.8 alnsn 2365 1.8 alnsn if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { 2366 1.8 alnsn dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; 2367 1.8 alnsn if (FAST_IS_REG(src)) { 2368 1.8 alnsn /* We overwrite the high bits of source. From SLJIT point of view, 2369 1.8 alnsn this is not an issue. 2370 1.8 alnsn Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ 2371 1.8 alnsn FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0)); 2372 1.8 alnsn } 2373 1.1 alnsn else { 2374 1.8 alnsn FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw)); 2375 1.8 alnsn src = TMP_FREG; 2376 1.1 alnsn } 2377 1.1 alnsn 2378 1.8 alnsn FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0)); 2379 1.8 alnsn if (dst_r == TMP_FREG) 2380 1.8 alnsn return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2381 1.8 alnsn return SLJIT_SUCCESS; 2382 1.1 alnsn } 2383 1.1 alnsn 2384 1.5 alnsn if (SLOW_IS_REG(dst)) { 2385 1.1 alnsn dst_r = dst; 2386 1.1 alnsn if (dst != src) 2387 1.8 alnsn FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); 2388 1.1 alnsn } 2389 1.1 alnsn else { 2390 1.1 alnsn dst_r = TMP_FREG; 2391 1.8 alnsn FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); 2392 1.1 alnsn } 2393 1.1 alnsn 2394 1.5 alnsn switch (GET_OPCODE(op)) { 2395 1.8 alnsn case SLJIT_NEG_F64: 2396 1.8 alnsn FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8))); 2397 1.1 alnsn break; 2398 1.1 alnsn 2399 1.8 alnsn case SLJIT_ABS_F64: 2400 1.8 alnsn FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12))); 2401 1.1 alnsn break; 2402 1.1 alnsn } 2403 1.1 alnsn 2404 1.1 alnsn if (dst_r == TMP_FREG) 2405 1.8 alnsn return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2406 1.1 alnsn return SLJIT_SUCCESS; 2407 1.1 alnsn } 2408 1.1 alnsn 2409 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, 2410 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 2411 1.8 alnsn sljit_s32 src1, sljit_sw src1w, 2412 1.8 alnsn sljit_s32 src2, sljit_sw src2w) 2413 1.1 alnsn { 2414 1.8 alnsn sljit_s32 dst_r; 2415 1.1 alnsn 2416 1.1 alnsn CHECK_ERROR(); 2417 1.8 alnsn CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 2418 1.8 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 2419 1.8 alnsn ADJUST_LOCAL_OFFSET(src1, src1w); 2420 1.8 alnsn ADJUST_LOCAL_OFFSET(src2, src2w); 2421 1.1 alnsn 2422 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2423 1.1 alnsn compiler->mode32 = 1; 2424 1.1 alnsn #endif 2425 1.1 alnsn 2426 1.5 alnsn if (FAST_IS_REG(dst)) { 2427 1.1 alnsn dst_r = dst; 2428 1.1 alnsn if (dst == src1) 2429 1.1 alnsn ; /* Do nothing here. */ 2430 1.8 alnsn else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) { 2431 1.1 alnsn /* Swap arguments. */ 2432 1.1 alnsn src2 = src1; 2433 1.1 alnsn src2w = src1w; 2434 1.1 alnsn } 2435 1.1 alnsn else if (dst != src2) 2436 1.8 alnsn FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w)); 2437 1.1 alnsn else { 2438 1.1 alnsn dst_r = TMP_FREG; 2439 1.8 alnsn FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2440 1.1 alnsn } 2441 1.1 alnsn } 2442 1.1 alnsn else { 2443 1.1 alnsn dst_r = TMP_FREG; 2444 1.8 alnsn FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); 2445 1.1 alnsn } 2446 1.1 alnsn 2447 1.5 alnsn switch (GET_OPCODE(op)) { 2448 1.8 alnsn case SLJIT_ADD_F64: 2449 1.8 alnsn FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2450 1.1 alnsn break; 2451 1.1 alnsn 2452 1.8 alnsn case SLJIT_SUB_F64: 2453 1.8 alnsn FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2454 1.1 alnsn break; 2455 1.1 alnsn 2456 1.8 alnsn case SLJIT_MUL_F64: 2457 1.8 alnsn FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2458 1.1 alnsn break; 2459 1.1 alnsn 2460 1.8 alnsn case SLJIT_DIV_F64: 2461 1.8 alnsn FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); 2462 1.1 alnsn break; 2463 1.1 alnsn } 2464 1.1 alnsn 2465 1.1 alnsn if (dst_r == TMP_FREG) 2466 1.8 alnsn return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); 2467 1.1 alnsn return SLJIT_SUCCESS; 2468 1.1 alnsn } 2469 1.1 alnsn 2470 1.1 alnsn /* --------------------------------------------------------------------- */ 2471 1.1 alnsn /* Conditional instructions */ 2472 1.1 alnsn /* --------------------------------------------------------------------- */ 2473 1.1 alnsn 2474 1.1 alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) 2475 1.1 alnsn { 2476 1.8 alnsn sljit_u8 *inst; 2477 1.1 alnsn struct sljit_label *label; 2478 1.1 alnsn 2479 1.1 alnsn CHECK_ERROR_PTR(); 2480 1.8 alnsn CHECK_PTR(check_sljit_emit_label(compiler)); 2481 1.1 alnsn 2482 1.1 alnsn if (compiler->last_label && compiler->last_label->size == compiler->size) 2483 1.1 alnsn return compiler->last_label; 2484 1.1 alnsn 2485 1.1 alnsn label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); 2486 1.1 alnsn PTR_FAIL_IF(!label); 2487 1.1 alnsn set_label(label, compiler); 2488 1.1 alnsn 2489 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 2); 2490 1.5 alnsn PTR_FAIL_IF(!inst); 2491 1.1 alnsn 2492 1.5 alnsn *inst++ = 0; 2493 1.5 alnsn *inst++ = 0; 2494 1.1 alnsn 2495 1.1 alnsn return label; 2496 1.1 alnsn } 2497 1.1 alnsn 2498 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) 2499 1.1 alnsn { 2500 1.8 alnsn sljit_u8 *inst; 2501 1.1 alnsn struct sljit_jump *jump; 2502 1.1 alnsn 2503 1.1 alnsn CHECK_ERROR_PTR(); 2504 1.8 alnsn CHECK_PTR(check_sljit_emit_jump(compiler, type)); 2505 1.1 alnsn 2506 1.1 alnsn jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2507 1.1 alnsn PTR_FAIL_IF_NULL(jump); 2508 1.1 alnsn set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2509 1.1 alnsn type &= 0xff; 2510 1.1 alnsn 2511 1.1 alnsn if (type >= SLJIT_CALL1) 2512 1.1 alnsn PTR_FAIL_IF(call_with_args(compiler, type)); 2513 1.1 alnsn 2514 1.1 alnsn /* Worst case size. */ 2515 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2516 1.1 alnsn compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; 2517 1.1 alnsn #else 2518 1.1 alnsn compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); 2519 1.1 alnsn #endif 2520 1.1 alnsn 2521 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 2); 2522 1.5 alnsn PTR_FAIL_IF_NULL(inst); 2523 1.1 alnsn 2524 1.5 alnsn *inst++ = 0; 2525 1.9 alnsn *inst++ = type + 2; 2526 1.1 alnsn return jump; 2527 1.1 alnsn } 2528 1.1 alnsn 2529 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) 2530 1.1 alnsn { 2531 1.8 alnsn sljit_u8 *inst; 2532 1.1 alnsn struct sljit_jump *jump; 2533 1.1 alnsn 2534 1.1 alnsn CHECK_ERROR(); 2535 1.8 alnsn CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); 2536 1.1 alnsn ADJUST_LOCAL_OFFSET(src, srcw); 2537 1.1 alnsn 2538 1.1 alnsn CHECK_EXTRA_REGS(src, srcw, (void)0); 2539 1.1 alnsn 2540 1.1 alnsn if (type >= SLJIT_CALL1) { 2541 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2542 1.1 alnsn #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) 2543 1.8 alnsn if (src == SLJIT_R2) { 2544 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2545 1.5 alnsn src = TMP_REG1; 2546 1.1 alnsn } 2547 1.8 alnsn if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) 2548 1.5 alnsn srcw += sizeof(sljit_sw); 2549 1.1 alnsn #endif 2550 1.1 alnsn #endif 2551 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) 2552 1.8 alnsn if (src == SLJIT_R2) { 2553 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, src, 0); 2554 1.5 alnsn src = TMP_REG1; 2555 1.1 alnsn } 2556 1.1 alnsn #endif 2557 1.1 alnsn FAIL_IF(call_with_args(compiler, type)); 2558 1.1 alnsn } 2559 1.1 alnsn 2560 1.1 alnsn if (src == SLJIT_IMM) { 2561 1.1 alnsn jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2562 1.1 alnsn FAIL_IF_NULL(jump); 2563 1.1 alnsn set_jump(jump, compiler, JUMP_ADDR); 2564 1.1 alnsn jump->u.target = srcw; 2565 1.1 alnsn 2566 1.1 alnsn /* Worst case size. */ 2567 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2568 1.1 alnsn compiler->size += 5; 2569 1.1 alnsn #else 2570 1.1 alnsn compiler->size += 10 + 3; 2571 1.1 alnsn #endif 2572 1.1 alnsn 2573 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 2); 2574 1.5 alnsn FAIL_IF_NULL(inst); 2575 1.1 alnsn 2576 1.5 alnsn *inst++ = 0; 2577 1.9 alnsn *inst++ = type + 2; 2578 1.1 alnsn } 2579 1.1 alnsn else { 2580 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2581 1.1 alnsn /* REX_W is not necessary (src is not immediate). */ 2582 1.1 alnsn compiler->mode32 = 1; 2583 1.1 alnsn #endif 2584 1.5 alnsn inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); 2585 1.5 alnsn FAIL_IF(!inst); 2586 1.5 alnsn *inst++ = GROUP_FF; 2587 1.5 alnsn *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; 2588 1.1 alnsn } 2589 1.1 alnsn return SLJIT_SUCCESS; 2590 1.1 alnsn } 2591 1.1 alnsn 2592 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, 2593 1.8 alnsn sljit_s32 dst, sljit_sw dstw, 2594 1.8 alnsn sljit_s32 src, sljit_sw srcw, 2595 1.8 alnsn sljit_s32 type) 2596 1.1 alnsn { 2597 1.8 alnsn sljit_u8 *inst; 2598 1.8 alnsn sljit_u8 cond_set = 0; 2599 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2600 1.8 alnsn sljit_s32 reg; 2601 1.9 alnsn #endif 2602 1.9 alnsn /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */ 2603 1.8 alnsn sljit_s32 dst_save = dst; 2604 1.5 alnsn sljit_sw dstw_save = dstw; 2605 1.1 alnsn 2606 1.1 alnsn CHECK_ERROR(); 2607 1.8 alnsn CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); 2608 1.8 alnsn SLJIT_UNUSED_ARG(srcw); 2609 1.1 alnsn 2610 1.1 alnsn if (dst == SLJIT_UNUSED) 2611 1.1 alnsn return SLJIT_SUCCESS; 2612 1.1 alnsn 2613 1.1 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 2614 1.1 alnsn CHECK_EXTRA_REGS(dst, dstw, (void)0); 2615 1.1 alnsn 2616 1.8 alnsn type &= 0xff; 2617 1.5 alnsn /* setcc = jcc + 0x10. */ 2618 1.5 alnsn cond_set = get_jump_code(type) + 0x10; 2619 1.1 alnsn 2620 1.5 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2621 1.5 alnsn if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) { 2622 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3); 2623 1.5 alnsn FAIL_IF(!inst); 2624 1.5 alnsn INC_SIZE(4 + 3); 2625 1.5 alnsn /* Set low register to conditional flag. */ 2626 1.5 alnsn *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; 2627 1.5 alnsn *inst++ = GROUP_0F; 2628 1.5 alnsn *inst++ = cond_set; 2629 1.5 alnsn *inst++ = MOD_REG | reg_lmap[TMP_REG1]; 2630 1.5 alnsn *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); 2631 1.5 alnsn *inst++ = OR_rm8_r8; 2632 1.5 alnsn *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]; 2633 1.5 alnsn return SLJIT_SUCCESS; 2634 1.1 alnsn } 2635 1.1 alnsn 2636 1.5 alnsn reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1; 2637 1.1 alnsn 2638 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4); 2639 1.5 alnsn FAIL_IF(!inst); 2640 1.1 alnsn INC_SIZE(4 + 4); 2641 1.1 alnsn /* Set low register to conditional flag. */ 2642 1.5 alnsn *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; 2643 1.5 alnsn *inst++ = GROUP_0F; 2644 1.5 alnsn *inst++ = cond_set; 2645 1.5 alnsn *inst++ = MOD_REG | reg_lmap[reg]; 2646 1.5 alnsn *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); 2647 1.9 alnsn /* The movzx instruction does not affect flags. */ 2648 1.5 alnsn *inst++ = GROUP_0F; 2649 1.5 alnsn *inst++ = MOVZX_r_rm8; 2650 1.5 alnsn *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; 2651 1.5 alnsn 2652 1.5 alnsn if (reg != TMP_REG1) 2653 1.5 alnsn return SLJIT_SUCCESS; 2654 1.5 alnsn 2655 1.5 alnsn if (GET_OPCODE(op) < SLJIT_ADD) { 2656 1.5 alnsn compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; 2657 1.5 alnsn return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2658 1.5 alnsn } 2659 1.8 alnsn #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 2660 1.8 alnsn || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2661 1.5 alnsn compiler->skip_checks = 1; 2662 1.1 alnsn #endif 2663 1.9 alnsn return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); 2664 1.9 alnsn 2665 1.9 alnsn #else 2666 1.9 alnsn /* The SLJIT_CONFIG_X86_32 code path starts here. */ 2667 1.5 alnsn if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { 2668 1.5 alnsn if (reg_map[dst] <= 4) { 2669 1.5 alnsn /* Low byte is accessible. */ 2670 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3); 2671 1.5 alnsn FAIL_IF(!inst); 2672 1.1 alnsn INC_SIZE(3 + 3); 2673 1.1 alnsn /* Set low byte to conditional flag. */ 2674 1.5 alnsn *inst++ = GROUP_0F; 2675 1.5 alnsn *inst++ = cond_set; 2676 1.5 alnsn *inst++ = MOD_REG | reg_map[dst]; 2677 1.5 alnsn 2678 1.5 alnsn *inst++ = GROUP_0F; 2679 1.5 alnsn *inst++ = MOVZX_r_rm8; 2680 1.5 alnsn *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; 2681 1.5 alnsn return SLJIT_SUCCESS; 2682 1.5 alnsn } 2683 1.5 alnsn 2684 1.5 alnsn /* Low byte is not accessible. */ 2685 1.5 alnsn if (cpu_has_cmov == -1) 2686 1.5 alnsn get_cpu_features(); 2687 1.5 alnsn 2688 1.5 alnsn if (cpu_has_cmov) { 2689 1.5 alnsn EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); 2690 1.5 alnsn /* a xor reg, reg operation would overwrite the flags. */ 2691 1.5 alnsn EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); 2692 1.1 alnsn 2693 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); 2694 1.5 alnsn FAIL_IF(!inst); 2695 1.5 alnsn INC_SIZE(3); 2696 1.5 alnsn 2697 1.5 alnsn *inst++ = GROUP_0F; 2698 1.5 alnsn /* cmovcc = setcc - 0x50. */ 2699 1.5 alnsn *inst++ = cond_set - 0x50; 2700 1.5 alnsn *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]; 2701 1.5 alnsn return SLJIT_SUCCESS; 2702 1.1 alnsn } 2703 1.1 alnsn 2704 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2705 1.5 alnsn FAIL_IF(!inst); 2706 1.5 alnsn INC_SIZE(1 + 3 + 3 + 1); 2707 1.5 alnsn *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2708 1.5 alnsn /* Set al to conditional flag. */ 2709 1.5 alnsn *inst++ = GROUP_0F; 2710 1.5 alnsn *inst++ = cond_set; 2711 1.5 alnsn *inst++ = MOD_REG | 0 /* eax */; 2712 1.5 alnsn 2713 1.5 alnsn *inst++ = GROUP_0F; 2714 1.5 alnsn *inst++ = MOVZX_r_rm8; 2715 1.5 alnsn *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; 2716 1.5 alnsn *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2717 1.5 alnsn return SLJIT_SUCCESS; 2718 1.5 alnsn } 2719 1.5 alnsn 2720 1.5 alnsn if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { 2721 1.9 alnsn SLJIT_ASSERT(reg_map[SLJIT_R0] == 0); 2722 1.9 alnsn 2723 1.8 alnsn if (dst != SLJIT_R0) { 2724 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); 2725 1.5 alnsn FAIL_IF(!inst); 2726 1.5 alnsn INC_SIZE(1 + 3 + 2 + 1); 2727 1.5 alnsn /* Set low register to conditional flag. */ 2728 1.5 alnsn *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2729 1.5 alnsn *inst++ = GROUP_0F; 2730 1.5 alnsn *inst++ = cond_set; 2731 1.5 alnsn *inst++ = MOD_REG | 0 /* eax */; 2732 1.5 alnsn *inst++ = OR_rm8_r8; 2733 1.5 alnsn *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; 2734 1.5 alnsn *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2735 1.5 alnsn } 2736 1.5 alnsn else { 2737 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); 2738 1.5 alnsn FAIL_IF(!inst); 2739 1.5 alnsn INC_SIZE(2 + 3 + 2 + 2); 2740 1.5 alnsn /* Set low register to conditional flag. */ 2741 1.5 alnsn *inst++ = XCHG_r_rm; 2742 1.5 alnsn *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2743 1.5 alnsn *inst++ = GROUP_0F; 2744 1.5 alnsn *inst++ = cond_set; 2745 1.5 alnsn *inst++ = MOD_REG | 1 /* ecx */; 2746 1.5 alnsn *inst++ = OR_rm8_r8; 2747 1.5 alnsn *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; 2748 1.5 alnsn *inst++ = XCHG_r_rm; 2749 1.5 alnsn *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; 2750 1.5 alnsn } 2751 1.5 alnsn return SLJIT_SUCCESS; 2752 1.5 alnsn } 2753 1.5 alnsn 2754 1.5 alnsn /* Set TMP_REG1 to the bit. */ 2755 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); 2756 1.5 alnsn FAIL_IF(!inst); 2757 1.5 alnsn INC_SIZE(1 + 3 + 3 + 1); 2758 1.5 alnsn *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2759 1.5 alnsn /* Set al to conditional flag. */ 2760 1.5 alnsn *inst++ = GROUP_0F; 2761 1.5 alnsn *inst++ = cond_set; 2762 1.5 alnsn *inst++ = MOD_REG | 0 /* eax */; 2763 1.5 alnsn 2764 1.5 alnsn *inst++ = GROUP_0F; 2765 1.5 alnsn *inst++ = MOVZX_r_rm8; 2766 1.5 alnsn *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; 2767 1.1 alnsn 2768 1.5 alnsn *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; 2769 1.1 alnsn 2770 1.5 alnsn if (GET_OPCODE(op) < SLJIT_ADD) 2771 1.5 alnsn return emit_mov(compiler, dst, dstw, TMP_REG1, 0); 2772 1.1 alnsn 2773 1.8 alnsn #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 2774 1.8 alnsn || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2775 1.5 alnsn compiler->skip_checks = 1; 2776 1.1 alnsn #endif 2777 1.5 alnsn return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); 2778 1.5 alnsn #endif /* SLJIT_CONFIG_X86_64 */ 2779 1.1 alnsn } 2780 1.1 alnsn 2781 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) 2782 1.1 alnsn { 2783 1.1 alnsn CHECK_ERROR(); 2784 1.8 alnsn CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); 2785 1.1 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 2786 1.1 alnsn 2787 1.1 alnsn CHECK_EXTRA_REGS(dst, dstw, (void)0); 2788 1.1 alnsn 2789 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2790 1.1 alnsn compiler->mode32 = 0; 2791 1.1 alnsn #endif 2792 1.1 alnsn 2793 1.8 alnsn ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); 2794 1.1 alnsn 2795 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2796 1.1 alnsn if (NOT_HALFWORD(offset)) { 2797 1.5 alnsn FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); 2798 1.1 alnsn #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 2799 1.9 alnsn SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); 2800 1.1 alnsn return compiler->error; 2801 1.1 alnsn #else 2802 1.9 alnsn return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); 2803 1.1 alnsn #endif 2804 1.1 alnsn } 2805 1.1 alnsn #endif 2806 1.1 alnsn 2807 1.1 alnsn if (offset != 0) 2808 1.9 alnsn return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); 2809 1.8 alnsn return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); 2810 1.1 alnsn } 2811 1.1 alnsn 2812 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) 2813 1.1 alnsn { 2814 1.8 alnsn sljit_u8 *inst; 2815 1.1 alnsn struct sljit_const *const_; 2816 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2817 1.8 alnsn sljit_s32 reg; 2818 1.1 alnsn #endif 2819 1.1 alnsn 2820 1.1 alnsn CHECK_ERROR_PTR(); 2821 1.8 alnsn CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); 2822 1.1 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 2823 1.1 alnsn 2824 1.1 alnsn CHECK_EXTRA_REGS(dst, dstw, (void)0); 2825 1.1 alnsn 2826 1.1 alnsn const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); 2827 1.1 alnsn PTR_FAIL_IF(!const_); 2828 1.1 alnsn set_const(const_, compiler); 2829 1.1 alnsn 2830 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2831 1.1 alnsn compiler->mode32 = 0; 2832 1.5 alnsn reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2833 1.1 alnsn 2834 1.1 alnsn if (emit_load_imm64(compiler, reg, init_value)) 2835 1.1 alnsn return NULL; 2836 1.1 alnsn #else 2837 1.1 alnsn if (dst == SLJIT_UNUSED) 2838 1.5 alnsn dst = TMP_REG1; 2839 1.1 alnsn 2840 1.1 alnsn if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) 2841 1.1 alnsn return NULL; 2842 1.1 alnsn #endif 2843 1.1 alnsn 2844 1.8 alnsn inst = (sljit_u8*)ensure_buf(compiler, 2); 2845 1.5 alnsn PTR_FAIL_IF(!inst); 2846 1.1 alnsn 2847 1.5 alnsn *inst++ = 0; 2848 1.5 alnsn *inst++ = 1; 2849 1.1 alnsn 2850 1.1 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2851 1.5 alnsn if (dst & SLJIT_MEM) 2852 1.5 alnsn if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) 2853 1.1 alnsn return NULL; 2854 1.1 alnsn #endif 2855 1.1 alnsn 2856 1.1 alnsn return const_; 2857 1.1 alnsn } 2858 1.1 alnsn 2859 1.9 alnsn SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) 2860 1.1 alnsn { 2861 1.9 alnsn SLJIT_UNUSED_ARG(executable_offset); 2862 1.1 alnsn #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 2863 1.9 alnsn sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset); 2864 1.1 alnsn #else 2865 1.9 alnsn sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target); 2866 1.1 alnsn #endif 2867 1.1 alnsn } 2868 1.1 alnsn 2869 1.9 alnsn SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) 2870 1.1 alnsn { 2871 1.9 alnsn SLJIT_UNUSED_ARG(executable_offset); 2872 1.9 alnsn sljit_unaligned_store_sw((void*)addr, new_constant); 2873 1.1 alnsn } 2874 1.8 alnsn 2875 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void) 2876 1.8 alnsn { 2877 1.8 alnsn #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) 2878 1.8 alnsn if (cpu_has_sse2 == -1) 2879 1.8 alnsn get_cpu_features(); 2880 1.8 alnsn return cpu_has_sse2; 2881 1.8 alnsn #else 2882 1.8 alnsn return 1; 2883 1.8 alnsn #endif 2884 1.8 alnsn } 2885 1.8 alnsn 2886 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void) 2887 1.8 alnsn { 2888 1.8 alnsn if (cpu_has_cmov == -1) 2889 1.8 alnsn get_cpu_features(); 2890 1.8 alnsn return cpu_has_cmov; 2891 1.8 alnsn } 2892 1.8 alnsn 2893 1.8 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler, 2894 1.8 alnsn sljit_s32 type, 2895 1.8 alnsn sljit_s32 dst_reg, 2896 1.8 alnsn sljit_s32 src, sljit_sw srcw) 2897 1.8 alnsn { 2898 1.8 alnsn sljit_u8* inst; 2899 1.8 alnsn 2900 1.8 alnsn CHECK_ERROR(); 2901 1.8 alnsn #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 2902 1.8 alnsn CHECK_ARGUMENT(sljit_x86_is_cmov_available()); 2903 1.8 alnsn CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); 2904 1.8 alnsn CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); 2905 1.8 alnsn CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP)); 2906 1.8 alnsn FUNCTION_CHECK_SRC(src, srcw); 2907 1.9 alnsn 2908 1.9 alnsn if ((type & 0xff) <= SLJIT_NOT_ZERO) 2909 1.9 alnsn CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); 2910 1.9 alnsn else 2911 1.9 alnsn CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff)); 2912 1.8 alnsn #endif 2913 1.8 alnsn #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) 2914 1.8 alnsn if (SLJIT_UNLIKELY(!!compiler->verbose)) { 2915 1.8 alnsn fprintf(compiler->verbose, " x86_cmov%s %s%s, ", 2916 1.8 alnsn !(dst_reg & SLJIT_I32_OP) ? "" : ".i", 2917 1.8 alnsn jump_names[type & 0xff], JUMP_POSTFIX(type)); 2918 1.8 alnsn sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP); 2919 1.8 alnsn fprintf(compiler->verbose, ", "); 2920 1.8 alnsn sljit_verbose_param(compiler, src, srcw); 2921 1.8 alnsn fprintf(compiler->verbose, "\n"); 2922 1.8 alnsn } 2923 1.8 alnsn #endif 2924 1.8 alnsn 2925 1.8 alnsn ADJUST_LOCAL_OFFSET(src, srcw); 2926 1.8 alnsn CHECK_EXTRA_REGS(src, srcw, (void)0); 2927 1.8 alnsn 2928 1.8 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 2929 1.8 alnsn compiler->mode32 = dst_reg & SLJIT_I32_OP; 2930 1.8 alnsn #endif 2931 1.8 alnsn dst_reg &= ~SLJIT_I32_OP; 2932 1.8 alnsn 2933 1.8 alnsn if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { 2934 1.8 alnsn EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); 2935 1.8 alnsn src = TMP_REG1; 2936 1.8 alnsn srcw = 0; 2937 1.8 alnsn } 2938 1.8 alnsn 2939 1.8 alnsn inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); 2940 1.8 alnsn FAIL_IF(!inst); 2941 1.8 alnsn *inst++ = GROUP_0F; 2942 1.8 alnsn *inst = get_jump_code(type & 0xff) - 0x40; 2943 1.8 alnsn return SLJIT_SUCCESS; 2944 1.8 alnsn } 2945