1 1.5 riastrad /* $NetBSD: sljitNativeARM_64.c,v 1.5 2024/04/02 22:29:57 riastradh Exp $ */ 2 1.2 alnsn 3 1.1 alnsn /* 4 1.1 alnsn * Stack-less Just-In-Time compiler 5 1.1 alnsn * 6 1.4 alnsn * Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 7 1.1 alnsn * 8 1.1 alnsn * Redistribution and use in source and binary forms, with or without modification, are 9 1.1 alnsn * permitted provided that the following conditions are met: 10 1.1 alnsn * 11 1.1 alnsn * 1. Redistributions of source code must retain the above copyright notice, this list of 12 1.1 alnsn * conditions and the following disclaimer. 13 1.1 alnsn * 14 1.1 alnsn * 2. Redistributions in binary form must reproduce the above copyright notice, this list 15 1.1 alnsn * of conditions and the following disclaimer in the documentation and/or other materials 16 1.1 alnsn * provided with the distribution. 17 1.1 alnsn * 18 1.1 alnsn * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 19 1.1 alnsn * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 1.1 alnsn * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 21 1.1 alnsn * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 1.1 alnsn * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 23 1.1 alnsn * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 1.1 alnsn * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 1.1 alnsn * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 1.1 alnsn * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 1.1 alnsn */ 28 1.1 alnsn 29 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) 30 1.1 alnsn { 31 1.1 alnsn return "ARM-64" SLJIT_CPUINFO; 32 1.1 alnsn } 33 1.1 alnsn 34 1.1 alnsn /* Length of an instruction word */ 35 1.3 alnsn typedef sljit_u32 sljit_ins; 36 1.1 alnsn 37 1.3 alnsn #define TMP_ZERO (0) 38 1.1 alnsn 39 1.3 alnsn #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 40 1.3 alnsn #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 41 1.3 alnsn #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) 42 1.3 alnsn #define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 5) 43 1.3 alnsn #define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 6) 44 1.1 alnsn 45 1.1 alnsn #define TMP_FREG1 (0) 46 1.3 alnsn #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) 47 1.1 alnsn 48 1.3 alnsn static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { 49 1.3 alnsn 31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31 50 1.1 alnsn }; 51 1.1 alnsn 52 1.1 alnsn #define W_OP (1 << 31) 53 1.1 alnsn #define RD(rd) (reg_map[rd]) 54 1.1 alnsn #define RT(rt) (reg_map[rt]) 55 1.1 alnsn #define RN(rn) (reg_map[rn] << 5) 56 1.1 alnsn #define RT2(rt2) (reg_map[rt2] << 10) 57 1.1 alnsn #define RM(rm) (reg_map[rm] << 16) 58 1.1 alnsn #define VD(vd) (vd) 59 1.1 alnsn #define VT(vt) (vt) 60 1.1 alnsn #define VN(vn) ((vn) << 5) 61 1.1 alnsn #define VM(vm) ((vm) << 16) 62 1.1 alnsn 63 1.1 alnsn /* --------------------------------------------------------------------- */ 64 1.1 alnsn /* Instrucion forms */ 65 1.1 alnsn /* --------------------------------------------------------------------- */ 66 1.1 alnsn 67 1.1 alnsn #define ADC 0x9a000000 68 1.1 alnsn #define ADD 0x8b000000 69 1.1 alnsn #define ADDI 0x91000000 70 1.1 alnsn #define AND 0x8a000000 71 1.1 alnsn #define ANDI 0x92000000 72 1.1 alnsn #define ASRV 0x9ac02800 73 1.1 alnsn #define B 0x14000000 74 1.1 alnsn #define B_CC 0x54000000 75 1.1 alnsn #define BL 0x94000000 76 1.1 alnsn #define BLR 0xd63f0000 77 1.1 alnsn #define BR 0xd61f0000 78 1.1 alnsn #define BRK 0xd4200000 79 1.1 alnsn #define CBZ 0xb4000000 80 1.1 alnsn #define CLZ 0xdac01000 81 1.1 alnsn #define CSINC 0x9a800400 82 1.1 alnsn #define EOR 0xca000000 83 1.1 alnsn #define EORI 0xd2000000 84 1.1 alnsn #define FABS 0x1e60c000 85 1.1 alnsn #define FADD 0x1e602800 86 1.1 alnsn #define FCMP 0x1e602000 87 1.3 alnsn #define FCVT 0x1e224000 88 1.3 alnsn #define FCVTZS 0x9e780000 89 1.1 alnsn #define FDIV 0x1e601800 90 1.1 alnsn #define FMOV 0x1e604000 91 1.1 alnsn #define FMUL 0x1e600800 92 1.1 alnsn #define FNEG 0x1e614000 93 1.1 alnsn #define FSUB 0x1e603800 94 1.1 alnsn #define LDRI 0xf9400000 95 1.1 alnsn #define LDP 0xa9400000 96 1.1 alnsn #define LDP_PST 0xa8c00000 97 1.1 alnsn #define LSLV 0x9ac02000 98 1.1 alnsn #define LSRV 0x9ac02400 99 1.1 alnsn #define MADD 0x9b000000 100 1.1 alnsn #define MOVK 0xf2800000 101 1.1 alnsn #define MOVN 0x92800000 102 1.1 alnsn #define MOVZ 0xd2800000 103 1.1 alnsn #define NOP 0xd503201f 104 1.1 alnsn #define ORN 0xaa200000 105 1.1 alnsn #define ORR 0xaa000000 106 1.1 alnsn #define ORRI 0xb2000000 107 1.1 alnsn #define RET 0xd65f0000 108 1.1 alnsn #define SBC 0xda000000 109 1.1 alnsn #define SBFM 0x93000000 110 1.3 alnsn #define SCVTF 0x9e620000 111 1.1 alnsn #define SDIV 0x9ac00c00 112 1.1 alnsn #define SMADDL 0x9b200000 113 1.1 alnsn #define SMULH 0x9b403c00 114 1.1 alnsn #define STP 0xa9000000 115 1.1 alnsn #define STP_PRE 0xa9800000 116 1.1 alnsn #define STRI 0xf9000000 117 1.1 alnsn #define STR_FI 0x3d000000 118 1.1 alnsn #define STR_FR 0x3c206800 119 1.1 alnsn #define STUR_FI 0x3c000000 120 1.1 alnsn #define SUB 0xcb000000 121 1.1 alnsn #define SUBI 0xd1000000 122 1.1 alnsn #define SUBS 0xeb000000 123 1.1 alnsn #define UBFM 0xd3000000 124 1.1 alnsn #define UDIV 0x9ac00800 125 1.1 alnsn #define UMULH 0x9bc03c00 126 1.1 alnsn 127 1.1 alnsn /* dest_reg is the absolute name of the register 128 1.1 alnsn Useful for reordering instructions in the delay slot. */ 129 1.3 alnsn static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) 130 1.1 alnsn { 131 1.1 alnsn sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); 132 1.1 alnsn FAIL_IF(!ptr); 133 1.1 alnsn *ptr = ins; 134 1.1 alnsn compiler->size++; 135 1.1 alnsn return SLJIT_SUCCESS; 136 1.1 alnsn } 137 1.1 alnsn 138 1.3 alnsn static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) 139 1.1 alnsn { 140 1.1 alnsn FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); 141 1.1 alnsn FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21))); 142 1.1 alnsn FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21))); 143 1.1 alnsn return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21)); 144 1.1 alnsn } 145 1.1 alnsn 146 1.1 alnsn static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm) 147 1.1 alnsn { 148 1.3 alnsn sljit_s32 dst = inst[0] & 0x1f; 149 1.1 alnsn SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21))); 150 1.1 alnsn inst[0] = MOVZ | dst | ((new_imm & 0xffff) << 5); 151 1.1 alnsn inst[1] = MOVK | dst | (((new_imm >> 16) & 0xffff) << 5) | (1 << 21); 152 1.1 alnsn inst[2] = MOVK | dst | (((new_imm >> 32) & 0xffff) << 5) | (2 << 21); 153 1.1 alnsn inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21); 154 1.1 alnsn } 155 1.1 alnsn 156 1.4 alnsn static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) 157 1.1 alnsn { 158 1.1 alnsn sljit_sw diff; 159 1.1 alnsn sljit_uw target_addr; 160 1.1 alnsn 161 1.1 alnsn if (jump->flags & SLJIT_REWRITABLE_JUMP) { 162 1.1 alnsn jump->flags |= PATCH_ABS64; 163 1.1 alnsn return 0; 164 1.1 alnsn } 165 1.1 alnsn 166 1.1 alnsn if (jump->flags & JUMP_ADDR) 167 1.1 alnsn target_addr = jump->u.target; 168 1.1 alnsn else { 169 1.1 alnsn SLJIT_ASSERT(jump->flags & JUMP_LABEL); 170 1.4 alnsn target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; 171 1.1 alnsn } 172 1.4 alnsn 173 1.4 alnsn diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; 174 1.1 alnsn 175 1.1 alnsn if (jump->flags & IS_COND) { 176 1.1 alnsn diff += sizeof(sljit_ins); 177 1.1 alnsn if (diff <= 0xfffff && diff >= -0x100000) { 178 1.1 alnsn code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; 179 1.1 alnsn jump->addr -= sizeof(sljit_ins); 180 1.1 alnsn jump->flags |= PATCH_COND; 181 1.1 alnsn return 5; 182 1.1 alnsn } 183 1.1 alnsn diff -= sizeof(sljit_ins); 184 1.1 alnsn } 185 1.1 alnsn 186 1.1 alnsn if (diff <= 0x7ffffff && diff >= -0x8000000) { 187 1.1 alnsn jump->flags |= PATCH_B; 188 1.1 alnsn return 4; 189 1.1 alnsn } 190 1.1 alnsn 191 1.1 alnsn if (target_addr <= 0xffffffffl) { 192 1.1 alnsn if (jump->flags & IS_COND) 193 1.1 alnsn code_ptr[-5] -= (2 << 5); 194 1.1 alnsn code_ptr[-2] = code_ptr[0]; 195 1.1 alnsn return 2; 196 1.1 alnsn } 197 1.1 alnsn if (target_addr <= 0xffffffffffffl) { 198 1.1 alnsn if (jump->flags & IS_COND) 199 1.1 alnsn code_ptr[-5] -= (1 << 5); 200 1.1 alnsn jump->flags |= PATCH_ABS48; 201 1.1 alnsn code_ptr[-1] = code_ptr[0]; 202 1.1 alnsn return 1; 203 1.1 alnsn } 204 1.1 alnsn 205 1.1 alnsn jump->flags |= PATCH_ABS64; 206 1.1 alnsn return 0; 207 1.1 alnsn } 208 1.1 alnsn 209 1.1 alnsn SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) 210 1.1 alnsn { 211 1.1 alnsn struct sljit_memory_fragment *buf; 212 1.1 alnsn sljit_ins *code; 213 1.1 alnsn sljit_ins *code_ptr; 214 1.1 alnsn sljit_ins *buf_ptr; 215 1.1 alnsn sljit_ins *buf_end; 216 1.1 alnsn sljit_uw word_count; 217 1.4 alnsn sljit_sw executable_offset; 218 1.1 alnsn sljit_uw addr; 219 1.3 alnsn sljit_s32 dst; 220 1.1 alnsn 221 1.1 alnsn struct sljit_label *label; 222 1.1 alnsn struct sljit_jump *jump; 223 1.1 alnsn struct sljit_const *const_; 224 1.1 alnsn 225 1.1 alnsn CHECK_ERROR_PTR(); 226 1.3 alnsn CHECK_PTR(check_sljit_generate_code(compiler)); 227 1.1 alnsn reverse_buf(compiler); 228 1.1 alnsn 229 1.1 alnsn code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); 230 1.1 alnsn PTR_FAIL_WITH_EXEC_IF(code); 231 1.1 alnsn buf = compiler->buf; 232 1.1 alnsn 233 1.1 alnsn code_ptr = code; 234 1.1 alnsn word_count = 0; 235 1.4 alnsn executable_offset = SLJIT_EXEC_OFFSET(code); 236 1.4 alnsn 237 1.1 alnsn label = compiler->labels; 238 1.1 alnsn jump = compiler->jumps; 239 1.1 alnsn const_ = compiler->consts; 240 1.1 alnsn 241 1.1 alnsn do { 242 1.1 alnsn buf_ptr = (sljit_ins*)buf->memory; 243 1.1 alnsn buf_end = buf_ptr + (buf->used_size >> 2); 244 1.1 alnsn do { 245 1.1 alnsn *code_ptr = *buf_ptr++; 246 1.1 alnsn /* These structures are ordered by their address. */ 247 1.1 alnsn SLJIT_ASSERT(!label || label->size >= word_count); 248 1.1 alnsn SLJIT_ASSERT(!jump || jump->addr >= word_count); 249 1.1 alnsn SLJIT_ASSERT(!const_ || const_->addr >= word_count); 250 1.1 alnsn if (label && label->size == word_count) { 251 1.4 alnsn label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); 252 1.1 alnsn label->size = code_ptr - code; 253 1.1 alnsn label = label->next; 254 1.1 alnsn } 255 1.1 alnsn if (jump && jump->addr == word_count) { 256 1.1 alnsn jump->addr = (sljit_uw)(code_ptr - 4); 257 1.4 alnsn code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); 258 1.1 alnsn jump = jump->next; 259 1.1 alnsn } 260 1.1 alnsn if (const_ && const_->addr == word_count) { 261 1.1 alnsn const_->addr = (sljit_uw)code_ptr; 262 1.1 alnsn const_ = const_->next; 263 1.1 alnsn } 264 1.1 alnsn code_ptr ++; 265 1.1 alnsn word_count ++; 266 1.1 alnsn } while (buf_ptr < buf_end); 267 1.1 alnsn 268 1.1 alnsn buf = buf->next; 269 1.1 alnsn } while (buf); 270 1.1 alnsn 271 1.1 alnsn if (label && label->size == word_count) { 272 1.4 alnsn label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); 273 1.1 alnsn label->size = code_ptr - code; 274 1.1 alnsn label = label->next; 275 1.1 alnsn } 276 1.1 alnsn 277 1.1 alnsn SLJIT_ASSERT(!label); 278 1.1 alnsn SLJIT_ASSERT(!jump); 279 1.1 alnsn SLJIT_ASSERT(!const_); 280 1.1 alnsn SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); 281 1.1 alnsn 282 1.1 alnsn jump = compiler->jumps; 283 1.1 alnsn while (jump) { 284 1.1 alnsn do { 285 1.1 alnsn addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; 286 1.4 alnsn buf_ptr = (sljit_ins *)jump->addr; 287 1.4 alnsn 288 1.1 alnsn if (jump->flags & PATCH_B) { 289 1.4 alnsn addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; 290 1.1 alnsn SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000); 291 1.1 alnsn buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff); 292 1.1 alnsn if (jump->flags & IS_COND) 293 1.1 alnsn buf_ptr[-1] -= (4 << 5); 294 1.1 alnsn break; 295 1.1 alnsn } 296 1.1 alnsn if (jump->flags & PATCH_COND) { 297 1.4 alnsn addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; 298 1.1 alnsn SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000); 299 1.1 alnsn buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5); 300 1.1 alnsn break; 301 1.1 alnsn } 302 1.1 alnsn 303 1.1 alnsn SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl); 304 1.1 alnsn SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl); 305 1.1 alnsn 306 1.1 alnsn dst = buf_ptr[0] & 0x1f; 307 1.1 alnsn buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5); 308 1.1 alnsn buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21); 309 1.1 alnsn if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) 310 1.1 alnsn buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21); 311 1.1 alnsn if (jump->flags & PATCH_ABS64) 312 1.1 alnsn buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21); 313 1.1 alnsn } while (0); 314 1.1 alnsn jump = jump->next; 315 1.1 alnsn } 316 1.1 alnsn 317 1.1 alnsn compiler->error = SLJIT_ERR_COMPILED; 318 1.4 alnsn compiler->executable_offset = executable_offset; 319 1.1 alnsn compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); 320 1.4 alnsn 321 1.4 alnsn code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); 322 1.4 alnsn code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); 323 1.4 alnsn 324 1.1 alnsn SLJIT_CACHE_FLUSH(code, code_ptr); 325 1.1 alnsn return code; 326 1.1 alnsn } 327 1.1 alnsn 328 1.1 alnsn /* --------------------------------------------------------------------- */ 329 1.1 alnsn /* Core code generator functions. */ 330 1.1 alnsn /* --------------------------------------------------------------------- */ 331 1.1 alnsn 332 1.1 alnsn #define COUNT_TRAILING_ZERO(value, result) \ 333 1.1 alnsn result = 0; \ 334 1.1 alnsn if (!(value & 0xffffffff)) { \ 335 1.1 alnsn result += 32; \ 336 1.1 alnsn value >>= 32; \ 337 1.1 alnsn } \ 338 1.1 alnsn if (!(value & 0xffff)) { \ 339 1.1 alnsn result += 16; \ 340 1.1 alnsn value >>= 16; \ 341 1.1 alnsn } \ 342 1.1 alnsn if (!(value & 0xff)) { \ 343 1.1 alnsn result += 8; \ 344 1.1 alnsn value >>= 8; \ 345 1.1 alnsn } \ 346 1.1 alnsn if (!(value & 0xf)) { \ 347 1.1 alnsn result += 4; \ 348 1.1 alnsn value >>= 4; \ 349 1.1 alnsn } \ 350 1.1 alnsn if (!(value & 0x3)) { \ 351 1.1 alnsn result += 2; \ 352 1.1 alnsn value >>= 2; \ 353 1.1 alnsn } \ 354 1.1 alnsn if (!(value & 0x1)) { \ 355 1.1 alnsn result += 1; \ 356 1.1 alnsn value >>= 1; \ 357 1.1 alnsn } 358 1.1 alnsn 359 1.1 alnsn #define LOGICAL_IMM_CHECK 0x100 360 1.1 alnsn 361 1.3 alnsn static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) 362 1.1 alnsn { 363 1.3 alnsn sljit_s32 negated, ones, right; 364 1.1 alnsn sljit_uw mask, uimm; 365 1.1 alnsn sljit_ins ins; 366 1.1 alnsn 367 1.1 alnsn if (len & LOGICAL_IMM_CHECK) { 368 1.1 alnsn len &= ~LOGICAL_IMM_CHECK; 369 1.1 alnsn if (len == 32 && (imm == 0 || imm == -1)) 370 1.1 alnsn return 0; 371 1.3 alnsn if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1)) 372 1.1 alnsn return 0; 373 1.1 alnsn } 374 1.1 alnsn 375 1.1 alnsn SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1) 376 1.3 alnsn || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1)); 377 1.1 alnsn uimm = (sljit_uw)imm; 378 1.1 alnsn while (1) { 379 1.1 alnsn if (len <= 0) { 380 1.4 alnsn SLJIT_UNREACHABLE(); 381 1.1 alnsn return 0; 382 1.1 alnsn } 383 1.1 alnsn mask = ((sljit_uw)1 << len) - 1; 384 1.1 alnsn if ((uimm & mask) != ((uimm >> len) & mask)) 385 1.1 alnsn break; 386 1.1 alnsn len >>= 1; 387 1.1 alnsn } 388 1.1 alnsn 389 1.1 alnsn len <<= 1; 390 1.1 alnsn 391 1.1 alnsn negated = 0; 392 1.1 alnsn if (uimm & 0x1) { 393 1.1 alnsn negated = 1; 394 1.1 alnsn uimm = ~uimm; 395 1.1 alnsn } 396 1.1 alnsn 397 1.1 alnsn if (len < 64) 398 1.1 alnsn uimm &= ((sljit_uw)1 << len) - 1; 399 1.1 alnsn 400 1.1 alnsn /* Unsigned right shift. */ 401 1.1 alnsn COUNT_TRAILING_ZERO(uimm, right); 402 1.1 alnsn 403 1.1 alnsn /* Signed shift. We also know that the highest bit is set. */ 404 1.1 alnsn imm = (sljit_sw)~uimm; 405 1.1 alnsn SLJIT_ASSERT(imm < 0); 406 1.1 alnsn 407 1.1 alnsn COUNT_TRAILING_ZERO(imm, ones); 408 1.1 alnsn 409 1.1 alnsn if (~imm) 410 1.1 alnsn return 0; 411 1.1 alnsn 412 1.1 alnsn if (len == 64) 413 1.1 alnsn ins = 1 << 22; 414 1.1 alnsn else 415 1.1 alnsn ins = (0x3f - ((len << 1) - 1)) << 10; 416 1.1 alnsn 417 1.1 alnsn if (negated) 418 1.1 alnsn return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16); 419 1.1 alnsn 420 1.1 alnsn return ins | ((ones - 1) << 10) | ((len - right) << 16); 421 1.1 alnsn } 422 1.1 alnsn 423 1.1 alnsn #undef COUNT_TRAILING_ZERO 424 1.1 alnsn 425 1.3 alnsn static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm) 426 1.1 alnsn { 427 1.1 alnsn sljit_uw imm = (sljit_uw)simm; 428 1.3 alnsn sljit_s32 i, zeros, ones, first; 429 1.1 alnsn sljit_ins bitmask; 430 1.1 alnsn 431 1.1 alnsn if (imm <= 0xffff) 432 1.1 alnsn return push_inst(compiler, MOVZ | RD(dst) | (imm << 5)); 433 1.1 alnsn 434 1.1 alnsn if (simm >= -0x10000 && simm < 0) 435 1.1 alnsn return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)); 436 1.1 alnsn 437 1.1 alnsn if (imm <= 0xffffffffl) { 438 1.1 alnsn if ((imm & 0xffff0000l) == 0xffff0000) 439 1.1 alnsn return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5)); 440 1.1 alnsn if ((imm & 0xffff) == 0xffff) 441 1.1 alnsn return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); 442 1.1 alnsn bitmask = logical_imm(simm, 16); 443 1.1 alnsn if (bitmask != 0) 444 1.1 alnsn return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask); 445 1.1 alnsn } 446 1.1 alnsn else { 447 1.1 alnsn bitmask = logical_imm(simm, 32); 448 1.1 alnsn if (bitmask != 0) 449 1.1 alnsn return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask); 450 1.1 alnsn } 451 1.1 alnsn 452 1.1 alnsn if (imm <= 0xffffffffl) { 453 1.1 alnsn FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); 454 1.1 alnsn return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); 455 1.1 alnsn } 456 1.1 alnsn 457 1.1 alnsn if (simm >= -0x100000000l && simm < 0) { 458 1.1 alnsn FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5))); 459 1.1 alnsn return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); 460 1.1 alnsn } 461 1.1 alnsn 462 1.1 alnsn /* A large amount of number can be constructed from ORR and MOVx, 463 1.1 alnsn but computing them is costly. We don't */ 464 1.1 alnsn 465 1.1 alnsn zeros = 0; 466 1.1 alnsn ones = 0; 467 1.1 alnsn for (i = 4; i > 0; i--) { 468 1.1 alnsn if ((simm & 0xffff) == 0) 469 1.1 alnsn zeros++; 470 1.1 alnsn if ((simm & 0xffff) == 0xffff) 471 1.1 alnsn ones++; 472 1.1 alnsn simm >>= 16; 473 1.1 alnsn } 474 1.1 alnsn 475 1.1 alnsn simm = (sljit_sw)imm; 476 1.1 alnsn first = 1; 477 1.1 alnsn if (ones > zeros) { 478 1.1 alnsn simm = ~simm; 479 1.1 alnsn for (i = 0; i < 4; i++) { 480 1.1 alnsn if (!(simm & 0xffff)) { 481 1.1 alnsn simm >>= 16; 482 1.1 alnsn continue; 483 1.1 alnsn } 484 1.1 alnsn if (first) { 485 1.1 alnsn first = 0; 486 1.1 alnsn FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); 487 1.1 alnsn } 488 1.1 alnsn else 489 1.1 alnsn FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21))); 490 1.1 alnsn simm >>= 16; 491 1.1 alnsn } 492 1.1 alnsn return SLJIT_SUCCESS; 493 1.1 alnsn } 494 1.1 alnsn 495 1.1 alnsn for (i = 0; i < 4; i++) { 496 1.1 alnsn if (!(simm & 0xffff)) { 497 1.1 alnsn simm >>= 16; 498 1.1 alnsn continue; 499 1.1 alnsn } 500 1.1 alnsn if (first) { 501 1.1 alnsn first = 0; 502 1.1 alnsn FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); 503 1.1 alnsn } 504 1.1 alnsn else 505 1.1 alnsn FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); 506 1.1 alnsn simm >>= 16; 507 1.1 alnsn } 508 1.1 alnsn return SLJIT_SUCCESS; 509 1.1 alnsn } 510 1.1 alnsn 511 1.1 alnsn #define ARG1_IMM 0x0010000 512 1.1 alnsn #define ARG2_IMM 0x0020000 513 1.1 alnsn #define INT_OP 0x0040000 514 1.1 alnsn #define SET_FLAGS 0x0080000 515 1.1 alnsn #define UNUSED_RETURN 0x0100000 516 1.1 alnsn #define SLOW_DEST 0x0200000 517 1.1 alnsn #define SLOW_SRC1 0x0400000 518 1.1 alnsn #define SLOW_SRC2 0x0800000 519 1.1 alnsn 520 1.1 alnsn #define CHECK_FLAGS(flag_bits) \ 521 1.1 alnsn if (flags & SET_FLAGS) { \ 522 1.1 alnsn inv_bits |= flag_bits; \ 523 1.1 alnsn if (flags & UNUSED_RETURN) \ 524 1.1 alnsn dst = TMP_ZERO; \ 525 1.1 alnsn } 526 1.1 alnsn 527 1.3 alnsn static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2) 528 1.1 alnsn { 529 1.1 alnsn /* dst must be register, TMP_REG1 530 1.1 alnsn arg1 must be register, TMP_REG1, imm 531 1.1 alnsn arg2 must be register, TMP_REG2, imm */ 532 1.1 alnsn sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0; 533 1.1 alnsn sljit_ins inst_bits; 534 1.3 alnsn sljit_s32 op = (flags & 0xffff); 535 1.3 alnsn sljit_s32 reg; 536 1.1 alnsn sljit_sw imm, nimm; 537 1.1 alnsn 538 1.1 alnsn if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { 539 1.1 alnsn /* Both are immediates. */ 540 1.1 alnsn flags &= ~ARG1_IMM; 541 1.1 alnsn if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB) 542 1.1 alnsn arg1 = TMP_ZERO; 543 1.1 alnsn else { 544 1.1 alnsn FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); 545 1.1 alnsn arg1 = TMP_REG1; 546 1.1 alnsn } 547 1.1 alnsn } 548 1.1 alnsn 549 1.1 alnsn if (flags & (ARG1_IMM | ARG2_IMM)) { 550 1.1 alnsn reg = (flags & ARG2_IMM) ? arg1 : arg2; 551 1.1 alnsn imm = (flags & ARG2_IMM) ? arg2 : arg1; 552 1.1 alnsn 553 1.1 alnsn switch (op) { 554 1.1 alnsn case SLJIT_MUL: 555 1.1 alnsn case SLJIT_NEG: 556 1.1 alnsn case SLJIT_CLZ: 557 1.1 alnsn case SLJIT_ADDC: 558 1.1 alnsn case SLJIT_SUBC: 559 1.1 alnsn /* No form with immediate operand (except imm 0, which 560 1.1 alnsn is represented by a ZERO register). */ 561 1.1 alnsn break; 562 1.1 alnsn case SLJIT_MOV: 563 1.1 alnsn SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); 564 1.1 alnsn return load_immediate(compiler, dst, imm); 565 1.1 alnsn case SLJIT_NOT: 566 1.1 alnsn SLJIT_ASSERT(flags & ARG2_IMM); 567 1.1 alnsn FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); 568 1.1 alnsn goto set_flags; 569 1.1 alnsn case SLJIT_SUB: 570 1.1 alnsn if (flags & ARG1_IMM) 571 1.1 alnsn break; 572 1.1 alnsn imm = -imm; 573 1.1 alnsn /* Fall through. */ 574 1.1 alnsn case SLJIT_ADD: 575 1.1 alnsn if (imm == 0) { 576 1.1 alnsn CHECK_FLAGS(1 << 29); 577 1.1 alnsn return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); 578 1.1 alnsn } 579 1.1 alnsn if (imm > 0 && imm <= 0xfff) { 580 1.1 alnsn CHECK_FLAGS(1 << 29); 581 1.1 alnsn return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10)); 582 1.1 alnsn } 583 1.1 alnsn nimm = -imm; 584 1.1 alnsn if (nimm > 0 && nimm <= 0xfff) { 585 1.1 alnsn CHECK_FLAGS(1 << 29); 586 1.1 alnsn return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10)); 587 1.1 alnsn } 588 1.1 alnsn if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) { 589 1.1 alnsn CHECK_FLAGS(1 << 29); 590 1.1 alnsn return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)); 591 1.1 alnsn } 592 1.1 alnsn if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) { 593 1.1 alnsn CHECK_FLAGS(1 << 29); 594 1.1 alnsn return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)); 595 1.1 alnsn } 596 1.1 alnsn if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) { 597 1.1 alnsn FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22))); 598 1.1 alnsn return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10)); 599 1.1 alnsn } 600 1.1 alnsn if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) { 601 1.1 alnsn FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22))); 602 1.1 alnsn return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10)); 603 1.1 alnsn } 604 1.1 alnsn break; 605 1.1 alnsn case SLJIT_AND: 606 1.1 alnsn inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); 607 1.1 alnsn if (!inst_bits) 608 1.1 alnsn break; 609 1.1 alnsn CHECK_FLAGS(3 << 29); 610 1.1 alnsn return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits); 611 1.1 alnsn case SLJIT_OR: 612 1.1 alnsn case SLJIT_XOR: 613 1.1 alnsn inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); 614 1.1 alnsn if (!inst_bits) 615 1.1 alnsn break; 616 1.1 alnsn if (op == SLJIT_OR) 617 1.1 alnsn inst_bits |= ORRI; 618 1.1 alnsn else 619 1.1 alnsn inst_bits |= EORI; 620 1.1 alnsn FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg))); 621 1.1 alnsn goto set_flags; 622 1.1 alnsn case SLJIT_SHL: 623 1.1 alnsn if (flags & ARG1_IMM) 624 1.1 alnsn break; 625 1.1 alnsn if (flags & INT_OP) { 626 1.1 alnsn imm &= 0x1f; 627 1.1 alnsn FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10))); 628 1.1 alnsn } 629 1.1 alnsn else { 630 1.1 alnsn imm &= 0x3f; 631 1.1 alnsn FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10))); 632 1.1 alnsn } 633 1.1 alnsn goto set_flags; 634 1.1 alnsn case SLJIT_LSHR: 635 1.1 alnsn case SLJIT_ASHR: 636 1.1 alnsn if (flags & ARG1_IMM) 637 1.1 alnsn break; 638 1.1 alnsn if (op == SLJIT_ASHR) 639 1.1 alnsn inv_bits |= 1 << 30; 640 1.1 alnsn if (flags & INT_OP) { 641 1.1 alnsn imm &= 0x1f; 642 1.1 alnsn FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10))); 643 1.1 alnsn } 644 1.1 alnsn else { 645 1.1 alnsn imm &= 0x3f; 646 1.1 alnsn FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10))); 647 1.1 alnsn } 648 1.1 alnsn goto set_flags; 649 1.1 alnsn default: 650 1.4 alnsn SLJIT_UNREACHABLE(); 651 1.1 alnsn break; 652 1.1 alnsn } 653 1.1 alnsn 654 1.1 alnsn if (flags & ARG2_IMM) { 655 1.1 alnsn if (arg2 == 0) 656 1.1 alnsn arg2 = TMP_ZERO; 657 1.1 alnsn else { 658 1.1 alnsn FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); 659 1.1 alnsn arg2 = TMP_REG2; 660 1.1 alnsn } 661 1.1 alnsn } 662 1.1 alnsn else { 663 1.1 alnsn if (arg1 == 0) 664 1.1 alnsn arg1 = TMP_ZERO; 665 1.1 alnsn else { 666 1.1 alnsn FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); 667 1.1 alnsn arg1 = TMP_REG1; 668 1.1 alnsn } 669 1.1 alnsn } 670 1.1 alnsn } 671 1.1 alnsn 672 1.1 alnsn /* Both arguments are registers. */ 673 1.1 alnsn switch (op) { 674 1.1 alnsn case SLJIT_MOV: 675 1.1 alnsn case SLJIT_MOV_P: 676 1.1 alnsn case SLJIT_MOVU: 677 1.1 alnsn case SLJIT_MOVU_P: 678 1.1 alnsn SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 679 1.1 alnsn if (dst == arg2) 680 1.1 alnsn return SLJIT_SUCCESS; 681 1.1 alnsn return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2)); 682 1.3 alnsn case SLJIT_MOV_U8: 683 1.3 alnsn case SLJIT_MOVU_U8: 684 1.1 alnsn SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 685 1.1 alnsn return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10)); 686 1.3 alnsn case SLJIT_MOV_S8: 687 1.3 alnsn case SLJIT_MOVU_S8: 688 1.1 alnsn SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 689 1.1 alnsn if (!(flags & INT_OP)) 690 1.1 alnsn inv_bits |= 1 << 22; 691 1.1 alnsn return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); 692 1.3 alnsn case SLJIT_MOV_U16: 693 1.3 alnsn case SLJIT_MOVU_U16: 694 1.1 alnsn SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 695 1.1 alnsn return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10)); 696 1.3 alnsn case SLJIT_MOV_S16: 697 1.3 alnsn case SLJIT_MOVU_S16: 698 1.1 alnsn SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 699 1.1 alnsn if (!(flags & INT_OP)) 700 1.1 alnsn inv_bits |= 1 << 22; 701 1.1 alnsn return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); 702 1.3 alnsn case SLJIT_MOV_U32: 703 1.3 alnsn case SLJIT_MOVU_U32: 704 1.1 alnsn SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 705 1.1 alnsn if ((flags & INT_OP) && dst == arg2) 706 1.1 alnsn return SLJIT_SUCCESS; 707 1.1 alnsn return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); 708 1.3 alnsn case SLJIT_MOV_S32: 709 1.3 alnsn case SLJIT_MOVU_S32: 710 1.1 alnsn SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); 711 1.1 alnsn if ((flags & INT_OP) && dst == arg2) 712 1.1 alnsn return SLJIT_SUCCESS; 713 1.1 alnsn return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); 714 1.1 alnsn case SLJIT_NOT: 715 1.1 alnsn SLJIT_ASSERT(arg1 == TMP_REG1); 716 1.1 alnsn FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); 717 1.1 alnsn goto set_flags; 718 1.1 alnsn case SLJIT_NEG: 719 1.1 alnsn SLJIT_ASSERT(arg1 == TMP_REG1); 720 1.1 alnsn if (flags & SET_FLAGS) 721 1.1 alnsn inv_bits |= 1 << 29; 722 1.1 alnsn return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); 723 1.1 alnsn case SLJIT_CLZ: 724 1.1 alnsn SLJIT_ASSERT(arg1 == TMP_REG1); 725 1.1 alnsn FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2))); 726 1.1 alnsn goto set_flags; 727 1.1 alnsn case SLJIT_ADD: 728 1.1 alnsn CHECK_FLAGS(1 << 29); 729 1.1 alnsn return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); 730 1.1 alnsn case SLJIT_ADDC: 731 1.1 alnsn CHECK_FLAGS(1 << 29); 732 1.1 alnsn return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); 733 1.1 alnsn case SLJIT_SUB: 734 1.1 alnsn CHECK_FLAGS(1 << 29); 735 1.1 alnsn return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); 736 1.1 alnsn case SLJIT_SUBC: 737 1.1 alnsn CHECK_FLAGS(1 << 29); 738 1.1 alnsn return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); 739 1.1 alnsn case SLJIT_MUL: 740 1.1 alnsn if (!(flags & SET_FLAGS)) 741 1.1 alnsn return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)); 742 1.1 alnsn if (flags & INT_OP) { 743 1.1 alnsn FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10))); 744 1.3 alnsn FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10))); 745 1.3 alnsn return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); 746 1.1 alnsn } 747 1.3 alnsn FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2))); 748 1.1 alnsn FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO))); 749 1.3 alnsn return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); 750 1.1 alnsn case SLJIT_AND: 751 1.1 alnsn CHECK_FLAGS(3 << 29); 752 1.1 alnsn return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); 753 1.1 alnsn case SLJIT_OR: 754 1.1 alnsn FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); 755 1.1 alnsn goto set_flags; 756 1.1 alnsn case SLJIT_XOR: 757 1.1 alnsn FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); 758 1.1 alnsn goto set_flags; 759 1.1 alnsn case SLJIT_SHL: 760 1.1 alnsn FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); 761 1.1 alnsn goto set_flags; 762 1.1 alnsn case SLJIT_LSHR: 763 1.1 alnsn FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); 764 1.1 alnsn goto set_flags; 765 1.1 alnsn case SLJIT_ASHR: 766 1.1 alnsn FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); 767 1.1 alnsn goto set_flags; 768 1.1 alnsn } 769 1.1 alnsn 770 1.4 alnsn SLJIT_UNREACHABLE(); 771 1.1 alnsn return SLJIT_SUCCESS; 772 1.1 alnsn 773 1.1 alnsn set_flags: 774 1.1 alnsn if (flags & SET_FLAGS) 775 1.1 alnsn return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO)); 776 1.1 alnsn return SLJIT_SUCCESS; 777 1.1 alnsn } 778 1.1 alnsn 779 1.1 alnsn #define STORE 0x01 780 1.1 alnsn #define SIGNED 0x02 781 1.1 alnsn 782 1.1 alnsn #define UPDATE 0x04 783 1.1 alnsn #define ARG_TEST 0x08 784 1.1 alnsn 785 1.1 alnsn #define BYTE_SIZE 0x000 786 1.1 alnsn #define HALF_SIZE 0x100 787 1.1 alnsn #define INT_SIZE 0x200 788 1.1 alnsn #define WORD_SIZE 0x300 789 1.1 alnsn 790 1.1 alnsn #define MEM_SIZE_SHIFT(flags) ((flags) >> 8) 791 1.1 alnsn 792 1.3 alnsn static const sljit_ins sljit_mem_imm[4] = { 793 1.1 alnsn /* u l */ 0x39400000 /* ldrb [reg,imm] */, 794 1.1 alnsn /* u s */ 0x39000000 /* strb [reg,imm] */, 795 1.1 alnsn /* s l */ 0x39800000 /* ldrsb [reg,imm] */, 796 1.1 alnsn /* s s */ 0x39000000 /* strb [reg,imm] */, 797 1.1 alnsn }; 798 1.1 alnsn 799 1.3 alnsn static const sljit_ins sljit_mem_simm[4] = { 800 1.1 alnsn /* u l */ 0x38400000 /* ldurb [reg,imm] */, 801 1.1 alnsn /* u s */ 0x38000000 /* sturb [reg,imm] */, 802 1.1 alnsn /* s l */ 0x38800000 /* ldursb [reg,imm] */, 803 1.1 alnsn /* s s */ 0x38000000 /* sturb [reg,imm] */, 804 1.1 alnsn }; 805 1.1 alnsn 806 1.3 alnsn static const sljit_ins sljit_mem_pre_simm[4] = { 807 1.1 alnsn /* u l */ 0x38400c00 /* ldrb [reg,imm]! */, 808 1.1 alnsn /* u s */ 0x38000c00 /* strb [reg,imm]! */, 809 1.1 alnsn /* s l */ 0x38800c00 /* ldrsb [reg,imm]! */, 810 1.1 alnsn /* s s */ 0x38000c00 /* strb [reg,imm]! */, 811 1.1 alnsn }; 812 1.1 alnsn 813 1.3 alnsn static const sljit_ins sljit_mem_reg[4] = { 814 1.1 alnsn /* u l */ 0x38606800 /* ldrb [reg,reg] */, 815 1.1 alnsn /* u s */ 0x38206800 /* strb [reg,reg] */, 816 1.1 alnsn /* s l */ 0x38a06800 /* ldrsb [reg,reg] */, 817 1.1 alnsn /* s s */ 0x38206800 /* strb [reg,reg] */, 818 1.1 alnsn }; 819 1.1 alnsn 820 1.1 alnsn /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ 821 1.3 alnsn static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) 822 1.1 alnsn { 823 1.1 alnsn if (value >= 0) { 824 1.1 alnsn if (value <= 0xfff) 825 1.1 alnsn return push_inst(compiler, ADDI | RD(dst) | RN(reg) | (value << 10)); 826 1.1 alnsn if (value <= 0xffffff && !(value & 0xfff)) 827 1.1 alnsn return push_inst(compiler, ADDI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2)); 828 1.1 alnsn } 829 1.1 alnsn else { 830 1.1 alnsn value = -value; 831 1.1 alnsn if (value <= 0xfff) 832 1.1 alnsn return push_inst(compiler, SUBI | RD(dst) | RN(reg) | (value << 10)); 833 1.1 alnsn if (value <= 0xffffff && !(value & 0xfff)) 834 1.1 alnsn return push_inst(compiler, SUBI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2)); 835 1.1 alnsn } 836 1.1 alnsn return SLJIT_ERR_UNSUPPORTED; 837 1.1 alnsn } 838 1.1 alnsn 839 1.1 alnsn /* Can perform an operation using at most 1 instruction. */ 840 1.3 alnsn static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) 841 1.1 alnsn { 842 1.3 alnsn sljit_u32 shift = MEM_SIZE_SHIFT(flags); 843 1.1 alnsn 844 1.1 alnsn SLJIT_ASSERT(arg & SLJIT_MEM); 845 1.1 alnsn 846 1.1 alnsn if (SLJIT_UNLIKELY(flags & UPDATE)) { 847 1.1 alnsn if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 255 && argw >= -256) { 848 1.1 alnsn if (SLJIT_UNLIKELY(flags & ARG_TEST)) 849 1.1 alnsn return 1; 850 1.1 alnsn 851 1.1 alnsn arg &= REG_MASK; 852 1.1 alnsn argw &= 0x1ff; 853 1.1 alnsn FAIL_IF(push_inst(compiler, sljit_mem_pre_simm[flags & 0x3] 854 1.1 alnsn | (shift << 30) | RT(reg) | RN(arg) | (argw << 12))); 855 1.1 alnsn return -1; 856 1.1 alnsn } 857 1.1 alnsn return 0; 858 1.1 alnsn } 859 1.1 alnsn 860 1.1 alnsn if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { 861 1.1 alnsn argw &= 0x3; 862 1.1 alnsn if (argw && argw != shift) 863 1.1 alnsn return 0; 864 1.1 alnsn 865 1.1 alnsn if (SLJIT_UNLIKELY(flags & ARG_TEST)) 866 1.1 alnsn return 1; 867 1.1 alnsn 868 1.1 alnsn FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) 869 1.1 alnsn | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0))); 870 1.1 alnsn return -1; 871 1.1 alnsn } 872 1.1 alnsn 873 1.1 alnsn arg &= REG_MASK; 874 1.1 alnsn if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) { 875 1.1 alnsn if (SLJIT_UNLIKELY(flags & ARG_TEST)) 876 1.1 alnsn return 1; 877 1.1 alnsn 878 1.1 alnsn FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) 879 1.1 alnsn | RT(reg) | RN(arg) | (argw << (10 - shift)))); 880 1.1 alnsn return -1; 881 1.1 alnsn } 882 1.1 alnsn 883 1.1 alnsn if (argw > 255 || argw < -256) 884 1.1 alnsn return 0; 885 1.1 alnsn 886 1.1 alnsn if (SLJIT_UNLIKELY(flags & ARG_TEST)) 887 1.1 alnsn return 1; 888 1.1 alnsn 889 1.1 alnsn FAIL_IF(push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30) 890 1.1 alnsn | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12))); 891 1.1 alnsn return -1; 892 1.1 alnsn } 893 1.1 alnsn 894 1.1 alnsn /* see getput_arg below. 895 1.1 alnsn Note: can_cache is called only for binary operators. Those 896 1.1 alnsn operators always uses word arguments without write back. */ 897 1.3 alnsn static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) 898 1.1 alnsn { 899 1.1 alnsn sljit_sw diff; 900 1.1 alnsn if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM)) 901 1.1 alnsn return 0; 902 1.1 alnsn 903 1.1 alnsn if (!(arg & REG_MASK)) { 904 1.1 alnsn diff = argw - next_argw; 905 1.1 alnsn if (diff <= 0xfff && diff >= -0xfff) 906 1.1 alnsn return 1; 907 1.1 alnsn return 0; 908 1.1 alnsn } 909 1.1 alnsn 910 1.1 alnsn if (argw == next_argw) 911 1.1 alnsn return 1; 912 1.1 alnsn 913 1.1 alnsn diff = argw - next_argw; 914 1.1 alnsn if (arg == next_arg && diff <= 0xfff && diff >= -0xfff) 915 1.1 alnsn return 1; 916 1.1 alnsn 917 1.1 alnsn return 0; 918 1.1 alnsn } 919 1.1 alnsn 920 1.1 alnsn /* Emit the necessary instructions. See can_cache above. */ 921 1.3 alnsn static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, 922 1.3 alnsn sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) 923 1.1 alnsn { 924 1.3 alnsn sljit_u32 shift = MEM_SIZE_SHIFT(flags); 925 1.3 alnsn sljit_s32 tmp_r, other_r; 926 1.1 alnsn sljit_sw diff; 927 1.1 alnsn 928 1.1 alnsn SLJIT_ASSERT(arg & SLJIT_MEM); 929 1.1 alnsn if (!(next_arg & SLJIT_MEM)) { 930 1.1 alnsn next_arg = 0; 931 1.1 alnsn next_argw = 0; 932 1.1 alnsn } 933 1.1 alnsn 934 1.1 alnsn tmp_r = (flags & STORE) ? TMP_REG3 : reg; 935 1.1 alnsn 936 1.1 alnsn if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) { 937 1.1 alnsn /* Update only applies if a base register exists. */ 938 1.1 alnsn other_r = OFFS_REG(arg); 939 1.1 alnsn if (!other_r) { 940 1.1 alnsn other_r = arg & REG_MASK; 941 1.4 alnsn SLJIT_ASSERT(other_r != reg); 942 1.4 alnsn 943 1.4 alnsn if (argw >= 0 && argw <= 0xffffff) { 944 1.1 alnsn if ((argw & 0xfff) != 0) 945 1.1 alnsn FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10))); 946 1.1 alnsn if (argw >> 12) 947 1.1 alnsn FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10))); 948 1.1 alnsn return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r)); 949 1.1 alnsn } 950 1.4 alnsn else if (argw < 0 && argw >= -0xffffff) { 951 1.1 alnsn argw = -argw; 952 1.1 alnsn if ((argw & 0xfff) != 0) 953 1.1 alnsn FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10))); 954 1.1 alnsn if (argw >> 12) 955 1.1 alnsn FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10))); 956 1.1 alnsn return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r)); 957 1.1 alnsn } 958 1.1 alnsn 959 1.1 alnsn if (compiler->cache_arg == SLJIT_MEM) { 960 1.1 alnsn if (argw == compiler->cache_argw) { 961 1.1 alnsn other_r = TMP_REG3; 962 1.1 alnsn argw = 0; 963 1.1 alnsn } 964 1.1 alnsn else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { 965 1.1 alnsn FAIL_IF(compiler->error); 966 1.1 alnsn compiler->cache_argw = argw; 967 1.1 alnsn other_r = TMP_REG3; 968 1.1 alnsn argw = 0; 969 1.1 alnsn } 970 1.1 alnsn } 971 1.1 alnsn 972 1.1 alnsn if (argw) { 973 1.1 alnsn FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); 974 1.1 alnsn compiler->cache_arg = SLJIT_MEM; 975 1.1 alnsn compiler->cache_argw = argw; 976 1.1 alnsn other_r = TMP_REG3; 977 1.1 alnsn argw = 0; 978 1.1 alnsn } 979 1.1 alnsn } 980 1.1 alnsn 981 1.1 alnsn /* No caching here. */ 982 1.1 alnsn arg &= REG_MASK; 983 1.4 alnsn FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r))); 984 1.4 alnsn return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r)); 985 1.1 alnsn } 986 1.1 alnsn 987 1.1 alnsn if (arg & OFFS_REG_MASK) { 988 1.1 alnsn other_r = OFFS_REG(arg); 989 1.1 alnsn arg &= REG_MASK; 990 1.1 alnsn FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RN(arg) | RM(other_r) | ((argw & 0x3) << 10))); 991 1.1 alnsn return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(tmp_r)); 992 1.1 alnsn } 993 1.1 alnsn 994 1.1 alnsn if (compiler->cache_arg == arg) { 995 1.1 alnsn diff = argw - compiler->cache_argw; 996 1.1 alnsn if (diff <= 255 && diff >= -256) 997 1.1 alnsn return push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30) 998 1.1 alnsn | RT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12)); 999 1.1 alnsn if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) { 1000 1.1 alnsn FAIL_IF(compiler->error); 1001 1.1 alnsn return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg)); 1002 1.1 alnsn } 1003 1.1 alnsn } 1004 1.1 alnsn 1005 1.1 alnsn if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) { 1006 1.1 alnsn FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10))); 1007 1.1 alnsn return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) 1008 1.1 alnsn | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift))); 1009 1.1 alnsn } 1010 1.1 alnsn 1011 1.1 alnsn diff = argw - next_argw; 1012 1.1 alnsn next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0; 1013 1.1 alnsn arg &= REG_MASK; 1014 1.1 alnsn 1015 1.1 alnsn if (arg && compiler->cache_arg == SLJIT_MEM) { 1016 1.1 alnsn if (compiler->cache_argw == argw) 1017 1.1 alnsn return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); 1018 1.1 alnsn if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { 1019 1.1 alnsn FAIL_IF(compiler->error); 1020 1.1 alnsn compiler->cache_argw = argw; 1021 1.1 alnsn return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); 1022 1.1 alnsn } 1023 1.1 alnsn } 1024 1.1 alnsn 1025 1.1 alnsn compiler->cache_argw = argw; 1026 1.1 alnsn if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) { 1027 1.1 alnsn FAIL_IF(compiler->error); 1028 1.1 alnsn compiler->cache_arg = SLJIT_MEM | arg; 1029 1.1 alnsn arg = 0; 1030 1.1 alnsn } 1031 1.1 alnsn else { 1032 1.1 alnsn FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); 1033 1.1 alnsn compiler->cache_arg = SLJIT_MEM; 1034 1.1 alnsn 1035 1.1 alnsn if (next_arg) { 1036 1.1 alnsn FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RN(TMP_REG3) | RM(arg))); 1037 1.1 alnsn compiler->cache_arg = SLJIT_MEM | arg; 1038 1.1 alnsn arg = 0; 1039 1.1 alnsn } 1040 1.1 alnsn } 1041 1.1 alnsn 1042 1.1 alnsn if (arg) 1043 1.1 alnsn return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); 1044 1.1 alnsn return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG3)); 1045 1.1 alnsn } 1046 1.1 alnsn 1047 1.3 alnsn static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) 1048 1.1 alnsn { 1049 1.1 alnsn if (getput_arg_fast(compiler, flags, reg, arg, argw)) 1050 1.1 alnsn return compiler->error; 1051 1.1 alnsn compiler->cache_arg = 0; 1052 1.1 alnsn compiler->cache_argw = 0; 1053 1.1 alnsn return getput_arg(compiler, flags, reg, arg, argw, 0, 0); 1054 1.1 alnsn } 1055 1.1 alnsn 1056 1.3 alnsn static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) 1057 1.1 alnsn { 1058 1.1 alnsn if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) 1059 1.1 alnsn return compiler->error; 1060 1.1 alnsn return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); 1061 1.1 alnsn } 1062 1.1 alnsn 1063 1.1 alnsn /* --------------------------------------------------------------------- */ 1064 1.1 alnsn /* Entry, exit */ 1065 1.1 alnsn /* --------------------------------------------------------------------- */ 1066 1.1 alnsn 1067 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, 1068 1.3 alnsn sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 1069 1.3 alnsn sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 1070 1.1 alnsn { 1071 1.3 alnsn sljit_s32 i, tmp, offs, prev, saved_regs_size; 1072 1.3 alnsn 1073 1.1 alnsn CHECK_ERROR(); 1074 1.3 alnsn CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 1075 1.3 alnsn set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 1076 1.1 alnsn 1077 1.3 alnsn saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0); 1078 1.3 alnsn local_size += saved_regs_size + SLJIT_LOCALS_OFFSET; 1079 1.3 alnsn local_size = (local_size + 15) & ~0xf; 1080 1.1 alnsn compiler->local_size = local_size; 1081 1.1 alnsn 1082 1.5 riastrad SLJIT_ASSERT(local_size >= 0); 1083 1.5 riastrad if ((size_t)local_size <= (63 * sizeof(sljit_sw))) { 1084 1.1 alnsn FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) 1085 1.1 alnsn | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15))); 1086 1.3 alnsn FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); 1087 1.3 alnsn offs = (local_size - saved_regs_size) << (15 - 3); 1088 1.3 alnsn } else { 1089 1.3 alnsn offs = 0 << 15; 1090 1.3 alnsn if (saved_regs_size & 0x8) { 1091 1.3 alnsn offs = 1 << 15; 1092 1.3 alnsn saved_regs_size += sizeof(sljit_sw); 1093 1.3 alnsn } 1094 1.3 alnsn local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; 1095 1.3 alnsn if (saved_regs_size > 0) 1096 1.3 alnsn FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); 1097 1.3 alnsn } 1098 1.3 alnsn 1099 1.3 alnsn tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; 1100 1.3 alnsn prev = -1; 1101 1.3 alnsn for (i = SLJIT_S0; i >= tmp; i--) { 1102 1.3 alnsn if (prev == -1) { 1103 1.3 alnsn if (!(offs & (1 << 15))) { 1104 1.3 alnsn prev = i; 1105 1.3 alnsn continue; 1106 1.3 alnsn } 1107 1.3 alnsn FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); 1108 1.3 alnsn offs += 1 << 15; 1109 1.3 alnsn continue; 1110 1.3 alnsn } 1111 1.3 alnsn FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); 1112 1.3 alnsn offs += 2 << 15; 1113 1.3 alnsn prev = -1; 1114 1.3 alnsn } 1115 1.3 alnsn 1116 1.3 alnsn for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { 1117 1.3 alnsn if (prev == -1) { 1118 1.3 alnsn if (!(offs & (1 << 15))) { 1119 1.3 alnsn prev = i; 1120 1.3 alnsn continue; 1121 1.3 alnsn } 1122 1.3 alnsn FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); 1123 1.3 alnsn offs += 1 << 15; 1124 1.3 alnsn continue; 1125 1.3 alnsn } 1126 1.3 alnsn FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); 1127 1.3 alnsn offs += 2 << 15; 1128 1.3 alnsn prev = -1; 1129 1.3 alnsn } 1130 1.3 alnsn 1131 1.3 alnsn SLJIT_ASSERT(prev == -1); 1132 1.3 alnsn 1133 1.5 riastrad SLJIT_ASSERT(compiler->local_size >= 0); 1134 1.5 riastrad if ((size_t)compiler->local_size > (63 * sizeof(sljit_sw))) { 1135 1.3 alnsn /* The local_size is already adjusted by the saved registers. */ 1136 1.1 alnsn if (local_size > 0xfff) { 1137 1.1 alnsn FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); 1138 1.1 alnsn local_size &= 0xfff; 1139 1.1 alnsn } 1140 1.1 alnsn if (local_size) 1141 1.1 alnsn FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); 1142 1.3 alnsn FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) 1143 1.3 alnsn | RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15))); 1144 1.3 alnsn FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); 1145 1.1 alnsn } 1146 1.1 alnsn 1147 1.1 alnsn if (args >= 1) 1148 1.3 alnsn FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); 1149 1.1 alnsn if (args >= 2) 1150 1.3 alnsn FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1))); 1151 1.1 alnsn if (args >= 3) 1152 1.3 alnsn FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); 1153 1.1 alnsn 1154 1.1 alnsn return SLJIT_SUCCESS; 1155 1.1 alnsn } 1156 1.1 alnsn 1157 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, 1158 1.3 alnsn sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 1159 1.3 alnsn sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 1160 1.1 alnsn { 1161 1.3 alnsn CHECK_ERROR(); 1162 1.3 alnsn CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 1163 1.3 alnsn set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 1164 1.1 alnsn 1165 1.3 alnsn local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET; 1166 1.3 alnsn local_size = (local_size + 15) & ~0xf; 1167 1.3 alnsn compiler->local_size = local_size; 1168 1.3 alnsn return SLJIT_SUCCESS; 1169 1.1 alnsn } 1170 1.1 alnsn 1171 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) 1172 1.1 alnsn { 1173 1.3 alnsn sljit_s32 local_size; 1174 1.3 alnsn sljit_s32 i, tmp, offs, prev, saved_regs_size; 1175 1.1 alnsn 1176 1.1 alnsn CHECK_ERROR(); 1177 1.3 alnsn CHECK(check_sljit_emit_return(compiler, op, src, srcw)); 1178 1.1 alnsn 1179 1.1 alnsn FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); 1180 1.1 alnsn 1181 1.1 alnsn local_size = compiler->local_size; 1182 1.1 alnsn 1183 1.3 alnsn saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0); 1184 1.5 riastrad SLJIT_ASSERT(local_size >= 0); 1185 1.5 riastrad if ((size_t)local_size <= (63 * sizeof(sljit_sw))) 1186 1.3 alnsn offs = (local_size - saved_regs_size) << (15 - 3); 1187 1.3 alnsn else { 1188 1.1 alnsn FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) 1189 1.3 alnsn | RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15))); 1190 1.3 alnsn offs = 0 << 15; 1191 1.3 alnsn if (saved_regs_size & 0x8) { 1192 1.3 alnsn offs = 1 << 15; 1193 1.3 alnsn saved_regs_size += sizeof(sljit_sw); 1194 1.3 alnsn } 1195 1.3 alnsn local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; 1196 1.1 alnsn if (local_size > 0xfff) { 1197 1.1 alnsn FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); 1198 1.1 alnsn local_size &= 0xfff; 1199 1.1 alnsn } 1200 1.1 alnsn if (local_size) 1201 1.1 alnsn FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); 1202 1.1 alnsn } 1203 1.1 alnsn 1204 1.3 alnsn tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; 1205 1.3 alnsn prev = -1; 1206 1.3 alnsn for (i = SLJIT_S0; i >= tmp; i--) { 1207 1.3 alnsn if (prev == -1) { 1208 1.3 alnsn if (!(offs & (1 << 15))) { 1209 1.3 alnsn prev = i; 1210 1.3 alnsn continue; 1211 1.3 alnsn } 1212 1.3 alnsn FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); 1213 1.3 alnsn offs += 1 << 15; 1214 1.3 alnsn continue; 1215 1.3 alnsn } 1216 1.3 alnsn FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); 1217 1.3 alnsn offs += 2 << 15; 1218 1.3 alnsn prev = -1; 1219 1.3 alnsn } 1220 1.3 alnsn 1221 1.3 alnsn for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { 1222 1.3 alnsn if (prev == -1) { 1223 1.3 alnsn if (!(offs & (1 << 15))) { 1224 1.3 alnsn prev = i; 1225 1.3 alnsn continue; 1226 1.3 alnsn } 1227 1.3 alnsn FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); 1228 1.3 alnsn offs += 1 << 15; 1229 1.3 alnsn continue; 1230 1.3 alnsn } 1231 1.3 alnsn FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); 1232 1.3 alnsn offs += 2 << 15; 1233 1.3 alnsn prev = -1; 1234 1.3 alnsn } 1235 1.3 alnsn 1236 1.3 alnsn SLJIT_ASSERT(prev == -1); 1237 1.3 alnsn 1238 1.5 riastrad SLJIT_ASSERT(compiler->local_size >= 0); 1239 1.5 riastrad if ((size_t)compiler->local_size <= (63 * sizeof(sljit_sw))) { 1240 1.3 alnsn FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) 1241 1.3 alnsn | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15))); 1242 1.3 alnsn } else if (saved_regs_size > 0) { 1243 1.3 alnsn FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); 1244 1.3 alnsn } 1245 1.3 alnsn 1246 1.1 alnsn FAIL_IF(push_inst(compiler, RET | RN(TMP_LR))); 1247 1.1 alnsn return SLJIT_SUCCESS; 1248 1.1 alnsn } 1249 1.1 alnsn 1250 1.1 alnsn /* --------------------------------------------------------------------- */ 1251 1.1 alnsn /* Operators */ 1252 1.1 alnsn /* --------------------------------------------------------------------- */ 1253 1.1 alnsn 1254 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) 1255 1.1 alnsn { 1256 1.3 alnsn sljit_ins inv_bits = (op & SLJIT_I32_OP) ? (1 << 31) : 0; 1257 1.1 alnsn 1258 1.1 alnsn CHECK_ERROR(); 1259 1.3 alnsn CHECK(check_sljit_emit_op0(compiler, op)); 1260 1.1 alnsn 1261 1.1 alnsn op = GET_OPCODE(op); 1262 1.1 alnsn switch (op) { 1263 1.1 alnsn case SLJIT_BREAKPOINT: 1264 1.1 alnsn return push_inst(compiler, BRK); 1265 1.1 alnsn case SLJIT_NOP: 1266 1.1 alnsn return push_inst(compiler, NOP); 1267 1.3 alnsn case SLJIT_LMUL_UW: 1268 1.3 alnsn case SLJIT_LMUL_SW: 1269 1.3 alnsn FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); 1270 1.3 alnsn FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); 1271 1.3 alnsn return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); 1272 1.3 alnsn case SLJIT_DIVMOD_UW: 1273 1.3 alnsn case SLJIT_DIVMOD_SW: 1274 1.3 alnsn FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); 1275 1.3 alnsn FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); 1276 1.3 alnsn FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); 1277 1.3 alnsn return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); 1278 1.3 alnsn case SLJIT_DIV_UW: 1279 1.3 alnsn case SLJIT_DIV_SW: 1280 1.3 alnsn return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); 1281 1.1 alnsn } 1282 1.1 alnsn 1283 1.1 alnsn return SLJIT_SUCCESS; 1284 1.1 alnsn } 1285 1.1 alnsn 1286 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, 1287 1.3 alnsn sljit_s32 dst, sljit_sw dstw, 1288 1.3 alnsn sljit_s32 src, sljit_sw srcw) 1289 1.1 alnsn { 1290 1.3 alnsn sljit_s32 dst_r, flags, mem_flags; 1291 1.3 alnsn sljit_s32 op_flags = GET_ALL_FLAGS(op); 1292 1.1 alnsn 1293 1.1 alnsn CHECK_ERROR(); 1294 1.3 alnsn CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); 1295 1.1 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 1296 1.1 alnsn ADJUST_LOCAL_OFFSET(src, srcw); 1297 1.1 alnsn 1298 1.1 alnsn compiler->cache_arg = 0; 1299 1.1 alnsn compiler->cache_argw = 0; 1300 1.1 alnsn 1301 1.1 alnsn dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 1302 1.1 alnsn 1303 1.1 alnsn op = GET_OPCODE(op); 1304 1.1 alnsn if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { 1305 1.1 alnsn switch (op) { 1306 1.1 alnsn case SLJIT_MOV: 1307 1.1 alnsn case SLJIT_MOV_P: 1308 1.1 alnsn flags = WORD_SIZE; 1309 1.1 alnsn break; 1310 1.3 alnsn case SLJIT_MOV_U8: 1311 1.1 alnsn flags = BYTE_SIZE; 1312 1.1 alnsn if (src & SLJIT_IMM) 1313 1.3 alnsn srcw = (sljit_u8)srcw; 1314 1.1 alnsn break; 1315 1.3 alnsn case SLJIT_MOV_S8: 1316 1.1 alnsn flags = BYTE_SIZE | SIGNED; 1317 1.1 alnsn if (src & SLJIT_IMM) 1318 1.3 alnsn srcw = (sljit_s8)srcw; 1319 1.1 alnsn break; 1320 1.3 alnsn case SLJIT_MOV_U16: 1321 1.1 alnsn flags = HALF_SIZE; 1322 1.1 alnsn if (src & SLJIT_IMM) 1323 1.3 alnsn srcw = (sljit_u16)srcw; 1324 1.1 alnsn break; 1325 1.3 alnsn case SLJIT_MOV_S16: 1326 1.1 alnsn flags = HALF_SIZE | SIGNED; 1327 1.1 alnsn if (src & SLJIT_IMM) 1328 1.3 alnsn srcw = (sljit_s16)srcw; 1329 1.1 alnsn break; 1330 1.3 alnsn case SLJIT_MOV_U32: 1331 1.1 alnsn flags = INT_SIZE; 1332 1.1 alnsn if (src & SLJIT_IMM) 1333 1.3 alnsn srcw = (sljit_u32)srcw; 1334 1.1 alnsn break; 1335 1.3 alnsn case SLJIT_MOV_S32: 1336 1.1 alnsn flags = INT_SIZE | SIGNED; 1337 1.1 alnsn if (src & SLJIT_IMM) 1338 1.3 alnsn srcw = (sljit_s32)srcw; 1339 1.1 alnsn break; 1340 1.1 alnsn case SLJIT_MOVU: 1341 1.1 alnsn case SLJIT_MOVU_P: 1342 1.1 alnsn flags = WORD_SIZE | UPDATE; 1343 1.1 alnsn break; 1344 1.3 alnsn case SLJIT_MOVU_U8: 1345 1.1 alnsn flags = BYTE_SIZE | UPDATE; 1346 1.1 alnsn if (src & SLJIT_IMM) 1347 1.3 alnsn srcw = (sljit_u8)srcw; 1348 1.1 alnsn break; 1349 1.3 alnsn case SLJIT_MOVU_S8: 1350 1.1 alnsn flags = BYTE_SIZE | SIGNED | UPDATE; 1351 1.1 alnsn if (src & SLJIT_IMM) 1352 1.3 alnsn srcw = (sljit_s8)srcw; 1353 1.1 alnsn break; 1354 1.3 alnsn case SLJIT_MOVU_U16: 1355 1.1 alnsn flags = HALF_SIZE | UPDATE; 1356 1.1 alnsn if (src & SLJIT_IMM) 1357 1.3 alnsn srcw = (sljit_u16)srcw; 1358 1.1 alnsn break; 1359 1.3 alnsn case SLJIT_MOVU_S16: 1360 1.1 alnsn flags = HALF_SIZE | SIGNED | UPDATE; 1361 1.1 alnsn if (src & SLJIT_IMM) 1362 1.3 alnsn srcw = (sljit_s16)srcw; 1363 1.1 alnsn break; 1364 1.3 alnsn case SLJIT_MOVU_U32: 1365 1.1 alnsn flags = INT_SIZE | UPDATE; 1366 1.1 alnsn if (src & SLJIT_IMM) 1367 1.3 alnsn srcw = (sljit_u32)srcw; 1368 1.1 alnsn break; 1369 1.3 alnsn case SLJIT_MOVU_S32: 1370 1.1 alnsn flags = INT_SIZE | SIGNED | UPDATE; 1371 1.1 alnsn if (src & SLJIT_IMM) 1372 1.3 alnsn srcw = (sljit_s32)srcw; 1373 1.1 alnsn break; 1374 1.1 alnsn default: 1375 1.4 alnsn SLJIT_UNREACHABLE(); 1376 1.1 alnsn flags = 0; 1377 1.1 alnsn break; 1378 1.1 alnsn } 1379 1.1 alnsn 1380 1.1 alnsn if (src & SLJIT_IMM) 1381 1.1 alnsn FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); 1382 1.1 alnsn else if (src & SLJIT_MEM) { 1383 1.1 alnsn if (getput_arg_fast(compiler, flags, dst_r, src, srcw)) 1384 1.1 alnsn FAIL_IF(compiler->error); 1385 1.1 alnsn else 1386 1.1 alnsn FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw)); 1387 1.1 alnsn } else { 1388 1.1 alnsn if (dst_r != TMP_REG1) 1389 1.3 alnsn return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src); 1390 1.1 alnsn dst_r = src; 1391 1.1 alnsn } 1392 1.1 alnsn 1393 1.1 alnsn if (dst & SLJIT_MEM) { 1394 1.1 alnsn if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) 1395 1.1 alnsn return compiler->error; 1396 1.1 alnsn else 1397 1.1 alnsn return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); 1398 1.1 alnsn } 1399 1.1 alnsn return SLJIT_SUCCESS; 1400 1.1 alnsn } 1401 1.1 alnsn 1402 1.4 alnsn flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; 1403 1.1 alnsn mem_flags = WORD_SIZE; 1404 1.3 alnsn if (op_flags & SLJIT_I32_OP) { 1405 1.1 alnsn flags |= INT_OP; 1406 1.1 alnsn mem_flags = INT_SIZE; 1407 1.1 alnsn } 1408 1.1 alnsn 1409 1.1 alnsn if (dst == SLJIT_UNUSED) 1410 1.1 alnsn flags |= UNUSED_RETURN; 1411 1.1 alnsn 1412 1.1 alnsn if (src & SLJIT_MEM) { 1413 1.1 alnsn if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src, srcw)) 1414 1.1 alnsn FAIL_IF(compiler->error); 1415 1.1 alnsn else 1416 1.1 alnsn FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src, srcw, dst, dstw)); 1417 1.1 alnsn src = TMP_REG2; 1418 1.1 alnsn } 1419 1.1 alnsn 1420 1.1 alnsn if (src & SLJIT_IMM) { 1421 1.1 alnsn flags |= ARG2_IMM; 1422 1.3 alnsn if (op_flags & SLJIT_I32_OP) 1423 1.3 alnsn srcw = (sljit_s32)srcw; 1424 1.1 alnsn } else 1425 1.1 alnsn srcw = src; 1426 1.1 alnsn 1427 1.1 alnsn emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw); 1428 1.1 alnsn 1429 1.1 alnsn if (dst & SLJIT_MEM) { 1430 1.1 alnsn if (getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw)) 1431 1.1 alnsn return compiler->error; 1432 1.1 alnsn else 1433 1.1 alnsn return getput_arg(compiler, mem_flags | STORE, dst_r, dst, dstw, 0, 0); 1434 1.1 alnsn } 1435 1.1 alnsn return SLJIT_SUCCESS; 1436 1.1 alnsn } 1437 1.1 alnsn 1438 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, 1439 1.3 alnsn sljit_s32 dst, sljit_sw dstw, 1440 1.3 alnsn sljit_s32 src1, sljit_sw src1w, 1441 1.3 alnsn sljit_s32 src2, sljit_sw src2w) 1442 1.1 alnsn { 1443 1.3 alnsn sljit_s32 dst_r, flags, mem_flags; 1444 1.1 alnsn 1445 1.1 alnsn CHECK_ERROR(); 1446 1.3 alnsn CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 1447 1.1 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 1448 1.1 alnsn ADJUST_LOCAL_OFFSET(src1, src1w); 1449 1.1 alnsn ADJUST_LOCAL_OFFSET(src2, src2w); 1450 1.1 alnsn 1451 1.1 alnsn compiler->cache_arg = 0; 1452 1.1 alnsn compiler->cache_argw = 0; 1453 1.1 alnsn 1454 1.1 alnsn dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 1455 1.4 alnsn flags = HAS_FLAGS(op) ? SET_FLAGS : 0; 1456 1.1 alnsn mem_flags = WORD_SIZE; 1457 1.3 alnsn if (op & SLJIT_I32_OP) { 1458 1.1 alnsn flags |= INT_OP; 1459 1.1 alnsn mem_flags = INT_SIZE; 1460 1.1 alnsn } 1461 1.1 alnsn 1462 1.1 alnsn if (dst == SLJIT_UNUSED) 1463 1.1 alnsn flags |= UNUSED_RETURN; 1464 1.1 alnsn 1465 1.1 alnsn if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, mem_flags | STORE | ARG_TEST, TMP_REG1, dst, dstw)) 1466 1.1 alnsn flags |= SLOW_DEST; 1467 1.1 alnsn 1468 1.1 alnsn if (src1 & SLJIT_MEM) { 1469 1.1 alnsn if (getput_arg_fast(compiler, mem_flags, TMP_REG1, src1, src1w)) 1470 1.1 alnsn FAIL_IF(compiler->error); 1471 1.1 alnsn else 1472 1.1 alnsn flags |= SLOW_SRC1; 1473 1.1 alnsn } 1474 1.1 alnsn if (src2 & SLJIT_MEM) { 1475 1.1 alnsn if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src2, src2w)) 1476 1.1 alnsn FAIL_IF(compiler->error); 1477 1.1 alnsn else 1478 1.1 alnsn flags |= SLOW_SRC2; 1479 1.1 alnsn } 1480 1.1 alnsn 1481 1.1 alnsn if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { 1482 1.1 alnsn if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { 1483 1.1 alnsn FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, src1, src1w)); 1484 1.1 alnsn FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw)); 1485 1.1 alnsn } 1486 1.1 alnsn else { 1487 1.1 alnsn FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, src2, src2w)); 1488 1.1 alnsn FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw)); 1489 1.1 alnsn } 1490 1.1 alnsn } 1491 1.1 alnsn else if (flags & SLOW_SRC1) 1492 1.1 alnsn FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw)); 1493 1.1 alnsn else if (flags & SLOW_SRC2) 1494 1.1 alnsn FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw)); 1495 1.1 alnsn 1496 1.1 alnsn if (src1 & SLJIT_MEM) 1497 1.1 alnsn src1 = TMP_REG1; 1498 1.1 alnsn if (src2 & SLJIT_MEM) 1499 1.1 alnsn src2 = TMP_REG2; 1500 1.1 alnsn 1501 1.1 alnsn if (src1 & SLJIT_IMM) 1502 1.1 alnsn flags |= ARG1_IMM; 1503 1.1 alnsn else 1504 1.1 alnsn src1w = src1; 1505 1.1 alnsn if (src2 & SLJIT_IMM) 1506 1.1 alnsn flags |= ARG2_IMM; 1507 1.1 alnsn else 1508 1.1 alnsn src2w = src2; 1509 1.1 alnsn 1510 1.1 alnsn emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); 1511 1.1 alnsn 1512 1.1 alnsn if (dst & SLJIT_MEM) { 1513 1.1 alnsn if (!(flags & SLOW_DEST)) { 1514 1.1 alnsn getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw); 1515 1.1 alnsn return compiler->error; 1516 1.1 alnsn } 1517 1.1 alnsn return getput_arg(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0); 1518 1.1 alnsn } 1519 1.1 alnsn 1520 1.1 alnsn return SLJIT_SUCCESS; 1521 1.1 alnsn } 1522 1.1 alnsn 1523 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) 1524 1.1 alnsn { 1525 1.3 alnsn CHECK_REG_INDEX(check_sljit_get_register_index(reg)); 1526 1.1 alnsn return reg_map[reg]; 1527 1.1 alnsn } 1528 1.1 alnsn 1529 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) 1530 1.1 alnsn { 1531 1.3 alnsn CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); 1532 1.1 alnsn return reg; 1533 1.1 alnsn } 1534 1.1 alnsn 1535 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, 1536 1.3 alnsn void *instruction, sljit_s32 size) 1537 1.1 alnsn { 1538 1.1 alnsn CHECK_ERROR(); 1539 1.3 alnsn CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); 1540 1.1 alnsn 1541 1.1 alnsn return push_inst(compiler, *(sljit_ins*)instruction); 1542 1.1 alnsn } 1543 1.1 alnsn 1544 1.1 alnsn /* --------------------------------------------------------------------- */ 1545 1.1 alnsn /* Floating point operators */ 1546 1.1 alnsn /* --------------------------------------------------------------------- */ 1547 1.1 alnsn 1548 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 1549 1.1 alnsn { 1550 1.1 alnsn #ifdef SLJIT_IS_FPU_AVAILABLE 1551 1.1 alnsn return SLJIT_IS_FPU_AVAILABLE; 1552 1.1 alnsn #else 1553 1.1 alnsn /* Available by default. */ 1554 1.1 alnsn return 1; 1555 1.1 alnsn #endif 1556 1.1 alnsn } 1557 1.1 alnsn 1558 1.3 alnsn static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) 1559 1.1 alnsn { 1560 1.3 alnsn sljit_u32 shift = MEM_SIZE_SHIFT(flags); 1561 1.1 alnsn sljit_ins ins_bits = (shift << 30); 1562 1.3 alnsn sljit_s32 other_r; 1563 1.1 alnsn sljit_sw diff; 1564 1.1 alnsn 1565 1.1 alnsn SLJIT_ASSERT(arg & SLJIT_MEM); 1566 1.1 alnsn 1567 1.1 alnsn if (!(flags & STORE)) 1568 1.1 alnsn ins_bits |= 1 << 22; 1569 1.1 alnsn 1570 1.1 alnsn if (arg & OFFS_REG_MASK) { 1571 1.1 alnsn argw &= 3; 1572 1.1 alnsn if (!argw || argw == shift) 1573 1.1 alnsn return push_inst(compiler, STR_FR | ins_bits | VT(reg) 1574 1.1 alnsn | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); 1575 1.1 alnsn other_r = OFFS_REG(arg); 1576 1.1 alnsn arg &= REG_MASK; 1577 1.1 alnsn FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | RM(other_r) | (argw << 10))); 1578 1.1 alnsn arg = TMP_REG1; 1579 1.1 alnsn argw = 0; 1580 1.1 alnsn } 1581 1.1 alnsn 1582 1.1 alnsn arg &= REG_MASK; 1583 1.1 alnsn if (arg && argw >= 0 && ((argw >> shift) <= 0xfff) && (argw & ((1 << shift) - 1)) == 0) 1584 1.1 alnsn return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(arg) | (argw << (10 - shift))); 1585 1.1 alnsn 1586 1.1 alnsn if (arg && argw <= 255 && argw >= -256) 1587 1.1 alnsn return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); 1588 1.1 alnsn 1589 1.1 alnsn /* Slow cases */ 1590 1.1 alnsn if (compiler->cache_arg == SLJIT_MEM && argw != compiler->cache_argw) { 1591 1.1 alnsn diff = argw - compiler->cache_argw; 1592 1.1 alnsn if (!arg && diff <= 255 && diff >= -256) 1593 1.1 alnsn return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12)); 1594 1.1 alnsn if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { 1595 1.1 alnsn FAIL_IF(compiler->error); 1596 1.1 alnsn compiler->cache_argw = argw; 1597 1.1 alnsn } 1598 1.1 alnsn } 1599 1.1 alnsn 1600 1.1 alnsn if (compiler->cache_arg != SLJIT_MEM || argw != compiler->cache_argw) { 1601 1.1 alnsn compiler->cache_arg = SLJIT_MEM; 1602 1.1 alnsn compiler->cache_argw = argw; 1603 1.1 alnsn FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); 1604 1.1 alnsn } 1605 1.1 alnsn 1606 1.1 alnsn if (arg & REG_MASK) 1607 1.1 alnsn return push_inst(compiler, STR_FR | ins_bits | VT(reg) | RN(arg) | RM(TMP_REG3)); 1608 1.1 alnsn return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3)); 1609 1.1 alnsn } 1610 1.1 alnsn 1611 1.3 alnsn static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, 1612 1.3 alnsn sljit_s32 dst, sljit_sw dstw, 1613 1.3 alnsn sljit_s32 src, sljit_sw srcw) 1614 1.3 alnsn { 1615 1.3 alnsn sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 1616 1.3 alnsn sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; 1617 1.3 alnsn 1618 1.3 alnsn if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) 1619 1.3 alnsn inv_bits |= (1 << 31); 1620 1.3 alnsn 1621 1.3 alnsn if (src & SLJIT_MEM) { 1622 1.3 alnsn emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); 1623 1.3 alnsn src = TMP_FREG1; 1624 1.3 alnsn } 1625 1.3 alnsn 1626 1.3 alnsn FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src))); 1627 1.3 alnsn 1628 1.3 alnsn if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) 1629 1.3 alnsn return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw); 1630 1.3 alnsn return SLJIT_SUCCESS; 1631 1.3 alnsn } 1632 1.3 alnsn 1633 1.3 alnsn static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, 1634 1.3 alnsn sljit_s32 dst, sljit_sw dstw, 1635 1.3 alnsn sljit_s32 src, sljit_sw srcw) 1636 1.3 alnsn { 1637 1.3 alnsn sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 1638 1.3 alnsn sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; 1639 1.3 alnsn 1640 1.3 alnsn if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) 1641 1.3 alnsn inv_bits |= (1 << 31); 1642 1.3 alnsn 1643 1.3 alnsn if (src & SLJIT_MEM) { 1644 1.3 alnsn emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw); 1645 1.3 alnsn src = TMP_REG1; 1646 1.3 alnsn } else if (src & SLJIT_IMM) { 1647 1.3 alnsn #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) 1648 1.3 alnsn if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) 1649 1.3 alnsn srcw = (sljit_s32)srcw; 1650 1.3 alnsn #endif 1651 1.3 alnsn FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); 1652 1.3 alnsn src = TMP_REG1; 1653 1.3 alnsn } 1654 1.3 alnsn 1655 1.3 alnsn FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); 1656 1.3 alnsn 1657 1.3 alnsn if (dst & SLJIT_MEM) 1658 1.3 alnsn return emit_fop_mem(compiler, ((op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); 1659 1.3 alnsn return SLJIT_SUCCESS; 1660 1.3 alnsn } 1661 1.3 alnsn 1662 1.3 alnsn static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, 1663 1.3 alnsn sljit_s32 src1, sljit_sw src1w, 1664 1.3 alnsn sljit_s32 src2, sljit_sw src2w) 1665 1.1 alnsn { 1666 1.3 alnsn sljit_s32 mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; 1667 1.3 alnsn sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; 1668 1.3 alnsn 1669 1.3 alnsn if (src1 & SLJIT_MEM) { 1670 1.3 alnsn emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); 1671 1.3 alnsn src1 = TMP_FREG1; 1672 1.3 alnsn } 1673 1.3 alnsn 1674 1.3 alnsn if (src2 & SLJIT_MEM) { 1675 1.3 alnsn emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); 1676 1.3 alnsn src2 = TMP_FREG2; 1677 1.3 alnsn } 1678 1.3 alnsn 1679 1.3 alnsn return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)); 1680 1.3 alnsn } 1681 1.3 alnsn 1682 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, 1683 1.3 alnsn sljit_s32 dst, sljit_sw dstw, 1684 1.3 alnsn sljit_s32 src, sljit_sw srcw) 1685 1.3 alnsn { 1686 1.3 alnsn sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; 1687 1.3 alnsn sljit_ins inv_bits; 1688 1.1 alnsn 1689 1.1 alnsn CHECK_ERROR(); 1690 1.1 alnsn compiler->cache_arg = 0; 1691 1.1 alnsn compiler->cache_argw = 0; 1692 1.1 alnsn 1693 1.3 alnsn SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference); 1694 1.3 alnsn SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); 1695 1.3 alnsn 1696 1.3 alnsn inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; 1697 1.3 alnsn dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 1698 1.1 alnsn 1699 1.1 alnsn if (src & SLJIT_MEM) { 1700 1.3 alnsn emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw); 1701 1.1 alnsn src = dst_r; 1702 1.1 alnsn } 1703 1.1 alnsn 1704 1.1 alnsn switch (GET_OPCODE(op)) { 1705 1.3 alnsn case SLJIT_MOV_F64: 1706 1.3 alnsn if (src != dst_r) { 1707 1.3 alnsn if (dst_r != TMP_FREG1) 1708 1.3 alnsn FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); 1709 1.3 alnsn else 1710 1.3 alnsn dst_r = src; 1711 1.3 alnsn } 1712 1.1 alnsn break; 1713 1.3 alnsn case SLJIT_NEG_F64: 1714 1.1 alnsn FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src))); 1715 1.1 alnsn break; 1716 1.3 alnsn case SLJIT_ABS_F64: 1717 1.1 alnsn FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); 1718 1.1 alnsn break; 1719 1.3 alnsn case SLJIT_CONV_F64_FROM_F32: 1720 1.3 alnsn FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_F32_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); 1721 1.3 alnsn break; 1722 1.1 alnsn } 1723 1.1 alnsn 1724 1.3 alnsn if (dst & SLJIT_MEM) 1725 1.3 alnsn return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw); 1726 1.3 alnsn return SLJIT_SUCCESS; 1727 1.1 alnsn } 1728 1.1 alnsn 1729 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, 1730 1.3 alnsn sljit_s32 dst, sljit_sw dstw, 1731 1.3 alnsn sljit_s32 src1, sljit_sw src1w, 1732 1.3 alnsn sljit_s32 src2, sljit_sw src2w) 1733 1.1 alnsn { 1734 1.3 alnsn sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; 1735 1.3 alnsn sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; 1736 1.1 alnsn 1737 1.1 alnsn CHECK_ERROR(); 1738 1.3 alnsn CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 1739 1.3 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 1740 1.3 alnsn ADJUST_LOCAL_OFFSET(src1, src1w); 1741 1.3 alnsn ADJUST_LOCAL_OFFSET(src2, src2w); 1742 1.1 alnsn 1743 1.1 alnsn compiler->cache_arg = 0; 1744 1.1 alnsn compiler->cache_argw = 0; 1745 1.1 alnsn 1746 1.3 alnsn dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 1747 1.1 alnsn if (src1 & SLJIT_MEM) { 1748 1.1 alnsn emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); 1749 1.1 alnsn src1 = TMP_FREG1; 1750 1.1 alnsn } 1751 1.1 alnsn if (src2 & SLJIT_MEM) { 1752 1.1 alnsn emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); 1753 1.1 alnsn src2 = TMP_FREG2; 1754 1.1 alnsn } 1755 1.1 alnsn 1756 1.1 alnsn switch (GET_OPCODE(op)) { 1757 1.3 alnsn case SLJIT_ADD_F64: 1758 1.1 alnsn FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); 1759 1.1 alnsn break; 1760 1.3 alnsn case SLJIT_SUB_F64: 1761 1.1 alnsn FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); 1762 1.1 alnsn break; 1763 1.3 alnsn case SLJIT_MUL_F64: 1764 1.1 alnsn FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); 1765 1.1 alnsn break; 1766 1.3 alnsn case SLJIT_DIV_F64: 1767 1.1 alnsn FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); 1768 1.1 alnsn break; 1769 1.1 alnsn } 1770 1.1 alnsn 1771 1.1 alnsn if (!(dst & SLJIT_MEM)) 1772 1.1 alnsn return SLJIT_SUCCESS; 1773 1.1 alnsn return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw); 1774 1.1 alnsn } 1775 1.1 alnsn 1776 1.1 alnsn /* --------------------------------------------------------------------- */ 1777 1.1 alnsn /* Other instructions */ 1778 1.1 alnsn /* --------------------------------------------------------------------- */ 1779 1.1 alnsn 1780 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) 1781 1.1 alnsn { 1782 1.1 alnsn CHECK_ERROR(); 1783 1.3 alnsn CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); 1784 1.1 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 1785 1.1 alnsn 1786 1.1 alnsn /* For UNUSED dst. Uncommon, but possible. */ 1787 1.1 alnsn if (dst == SLJIT_UNUSED) 1788 1.1 alnsn return SLJIT_SUCCESS; 1789 1.1 alnsn 1790 1.3 alnsn if (FAST_IS_REG(dst)) 1791 1.1 alnsn return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR)); 1792 1.1 alnsn 1793 1.1 alnsn /* Memory. */ 1794 1.1 alnsn return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw); 1795 1.1 alnsn } 1796 1.1 alnsn 1797 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) 1798 1.1 alnsn { 1799 1.1 alnsn CHECK_ERROR(); 1800 1.3 alnsn CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); 1801 1.1 alnsn ADJUST_LOCAL_OFFSET(src, srcw); 1802 1.1 alnsn 1803 1.3 alnsn if (FAST_IS_REG(src)) 1804 1.1 alnsn FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src))); 1805 1.1 alnsn else if (src & SLJIT_MEM) 1806 1.1 alnsn FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw)); 1807 1.1 alnsn else if (src & SLJIT_IMM) 1808 1.1 alnsn FAIL_IF(load_immediate(compiler, TMP_LR, srcw)); 1809 1.1 alnsn 1810 1.1 alnsn return push_inst(compiler, RET | RN(TMP_LR)); 1811 1.1 alnsn } 1812 1.1 alnsn 1813 1.1 alnsn /* --------------------------------------------------------------------- */ 1814 1.1 alnsn /* Conditional instructions */ 1815 1.1 alnsn /* --------------------------------------------------------------------- */ 1816 1.1 alnsn 1817 1.3 alnsn static sljit_uw get_cc(sljit_s32 type) 1818 1.1 alnsn { 1819 1.1 alnsn switch (type) { 1820 1.3 alnsn case SLJIT_EQUAL: 1821 1.3 alnsn case SLJIT_MUL_NOT_OVERFLOW: 1822 1.3 alnsn case SLJIT_EQUAL_F64: 1823 1.1 alnsn return 0x1; 1824 1.1 alnsn 1825 1.3 alnsn case SLJIT_NOT_EQUAL: 1826 1.3 alnsn case SLJIT_MUL_OVERFLOW: 1827 1.3 alnsn case SLJIT_NOT_EQUAL_F64: 1828 1.1 alnsn return 0x0; 1829 1.1 alnsn 1830 1.3 alnsn case SLJIT_LESS: 1831 1.3 alnsn case SLJIT_LESS_F64: 1832 1.1 alnsn return 0x2; 1833 1.1 alnsn 1834 1.3 alnsn case SLJIT_GREATER_EQUAL: 1835 1.3 alnsn case SLJIT_GREATER_EQUAL_F64: 1836 1.1 alnsn return 0x3; 1837 1.1 alnsn 1838 1.3 alnsn case SLJIT_GREATER: 1839 1.3 alnsn case SLJIT_GREATER_F64: 1840 1.1 alnsn return 0x9; 1841 1.1 alnsn 1842 1.3 alnsn case SLJIT_LESS_EQUAL: 1843 1.3 alnsn case SLJIT_LESS_EQUAL_F64: 1844 1.1 alnsn return 0x8; 1845 1.1 alnsn 1846 1.3 alnsn case SLJIT_SIG_LESS: 1847 1.1 alnsn return 0xa; 1848 1.1 alnsn 1849 1.3 alnsn case SLJIT_SIG_GREATER_EQUAL: 1850 1.1 alnsn return 0xb; 1851 1.1 alnsn 1852 1.3 alnsn case SLJIT_SIG_GREATER: 1853 1.1 alnsn return 0xd; 1854 1.1 alnsn 1855 1.3 alnsn case SLJIT_SIG_LESS_EQUAL: 1856 1.1 alnsn return 0xc; 1857 1.1 alnsn 1858 1.3 alnsn case SLJIT_OVERFLOW: 1859 1.3 alnsn case SLJIT_UNORDERED_F64: 1860 1.1 alnsn return 0x7; 1861 1.1 alnsn 1862 1.3 alnsn case SLJIT_NOT_OVERFLOW: 1863 1.3 alnsn case SLJIT_ORDERED_F64: 1864 1.1 alnsn return 0x6; 1865 1.1 alnsn 1866 1.1 alnsn default: 1867 1.4 alnsn SLJIT_UNREACHABLE(); 1868 1.1 alnsn return 0xe; 1869 1.1 alnsn } 1870 1.1 alnsn } 1871 1.1 alnsn 1872 1.1 alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) 1873 1.1 alnsn { 1874 1.1 alnsn struct sljit_label *label; 1875 1.1 alnsn 1876 1.1 alnsn CHECK_ERROR_PTR(); 1877 1.3 alnsn CHECK_PTR(check_sljit_emit_label(compiler)); 1878 1.1 alnsn 1879 1.1 alnsn if (compiler->last_label && compiler->last_label->size == compiler->size) 1880 1.1 alnsn return compiler->last_label; 1881 1.1 alnsn 1882 1.1 alnsn label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); 1883 1.1 alnsn PTR_FAIL_IF(!label); 1884 1.1 alnsn set_label(label, compiler); 1885 1.1 alnsn return label; 1886 1.1 alnsn } 1887 1.1 alnsn 1888 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) 1889 1.1 alnsn { 1890 1.1 alnsn struct sljit_jump *jump; 1891 1.1 alnsn 1892 1.1 alnsn CHECK_ERROR_PTR(); 1893 1.3 alnsn CHECK_PTR(check_sljit_emit_jump(compiler, type)); 1894 1.1 alnsn 1895 1.1 alnsn jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 1896 1.1 alnsn PTR_FAIL_IF(!jump); 1897 1.1 alnsn set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 1898 1.1 alnsn type &= 0xff; 1899 1.1 alnsn 1900 1.1 alnsn if (type < SLJIT_JUMP) { 1901 1.1 alnsn jump->flags |= IS_COND; 1902 1.1 alnsn PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type))); 1903 1.1 alnsn } 1904 1.1 alnsn else if (type >= SLJIT_FAST_CALL) 1905 1.1 alnsn jump->flags |= IS_BL; 1906 1.1 alnsn 1907 1.1 alnsn PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); 1908 1.1 alnsn jump->addr = compiler->size; 1909 1.1 alnsn PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1))); 1910 1.1 alnsn 1911 1.1 alnsn return jump; 1912 1.1 alnsn } 1913 1.1 alnsn 1914 1.3 alnsn static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type, 1915 1.3 alnsn sljit_s32 src, sljit_sw srcw) 1916 1.1 alnsn { 1917 1.1 alnsn struct sljit_jump *jump; 1918 1.3 alnsn sljit_ins inv_bits = (type & SLJIT_I32_OP) ? (1 << 31) : 0; 1919 1.1 alnsn 1920 1.3 alnsn SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); 1921 1.1 alnsn ADJUST_LOCAL_OFFSET(src, srcw); 1922 1.1 alnsn 1923 1.1 alnsn jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 1924 1.1 alnsn PTR_FAIL_IF(!jump); 1925 1.1 alnsn set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 1926 1.1 alnsn jump->flags |= IS_CBZ | IS_COND; 1927 1.1 alnsn 1928 1.1 alnsn if (src & SLJIT_MEM) { 1929 1.1 alnsn PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw)); 1930 1.1 alnsn src = TMP_REG1; 1931 1.1 alnsn } 1932 1.1 alnsn else if (src & SLJIT_IMM) { 1933 1.1 alnsn PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); 1934 1.1 alnsn src = TMP_REG1; 1935 1.1 alnsn } 1936 1.1 alnsn SLJIT_ASSERT(FAST_IS_REG(src)); 1937 1.1 alnsn 1938 1.3 alnsn if ((type & 0xff) == SLJIT_EQUAL) 1939 1.1 alnsn inv_bits |= 1 << 24; 1940 1.1 alnsn 1941 1.1 alnsn PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src))); 1942 1.1 alnsn PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); 1943 1.1 alnsn jump->addr = compiler->size; 1944 1.1 alnsn PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1))); 1945 1.1 alnsn return jump; 1946 1.1 alnsn } 1947 1.1 alnsn 1948 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) 1949 1.1 alnsn { 1950 1.1 alnsn struct sljit_jump *jump; 1951 1.1 alnsn 1952 1.1 alnsn CHECK_ERROR(); 1953 1.3 alnsn CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); 1954 1.1 alnsn ADJUST_LOCAL_OFFSET(src, srcw); 1955 1.1 alnsn 1956 1.1 alnsn /* In ARM, we don't need to touch the arguments. */ 1957 1.1 alnsn if (!(src & SLJIT_IMM)) { 1958 1.1 alnsn if (src & SLJIT_MEM) { 1959 1.1 alnsn FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw)); 1960 1.1 alnsn src = TMP_REG1; 1961 1.1 alnsn } 1962 1.1 alnsn return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src)); 1963 1.1 alnsn } 1964 1.1 alnsn 1965 1.1 alnsn jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 1966 1.1 alnsn FAIL_IF(!jump); 1967 1.1 alnsn set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); 1968 1.1 alnsn jump->u.target = srcw; 1969 1.1 alnsn 1970 1.1 alnsn FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); 1971 1.1 alnsn jump->addr = compiler->size; 1972 1.1 alnsn return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)); 1973 1.1 alnsn } 1974 1.1 alnsn 1975 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, 1976 1.3 alnsn sljit_s32 dst, sljit_sw dstw, 1977 1.3 alnsn sljit_s32 src, sljit_sw srcw, 1978 1.3 alnsn sljit_s32 type) 1979 1.1 alnsn { 1980 1.3 alnsn sljit_s32 dst_r, flags, mem_flags; 1981 1.1 alnsn sljit_ins cc; 1982 1.1 alnsn 1983 1.1 alnsn CHECK_ERROR(); 1984 1.3 alnsn CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); 1985 1.1 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 1986 1.1 alnsn ADJUST_LOCAL_OFFSET(src, srcw); 1987 1.1 alnsn 1988 1.1 alnsn if (dst == SLJIT_UNUSED) 1989 1.1 alnsn return SLJIT_SUCCESS; 1990 1.1 alnsn 1991 1.3 alnsn cc = get_cc(type & 0xff); 1992 1.3 alnsn dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 1993 1.1 alnsn 1994 1.1 alnsn if (GET_OPCODE(op) < SLJIT_ADD) { 1995 1.1 alnsn FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO))); 1996 1.1 alnsn if (dst_r != TMP_REG1) 1997 1.1 alnsn return SLJIT_SUCCESS; 1998 1.1 alnsn return emit_op_mem(compiler, (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE, TMP_REG1, dst, dstw); 1999 1.1 alnsn } 2000 1.1 alnsn 2001 1.1 alnsn compiler->cache_arg = 0; 2002 1.1 alnsn compiler->cache_argw = 0; 2003 1.4 alnsn flags = HAS_FLAGS(op) ? SET_FLAGS : 0; 2004 1.1 alnsn mem_flags = WORD_SIZE; 2005 1.3 alnsn if (op & SLJIT_I32_OP) { 2006 1.1 alnsn flags |= INT_OP; 2007 1.1 alnsn mem_flags = INT_SIZE; 2008 1.1 alnsn } 2009 1.1 alnsn 2010 1.1 alnsn if (src & SLJIT_MEM) { 2011 1.1 alnsn FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw)); 2012 1.1 alnsn src = TMP_REG1; 2013 1.1 alnsn srcw = 0; 2014 1.1 alnsn } else if (src & SLJIT_IMM) 2015 1.1 alnsn flags |= ARG1_IMM; 2016 1.1 alnsn 2017 1.1 alnsn FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO))); 2018 1.1 alnsn emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2); 2019 1.1 alnsn 2020 1.1 alnsn if (dst_r != TMP_REG1) 2021 1.1 alnsn return SLJIT_SUCCESS; 2022 1.1 alnsn return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0); 2023 1.1 alnsn } 2024 1.1 alnsn 2025 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) 2026 1.1 alnsn { 2027 1.1 alnsn struct sljit_const *const_; 2028 1.3 alnsn sljit_s32 dst_r; 2029 1.1 alnsn 2030 1.1 alnsn CHECK_ERROR_PTR(); 2031 1.3 alnsn CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); 2032 1.1 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 2033 1.1 alnsn 2034 1.1 alnsn const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); 2035 1.1 alnsn PTR_FAIL_IF(!const_); 2036 1.1 alnsn set_const(const_, compiler); 2037 1.1 alnsn 2038 1.1 alnsn dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; 2039 1.1 alnsn PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value)); 2040 1.1 alnsn 2041 1.1 alnsn if (dst & SLJIT_MEM) 2042 1.1 alnsn PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw)); 2043 1.1 alnsn return const_; 2044 1.1 alnsn } 2045 1.1 alnsn 2046 1.4 alnsn SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) 2047 1.1 alnsn { 2048 1.1 alnsn sljit_ins* inst = (sljit_ins*)addr; 2049 1.4 alnsn modify_imm64_const(inst, new_target); 2050 1.4 alnsn inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 2051 1.1 alnsn SLJIT_CACHE_FLUSH(inst, inst + 4); 2052 1.1 alnsn } 2053 1.1 alnsn 2054 1.4 alnsn SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) 2055 1.1 alnsn { 2056 1.1 alnsn sljit_ins* inst = (sljit_ins*)addr; 2057 1.1 alnsn modify_imm64_const(inst, new_constant); 2058 1.4 alnsn inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 2059 1.1 alnsn SLJIT_CACHE_FLUSH(inst, inst + 4); 2060 1.1 alnsn } 2061