1 /* $NetBSD: sljitNativeARM_32.c,v 1.4 2019/01/20 23:14:16 alnsn Exp $ */ 2 3 /* 4 * Stack-less Just-In-Time compiler 5 * 6 * Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without modification, are 9 * permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright notice, this list of 12 * conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 15 * of conditions and the following disclaimer in the documentation and/or other materials 16 * provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 21 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 23 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) 30 { 31 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) 32 return "ARMv7" SLJIT_CPUINFO; 33 #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 34 return "ARMv5" SLJIT_CPUINFO; 35 #else 36 #error "Internal error: Unknown ARM architecture" 37 #endif 38 } 39 40 /* Last register + 1. */ 41 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) 42 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) 43 #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4) 44 45 #define TMP_FREG1 (0) 46 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) 47 48 /* In ARM instruction words. 49 Cache lines are usually 32 byte aligned. */ 50 #define CONST_POOL_ALIGNMENT 8 51 #define CONST_POOL_EMPTY 0xffffffff 52 53 #define ALIGN_INSTRUCTION(ptr) \ 54 (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1)) 55 #define MAX_DIFFERENCE(max_diff) \ 56 (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) 57 58 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ 59 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 60 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 14, 12, 15 61 }; 62 63 #define RM(rm) (reg_map[rm]) 64 #define RD(rd) (reg_map[rd] << 12) 65 #define RN(rn) (reg_map[rn] << 16) 66 67 /* --------------------------------------------------------------------- */ 68 /* Instrucion forms */ 69 /* --------------------------------------------------------------------- */ 70 71 /* The instruction includes the AL condition. 72 INST_NAME - CONDITIONAL remove this flag. */ 73 #define COND_MASK 0xf0000000 74 #define CONDITIONAL 0xe0000000 75 #define PUSH_POOL 0xff000000 76 77 /* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */ 78 #define ADC_DP 0x5 79 #define ADD_DP 0x4 80 #define AND_DP 0x0 81 #define B 0xea000000 82 #define BIC_DP 0xe 83 #define BL 0xeb000000 84 #define BLX 0xe12fff30 85 #define BX 0xe12fff10 86 #define CLZ 0xe16f0f10 87 #define CMP_DP 0xa 88 #define BKPT 0xe1200070 89 #define EOR_DP 0x1 90 #define MOV_DP 0xd 91 #define MUL 0xe0000090 92 #define MVN_DP 0xf 93 #define NOP 0xe1a00000 94 #define ORR_DP 0xc 95 #define PUSH 0xe92d0000 96 #define POP 0xe8bd0000 97 #define RSB_DP 0x3 98 #define RSC_DP 0x7 99 #define SBC_DP 0x6 100 #define SMULL 0xe0c00090 101 #define SUB_DP 0x2 102 #define UMULL 0xe0800090 103 #define VABS_F32 0xeeb00ac0 104 #define VADD_F32 0xee300a00 105 #define VCMP_F32 0xeeb40a40 106 #define VCVT_F32_S32 0xeeb80ac0 107 #define VCVT_F64_F32 0xeeb70ac0 108 #define VCVT_S32_F32 0xeebd0ac0 109 #define VDIV_F32 0xee800a00 110 #define VMOV_F32 0xeeb00a40 111 #define VMOV 0xee000a10 112 #define VMRS 0xeef1fa10 113 #define VMUL_F32 0xee200a00 114 #define VNEG_F32 0xeeb10a40 115 #define VSTR_F32 0xed000a00 116 #define VSUB_F32 0xee300a40 117 118 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) 119 /* Arm v7 specific instructions. */ 120 #define MOVW 0xe3000000 121 #define MOVT 0xe3400000 122 #define SXTB 0xe6af0070 123 #define SXTH 0xe6bf0070 124 #define UXTB 0xe6ef0070 125 #define UXTH 0xe6ff0070 126 #endif 127 128 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 129 130 static sljit_s32 push_cpool(struct sljit_compiler *compiler) 131 { 132 /* Pushing the constant pool into the instruction stream. */ 133 sljit_uw* inst; 134 sljit_uw* cpool_ptr; 135 sljit_uw* cpool_end; 136 sljit_s32 i; 137 138 /* The label could point the address after the constant pool. */ 139 if (compiler->last_label && compiler->last_label->size == compiler->size) 140 compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1; 141 142 SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE); 143 inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 144 FAIL_IF(!inst); 145 compiler->size++; 146 *inst = 0xff000000 | compiler->cpool_fill; 147 148 for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) { 149 inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 150 FAIL_IF(!inst); 151 compiler->size++; 152 *inst = 0; 153 } 154 155 cpool_ptr = compiler->cpool; 156 cpool_end = cpool_ptr + compiler->cpool_fill; 157 while (cpool_ptr < cpool_end) { 158 inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 159 FAIL_IF(!inst); 160 compiler->size++; 161 *inst = *cpool_ptr++; 162 } 163 compiler->cpool_diff = CONST_POOL_EMPTY; 164 compiler->cpool_fill = 0; 165 return SLJIT_SUCCESS; 166 } 167 168 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) 169 { 170 sljit_uw* ptr; 171 172 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) 173 FAIL_IF(push_cpool(compiler)); 174 175 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 176 FAIL_IF(!ptr); 177 compiler->size++; 178 *ptr = inst; 179 return SLJIT_SUCCESS; 180 } 181 182 static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) 183 { 184 sljit_uw* ptr; 185 sljit_uw cpool_index = CPOOL_SIZE; 186 sljit_uw* cpool_ptr; 187 sljit_uw* cpool_end; 188 sljit_u8* cpool_unique_ptr; 189 190 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) 191 FAIL_IF(push_cpool(compiler)); 192 else if (compiler->cpool_fill > 0) { 193 cpool_ptr = compiler->cpool; 194 cpool_end = cpool_ptr + compiler->cpool_fill; 195 cpool_unique_ptr = compiler->cpool_unique; 196 do { 197 if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { 198 cpool_index = cpool_ptr - compiler->cpool; 199 break; 200 } 201 cpool_ptr++; 202 cpool_unique_ptr++; 203 } while (cpool_ptr < cpool_end); 204 } 205 206 if (cpool_index == CPOOL_SIZE) { 207 /* Must allocate a new entry in the literal pool. */ 208 if (compiler->cpool_fill < CPOOL_SIZE) { 209 cpool_index = compiler->cpool_fill; 210 compiler->cpool_fill++; 211 } 212 else { 213 FAIL_IF(push_cpool(compiler)); 214 cpool_index = 0; 215 compiler->cpool_fill = 1; 216 } 217 } 218 219 SLJIT_ASSERT((inst & 0xfff) == 0); 220 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 221 FAIL_IF(!ptr); 222 compiler->size++; 223 *ptr = inst | cpool_index; 224 225 compiler->cpool[cpool_index] = literal; 226 compiler->cpool_unique[cpool_index] = 0; 227 if (compiler->cpool_diff == CONST_POOL_EMPTY) 228 compiler->cpool_diff = compiler->size; 229 return SLJIT_SUCCESS; 230 } 231 232 static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) 233 { 234 sljit_uw* ptr; 235 if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE)) 236 FAIL_IF(push_cpool(compiler)); 237 238 SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0); 239 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 240 FAIL_IF(!ptr); 241 compiler->size++; 242 *ptr = inst | compiler->cpool_fill; 243 244 compiler->cpool[compiler->cpool_fill] = literal; 245 compiler->cpool_unique[compiler->cpool_fill] = 1; 246 compiler->cpool_fill++; 247 if (compiler->cpool_diff == CONST_POOL_EMPTY) 248 compiler->cpool_diff = compiler->size; 249 return SLJIT_SUCCESS; 250 } 251 252 static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler) 253 { 254 /* Place for at least two instruction (doesn't matter whether the first has a literal). */ 255 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088))) 256 return push_cpool(compiler); 257 return SLJIT_SUCCESS; 258 } 259 260 static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler) 261 { 262 /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */ 263 SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092)); 264 return push_inst(compiler, BLX | RM(TMP_REG2)); 265 } 266 267 static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size) 268 { 269 sljit_uw diff; 270 sljit_uw ind; 271 sljit_uw counter = 0; 272 sljit_uw* clear_const_pool = const_pool; 273 sljit_uw* clear_const_pool_end = const_pool + cpool_size; 274 275 SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT); 276 /* Set unused flag for all literals in the constant pool. 277 I.e.: unused literals can belong to branches, which can be encoded as B or BL. 278 We can "compress" the constant pool by discarding these literals. */ 279 while (clear_const_pool < clear_const_pool_end) 280 *clear_const_pool++ = (sljit_uw)(-1); 281 282 while (last_pc_patch < code_ptr) { 283 /* Data transfer instruction with Rn == r15. */ 284 if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { 285 diff = const_pool - last_pc_patch; 286 ind = (*last_pc_patch) & 0xfff; 287 288 /* Must be a load instruction with immediate offset. */ 289 SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20))); 290 if ((sljit_s32)const_pool[ind] < 0) { 291 const_pool[ind] = counter; 292 ind = counter; 293 counter++; 294 } 295 else 296 ind = const_pool[ind]; 297 298 SLJIT_ASSERT(diff >= 1); 299 if (diff >= 2 || ind > 0) { 300 diff = (diff + ind - 2) << 2; 301 SLJIT_ASSERT(diff <= 0xfff); 302 *last_pc_patch = (*last_pc_patch & ~0xfff) | diff; 303 } 304 else 305 *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004; 306 } 307 last_pc_patch++; 308 } 309 return counter; 310 } 311 312 /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */ 313 struct future_patch { 314 struct future_patch* next; 315 sljit_s32 index; 316 sljit_s32 value; 317 }; 318 319 static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) 320 { 321 sljit_s32 value; 322 struct future_patch *curr_patch, *prev_patch; 323 324 SLJIT_UNUSED_ARG(compiler); 325 326 /* Using the values generated by patch_pc_relative_loads. */ 327 if (!*first_patch) 328 value = (sljit_s32)cpool_start_address[cpool_current_index]; 329 else { 330 curr_patch = *first_patch; 331 prev_patch = NULL; 332 while (1) { 333 if (!curr_patch) { 334 value = (sljit_s32)cpool_start_address[cpool_current_index]; 335 break; 336 } 337 if ((sljit_uw)curr_patch->index == cpool_current_index) { 338 value = curr_patch->value; 339 if (prev_patch) 340 prev_patch->next = curr_patch->next; 341 else 342 *first_patch = curr_patch->next; 343 SLJIT_FREE(curr_patch, compiler->allocator_data); 344 break; 345 } 346 prev_patch = curr_patch; 347 curr_patch = curr_patch->next; 348 } 349 } 350 351 if (value >= 0) { 352 if ((sljit_uw)value > cpool_current_index) { 353 curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data); 354 if (!curr_patch) { 355 while (*first_patch) { 356 curr_patch = *first_patch; 357 *first_patch = (*first_patch)->next; 358 SLJIT_FREE(curr_patch, compiler->allocator_data); 359 } 360 return SLJIT_ERR_ALLOC_FAILED; 361 } 362 curr_patch->next = *first_patch; 363 curr_patch->index = value; 364 curr_patch->value = cpool_start_address[value]; 365 *first_patch = curr_patch; 366 } 367 cpool_start_address[value] = *buf_ptr; 368 } 369 return SLJIT_SUCCESS; 370 } 371 372 #else 373 374 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) 375 { 376 sljit_uw* ptr; 377 378 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); 379 FAIL_IF(!ptr); 380 compiler->size++; 381 *ptr = inst; 382 return SLJIT_SUCCESS; 383 } 384 385 static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) 386 { 387 FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); 388 return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); 389 } 390 391 #endif 392 393 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset) 394 { 395 sljit_sw diff; 396 397 if (jump->flags & SLJIT_REWRITABLE_JUMP) 398 return 0; 399 400 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 401 if (jump->flags & IS_BL) 402 code_ptr--; 403 404 if (jump->flags & JUMP_ADDR) 405 diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset); 406 else { 407 SLJIT_ASSERT(jump->flags & JUMP_LABEL); 408 diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); 409 } 410 411 /* Branch to Thumb code has not been optimized yet. */ 412 if (diff & 0x3) 413 return 0; 414 415 if (jump->flags & IS_BL) { 416 if (diff <= 0x01ffffff && diff >= -0x02000000) { 417 *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK); 418 jump->flags |= PATCH_B; 419 return 1; 420 } 421 } 422 else { 423 if (diff <= 0x01ffffff && diff >= -0x02000000) { 424 *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK); 425 jump->flags |= PATCH_B; 426 } 427 } 428 #else 429 if (jump->flags & JUMP_ADDR) 430 diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset); 431 else { 432 SLJIT_ASSERT(jump->flags & JUMP_LABEL); 433 diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr); 434 } 435 436 /* Branch to Thumb code has not been optimized yet. */ 437 if (diff & 0x3) 438 return 0; 439 440 if (diff <= 0x01ffffff && diff >= -0x02000000) { 441 code_ptr -= 2; 442 *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK); 443 jump->flags |= PATCH_B; 444 return 1; 445 } 446 #endif 447 return 0; 448 } 449 450 static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache) 451 { 452 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 453 sljit_uw *ptr = (sljit_uw *)jump_ptr; 454 sljit_uw *inst = (sljit_uw *)ptr[0]; 455 sljit_uw mov_pc = ptr[1]; 456 sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC); 457 sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2); 458 459 if (diff <= 0x7fffff && diff >= -0x800000) { 460 /* Turn to branch. */ 461 if (!bl) { 462 inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff); 463 if (flush_cache) { 464 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 465 SLJIT_CACHE_FLUSH(inst, inst + 1); 466 } 467 } else { 468 inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff); 469 inst[1] = NOP; 470 if (flush_cache) { 471 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 472 SLJIT_CACHE_FLUSH(inst, inst + 2); 473 } 474 } 475 } else { 476 /* Get the position of the constant. */ 477 if (mov_pc & (1 << 23)) 478 ptr = inst + ((mov_pc & 0xfff) >> 2) + 2; 479 else 480 ptr = inst + 1; 481 482 if (*inst != mov_pc) { 483 inst[0] = mov_pc; 484 if (!bl) { 485 if (flush_cache) { 486 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 487 SLJIT_CACHE_FLUSH(inst, inst + 1); 488 } 489 } else { 490 inst[1] = BLX | RM(TMP_REG1); 491 if (flush_cache) { 492 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 493 SLJIT_CACHE_FLUSH(inst, inst + 2); 494 } 495 } 496 } 497 *ptr = new_addr; 498 } 499 #else 500 sljit_uw *inst = (sljit_uw*)jump_ptr; 501 SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); 502 inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff); 503 inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff); 504 if (flush_cache) { 505 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 506 SLJIT_CACHE_FLUSH(inst, inst + 2); 507 } 508 #endif 509 } 510 511 static sljit_uw get_imm(sljit_uw imm); 512 513 static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache) 514 { 515 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 516 sljit_uw *ptr = (sljit_uw*)addr; 517 sljit_uw *inst = (sljit_uw*)ptr[0]; 518 sljit_uw ldr_literal = ptr[1]; 519 sljit_uw src2; 520 521 src2 = get_imm(new_constant); 522 if (src2) { 523 *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2; 524 if (flush_cache) { 525 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 526 SLJIT_CACHE_FLUSH(inst, inst + 1); 527 } 528 return; 529 } 530 531 src2 = get_imm(~new_constant); 532 if (src2) { 533 *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2; 534 if (flush_cache) { 535 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 536 SLJIT_CACHE_FLUSH(inst, inst + 1); 537 } 538 return; 539 } 540 541 if (ldr_literal & (1 << 23)) 542 ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2; 543 else 544 ptr = inst + 1; 545 546 if (*inst != ldr_literal) { 547 *inst = ldr_literal; 548 if (flush_cache) { 549 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 550 SLJIT_CACHE_FLUSH(inst, inst + 1); 551 } 552 } 553 *ptr = new_constant; 554 #else 555 sljit_uw *inst = (sljit_uw*)addr; 556 SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); 557 inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff); 558 inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff); 559 if (flush_cache) { 560 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); 561 SLJIT_CACHE_FLUSH(inst, inst + 2); 562 } 563 #endif 564 } 565 566 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) 567 { 568 struct sljit_memory_fragment *buf; 569 sljit_uw *code; 570 sljit_uw *code_ptr; 571 sljit_uw *buf_ptr; 572 sljit_uw *buf_end; 573 sljit_uw size; 574 sljit_uw word_count; 575 sljit_sw executable_offset; 576 sljit_sw jump_addr; 577 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 578 sljit_uw cpool_size; 579 sljit_uw cpool_skip_alignment; 580 sljit_uw cpool_current_index; 581 sljit_uw *cpool_start_address; 582 sljit_uw *last_pc_patch; 583 struct future_patch *first_patch; 584 #endif 585 586 struct sljit_label *label; 587 struct sljit_jump *jump; 588 struct sljit_const *const_; 589 590 CHECK_ERROR_PTR(); 591 CHECK_PTR(check_sljit_generate_code(compiler)); 592 reverse_buf(compiler); 593 594 /* Second code generation pass. */ 595 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 596 size = compiler->size + (compiler->patches << 1); 597 if (compiler->cpool_fill > 0) 598 size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; 599 #else 600 size = compiler->size; 601 #endif 602 code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw)); 603 PTR_FAIL_WITH_EXEC_IF(code); 604 buf = compiler->buf; 605 606 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 607 cpool_size = 0; 608 cpool_skip_alignment = 0; 609 cpool_current_index = 0; 610 cpool_start_address = NULL; 611 first_patch = NULL; 612 last_pc_patch = code; 613 #endif 614 615 code_ptr = code; 616 word_count = 0; 617 executable_offset = SLJIT_EXEC_OFFSET(code); 618 619 label = compiler->labels; 620 jump = compiler->jumps; 621 const_ = compiler->consts; 622 623 if (label && label->size == 0) { 624 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); 625 label = label->next; 626 } 627 628 do { 629 buf_ptr = (sljit_uw*)buf->memory; 630 buf_end = buf_ptr + (buf->used_size >> 2); 631 do { 632 word_count++; 633 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 634 if (cpool_size > 0) { 635 if (cpool_skip_alignment > 0) { 636 buf_ptr++; 637 cpool_skip_alignment--; 638 } 639 else { 640 if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { 641 SLJIT_FREE_EXEC(code); 642 compiler->error = SLJIT_ERR_ALLOC_FAILED; 643 return NULL; 644 } 645 buf_ptr++; 646 if (++cpool_current_index >= cpool_size) { 647 SLJIT_ASSERT(!first_patch); 648 cpool_size = 0; 649 if (label && label->size == word_count) { 650 /* Points after the current instruction. */ 651 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); 652 label->size = code_ptr - code; 653 label = label->next; 654 } 655 } 656 } 657 } 658 else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { 659 #endif 660 *code_ptr = *buf_ptr++; 661 /* These structures are ordered by their address. */ 662 SLJIT_ASSERT(!label || label->size >= word_count); 663 SLJIT_ASSERT(!jump || jump->addr >= word_count); 664 SLJIT_ASSERT(!const_ || const_->addr >= word_count); 665 if (jump && jump->addr == word_count) { 666 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 667 if (detect_jump_type(jump, code_ptr, code, executable_offset)) 668 code_ptr--; 669 jump->addr = (sljit_uw)code_ptr; 670 #else 671 jump->addr = (sljit_uw)(code_ptr - 2); 672 if (detect_jump_type(jump, code_ptr, code, executable_offset)) 673 code_ptr -= 2; 674 #endif 675 jump = jump->next; 676 } 677 if (label && label->size == word_count) { 678 /* code_ptr can be affected above. */ 679 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset); 680 label->size = (code_ptr + 1) - code; 681 label = label->next; 682 } 683 if (const_ && const_->addr == word_count) { 684 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 685 const_->addr = (sljit_uw)code_ptr; 686 #else 687 const_->addr = (sljit_uw)(code_ptr - 1); 688 #endif 689 const_ = const_->next; 690 } 691 code_ptr++; 692 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 693 } 694 else { 695 /* Fortunately, no need to shift. */ 696 cpool_size = *buf_ptr++ & ~PUSH_POOL; 697 SLJIT_ASSERT(cpool_size > 0); 698 cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1); 699 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); 700 if (cpool_current_index > 0) { 701 /* Unconditional branch. */ 702 *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); 703 code_ptr = cpool_start_address + cpool_current_index; 704 } 705 cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; 706 cpool_current_index = 0; 707 last_pc_patch = code_ptr; 708 } 709 #endif 710 } while (buf_ptr < buf_end); 711 buf = buf->next; 712 } while (buf); 713 714 SLJIT_ASSERT(!label); 715 SLJIT_ASSERT(!jump); 716 SLJIT_ASSERT(!const_); 717 718 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 719 SLJIT_ASSERT(cpool_size == 0); 720 if (compiler->cpool_fill > 0) { 721 cpool_start_address = ALIGN_INSTRUCTION(code_ptr); 722 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); 723 if (cpool_current_index > 0) 724 code_ptr = cpool_start_address + cpool_current_index; 725 726 buf_ptr = compiler->cpool; 727 buf_end = buf_ptr + compiler->cpool_fill; 728 cpool_current_index = 0; 729 while (buf_ptr < buf_end) { 730 if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { 731 SLJIT_FREE_EXEC(code); 732 compiler->error = SLJIT_ERR_ALLOC_FAILED; 733 return NULL; 734 } 735 buf_ptr++; 736 cpool_current_index++; 737 } 738 SLJIT_ASSERT(!first_patch); 739 } 740 #endif 741 742 jump = compiler->jumps; 743 while (jump) { 744 buf_ptr = (sljit_uw *)jump->addr; 745 746 if (jump->flags & PATCH_B) { 747 jump_addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); 748 if (!(jump->flags & JUMP_ADDR)) { 749 SLJIT_ASSERT(jump->flags & JUMP_LABEL); 750 SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - jump_addr) >= -0x02000000); 751 *buf_ptr |= (((sljit_sw)jump->u.label->addr - jump_addr) >> 2) & 0x00ffffff; 752 } 753 else { 754 SLJIT_ASSERT(((sljit_sw)jump->u.target - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - jump_addr) >= -0x02000000); 755 *buf_ptr |= (((sljit_sw)jump->u.target - jump_addr) >> 2) & 0x00ffffff; 756 } 757 } 758 else if (jump->flags & SLJIT_REWRITABLE_JUMP) { 759 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 760 jump->addr = (sljit_uw)code_ptr; 761 code_ptr[0] = (sljit_uw)buf_ptr; 762 code_ptr[1] = *buf_ptr; 763 inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); 764 code_ptr += 2; 765 #else 766 inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); 767 #endif 768 } 769 else { 770 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 771 if (jump->flags & IS_BL) 772 buf_ptr--; 773 if (*buf_ptr & (1 << 23)) 774 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; 775 else 776 buf_ptr += 1; 777 *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; 778 #else 779 inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); 780 #endif 781 } 782 jump = jump->next; 783 } 784 785 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 786 const_ = compiler->consts; 787 while (const_) { 788 buf_ptr = (sljit_uw*)const_->addr; 789 const_->addr = (sljit_uw)code_ptr; 790 791 code_ptr[0] = (sljit_uw)buf_ptr; 792 code_ptr[1] = *buf_ptr; 793 if (*buf_ptr & (1 << 23)) 794 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; 795 else 796 buf_ptr += 1; 797 /* Set the value again (can be a simple constant). */ 798 inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0); 799 code_ptr += 2; 800 801 const_ = const_->next; 802 } 803 #endif 804 805 SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size); 806 807 compiler->error = SLJIT_ERR_COMPILED; 808 compiler->executable_offset = executable_offset; 809 compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw); 810 811 code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); 812 code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); 813 814 SLJIT_CACHE_FLUSH(code, code_ptr); 815 return code; 816 } 817 818 /* --------------------------------------------------------------------- */ 819 /* Entry, exit */ 820 /* --------------------------------------------------------------------- */ 821 822 /* Creates an index in data_transfer_insts array. */ 823 #define WORD_DATA 0x00 824 #define BYTE_DATA 0x01 825 #define HALF_DATA 0x02 826 #define SIGNED_DATA 0x04 827 #define LOAD_DATA 0x08 828 829 /* emit_op inp_flags. 830 WRITE_BACK must be the first, since it is a flag. */ 831 #define WRITE_BACK 0x10 832 #define ALLOW_IMM 0x20 833 #define ALLOW_INV_IMM 0x40 834 #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) 835 836 /* s/l - store/load (1 bit) 837 u/s - signed/unsigned (1 bit) 838 w/b/h/N - word/byte/half/NOT allowed (2 bit) 839 Storing signed and unsigned values are the same operations. */ 840 841 static const sljit_uw data_transfer_insts[16] = { 842 /* s u w */ 0xe5000000 /* str */, 843 /* s u b */ 0xe5400000 /* strb */, 844 /* s u h */ 0xe10000b0 /* strh */, 845 /* s u N */ 0x00000000 /* not allowed */, 846 /* s s w */ 0xe5000000 /* str */, 847 /* s s b */ 0xe5400000 /* strb */, 848 /* s s h */ 0xe10000b0 /* strh */, 849 /* s s N */ 0x00000000 /* not allowed */, 850 851 /* l u w */ 0xe5100000 /* ldr */, 852 /* l u b */ 0xe5500000 /* ldrb */, 853 /* l u h */ 0xe11000b0 /* ldrh */, 854 /* l u N */ 0x00000000 /* not allowed */, 855 /* l s w */ 0xe5100000 /* ldr */, 856 /* l s b */ 0xe11000d0 /* ldrsb */, 857 /* l s h */ 0xe11000f0 /* ldrsh */, 858 /* l s N */ 0x00000000 /* not allowed */, 859 }; 860 861 #define EMIT_DATA_TRANSFER(type, add, wb, target_reg, base_reg, arg) \ 862 (data_transfer_insts[(type) & 0xf] | ((add) << 23) | ((wb) << (21 - 4)) | (reg_map[target_reg] << 12) | (reg_map[base_reg] << 16) | (arg)) 863 864 /* Normal ldr/str instruction. 865 Type2: ldrsb, ldrh, ldrsh */ 866 #define IS_TYPE1_TRANSFER(type) \ 867 (data_transfer_insts[(type) & 0xf] & 0x04000000) 868 #define TYPE2_TRANSFER_IMM(imm) \ 869 (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) 870 871 /* Condition: AL. */ 872 #define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \ 873 (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2)) 874 875 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, 876 sljit_s32 dst, sljit_sw dstw, 877 sljit_s32 src1, sljit_sw src1w, 878 sljit_s32 src2, sljit_sw src2w); 879 880 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, 881 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 882 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 883 { 884 sljit_s32 size, i, tmp; 885 sljit_uw push; 886 887 CHECK_ERROR(); 888 CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 889 set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 890 891 /* Push saved registers, temporary registers 892 stmdb sp!, {..., lr} */ 893 push = PUSH | (1 << 14); 894 895 tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; 896 for (i = SLJIT_S0; i >= tmp; i--) 897 push |= 1 << reg_map[i]; 898 899 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) 900 push |= 1 << reg_map[i]; 901 902 FAIL_IF(push_inst(compiler, push)); 903 904 /* Stack must be aligned to 8 bytes: */ 905 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 906 local_size = ((size + local_size + 7) & ~7) - size; 907 compiler->local_size = local_size; 908 if (local_size > 0) 909 FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); 910 911 if (args >= 1) 912 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0)))); 913 if (args >= 2) 914 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1)))); 915 if (args >= 3) 916 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2)))); 917 918 return SLJIT_SUCCESS; 919 } 920 921 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, 922 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 923 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 924 { 925 sljit_s32 size; 926 927 CHECK_ERROR(); 928 CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 929 set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 930 931 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 932 compiler->local_size = ((size + local_size + 7) & ~7) - size; 933 return SLJIT_SUCCESS; 934 } 935 936 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) 937 { 938 sljit_s32 i, tmp; 939 sljit_uw pop; 940 941 CHECK_ERROR(); 942 CHECK(check_sljit_emit_return(compiler, op, src, srcw)); 943 944 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); 945 946 if (compiler->local_size > 0) 947 FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); 948 949 /* Push saved registers, temporary registers 950 ldmia sp!, {..., pc} */ 951 pop = POP | (1 << 15); 952 953 tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; 954 for (i = SLJIT_S0; i >= tmp; i--) 955 pop |= 1 << reg_map[i]; 956 957 for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) 958 pop |= 1 << reg_map[i]; 959 960 return push_inst(compiler, pop); 961 } 962 963 /* --------------------------------------------------------------------- */ 964 /* Operators */ 965 /* --------------------------------------------------------------------- */ 966 967 /* flags: */ 968 /* Arguments are swapped. */ 969 #define ARGS_SWAPPED 0x01 970 /* Inverted immediate. */ 971 #define INV_IMM 0x02 972 /* Source and destination is register. */ 973 #define MOVE_REG_CONV 0x04 974 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ 975 #define SET_FLAGS (1 << 20) 976 /* dst: reg 977 src1: reg 978 src2: reg or imm (if allowed) 979 SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ 980 #define SRC2_IMM (1 << 25) 981 982 #define EMIT_SHIFT_INS_AND_RETURN(opcode) \ 983 SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \ 984 if (compiler->shift_imm != 0x20) { \ 985 SLJIT_ASSERT(src1 == TMP_REG1); \ 986 SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \ 987 \ 988 if (compiler->shift_imm != 0) \ 989 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \ 990 dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | RM(src2))); \ 991 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2))); \ 992 } \ 993 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \ 994 dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1))); 995 996 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, 997 sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) 998 { 999 switch (GET_OPCODE(op)) { 1000 case SLJIT_MOV: 1001 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); 1002 if (dst != src2) { 1003 if (src2 & SRC2_IMM) { 1004 return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0, 1005 dst, SLJIT_UNUSED, src2)); 1006 } 1007 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(src2))); 1008 } 1009 return SLJIT_SUCCESS; 1010 1011 case SLJIT_MOV_U8: 1012 case SLJIT_MOV_S8: 1013 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); 1014 if (flags & MOVE_REG_CONV) { 1015 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 1016 if (op == SLJIT_MOV_U8) 1017 return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); 1018 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | RM(src2)))); 1019 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst))); 1020 #else 1021 return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2)); 1022 #endif 1023 } 1024 else if (dst != src2) { 1025 SLJIT_ASSERT(src2 & SRC2_IMM); 1026 return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0, 1027 dst, SLJIT_UNUSED, src2)); 1028 } 1029 return SLJIT_SUCCESS; 1030 1031 case SLJIT_MOV_U16: 1032 case SLJIT_MOV_S16: 1033 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); 1034 if (flags & MOVE_REG_CONV) { 1035 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 1036 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | RM(src2)))); 1037 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst))); 1038 #else 1039 return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2)); 1040 #endif 1041 } 1042 else if (dst != src2) { 1043 SLJIT_ASSERT(src2 & SRC2_IMM); 1044 return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0, 1045 dst, SLJIT_UNUSED, src2)); 1046 } 1047 return SLJIT_SUCCESS; 1048 1049 case SLJIT_NOT: 1050 if (src2 & SRC2_IMM) { 1051 return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MOV_DP : MVN_DP, flags & SET_FLAGS, 1052 dst, SLJIT_UNUSED, src2)); 1053 } 1054 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2))); 1055 1056 case SLJIT_CLZ: 1057 SLJIT_ASSERT(!(flags & INV_IMM)); 1058 SLJIT_ASSERT(!(src2 & SRC2_IMM)); 1059 FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2))); 1060 if (flags & SET_FLAGS) 1061 return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, flags & SET_FLAGS, SLJIT_UNUSED, dst, SRC2_IMM)); 1062 return SLJIT_SUCCESS; 1063 1064 case SLJIT_ADD: 1065 SLJIT_ASSERT(!(flags & INV_IMM)); 1066 return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, flags & SET_FLAGS, 1067 dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); 1068 1069 case SLJIT_ADDC: 1070 SLJIT_ASSERT(!(flags & INV_IMM)); 1071 return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADC_DP, flags & SET_FLAGS, 1072 dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); 1073 1074 case SLJIT_SUB: 1075 SLJIT_ASSERT(!(flags & INV_IMM)); 1076 return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SUB_DP : RSB_DP, flags & SET_FLAGS, 1077 dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); 1078 1079 case SLJIT_SUBC: 1080 SLJIT_ASSERT(!(flags & INV_IMM)); 1081 return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SBC_DP : RSC_DP, flags & SET_FLAGS, 1082 dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); 1083 1084 case SLJIT_MUL: 1085 SLJIT_ASSERT(!(flags & INV_IMM)); 1086 SLJIT_ASSERT(!(src2 & SRC2_IMM)); 1087 1088 if (!HAS_FLAGS(op)) 1089 return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]); 1090 1091 FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1])); 1092 1093 /* cmp TMP_REG1, dst asr #31. */ 1094 return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG1, RM(dst) | 0xfc0)); 1095 1096 case SLJIT_AND: 1097 return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & INV_IMM) ? AND_DP : BIC_DP, flags & SET_FLAGS, 1098 dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); 1099 1100 case SLJIT_OR: 1101 SLJIT_ASSERT(!(flags & INV_IMM)); 1102 return push_inst(compiler, EMIT_DATA_PROCESS_INS(ORR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); 1103 1104 case SLJIT_XOR: 1105 SLJIT_ASSERT(!(flags & INV_IMM)); 1106 return push_inst(compiler, EMIT_DATA_PROCESS_INS(EOR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); 1107 1108 case SLJIT_SHL: 1109 EMIT_SHIFT_INS_AND_RETURN(0); 1110 1111 case SLJIT_LSHR: 1112 EMIT_SHIFT_INS_AND_RETURN(1); 1113 1114 case SLJIT_ASHR: 1115 EMIT_SHIFT_INS_AND_RETURN(2); 1116 } 1117 1118 SLJIT_UNREACHABLE(); 1119 return SLJIT_SUCCESS; 1120 } 1121 1122 #undef EMIT_SHIFT_INS_AND_RETURN 1123 1124 /* Tests whether the immediate can be stored in the 12 bit imm field. 1125 Returns with 0 if not possible. */ 1126 static sljit_uw get_imm(sljit_uw imm) 1127 { 1128 sljit_s32 rol; 1129 1130 if (imm <= 0xff) 1131 return SRC2_IMM | imm; 1132 1133 if (!(imm & 0xff000000)) { 1134 imm <<= 8; 1135 rol = 8; 1136 } 1137 else { 1138 imm = (imm << 24) | (imm >> 8); 1139 rol = 0; 1140 } 1141 1142 if (!(imm & 0xff000000)) { 1143 imm <<= 8; 1144 rol += 4; 1145 } 1146 1147 if (!(imm & 0xf0000000)) { 1148 imm <<= 4; 1149 rol += 2; 1150 } 1151 1152 if (!(imm & 0xc0000000)) { 1153 imm <<= 2; 1154 rol += 1; 1155 } 1156 1157 if (!(imm & 0x00ffffff)) 1158 return SRC2_IMM | (imm >> 24) | (rol << 8); 1159 else 1160 return 0; 1161 } 1162 1163 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 1164 static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm, sljit_s32 positive) 1165 { 1166 sljit_uw mask; 1167 sljit_uw imm1; 1168 sljit_uw imm2; 1169 sljit_s32 rol; 1170 1171 /* Step1: Search a zero byte (8 continous zero bit). */ 1172 mask = 0xff000000; 1173 rol = 8; 1174 while(1) { 1175 if (!(imm & mask)) { 1176 /* Rol imm by rol. */ 1177 imm = (imm << rol) | (imm >> (32 - rol)); 1178 /* Calculate arm rol. */ 1179 rol = 4 + (rol >> 1); 1180 break; 1181 } 1182 rol += 2; 1183 mask >>= 2; 1184 if (mask & 0x3) { 1185 /* rol by 8. */ 1186 imm = (imm << 8) | (imm >> 24); 1187 mask = 0xff00; 1188 rol = 24; 1189 while (1) { 1190 if (!(imm & mask)) { 1191 /* Rol imm by rol. */ 1192 imm = (imm << rol) | (imm >> (32 - rol)); 1193 /* Calculate arm rol. */ 1194 rol = (rol >> 1) - 8; 1195 break; 1196 } 1197 rol += 2; 1198 mask >>= 2; 1199 if (mask & 0x3) 1200 return 0; 1201 } 1202 break; 1203 } 1204 } 1205 1206 /* The low 8 bit must be zero. */ 1207 SLJIT_ASSERT(!(imm & 0xff)); 1208 1209 if (!(imm & 0xff000000)) { 1210 imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8); 1211 imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8); 1212 } 1213 else if (imm & 0xc0000000) { 1214 imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); 1215 imm <<= 8; 1216 rol += 4; 1217 1218 if (!(imm & 0xff000000)) { 1219 imm <<= 8; 1220 rol += 4; 1221 } 1222 1223 if (!(imm & 0xf0000000)) { 1224 imm <<= 4; 1225 rol += 2; 1226 } 1227 1228 if (!(imm & 0xc0000000)) { 1229 imm <<= 2; 1230 rol += 1; 1231 } 1232 1233 if (!(imm & 0x00ffffff)) 1234 imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); 1235 else 1236 return 0; 1237 } 1238 else { 1239 if (!(imm & 0xf0000000)) { 1240 imm <<= 4; 1241 rol += 2; 1242 } 1243 1244 if (!(imm & 0xc0000000)) { 1245 imm <<= 2; 1246 rol += 1; 1247 } 1248 1249 imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); 1250 imm <<= 8; 1251 rol += 4; 1252 1253 if (!(imm & 0xf0000000)) { 1254 imm <<= 4; 1255 rol += 2; 1256 } 1257 1258 if (!(imm & 0xc0000000)) { 1259 imm <<= 2; 1260 rol += 1; 1261 } 1262 1263 if (!(imm & 0x00ffffff)) 1264 imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); 1265 else 1266 return 0; 1267 } 1268 1269 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1))); 1270 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2))); 1271 return 1; 1272 } 1273 #endif 1274 1275 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm) 1276 { 1277 sljit_uw tmp; 1278 1279 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) 1280 if (!(imm & ~0xffff)) 1281 return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); 1282 #endif 1283 1284 /* Create imm by 1 inst. */ 1285 tmp = get_imm(imm); 1286 if (tmp) 1287 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp)); 1288 1289 tmp = get_imm(~imm); 1290 if (tmp) 1291 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp)); 1292 1293 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 1294 /* Create imm by 2 inst. */ 1295 FAIL_IF(generate_int(compiler, reg, imm, 1)); 1296 FAIL_IF(generate_int(compiler, reg, ~imm, 0)); 1297 1298 /* Load integer. */ 1299 return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm); 1300 #else 1301 return emit_imm(compiler, reg, imm); 1302 #endif 1303 } 1304 1305 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, 1306 sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) 1307 { 1308 sljit_uw offset_reg, imm; 1309 sljit_uw is_type1_transfer = IS_TYPE1_TRANSFER(flags); 1310 1311 SLJIT_ASSERT (arg & SLJIT_MEM); 1312 SLJIT_ASSERT((arg & REG_MASK) != tmp_reg); 1313 1314 SLJIT_COMPILE_ASSERT(WRITE_BACK == 0x10, optimized_for_emit_data_transfer); 1315 1316 if ((arg & REG_MASK) == SLJIT_UNUSED) { 1317 /* Write back is not used. */ 1318 FAIL_IF(load_immediate(compiler, tmp_reg, argw)); 1319 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, is_type1_transfer ? 0 : TYPE2_TRANSFER_IMM(0))); 1320 } 1321 1322 if (arg & OFFS_REG_MASK) { 1323 offset_reg = OFFS_REG(arg); 1324 arg &= REG_MASK; 1325 argw &= 0x3; 1326 1327 if (argw != 0 && !is_type1_transfer) { 1328 SLJIT_ASSERT(!(flags & WRITE_BACK)); 1329 1330 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_reg, arg, RM(offset_reg) | (argw << 7)))); 1331 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, TYPE2_TRANSFER_IMM(0))); 1332 } 1333 1334 /* Bit 25: RM is offset. */ 1335 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg, 1336 RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7))); 1337 } 1338 1339 arg &= REG_MASK; 1340 1341 if (is_type1_transfer) { 1342 if (argw > 0xfff) { 1343 imm = get_imm(argw & ~0xfff); 1344 if (imm) { 1345 offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; 1346 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm))); 1347 argw = argw & 0xfff; 1348 arg = offset_reg; 1349 } 1350 } 1351 else if (argw < -0xfff) { 1352 imm = get_imm(-argw & ~0xfff); 1353 if (imm) { 1354 offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; 1355 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm))); 1356 argw = -(-argw & 0xfff); 1357 arg = offset_reg; 1358 } 1359 } 1360 1361 if (argw >= 0 && argw <= 0xfff) { 1362 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg & REG_MASK, argw)); 1363 } 1364 if (argw < 0 && argw >= -0xfff) { 1365 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg & REG_MASK, -argw)); 1366 } 1367 } 1368 else { 1369 if (argw > 0xff) { 1370 imm = get_imm(argw & ~0xff); 1371 if (imm) { 1372 offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; 1373 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm))); 1374 argw = argw & 0xff; 1375 arg = offset_reg; 1376 } 1377 } 1378 else if (argw < -0xff) { 1379 imm = get_imm(-argw & ~0xff); 1380 if (imm) { 1381 offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; 1382 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm))); 1383 argw = -(-argw & 0xff); 1384 arg = offset_reg; 1385 } 1386 } 1387 1388 if (argw >= 0 && argw <= 0xff) { 1389 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw))); 1390 } 1391 if (argw < 0 && argw >= -0xff) { 1392 argw = -argw; 1393 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw))); 1394 } 1395 } 1396 1397 FAIL_IF(load_immediate(compiler, tmp_reg, argw)); 1398 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg, 1399 RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0))); 1400 } 1401 1402 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, 1403 sljit_s32 dst, sljit_sw dstw, 1404 sljit_s32 src1, sljit_sw src1w, 1405 sljit_s32 src2, sljit_sw src2w) 1406 { 1407 /* src1 is reg or TMP_REG1 1408 src2 is reg, TMP_REG2, or imm 1409 result goes to TMP_REG2, so put result can use TMP_REG1. */ 1410 1411 /* We prefers register and simple consts. */ 1412 sljit_s32 dst_reg; 1413 sljit_s32 src1_reg; 1414 sljit_s32 src2_reg; 1415 sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; 1416 1417 /* Destination check. */ 1418 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { 1419 if (op <= SLJIT_MOVU_P && !(src2 & SLJIT_MEM)) 1420 return SLJIT_SUCCESS; 1421 } 1422 1423 SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); 1424 1425 src2_reg = 0; 1426 1427 do { 1428 if (!(inp_flags & ALLOW_IMM)) 1429 break; 1430 1431 if (src2 & SLJIT_IMM) { 1432 src2_reg = get_imm(src2w); 1433 if (src2_reg) 1434 break; 1435 if (inp_flags & ALLOW_INV_IMM) { 1436 src2_reg = get_imm(~src2w); 1437 if (src2_reg) { 1438 flags |= INV_IMM; 1439 break; 1440 } 1441 } 1442 if (GET_OPCODE(op) == SLJIT_ADD) { 1443 src2_reg = get_imm(-src2w); 1444 if (src2_reg) { 1445 op = SLJIT_SUB | GET_ALL_FLAGS(op); 1446 break; 1447 } 1448 } 1449 if (GET_OPCODE(op) == SLJIT_SUB) { 1450 src2_reg = get_imm(-src2w); 1451 if (src2_reg) { 1452 op = SLJIT_ADD | GET_ALL_FLAGS(op); 1453 break; 1454 } 1455 } 1456 } 1457 1458 if (src1 & SLJIT_IMM) { 1459 src2_reg = get_imm(src1w); 1460 if (src2_reg) { 1461 flags |= ARGS_SWAPPED; 1462 src1 = src2; 1463 src1w = src2w; 1464 break; 1465 } 1466 if (inp_flags & ALLOW_INV_IMM) { 1467 src2_reg = get_imm(~src1w); 1468 if (src2_reg) { 1469 flags |= ARGS_SWAPPED | INV_IMM; 1470 src1 = src2; 1471 src1w = src2w; 1472 break; 1473 } 1474 } 1475 if (GET_OPCODE(op) == SLJIT_ADD) { 1476 src2_reg = get_imm(-src1w); 1477 if (src2_reg) { 1478 /* Note: add is commutative operation. */ 1479 src1 = src2; 1480 src1w = src2w; 1481 op = SLJIT_SUB | GET_ALL_FLAGS(op); 1482 break; 1483 } 1484 } 1485 } 1486 } while(0); 1487 1488 /* Source 1. */ 1489 if (FAST_IS_REG(src1)) 1490 src1_reg = src1; 1491 else if (src1 & SLJIT_MEM) { 1492 FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); 1493 src1_reg = TMP_REG1; 1494 } 1495 else { 1496 FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); 1497 src1_reg = TMP_REG1; 1498 } 1499 1500 /* Destination. */ 1501 dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; 1502 1503 if (op <= SLJIT_MOVU_P) { 1504 if (dst & SLJIT_MEM) { 1505 if (inp_flags & BYTE_DATA) 1506 inp_flags &= ~SIGNED_DATA; 1507 1508 if (FAST_IS_REG(src2)) 1509 return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2); 1510 } 1511 1512 if (FAST_IS_REG(src2) && dst_reg != TMP_REG2) 1513 flags |= MOVE_REG_CONV; 1514 } 1515 1516 /* Source 2. */ 1517 if (src2_reg == 0) { 1518 src2_reg = (op <= SLJIT_MOVU_P) ? dst_reg : TMP_REG2; 1519 1520 if (FAST_IS_REG(src2)) 1521 src2_reg = src2; 1522 else if (src2 & SLJIT_MEM) 1523 FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2)); 1524 else 1525 FAIL_IF(load_immediate(compiler, src2_reg, src2w)); 1526 } 1527 1528 FAIL_IF(emit_single_op(compiler, op, flags, dst_reg, src1_reg, src2_reg)); 1529 1530 if (!(dst & SLJIT_MEM)) 1531 return SLJIT_SUCCESS; 1532 1533 return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1); 1534 } 1535 1536 #ifdef __cplusplus 1537 extern "C" { 1538 #endif 1539 1540 #if defined(__GNUC__) 1541 extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator); 1542 extern int __aeabi_idivmod(int numerator, int denominator); 1543 #else 1544 #error "Software divmod functions are needed" 1545 #endif 1546 1547 #ifdef __cplusplus 1548 } 1549 #endif 1550 1551 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) 1552 { 1553 sljit_sw saved_reg_list[3]; 1554 sljit_sw saved_reg_count; 1555 1556 CHECK_ERROR(); 1557 CHECK(check_sljit_emit_op0(compiler, op)); 1558 1559 op = GET_OPCODE(op); 1560 switch (op) { 1561 case SLJIT_BREAKPOINT: 1562 FAIL_IF(push_inst(compiler, BKPT)); 1563 break; 1564 case SLJIT_NOP: 1565 FAIL_IF(push_inst(compiler, NOP)); 1566 break; 1567 case SLJIT_LMUL_UW: 1568 case SLJIT_LMUL_SW: 1569 return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) 1570 | (reg_map[SLJIT_R1] << 16) 1571 | (reg_map[SLJIT_R0] << 12) 1572 | (reg_map[SLJIT_R0] << 8) 1573 | reg_map[SLJIT_R1]); 1574 case SLJIT_DIVMOD_UW: 1575 case SLJIT_DIVMOD_SW: 1576 case SLJIT_DIV_UW: 1577 case SLJIT_DIV_SW: 1578 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); 1579 SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3); 1580 1581 saved_reg_count = 0; 1582 if (compiler->scratches >= 4) 1583 saved_reg_list[saved_reg_count++] = 3; 1584 if (compiler->scratches >= 3) 1585 saved_reg_list[saved_reg_count++] = 2; 1586 if (op >= SLJIT_DIV_UW) 1587 saved_reg_list[saved_reg_count++] = 1; 1588 1589 if (saved_reg_count > 0) { 1590 FAIL_IF(push_inst(compiler, 0xe52d0000 | (saved_reg_count >= 3 ? 16 : 8) 1591 | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); 1592 if (saved_reg_count >= 2) { 1593 SLJIT_ASSERT(saved_reg_list[1] < 8); 1594 FAIL_IF(push_inst(compiler, 0xe58d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */)); 1595 } 1596 if (saved_reg_count >= 3) { 1597 SLJIT_ASSERT(saved_reg_list[2] < 8); 1598 FAIL_IF(push_inst(compiler, 0xe58d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */)); 1599 } 1600 } 1601 1602 #if defined(__GNUC__) 1603 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, 1604 ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); 1605 #else 1606 #error "Software divmod functions are needed" 1607 #endif 1608 1609 if (saved_reg_count > 0) { 1610 if (saved_reg_count >= 3) { 1611 SLJIT_ASSERT(saved_reg_list[2] < 8); 1612 FAIL_IF(push_inst(compiler, 0xe59d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */)); 1613 } 1614 if (saved_reg_count >= 2) { 1615 SLJIT_ASSERT(saved_reg_list[1] < 8); 1616 FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */)); 1617 } 1618 return push_inst(compiler, 0xe49d0000 | (saved_reg_count >= 3 ? 16 : 8) 1619 | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); 1620 } 1621 return SLJIT_SUCCESS; 1622 } 1623 1624 return SLJIT_SUCCESS; 1625 } 1626 1627 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, 1628 sljit_s32 dst, sljit_sw dstw, 1629 sljit_s32 src, sljit_sw srcw) 1630 { 1631 CHECK_ERROR(); 1632 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); 1633 ADJUST_LOCAL_OFFSET(dst, dstw); 1634 ADJUST_LOCAL_OFFSET(src, srcw); 1635 1636 switch (GET_OPCODE(op)) { 1637 case SLJIT_MOV: 1638 case SLJIT_MOV_U32: 1639 case SLJIT_MOV_S32: 1640 case SLJIT_MOV_P: 1641 return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); 1642 1643 case SLJIT_MOV_U8: 1644 return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); 1645 1646 case SLJIT_MOV_S8: 1647 return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); 1648 1649 case SLJIT_MOV_U16: 1650 return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); 1651 1652 case SLJIT_MOV_S16: 1653 return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); 1654 1655 case SLJIT_MOVU: 1656 case SLJIT_MOVU_U32: 1657 case SLJIT_MOVU_S32: 1658 case SLJIT_MOVU_P: 1659 return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); 1660 1661 case SLJIT_MOVU_U8: 1662 return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); 1663 1664 case SLJIT_MOVU_S8: 1665 return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); 1666 1667 case SLJIT_MOVU_U16: 1668 return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); 1669 1670 case SLJIT_MOVU_S16: 1671 return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); 1672 1673 case SLJIT_NOT: 1674 return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); 1675 1676 case SLJIT_NEG: 1677 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 1678 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 1679 compiler->skip_checks = 1; 1680 #endif 1681 return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw); 1682 1683 case SLJIT_CLZ: 1684 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); 1685 } 1686 1687 return SLJIT_SUCCESS; 1688 } 1689 1690 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, 1691 sljit_s32 dst, sljit_sw dstw, 1692 sljit_s32 src1, sljit_sw src1w, 1693 sljit_s32 src2, sljit_sw src2w) 1694 { 1695 CHECK_ERROR(); 1696 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 1697 ADJUST_LOCAL_OFFSET(dst, dstw); 1698 ADJUST_LOCAL_OFFSET(src1, src1w); 1699 ADJUST_LOCAL_OFFSET(src2, src2w); 1700 1701 switch (GET_OPCODE(op)) { 1702 case SLJIT_ADD: 1703 case SLJIT_ADDC: 1704 case SLJIT_SUB: 1705 case SLJIT_SUBC: 1706 case SLJIT_OR: 1707 case SLJIT_XOR: 1708 return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); 1709 1710 case SLJIT_MUL: 1711 return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); 1712 1713 case SLJIT_AND: 1714 return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w); 1715 1716 case SLJIT_SHL: 1717 case SLJIT_LSHR: 1718 case SLJIT_ASHR: 1719 if (src2 & SLJIT_IMM) { 1720 compiler->shift_imm = src2w & 0x1f; 1721 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w); 1722 } 1723 else { 1724 compiler->shift_imm = 0x20; 1725 return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); 1726 } 1727 } 1728 1729 return SLJIT_SUCCESS; 1730 } 1731 1732 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) 1733 { 1734 CHECK_REG_INDEX(check_sljit_get_register_index(reg)); 1735 return reg_map[reg]; 1736 } 1737 1738 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) 1739 { 1740 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); 1741 return reg << 1; 1742 } 1743 1744 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, 1745 void *instruction, sljit_s32 size) 1746 { 1747 CHECK_ERROR(); 1748 CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); 1749 1750 return push_inst(compiler, *(sljit_uw*)instruction); 1751 } 1752 1753 /* --------------------------------------------------------------------- */ 1754 /* Floating point operators */ 1755 /* --------------------------------------------------------------------- */ 1756 1757 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 1758 1759 /* 0 - no fpu 1760 1 - vfp */ 1761 static sljit_s32 arm_fpu_type = -1; 1762 1763 static void init_compiler(void) 1764 { 1765 if (arm_fpu_type != -1) 1766 return; 1767 1768 /* TODO: Only the OS can help to determine the correct fpu type. */ 1769 arm_fpu_type = 1; 1770 } 1771 1772 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 1773 { 1774 #ifdef SLJIT_IS_FPU_AVAILABLE 1775 return SLJIT_IS_FPU_AVAILABLE; 1776 #else 1777 if (arm_fpu_type == -1) 1778 init_compiler(); 1779 return arm_fpu_type; 1780 #endif 1781 } 1782 1783 #else 1784 1785 #define arm_fpu_type 1 1786 1787 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) 1788 { 1789 /* Always available. */ 1790 return 1; 1791 } 1792 1793 #endif 1794 1795 #define FPU_LOAD (1 << 20) 1796 #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ 1797 ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg << 12) | (offs)) 1798 #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ 1799 ((opcode) | (mode) | ((dst) << 12) | (src1) | ((src2) << 16)) 1800 1801 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) 1802 { 1803 sljit_uw imm; 1804 sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); 1805 1806 SLJIT_ASSERT(arg & SLJIT_MEM); 1807 arg &= ~SLJIT_MEM; 1808 1809 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { 1810 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); 1811 arg = TMP_REG2; 1812 argw = 0; 1813 } 1814 1815 /* Fast loads and stores. */ 1816 if (arg) { 1817 if (!(argw & ~0x3fc)) 1818 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2)); 1819 if (!(-argw & ~0x3fc)) 1820 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2)); 1821 1822 imm = get_imm(argw & ~0x3fc); 1823 if (imm) { 1824 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, imm))); 1825 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2)); 1826 } 1827 imm = get_imm(-argw & ~0x3fc); 1828 if (imm) { 1829 argw = -argw; 1830 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG2, arg & REG_MASK, imm))); 1831 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2)); 1832 } 1833 } 1834 1835 if (arg) { 1836 FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); 1837 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(TMP_REG2)))); 1838 } 1839 else 1840 FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); 1841 1842 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0)); 1843 } 1844 1845 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, 1846 sljit_s32 dst, sljit_sw dstw, 1847 sljit_s32 src, sljit_sw srcw) 1848 { 1849 op ^= SLJIT_F32_OP; 1850 1851 if (src & SLJIT_MEM) { 1852 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); 1853 src = TMP_FREG1; 1854 } 1855 1856 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0))); 1857 1858 if (dst == SLJIT_UNUSED) 1859 return SLJIT_SUCCESS; 1860 1861 if (FAST_IS_REG(dst)) 1862 return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16)); 1863 1864 /* Store the integer value from a VFP register. */ 1865 return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); 1866 } 1867 1868 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, 1869 sljit_s32 dst, sljit_sw dstw, 1870 sljit_s32 src, sljit_sw srcw) 1871 { 1872 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 1873 1874 op ^= SLJIT_F32_OP; 1875 1876 if (FAST_IS_REG(src)) 1877 FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16))); 1878 else if (src & SLJIT_MEM) { 1879 /* Load the integer value into a VFP register. */ 1880 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); 1881 } 1882 else { 1883 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); 1884 FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16))); 1885 } 1886 1887 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0))); 1888 1889 if (dst & SLJIT_MEM) 1890 return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); 1891 return SLJIT_SUCCESS; 1892 } 1893 1894 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, 1895 sljit_s32 src1, sljit_sw src1w, 1896 sljit_s32 src2, sljit_sw src2w) 1897 { 1898 op ^= SLJIT_F32_OP; 1899 1900 if (src1 & SLJIT_MEM) { 1901 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); 1902 src1 = TMP_FREG1; 1903 } 1904 1905 if (src2 & SLJIT_MEM) { 1906 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); 1907 src2 = TMP_FREG2; 1908 } 1909 1910 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_F32_OP, src1, src2, 0))); 1911 return push_inst(compiler, VMRS); 1912 } 1913 1914 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, 1915 sljit_s32 dst, sljit_sw dstw, 1916 sljit_s32 src, sljit_sw srcw) 1917 { 1918 sljit_s32 dst_r; 1919 1920 CHECK_ERROR(); 1921 1922 SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); 1923 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); 1924 1925 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 1926 1927 if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) 1928 op ^= SLJIT_F32_OP; 1929 1930 if (src & SLJIT_MEM) { 1931 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw)); 1932 src = dst_r; 1933 } 1934 1935 switch (GET_OPCODE(op)) { 1936 case SLJIT_MOV_F64: 1937 if (src != dst_r) { 1938 if (dst_r != TMP_FREG1) 1939 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_F32_OP, dst_r, src, 0))); 1940 else 1941 dst_r = src; 1942 } 1943 break; 1944 case SLJIT_NEG_F64: 1945 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_F32_OP, dst_r, src, 0))); 1946 break; 1947 case SLJIT_ABS_F64: 1948 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_F32_OP, dst_r, src, 0))); 1949 break; 1950 case SLJIT_CONV_F64_FROM_F32: 1951 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_F32_OP, dst_r, src, 0))); 1952 op ^= SLJIT_F32_OP; 1953 break; 1954 } 1955 1956 if (dst & SLJIT_MEM) 1957 return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); 1958 return SLJIT_SUCCESS; 1959 } 1960 1961 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, 1962 sljit_s32 dst, sljit_sw dstw, 1963 sljit_s32 src1, sljit_sw src1w, 1964 sljit_s32 src2, sljit_sw src2w) 1965 { 1966 sljit_s32 dst_r; 1967 1968 CHECK_ERROR(); 1969 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); 1970 ADJUST_LOCAL_OFFSET(dst, dstw); 1971 ADJUST_LOCAL_OFFSET(src1, src1w); 1972 ADJUST_LOCAL_OFFSET(src2, src2w); 1973 1974 op ^= SLJIT_F32_OP; 1975 1976 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; 1977 1978 if (src2 & SLJIT_MEM) { 1979 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); 1980 src2 = TMP_FREG2; 1981 } 1982 1983 if (src1 & SLJIT_MEM) { 1984 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); 1985 src1 = TMP_FREG1; 1986 } 1987 1988 switch (GET_OPCODE(op)) { 1989 case SLJIT_ADD_F64: 1990 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); 1991 break; 1992 1993 case SLJIT_SUB_F64: 1994 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); 1995 break; 1996 1997 case SLJIT_MUL_F64: 1998 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); 1999 break; 2000 2001 case SLJIT_DIV_F64: 2002 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); 2003 break; 2004 } 2005 2006 if (dst_r == TMP_FREG1) 2007 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw)); 2008 2009 return SLJIT_SUCCESS; 2010 } 2011 2012 #undef FPU_LOAD 2013 #undef EMIT_FPU_DATA_TRANSFER 2014 #undef EMIT_FPU_OPERATION 2015 2016 /* --------------------------------------------------------------------- */ 2017 /* Other instructions */ 2018 /* --------------------------------------------------------------------- */ 2019 2020 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) 2021 { 2022 CHECK_ERROR(); 2023 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); 2024 ADJUST_LOCAL_OFFSET(dst, dstw); 2025 2026 SLJIT_ASSERT(reg_map[TMP_REG1] == 14); 2027 2028 /* For UNUSED dst. Uncommon, but possible. */ 2029 if (dst == SLJIT_UNUSED) 2030 return SLJIT_SUCCESS; 2031 2032 if (FAST_IS_REG(dst)) 2033 return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG1))); 2034 2035 /* Memory. */ 2036 return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2); 2037 } 2038 2039 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) 2040 { 2041 CHECK_ERROR(); 2042 CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); 2043 ADJUST_LOCAL_OFFSET(src, srcw); 2044 2045 SLJIT_ASSERT(reg_map[TMP_REG1] == 14); 2046 2047 if (FAST_IS_REG(src)) 2048 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, 0, RM(src)))); 2049 else if (src & SLJIT_MEM) 2050 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG2)); 2051 else if (src & SLJIT_IMM) 2052 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); 2053 2054 return push_inst(compiler, BX | RM(TMP_REG1)); 2055 } 2056 2057 /* --------------------------------------------------------------------- */ 2058 /* Conditional instructions */ 2059 /* --------------------------------------------------------------------- */ 2060 2061 static sljit_uw get_cc(sljit_s32 type) 2062 { 2063 switch (type) { 2064 case SLJIT_EQUAL: 2065 case SLJIT_MUL_NOT_OVERFLOW: 2066 case SLJIT_EQUAL_F64: 2067 return 0x00000000; 2068 2069 case SLJIT_NOT_EQUAL: 2070 case SLJIT_MUL_OVERFLOW: 2071 case SLJIT_NOT_EQUAL_F64: 2072 return 0x10000000; 2073 2074 case SLJIT_LESS: 2075 case SLJIT_LESS_F64: 2076 return 0x30000000; 2077 2078 case SLJIT_GREATER_EQUAL: 2079 case SLJIT_GREATER_EQUAL_F64: 2080 return 0x20000000; 2081 2082 case SLJIT_GREATER: 2083 case SLJIT_GREATER_F64: 2084 return 0x80000000; 2085 2086 case SLJIT_LESS_EQUAL: 2087 case SLJIT_LESS_EQUAL_F64: 2088 return 0x90000000; 2089 2090 case SLJIT_SIG_LESS: 2091 return 0xb0000000; 2092 2093 case SLJIT_SIG_GREATER_EQUAL: 2094 return 0xa0000000; 2095 2096 case SLJIT_SIG_GREATER: 2097 return 0xc0000000; 2098 2099 case SLJIT_SIG_LESS_EQUAL: 2100 return 0xd0000000; 2101 2102 case SLJIT_OVERFLOW: 2103 case SLJIT_UNORDERED_F64: 2104 return 0x60000000; 2105 2106 case SLJIT_NOT_OVERFLOW: 2107 case SLJIT_ORDERED_F64: 2108 return 0x70000000; 2109 2110 default: 2111 SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); 2112 return 0xe0000000; 2113 } 2114 } 2115 2116 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) 2117 { 2118 struct sljit_label *label; 2119 2120 CHECK_ERROR_PTR(); 2121 CHECK_PTR(check_sljit_emit_label(compiler)); 2122 2123 if (compiler->last_label && compiler->last_label->size == compiler->size) 2124 return compiler->last_label; 2125 2126 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); 2127 PTR_FAIL_IF(!label); 2128 set_label(label, compiler); 2129 return label; 2130 } 2131 2132 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) 2133 { 2134 struct sljit_jump *jump; 2135 2136 CHECK_ERROR_PTR(); 2137 CHECK_PTR(check_sljit_emit_jump(compiler, type)); 2138 2139 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2140 PTR_FAIL_IF(!jump); 2141 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); 2142 type &= 0xff; 2143 2144 /* In ARM, we don't need to touch the arguments. */ 2145 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 2146 if (type >= SLJIT_FAST_CALL) 2147 PTR_FAIL_IF(prepare_blx(compiler)); 2148 PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, 2149 type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0)); 2150 2151 if (jump->flags & SLJIT_REWRITABLE_JUMP) { 2152 jump->addr = compiler->size; 2153 compiler->patches++; 2154 } 2155 2156 if (type >= SLJIT_FAST_CALL) { 2157 jump->flags |= IS_BL; 2158 PTR_FAIL_IF(emit_blx(compiler)); 2159 } 2160 2161 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) 2162 jump->addr = compiler->size; 2163 #else 2164 if (type >= SLJIT_FAST_CALL) 2165 jump->flags |= IS_BL; 2166 PTR_FAIL_IF(emit_imm(compiler, TMP_REG2, 0)); 2167 PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)) & ~COND_MASK) | get_cc(type))); 2168 jump->addr = compiler->size; 2169 #endif 2170 return jump; 2171 } 2172 2173 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) 2174 { 2175 struct sljit_jump *jump; 2176 2177 CHECK_ERROR(); 2178 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); 2179 ADJUST_LOCAL_OFFSET(src, srcw); 2180 2181 /* In ARM, we don't need to touch the arguments. */ 2182 if (!(src & SLJIT_IMM)) { 2183 if (FAST_IS_REG(src)) 2184 return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src)); 2185 2186 SLJIT_ASSERT(src & SLJIT_MEM); 2187 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG2)); 2188 return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)); 2189 } 2190 2191 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); 2192 FAIL_IF(!jump); 2193 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); 2194 jump->u.target = srcw; 2195 2196 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 2197 if (type >= SLJIT_FAST_CALL) 2198 FAIL_IF(prepare_blx(compiler)); 2199 FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0), 0)); 2200 if (type >= SLJIT_FAST_CALL) 2201 FAIL_IF(emit_blx(compiler)); 2202 #else 2203 FAIL_IF(emit_imm(compiler, TMP_REG2, 0)); 2204 FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2))); 2205 #endif 2206 jump->addr = compiler->size; 2207 return SLJIT_SUCCESS; 2208 } 2209 2210 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, 2211 sljit_s32 dst, sljit_sw dstw, 2212 sljit_s32 src, sljit_sw srcw, 2213 sljit_s32 type) 2214 { 2215 sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op); 2216 sljit_uw cc, ins; 2217 2218 CHECK_ERROR(); 2219 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); 2220 ADJUST_LOCAL_OFFSET(dst, dstw); 2221 ADJUST_LOCAL_OFFSET(src, srcw); 2222 2223 if (dst == SLJIT_UNUSED) 2224 return SLJIT_SUCCESS; 2225 2226 op = GET_OPCODE(op); 2227 cc = get_cc(type & 0xff); 2228 dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2; 2229 2230 if (op < SLJIT_ADD) { 2231 FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, SRC2_IMM | 0))); 2232 FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc)); 2233 return (dst_reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1) : SLJIT_SUCCESS; 2234 } 2235 2236 ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP)); 2237 if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) { 2238 FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc)); 2239 /* The condition must always be set, even if the ORR/EOR is not executed above. */ 2240 return (flags & SLJIT_SET_Z) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS; 2241 } 2242 2243 if (src & SLJIT_MEM) { 2244 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); 2245 src = TMP_REG1; 2246 } else if (src & SLJIT_IMM) { 2247 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); 2248 src = TMP_REG1; 2249 } 2250 2251 FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_reg, src, SRC2_IMM | 1) & ~COND_MASK) | cc)); 2252 FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_reg, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); 2253 if (dst_reg == TMP_REG2) 2254 FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1)); 2255 2256 return (flags & SLJIT_SET_Z) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG2, SLJIT_UNUSED, RM(dst_reg))) : SLJIT_SUCCESS; 2257 } 2258 2259 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) 2260 { 2261 struct sljit_const *const_; 2262 sljit_s32 reg; 2263 2264 CHECK_ERROR_PTR(); 2265 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); 2266 ADJUST_LOCAL_OFFSET(dst, dstw); 2267 2268 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); 2269 PTR_FAIL_IF(!const_); 2270 2271 reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; 2272 2273 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) 2274 PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), init_value)); 2275 compiler->patches++; 2276 #else 2277 PTR_FAIL_IF(emit_imm(compiler, reg, init_value)); 2278 #endif 2279 set_const(const_, compiler); 2280 2281 if (dst & SLJIT_MEM) 2282 PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1)); 2283 return const_; 2284 } 2285 2286 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) 2287 { 2288 inline_set_jump_addr(addr, executable_offset, new_target, 1); 2289 } 2290 2291 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) 2292 { 2293 inline_set_const(addr, executable_offset, new_constant, 1); 2294 } 2295