1 /* $NetBSD: sljitNativeX86_64.c,v 1.4 2019/01/20 23:14:16 alnsn Exp $ */ 2 3 /* 4 * Stack-less Just-In-Time compiler 5 * 6 * Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without modification, are 9 * permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright notice, this list of 12 * conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 15 * of conditions and the following disclaimer in the documentation and/or other materials 16 * provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 21 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 23 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* x86 64-bit arch dependent functions. */ 30 31 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) 32 { 33 sljit_u8 *inst; 34 35 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); 36 FAIL_IF(!inst); 37 INC_SIZE(2 + sizeof(sljit_sw)); 38 *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); 39 *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7); 40 sljit_unaligned_store_sw(inst, imm); 41 return SLJIT_SUCCESS; 42 } 43 44 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type) 45 { 46 if (type < SLJIT_JUMP) { 47 /* Invert type. */ 48 *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10; 49 *code_ptr++ = 10 + 3; 50 } 51 52 SLJIT_ASSERT(reg_map[TMP_REG3] == 9); 53 *code_ptr++ = REX_W | REX_B; 54 *code_ptr++ = MOV_r_i32 + 1; 55 jump->addr = (sljit_uw)code_ptr; 56 57 if (jump->flags & JUMP_LABEL) 58 jump->flags |= PATCH_MD; 59 else 60 sljit_unaligned_store_sw(code_ptr, jump->u.target); 61 62 code_ptr += sizeof(sljit_sw); 63 *code_ptr++ = REX_B; 64 *code_ptr++ = GROUP_FF; 65 *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1); 66 67 return code_ptr; 68 } 69 70 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, 71 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 72 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 73 { 74 sljit_s32 i, tmp, size, saved_register_size; 75 sljit_u8 *inst; 76 77 CHECK_ERROR(); 78 CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 79 set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 80 81 #ifdef _WIN64 82 /* Two/four register slots for parameters plus space for xmm6 register if needed. */ 83 if (fscratches >= 6 || fsaveds >= 1) 84 compiler->locals_offset = 6 * sizeof(sljit_sw); 85 else 86 compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); 87 #endif 88 89 /* Including the return address saved by the call instruction. */ 90 saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 91 92 tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; 93 for (i = SLJIT_S0; i >= tmp; i--) { 94 size = reg_map[i] >= 8 ? 2 : 1; 95 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 96 FAIL_IF(!inst); 97 INC_SIZE(size); 98 if (reg_map[i] >= 8) 99 *inst++ = REX_B; 100 PUSH_REG(reg_lmap[i]); 101 } 102 103 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { 104 size = reg_map[i] >= 8 ? 2 : 1; 105 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 106 FAIL_IF(!inst); 107 INC_SIZE(size); 108 if (reg_map[i] >= 8) 109 *inst++ = REX_B; 110 PUSH_REG(reg_lmap[i]); 111 } 112 113 if (args > 0) { 114 size = args * 3; 115 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 116 FAIL_IF(!inst); 117 118 INC_SIZE(size); 119 120 #ifndef _WIN64 121 if (args > 0) { 122 *inst++ = REX_W; 123 *inst++ = MOV_r_rm; 124 *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; 125 } 126 if (args > 1) { 127 *inst++ = REX_W | REX_R; 128 *inst++ = MOV_r_rm; 129 *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; 130 } 131 if (args > 2) { 132 *inst++ = REX_W | REX_R; 133 *inst++ = MOV_r_rm; 134 *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; 135 } 136 #else 137 if (args > 0) { 138 *inst++ = REX_W; 139 *inst++ = MOV_r_rm; 140 *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; 141 } 142 if (args > 1) { 143 *inst++ = REX_W; 144 *inst++ = MOV_r_rm; 145 *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; 146 } 147 if (args > 2) { 148 *inst++ = REX_W | REX_B; 149 *inst++ = MOV_r_rm; 150 *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; 151 } 152 #endif 153 } 154 155 local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; 156 compiler->local_size = local_size; 157 158 #ifdef _WIN64 159 if (local_size > 1024) { 160 /* Allocate stack for the callback, which grows the stack. */ 161 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32))); 162 FAIL_IF(!inst); 163 INC_SIZE(4 + (3 + sizeof(sljit_s32))); 164 *inst++ = REX_W; 165 *inst++ = GROUP_BINARY_83; 166 *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; 167 /* Allocated size for registers must be divisible by 8. */ 168 SLJIT_ASSERT(!(saved_register_size & 0x7)); 169 /* Aligned to 16 byte. */ 170 if (saved_register_size & 0x8) { 171 *inst++ = 5 * sizeof(sljit_sw); 172 local_size -= 5 * sizeof(sljit_sw); 173 } else { 174 *inst++ = 4 * sizeof(sljit_sw); 175 local_size -= 4 * sizeof(sljit_sw); 176 } 177 /* Second instruction */ 178 SLJIT_ASSERT(reg_map[SLJIT_R0] < 8); 179 *inst++ = REX_W; 180 *inst++ = MOV_rm_i32; 181 *inst++ = MOD_REG | reg_lmap[SLJIT_R0]; 182 sljit_unaligned_store_s32(inst, local_size); 183 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 184 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 185 compiler->skip_checks = 1; 186 #endif 187 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); 188 } 189 #endif 190 191 if (local_size > 0) { 192 if (local_size <= 127) { 193 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 194 FAIL_IF(!inst); 195 INC_SIZE(4); 196 *inst++ = REX_W; 197 *inst++ = GROUP_BINARY_83; 198 *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; 199 *inst++ = local_size; 200 } 201 else { 202 inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); 203 FAIL_IF(!inst); 204 INC_SIZE(7); 205 *inst++ = REX_W; 206 *inst++ = GROUP_BINARY_81; 207 *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; 208 sljit_unaligned_store_s32(inst, local_size); 209 inst += sizeof(sljit_s32); 210 } 211 } 212 213 #ifdef _WIN64 214 /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */ 215 if (fscratches >= 6 || fsaveds >= 1) { 216 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 217 FAIL_IF(!inst); 218 INC_SIZE(5); 219 *inst++ = GROUP_0F; 220 sljit_unaligned_store_s32(inst, 0x20247429); 221 } 222 #endif 223 224 return SLJIT_SUCCESS; 225 } 226 227 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, 228 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 229 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 230 { 231 sljit_s32 saved_register_size; 232 233 CHECK_ERROR(); 234 CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 235 set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 236 237 #ifdef _WIN64 238 /* Two/four register slots for parameters plus space for xmm6 register if needed. */ 239 if (fscratches >= 6 || fsaveds >= 1) 240 compiler->locals_offset = 6 * sizeof(sljit_sw); 241 else 242 compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); 243 #endif 244 245 /* Including the return address saved by the call instruction. */ 246 saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 247 compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; 248 return SLJIT_SUCCESS; 249 } 250 251 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) 252 { 253 sljit_s32 i, tmp, size; 254 sljit_u8 *inst; 255 256 CHECK_ERROR(); 257 CHECK(check_sljit_emit_return(compiler, op, src, srcw)); 258 259 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); 260 261 #ifdef _WIN64 262 /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */ 263 if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) { 264 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 265 FAIL_IF(!inst); 266 INC_SIZE(5); 267 *inst++ = GROUP_0F; 268 sljit_unaligned_store_s32(inst, 0x20247428); 269 } 270 #endif 271 272 if (compiler->local_size > 0) { 273 if (compiler->local_size <= 127) { 274 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 275 FAIL_IF(!inst); 276 INC_SIZE(4); 277 *inst++ = REX_W; 278 *inst++ = GROUP_BINARY_83; 279 *inst++ = MOD_REG | ADD | 4; 280 *inst = compiler->local_size; 281 } 282 else { 283 inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); 284 FAIL_IF(!inst); 285 INC_SIZE(7); 286 *inst++ = REX_W; 287 *inst++ = GROUP_BINARY_81; 288 *inst++ = MOD_REG | ADD | 4; 289 sljit_unaligned_store_s32(inst, compiler->local_size); 290 } 291 } 292 293 tmp = compiler->scratches; 294 for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { 295 size = reg_map[i] >= 8 ? 2 : 1; 296 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 297 FAIL_IF(!inst); 298 INC_SIZE(size); 299 if (reg_map[i] >= 8) 300 *inst++ = REX_B; 301 POP_REG(reg_lmap[i]); 302 } 303 304 tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; 305 for (i = tmp; i <= SLJIT_S0; i++) { 306 size = reg_map[i] >= 8 ? 2 : 1; 307 inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 308 FAIL_IF(!inst); 309 INC_SIZE(size); 310 if (reg_map[i] >= 8) 311 *inst++ = REX_B; 312 POP_REG(reg_lmap[i]); 313 } 314 315 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 316 FAIL_IF(!inst); 317 INC_SIZE(1); 318 RET(); 319 return SLJIT_SUCCESS; 320 } 321 322 /* --------------------------------------------------------------------- */ 323 /* Operators */ 324 /* --------------------------------------------------------------------- */ 325 326 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm) 327 { 328 sljit_u8 *inst; 329 sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32); 330 331 inst = (sljit_u8*)ensure_buf(compiler, 1 + length); 332 FAIL_IF(!inst); 333 INC_SIZE(length); 334 if (rex) 335 *inst++ = rex; 336 *inst++ = opcode; 337 sljit_unaligned_store_s32(inst, imm); 338 return SLJIT_SUCCESS; 339 } 340 341 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size, 342 /* The register or immediate operand. */ 343 sljit_s32 a, sljit_sw imma, 344 /* The general operand (not immediate). */ 345 sljit_s32 b, sljit_sw immb) 346 { 347 sljit_u8 *inst; 348 sljit_u8 *buf_ptr; 349 sljit_u8 rex = 0; 350 sljit_s32 flags = size & ~0xf; 351 sljit_s32 inst_size; 352 353 /* The immediate operand must be 32 bit. */ 354 SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); 355 /* Both cannot be switched on. */ 356 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); 357 /* Size flags not allowed for typed instructions. */ 358 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); 359 /* Both size flags cannot be switched on. */ 360 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); 361 /* SSE2 and immediate is not possible. */ 362 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); 363 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) 364 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) 365 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); 366 367 size &= 0xf; 368 inst_size = size; 369 370 if (!compiler->mode32 && !(flags & EX86_NO_REXW)) 371 rex |= REX_W; 372 else if (flags & EX86_REX) 373 rex |= REX; 374 375 if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) 376 inst_size++; 377 if (flags & EX86_PREF_66) 378 inst_size++; 379 380 /* Calculate size of b. */ 381 inst_size += 1; /* mod r/m byte. */ 382 if (b & SLJIT_MEM) { 383 if (!(b & OFFS_REG_MASK)) { 384 if (NOT_HALFWORD(immb)) { 385 PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG3, immb)); 386 immb = 0; 387 if (b & REG_MASK) 388 b |= TO_OFFS_REG(TMP_REG3); 389 else 390 b |= TMP_REG3; 391 } 392 else if (reg_lmap[b & REG_MASK] == 4) 393 b |= TO_OFFS_REG(SLJIT_SP); 394 } 395 396 if ((b & REG_MASK) == SLJIT_UNUSED) 397 inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */ 398 else { 399 if (reg_map[b & REG_MASK] >= 8) 400 rex |= REX_B; 401 402 if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { 403 /* Immediate operand. */ 404 if (immb <= 127 && immb >= -128) 405 inst_size += sizeof(sljit_s8); 406 else 407 inst_size += sizeof(sljit_s32); 408 } 409 else if (reg_lmap[b & REG_MASK] == 5) 410 inst_size += sizeof(sljit_s8); 411 412 if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { 413 inst_size += 1; /* SIB byte. */ 414 if (reg_map[OFFS_REG(b)] >= 8) 415 rex |= REX_X; 416 } 417 } 418 } 419 else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8) 420 rex |= REX_B; 421 422 if (a & SLJIT_IMM) { 423 if (flags & EX86_BIN_INS) { 424 if (imma <= 127 && imma >= -128) { 425 inst_size += 1; 426 flags |= EX86_BYTE_ARG; 427 } else 428 inst_size += 4; 429 } 430 else if (flags & EX86_SHIFT_INS) { 431 imma &= compiler->mode32 ? 0x1f : 0x3f; 432 if (imma != 1) { 433 inst_size ++; 434 flags |= EX86_BYTE_ARG; 435 } 436 } else if (flags & EX86_BYTE_ARG) 437 inst_size++; 438 else if (flags & EX86_HALF_ARG) 439 inst_size += sizeof(short); 440 else 441 inst_size += sizeof(sljit_s32); 442 } 443 else { 444 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); 445 /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ 446 if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8) 447 rex |= REX_R; 448 } 449 450 if (rex) 451 inst_size++; 452 453 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); 454 PTR_FAIL_IF(!inst); 455 456 /* Encoding the byte. */ 457 INC_SIZE(inst_size); 458 if (flags & EX86_PREF_F2) 459 *inst++ = 0xf2; 460 if (flags & EX86_PREF_F3) 461 *inst++ = 0xf3; 462 if (flags & EX86_PREF_66) 463 *inst++ = 0x66; 464 if (rex) 465 *inst++ = rex; 466 buf_ptr = inst + size; 467 468 /* Encode mod/rm byte. */ 469 if (!(flags & EX86_SHIFT_INS)) { 470 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) 471 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; 472 473 if ((a & SLJIT_IMM) || (a == 0)) 474 *buf_ptr = 0; 475 else if (!(flags & EX86_SSE2_OP1)) 476 *buf_ptr = reg_lmap[a] << 3; 477 else 478 *buf_ptr = a << 3; 479 } 480 else { 481 if (a & SLJIT_IMM) { 482 if (imma == 1) 483 *inst = GROUP_SHIFT_1; 484 else 485 *inst = GROUP_SHIFT_N; 486 } else 487 *inst = GROUP_SHIFT_CL; 488 *buf_ptr = 0; 489 } 490 491 if (!(b & SLJIT_MEM)) 492 *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b); 493 else if ((b & REG_MASK) != SLJIT_UNUSED) { 494 if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { 495 if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { 496 if (immb <= 127 && immb >= -128) 497 *buf_ptr |= 0x40; 498 else 499 *buf_ptr |= 0x80; 500 } 501 502 if ((b & OFFS_REG_MASK) == SLJIT_UNUSED) 503 *buf_ptr++ |= reg_lmap[b & REG_MASK]; 504 else { 505 *buf_ptr++ |= 0x04; 506 *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3); 507 } 508 509 if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { 510 if (immb <= 127 && immb >= -128) 511 *buf_ptr++ = immb; /* 8 bit displacement. */ 512 else { 513 sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ 514 buf_ptr += sizeof(sljit_s32); 515 } 516 } 517 } 518 else { 519 if (reg_lmap[b & REG_MASK] == 5) 520 *buf_ptr |= 0x40; 521 *buf_ptr++ |= 0x04; 522 *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6); 523 if (reg_lmap[b & REG_MASK] == 5) 524 *buf_ptr++ = 0; 525 } 526 } 527 else { 528 *buf_ptr++ |= 0x04; 529 *buf_ptr++ = 0x25; 530 sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ 531 buf_ptr += sizeof(sljit_s32); 532 } 533 534 if (a & SLJIT_IMM) { 535 if (flags & EX86_BYTE_ARG) 536 *buf_ptr = imma; 537 else if (flags & EX86_HALF_ARG) 538 sljit_unaligned_store_s16(buf_ptr, imma); 539 else if (!(flags & EX86_SHIFT_INS)) 540 sljit_unaligned_store_s32(buf_ptr, imma); 541 } 542 543 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); 544 } 545 546 /* --------------------------------------------------------------------- */ 547 /* Call / return instructions */ 548 /* --------------------------------------------------------------------- */ 549 550 static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type) 551 { 552 sljit_u8 *inst; 553 554 #ifndef _WIN64 555 SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8); 556 557 inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); 558 FAIL_IF(!inst); 559 INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); 560 if (type >= SLJIT_CALL3) { 561 *inst++ = REX_W; 562 *inst++ = MOV_r_rm; 563 *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2]; 564 } 565 *inst++ = REX_W; 566 *inst++ = MOV_r_rm; 567 *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0]; 568 #else 569 SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8); 570 571 inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); 572 FAIL_IF(!inst); 573 INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); 574 if (type >= SLJIT_CALL3) { 575 *inst++ = REX_W | REX_R; 576 *inst++ = MOV_r_rm; 577 *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2]; 578 } 579 *inst++ = REX_W; 580 *inst++ = MOV_r_rm; 581 *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0]; 582 #endif 583 return SLJIT_SUCCESS; 584 } 585 586 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) 587 { 588 sljit_u8 *inst; 589 590 CHECK_ERROR(); 591 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); 592 ADJUST_LOCAL_OFFSET(dst, dstw); 593 594 /* For UNUSED dst. Uncommon, but possible. */ 595 if (dst == SLJIT_UNUSED) 596 dst = TMP_REG1; 597 598 if (FAST_IS_REG(dst)) { 599 if (reg_map[dst] < 8) { 600 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 601 FAIL_IF(!inst); 602 INC_SIZE(1); 603 POP_REG(reg_lmap[dst]); 604 return SLJIT_SUCCESS; 605 } 606 607 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 608 FAIL_IF(!inst); 609 INC_SIZE(2); 610 *inst++ = REX_B; 611 POP_REG(reg_lmap[dst]); 612 return SLJIT_SUCCESS; 613 } 614 615 /* REX_W is not necessary (src is not immediate). */ 616 compiler->mode32 = 1; 617 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 618 FAIL_IF(!inst); 619 *inst++ = POP_rm; 620 return SLJIT_SUCCESS; 621 } 622 623 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) 624 { 625 sljit_u8 *inst; 626 627 CHECK_ERROR(); 628 CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); 629 ADJUST_LOCAL_OFFSET(src, srcw); 630 631 if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) { 632 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); 633 src = TMP_REG1; 634 } 635 636 if (FAST_IS_REG(src)) { 637 if (reg_map[src] < 8) { 638 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1); 639 FAIL_IF(!inst); 640 641 INC_SIZE(1 + 1); 642 PUSH_REG(reg_lmap[src]); 643 } 644 else { 645 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1); 646 FAIL_IF(!inst); 647 648 INC_SIZE(2 + 1); 649 *inst++ = REX_B; 650 PUSH_REG(reg_lmap[src]); 651 } 652 } 653 else if (src & SLJIT_MEM) { 654 /* REX_W is not necessary (src is not immediate). */ 655 compiler->mode32 = 1; 656 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); 657 FAIL_IF(!inst); 658 *inst++ = GROUP_FF; 659 *inst |= PUSH_rm; 660 661 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 662 FAIL_IF(!inst); 663 INC_SIZE(1); 664 } 665 else { 666 SLJIT_ASSERT(IS_HALFWORD(srcw)); 667 /* SLJIT_IMM. */ 668 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1); 669 FAIL_IF(!inst); 670 671 INC_SIZE(5 + 1); 672 *inst++ = PUSH_i32; 673 sljit_unaligned_store_s32(inst, srcw); 674 inst += sizeof(sljit_s32); 675 } 676 677 RET(); 678 return SLJIT_SUCCESS; 679 } 680 681 682 /* --------------------------------------------------------------------- */ 683 /* Extend input */ 684 /* --------------------------------------------------------------------- */ 685 686 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, 687 sljit_s32 dst, sljit_sw dstw, 688 sljit_s32 src, sljit_sw srcw) 689 { 690 sljit_u8* inst; 691 sljit_s32 dst_r; 692 693 compiler->mode32 = 0; 694 695 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 696 return SLJIT_SUCCESS; /* Empty instruction. */ 697 698 if (src & SLJIT_IMM) { 699 if (FAST_IS_REG(dst)) { 700 if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { 701 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); 702 FAIL_IF(!inst); 703 *inst = MOV_rm_i32; 704 return SLJIT_SUCCESS; 705 } 706 return emit_load_imm64(compiler, dst, srcw); 707 } 708 compiler->mode32 = 1; 709 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); 710 FAIL_IF(!inst); 711 *inst = MOV_rm_i32; 712 compiler->mode32 = 0; 713 return SLJIT_SUCCESS; 714 } 715 716 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 717 718 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) 719 dst_r = src; 720 else { 721 if (sign) { 722 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); 723 FAIL_IF(!inst); 724 *inst++ = MOVSXD_r_rm; 725 } else { 726 compiler->mode32 = 1; 727 FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw)); 728 compiler->mode32 = 0; 729 } 730 } 731 732 if (dst & SLJIT_MEM) { 733 compiler->mode32 = 1; 734 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 735 FAIL_IF(!inst); 736 *inst = MOV_rm_r; 737 compiler->mode32 = 0; 738 } 739 740 return SLJIT_SUCCESS; 741 } 742