1 1.4 alnsn /* $NetBSD: sljitNativeX86_64.c,v 1.4 2019/01/20 23:14:16 alnsn Exp $ */ 2 1.2 alnsn 3 1.1 alnsn /* 4 1.1 alnsn * Stack-less Just-In-Time compiler 5 1.1 alnsn * 6 1.4 alnsn * Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 7 1.1 alnsn * 8 1.1 alnsn * Redistribution and use in source and binary forms, with or without modification, are 9 1.1 alnsn * permitted provided that the following conditions are met: 10 1.1 alnsn * 11 1.1 alnsn * 1. Redistributions of source code must retain the above copyright notice, this list of 12 1.1 alnsn * conditions and the following disclaimer. 13 1.1 alnsn * 14 1.1 alnsn * 2. Redistributions in binary form must reproduce the above copyright notice, this list 15 1.1 alnsn * of conditions and the following disclaimer in the documentation and/or other materials 16 1.1 alnsn * provided with the distribution. 17 1.1 alnsn * 18 1.1 alnsn * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 19 1.1 alnsn * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 1.1 alnsn * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 21 1.1 alnsn * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 22 1.1 alnsn * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 23 1.1 alnsn * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 24 1.1 alnsn * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 1.1 alnsn * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 1.1 alnsn * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 1.1 alnsn */ 28 1.1 alnsn 29 1.1 alnsn /* x86 64-bit arch dependent functions. */ 30 1.1 alnsn 31 1.3 alnsn static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) 32 1.1 alnsn { 33 1.3 alnsn sljit_u8 *inst; 34 1.1 alnsn 35 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); 36 1.2 alnsn FAIL_IF(!inst); 37 1.2 alnsn INC_SIZE(2 + sizeof(sljit_sw)); 38 1.2 alnsn *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); 39 1.2 alnsn *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7); 40 1.4 alnsn sljit_unaligned_store_sw(inst, imm); 41 1.1 alnsn return SLJIT_SUCCESS; 42 1.1 alnsn } 43 1.1 alnsn 44 1.3 alnsn static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type) 45 1.1 alnsn { 46 1.1 alnsn if (type < SLJIT_JUMP) { 47 1.2 alnsn /* Invert type. */ 48 1.1 alnsn *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10; 49 1.1 alnsn *code_ptr++ = 10 + 3; 50 1.1 alnsn } 51 1.1 alnsn 52 1.4 alnsn SLJIT_ASSERT(reg_map[TMP_REG3] == 9); 53 1.1 alnsn *code_ptr++ = REX_W | REX_B; 54 1.2 alnsn *code_ptr++ = MOV_r_i32 + 1; 55 1.1 alnsn jump->addr = (sljit_uw)code_ptr; 56 1.1 alnsn 57 1.1 alnsn if (jump->flags & JUMP_LABEL) 58 1.1 alnsn jump->flags |= PATCH_MD; 59 1.1 alnsn else 60 1.4 alnsn sljit_unaligned_store_sw(code_ptr, jump->u.target); 61 1.1 alnsn 62 1.2 alnsn code_ptr += sizeof(sljit_sw); 63 1.1 alnsn *code_ptr++ = REX_B; 64 1.2 alnsn *code_ptr++ = GROUP_FF; 65 1.2 alnsn *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1); 66 1.1 alnsn 67 1.1 alnsn return code_ptr; 68 1.1 alnsn } 69 1.1 alnsn 70 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, 71 1.3 alnsn sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 72 1.3 alnsn sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 73 1.1 alnsn { 74 1.3 alnsn sljit_s32 i, tmp, size, saved_register_size; 75 1.3 alnsn sljit_u8 *inst; 76 1.1 alnsn 77 1.1 alnsn CHECK_ERROR(); 78 1.3 alnsn CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 79 1.3 alnsn set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 80 1.1 alnsn 81 1.4 alnsn #ifdef _WIN64 82 1.4 alnsn /* Two/four register slots for parameters plus space for xmm6 register if needed. */ 83 1.4 alnsn if (fscratches >= 6 || fsaveds >= 1) 84 1.4 alnsn compiler->locals_offset = 6 * sizeof(sljit_sw); 85 1.4 alnsn else 86 1.4 alnsn compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); 87 1.4 alnsn #endif 88 1.1 alnsn 89 1.1 alnsn /* Including the return address saved by the call instruction. */ 90 1.3 alnsn saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 91 1.3 alnsn 92 1.3 alnsn tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; 93 1.3 alnsn for (i = SLJIT_S0; i >= tmp; i--) { 94 1.3 alnsn size = reg_map[i] >= 8 ? 2 : 1; 95 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 96 1.3 alnsn FAIL_IF(!inst); 97 1.3 alnsn INC_SIZE(size); 98 1.3 alnsn if (reg_map[i] >= 8) 99 1.3 alnsn *inst++ = REX_B; 100 1.3 alnsn PUSH_REG(reg_lmap[i]); 101 1.3 alnsn } 102 1.3 alnsn 103 1.3 alnsn for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { 104 1.3 alnsn size = reg_map[i] >= 8 ? 2 : 1; 105 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 106 1.3 alnsn FAIL_IF(!inst); 107 1.3 alnsn INC_SIZE(size); 108 1.3 alnsn if (reg_map[i] >= 8) 109 1.3 alnsn *inst++ = REX_B; 110 1.3 alnsn PUSH_REG(reg_lmap[i]); 111 1.1 alnsn } 112 1.3 alnsn 113 1.3 alnsn if (args > 0) { 114 1.3 alnsn size = args * 3; 115 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 116 1.2 alnsn FAIL_IF(!inst); 117 1.1 alnsn 118 1.1 alnsn INC_SIZE(size); 119 1.1 alnsn 120 1.1 alnsn #ifndef _WIN64 121 1.1 alnsn if (args > 0) { 122 1.2 alnsn *inst++ = REX_W; 123 1.2 alnsn *inst++ = MOV_r_rm; 124 1.3 alnsn *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; 125 1.1 alnsn } 126 1.1 alnsn if (args > 1) { 127 1.2 alnsn *inst++ = REX_W | REX_R; 128 1.2 alnsn *inst++ = MOV_r_rm; 129 1.3 alnsn *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; 130 1.1 alnsn } 131 1.1 alnsn if (args > 2) { 132 1.2 alnsn *inst++ = REX_W | REX_R; 133 1.2 alnsn *inst++ = MOV_r_rm; 134 1.3 alnsn *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; 135 1.1 alnsn } 136 1.1 alnsn #else 137 1.1 alnsn if (args > 0) { 138 1.2 alnsn *inst++ = REX_W; 139 1.2 alnsn *inst++ = MOV_r_rm; 140 1.3 alnsn *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; 141 1.1 alnsn } 142 1.1 alnsn if (args > 1) { 143 1.2 alnsn *inst++ = REX_W; 144 1.2 alnsn *inst++ = MOV_r_rm; 145 1.3 alnsn *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; 146 1.1 alnsn } 147 1.1 alnsn if (args > 2) { 148 1.2 alnsn *inst++ = REX_W | REX_B; 149 1.2 alnsn *inst++ = MOV_r_rm; 150 1.3 alnsn *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; 151 1.1 alnsn } 152 1.1 alnsn #endif 153 1.1 alnsn } 154 1.1 alnsn 155 1.3 alnsn local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; 156 1.1 alnsn compiler->local_size = local_size; 157 1.3 alnsn 158 1.1 alnsn #ifdef _WIN64 159 1.1 alnsn if (local_size > 1024) { 160 1.1 alnsn /* Allocate stack for the callback, which grows the stack. */ 161 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32))); 162 1.2 alnsn FAIL_IF(!inst); 163 1.3 alnsn INC_SIZE(4 + (3 + sizeof(sljit_s32))); 164 1.2 alnsn *inst++ = REX_W; 165 1.2 alnsn *inst++ = GROUP_BINARY_83; 166 1.4 alnsn *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; 167 1.3 alnsn /* Allocated size for registers must be divisible by 8. */ 168 1.3 alnsn SLJIT_ASSERT(!(saved_register_size & 0x7)); 169 1.3 alnsn /* Aligned to 16 byte. */ 170 1.3 alnsn if (saved_register_size & 0x8) { 171 1.2 alnsn *inst++ = 5 * sizeof(sljit_sw); 172 1.2 alnsn local_size -= 5 * sizeof(sljit_sw); 173 1.1 alnsn } else { 174 1.2 alnsn *inst++ = 4 * sizeof(sljit_sw); 175 1.2 alnsn local_size -= 4 * sizeof(sljit_sw); 176 1.1 alnsn } 177 1.2 alnsn /* Second instruction */ 178 1.4 alnsn SLJIT_ASSERT(reg_map[SLJIT_R0] < 8); 179 1.2 alnsn *inst++ = REX_W; 180 1.2 alnsn *inst++ = MOV_rm_i32; 181 1.3 alnsn *inst++ = MOD_REG | reg_lmap[SLJIT_R0]; 182 1.4 alnsn sljit_unaligned_store_s32(inst, local_size); 183 1.3 alnsn #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ 184 1.3 alnsn || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) 185 1.2 alnsn compiler->skip_checks = 1; 186 1.2 alnsn #endif 187 1.1 alnsn FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); 188 1.1 alnsn } 189 1.1 alnsn #endif 190 1.3 alnsn 191 1.4 alnsn if (local_size > 0) { 192 1.4 alnsn if (local_size <= 127) { 193 1.4 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 194 1.4 alnsn FAIL_IF(!inst); 195 1.4 alnsn INC_SIZE(4); 196 1.4 alnsn *inst++ = REX_W; 197 1.4 alnsn *inst++ = GROUP_BINARY_83; 198 1.4 alnsn *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; 199 1.4 alnsn *inst++ = local_size; 200 1.4 alnsn } 201 1.4 alnsn else { 202 1.4 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); 203 1.4 alnsn FAIL_IF(!inst); 204 1.4 alnsn INC_SIZE(7); 205 1.4 alnsn *inst++ = REX_W; 206 1.4 alnsn *inst++ = GROUP_BINARY_81; 207 1.4 alnsn *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; 208 1.4 alnsn sljit_unaligned_store_s32(inst, local_size); 209 1.4 alnsn inst += sizeof(sljit_s32); 210 1.4 alnsn } 211 1.1 alnsn } 212 1.3 alnsn 213 1.2 alnsn #ifdef _WIN64 214 1.3 alnsn /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */ 215 1.3 alnsn if (fscratches >= 6 || fsaveds >= 1) { 216 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 217 1.3 alnsn FAIL_IF(!inst); 218 1.3 alnsn INC_SIZE(5); 219 1.3 alnsn *inst++ = GROUP_0F; 220 1.4 alnsn sljit_unaligned_store_s32(inst, 0x20247429); 221 1.3 alnsn } 222 1.2 alnsn #endif 223 1.1 alnsn 224 1.1 alnsn return SLJIT_SUCCESS; 225 1.1 alnsn } 226 1.1 alnsn 227 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, 228 1.3 alnsn sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, 229 1.3 alnsn sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) 230 1.1 alnsn { 231 1.3 alnsn sljit_s32 saved_register_size; 232 1.1 alnsn 233 1.3 alnsn CHECK_ERROR(); 234 1.3 alnsn CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); 235 1.3 alnsn set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); 236 1.1 alnsn 237 1.4 alnsn #ifdef _WIN64 238 1.4 alnsn /* Two/four register slots for parameters plus space for xmm6 register if needed. */ 239 1.4 alnsn if (fscratches >= 6 || fsaveds >= 1) 240 1.4 alnsn compiler->locals_offset = 6 * sizeof(sljit_sw); 241 1.4 alnsn else 242 1.4 alnsn compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); 243 1.4 alnsn #endif 244 1.4 alnsn 245 1.1 alnsn /* Including the return address saved by the call instruction. */ 246 1.3 alnsn saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); 247 1.3 alnsn compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; 248 1.3 alnsn return SLJIT_SUCCESS; 249 1.1 alnsn } 250 1.1 alnsn 251 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) 252 1.1 alnsn { 253 1.3 alnsn sljit_s32 i, tmp, size; 254 1.3 alnsn sljit_u8 *inst; 255 1.1 alnsn 256 1.1 alnsn CHECK_ERROR(); 257 1.3 alnsn CHECK(check_sljit_emit_return(compiler, op, src, srcw)); 258 1.1 alnsn 259 1.1 alnsn FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); 260 1.1 alnsn 261 1.2 alnsn #ifdef _WIN64 262 1.3 alnsn /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */ 263 1.3 alnsn if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) { 264 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); 265 1.3 alnsn FAIL_IF(!inst); 266 1.3 alnsn INC_SIZE(5); 267 1.3 alnsn *inst++ = GROUP_0F; 268 1.4 alnsn sljit_unaligned_store_s32(inst, 0x20247428); 269 1.3 alnsn } 270 1.2 alnsn #endif 271 1.3 alnsn 272 1.4 alnsn if (compiler->local_size > 0) { 273 1.4 alnsn if (compiler->local_size <= 127) { 274 1.4 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); 275 1.4 alnsn FAIL_IF(!inst); 276 1.4 alnsn INC_SIZE(4); 277 1.4 alnsn *inst++ = REX_W; 278 1.4 alnsn *inst++ = GROUP_BINARY_83; 279 1.4 alnsn *inst++ = MOD_REG | ADD | 4; 280 1.4 alnsn *inst = compiler->local_size; 281 1.4 alnsn } 282 1.4 alnsn else { 283 1.4 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); 284 1.4 alnsn FAIL_IF(!inst); 285 1.4 alnsn INC_SIZE(7); 286 1.4 alnsn *inst++ = REX_W; 287 1.4 alnsn *inst++ = GROUP_BINARY_81; 288 1.4 alnsn *inst++ = MOD_REG | ADD | 4; 289 1.4 alnsn sljit_unaligned_store_s32(inst, compiler->local_size); 290 1.4 alnsn } 291 1.1 alnsn } 292 1.1 alnsn 293 1.3 alnsn tmp = compiler->scratches; 294 1.3 alnsn for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { 295 1.3 alnsn size = reg_map[i] >= 8 ? 2 : 1; 296 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 297 1.3 alnsn FAIL_IF(!inst); 298 1.3 alnsn INC_SIZE(size); 299 1.3 alnsn if (reg_map[i] >= 8) 300 1.3 alnsn *inst++ = REX_B; 301 1.3 alnsn POP_REG(reg_lmap[i]); 302 1.3 alnsn } 303 1.1 alnsn 304 1.3 alnsn tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; 305 1.3 alnsn for (i = tmp; i <= SLJIT_S0; i++) { 306 1.3 alnsn size = reg_map[i] >= 8 ? 2 : 1; 307 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + size); 308 1.3 alnsn FAIL_IF(!inst); 309 1.3 alnsn INC_SIZE(size); 310 1.3 alnsn if (reg_map[i] >= 8) 311 1.3 alnsn *inst++ = REX_B; 312 1.3 alnsn POP_REG(reg_lmap[i]); 313 1.1 alnsn } 314 1.1 alnsn 315 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 316 1.3 alnsn FAIL_IF(!inst); 317 1.3 alnsn INC_SIZE(1); 318 1.1 alnsn RET(); 319 1.1 alnsn return SLJIT_SUCCESS; 320 1.1 alnsn } 321 1.1 alnsn 322 1.1 alnsn /* --------------------------------------------------------------------- */ 323 1.1 alnsn /* Operators */ 324 1.1 alnsn /* --------------------------------------------------------------------- */ 325 1.1 alnsn 326 1.3 alnsn static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm) 327 1.1 alnsn { 328 1.3 alnsn sljit_u8 *inst; 329 1.3 alnsn sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32); 330 1.1 alnsn 331 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + length); 332 1.2 alnsn FAIL_IF(!inst); 333 1.2 alnsn INC_SIZE(length); 334 1.2 alnsn if (rex) 335 1.2 alnsn *inst++ = rex; 336 1.2 alnsn *inst++ = opcode; 337 1.4 alnsn sljit_unaligned_store_s32(inst, imm); 338 1.1 alnsn return SLJIT_SUCCESS; 339 1.1 alnsn } 340 1.1 alnsn 341 1.3 alnsn static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size, 342 1.1 alnsn /* The register or immediate operand. */ 343 1.3 alnsn sljit_s32 a, sljit_sw imma, 344 1.1 alnsn /* The general operand (not immediate). */ 345 1.3 alnsn sljit_s32 b, sljit_sw immb) 346 1.1 alnsn { 347 1.3 alnsn sljit_u8 *inst; 348 1.3 alnsn sljit_u8 *buf_ptr; 349 1.3 alnsn sljit_u8 rex = 0; 350 1.3 alnsn sljit_s32 flags = size & ~0xf; 351 1.3 alnsn sljit_s32 inst_size; 352 1.1 alnsn 353 1.1 alnsn /* The immediate operand must be 32 bit. */ 354 1.1 alnsn SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); 355 1.1 alnsn /* Both cannot be switched on. */ 356 1.1 alnsn SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); 357 1.1 alnsn /* Size flags not allowed for typed instructions. */ 358 1.1 alnsn SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); 359 1.1 alnsn /* Both size flags cannot be switched on. */ 360 1.1 alnsn SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); 361 1.1 alnsn /* SSE2 and immediate is not possible. */ 362 1.1 alnsn SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); 363 1.2 alnsn SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) 364 1.2 alnsn && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) 365 1.2 alnsn && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); 366 1.1 alnsn 367 1.1 alnsn size &= 0xf; 368 1.1 alnsn inst_size = size; 369 1.1 alnsn 370 1.1 alnsn if (!compiler->mode32 && !(flags & EX86_NO_REXW)) 371 1.1 alnsn rex |= REX_W; 372 1.1 alnsn else if (flags & EX86_REX) 373 1.1 alnsn rex |= REX; 374 1.1 alnsn 375 1.2 alnsn if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) 376 1.1 alnsn inst_size++; 377 1.1 alnsn if (flags & EX86_PREF_66) 378 1.1 alnsn inst_size++; 379 1.1 alnsn 380 1.1 alnsn /* Calculate size of b. */ 381 1.1 alnsn inst_size += 1; /* mod r/m byte. */ 382 1.1 alnsn if (b & SLJIT_MEM) { 383 1.3 alnsn if (!(b & OFFS_REG_MASK)) { 384 1.3 alnsn if (NOT_HALFWORD(immb)) { 385 1.4 alnsn PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG3, immb)); 386 1.3 alnsn immb = 0; 387 1.3 alnsn if (b & REG_MASK) 388 1.3 alnsn b |= TO_OFFS_REG(TMP_REG3); 389 1.3 alnsn else 390 1.3 alnsn b |= TMP_REG3; 391 1.3 alnsn } 392 1.3 alnsn else if (reg_lmap[b & REG_MASK] == 4) 393 1.3 alnsn b |= TO_OFFS_REG(SLJIT_SP); 394 1.3 alnsn } 395 1.3 alnsn 396 1.2 alnsn if ((b & REG_MASK) == SLJIT_UNUSED) 397 1.3 alnsn inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */ 398 1.1 alnsn else { 399 1.2 alnsn if (reg_map[b & REG_MASK] >= 8) 400 1.1 alnsn rex |= REX_B; 401 1.3 alnsn 402 1.3 alnsn if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { 403 1.1 alnsn /* Immediate operand. */ 404 1.1 alnsn if (immb <= 127 && immb >= -128) 405 1.3 alnsn inst_size += sizeof(sljit_s8); 406 1.1 alnsn else 407 1.3 alnsn inst_size += sizeof(sljit_s32); 408 1.1 alnsn } 409 1.3 alnsn else if (reg_lmap[b & REG_MASK] == 5) 410 1.3 alnsn inst_size += sizeof(sljit_s8); 411 1.1 alnsn 412 1.3 alnsn if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { 413 1.3 alnsn inst_size += 1; /* SIB byte. */ 414 1.3 alnsn if (reg_map[OFFS_REG(b)] >= 8) 415 1.3 alnsn rex |= REX_X; 416 1.3 alnsn } 417 1.1 alnsn } 418 1.1 alnsn } 419 1.3 alnsn else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8) 420 1.1 alnsn rex |= REX_B; 421 1.1 alnsn 422 1.1 alnsn if (a & SLJIT_IMM) { 423 1.1 alnsn if (flags & EX86_BIN_INS) { 424 1.1 alnsn if (imma <= 127 && imma >= -128) { 425 1.1 alnsn inst_size += 1; 426 1.1 alnsn flags |= EX86_BYTE_ARG; 427 1.1 alnsn } else 428 1.1 alnsn inst_size += 4; 429 1.1 alnsn } 430 1.1 alnsn else if (flags & EX86_SHIFT_INS) { 431 1.1 alnsn imma &= compiler->mode32 ? 0x1f : 0x3f; 432 1.1 alnsn if (imma != 1) { 433 1.1 alnsn inst_size ++; 434 1.1 alnsn flags |= EX86_BYTE_ARG; 435 1.1 alnsn } 436 1.1 alnsn } else if (flags & EX86_BYTE_ARG) 437 1.1 alnsn inst_size++; 438 1.1 alnsn else if (flags & EX86_HALF_ARG) 439 1.1 alnsn inst_size += sizeof(short); 440 1.1 alnsn else 441 1.3 alnsn inst_size += sizeof(sljit_s32); 442 1.1 alnsn } 443 1.1 alnsn else { 444 1.1 alnsn SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); 445 1.1 alnsn /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ 446 1.3 alnsn if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8) 447 1.1 alnsn rex |= REX_R; 448 1.1 alnsn } 449 1.1 alnsn 450 1.1 alnsn if (rex) 451 1.1 alnsn inst_size++; 452 1.1 alnsn 453 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); 454 1.2 alnsn PTR_FAIL_IF(!inst); 455 1.1 alnsn 456 1.1 alnsn /* Encoding the byte. */ 457 1.1 alnsn INC_SIZE(inst_size); 458 1.1 alnsn if (flags & EX86_PREF_F2) 459 1.2 alnsn *inst++ = 0xf2; 460 1.2 alnsn if (flags & EX86_PREF_F3) 461 1.2 alnsn *inst++ = 0xf3; 462 1.1 alnsn if (flags & EX86_PREF_66) 463 1.2 alnsn *inst++ = 0x66; 464 1.1 alnsn if (rex) 465 1.2 alnsn *inst++ = rex; 466 1.2 alnsn buf_ptr = inst + size; 467 1.1 alnsn 468 1.1 alnsn /* Encode mod/rm byte. */ 469 1.1 alnsn if (!(flags & EX86_SHIFT_INS)) { 470 1.1 alnsn if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) 471 1.2 alnsn *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; 472 1.1 alnsn 473 1.1 alnsn if ((a & SLJIT_IMM) || (a == 0)) 474 1.1 alnsn *buf_ptr = 0; 475 1.3 alnsn else if (!(flags & EX86_SSE2_OP1)) 476 1.1 alnsn *buf_ptr = reg_lmap[a] << 3; 477 1.1 alnsn else 478 1.1 alnsn *buf_ptr = a << 3; 479 1.1 alnsn } 480 1.1 alnsn else { 481 1.1 alnsn if (a & SLJIT_IMM) { 482 1.1 alnsn if (imma == 1) 483 1.2 alnsn *inst = GROUP_SHIFT_1; 484 1.1 alnsn else 485 1.2 alnsn *inst = GROUP_SHIFT_N; 486 1.1 alnsn } else 487 1.2 alnsn *inst = GROUP_SHIFT_CL; 488 1.1 alnsn *buf_ptr = 0; 489 1.1 alnsn } 490 1.1 alnsn 491 1.1 alnsn if (!(b & SLJIT_MEM)) 492 1.3 alnsn *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b); 493 1.2 alnsn else if ((b & REG_MASK) != SLJIT_UNUSED) { 494 1.3 alnsn if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { 495 1.3 alnsn if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { 496 1.1 alnsn if (immb <= 127 && immb >= -128) 497 1.1 alnsn *buf_ptr |= 0x40; 498 1.1 alnsn else 499 1.1 alnsn *buf_ptr |= 0x80; 500 1.1 alnsn } 501 1.1 alnsn 502 1.2 alnsn if ((b & OFFS_REG_MASK) == SLJIT_UNUSED) 503 1.2 alnsn *buf_ptr++ |= reg_lmap[b & REG_MASK]; 504 1.1 alnsn else { 505 1.1 alnsn *buf_ptr++ |= 0x04; 506 1.2 alnsn *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3); 507 1.1 alnsn } 508 1.1 alnsn 509 1.3 alnsn if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { 510 1.1 alnsn if (immb <= 127 && immb >= -128) 511 1.1 alnsn *buf_ptr++ = immb; /* 8 bit displacement. */ 512 1.1 alnsn else { 513 1.4 alnsn sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ 514 1.3 alnsn buf_ptr += sizeof(sljit_s32); 515 1.1 alnsn } 516 1.1 alnsn } 517 1.1 alnsn } 518 1.1 alnsn else { 519 1.3 alnsn if (reg_lmap[b & REG_MASK] == 5) 520 1.3 alnsn *buf_ptr |= 0x40; 521 1.1 alnsn *buf_ptr++ |= 0x04; 522 1.2 alnsn *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6); 523 1.3 alnsn if (reg_lmap[b & REG_MASK] == 5) 524 1.3 alnsn *buf_ptr++ = 0; 525 1.1 alnsn } 526 1.1 alnsn } 527 1.1 alnsn else { 528 1.1 alnsn *buf_ptr++ |= 0x04; 529 1.1 alnsn *buf_ptr++ = 0x25; 530 1.4 alnsn sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ 531 1.3 alnsn buf_ptr += sizeof(sljit_s32); 532 1.1 alnsn } 533 1.1 alnsn 534 1.1 alnsn if (a & SLJIT_IMM) { 535 1.1 alnsn if (flags & EX86_BYTE_ARG) 536 1.1 alnsn *buf_ptr = imma; 537 1.1 alnsn else if (flags & EX86_HALF_ARG) 538 1.4 alnsn sljit_unaligned_store_s16(buf_ptr, imma); 539 1.1 alnsn else if (!(flags & EX86_SHIFT_INS)) 540 1.4 alnsn sljit_unaligned_store_s32(buf_ptr, imma); 541 1.1 alnsn } 542 1.1 alnsn 543 1.2 alnsn return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); 544 1.1 alnsn } 545 1.1 alnsn 546 1.1 alnsn /* --------------------------------------------------------------------- */ 547 1.1 alnsn /* Call / return instructions */ 548 1.1 alnsn /* --------------------------------------------------------------------- */ 549 1.1 alnsn 550 1.3 alnsn static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type) 551 1.1 alnsn { 552 1.3 alnsn sljit_u8 *inst; 553 1.1 alnsn 554 1.1 alnsn #ifndef _WIN64 555 1.4 alnsn SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8); 556 1.1 alnsn 557 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); 558 1.2 alnsn FAIL_IF(!inst); 559 1.1 alnsn INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); 560 1.1 alnsn if (type >= SLJIT_CALL3) { 561 1.2 alnsn *inst++ = REX_W; 562 1.2 alnsn *inst++ = MOV_r_rm; 563 1.3 alnsn *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2]; 564 1.2 alnsn } 565 1.2 alnsn *inst++ = REX_W; 566 1.2 alnsn *inst++ = MOV_r_rm; 567 1.3 alnsn *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0]; 568 1.1 alnsn #else 569 1.4 alnsn SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8); 570 1.1 alnsn 571 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); 572 1.2 alnsn FAIL_IF(!inst); 573 1.1 alnsn INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); 574 1.1 alnsn if (type >= SLJIT_CALL3) { 575 1.2 alnsn *inst++ = REX_W | REX_R; 576 1.2 alnsn *inst++ = MOV_r_rm; 577 1.3 alnsn *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2]; 578 1.2 alnsn } 579 1.2 alnsn *inst++ = REX_W; 580 1.2 alnsn *inst++ = MOV_r_rm; 581 1.3 alnsn *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0]; 582 1.1 alnsn #endif 583 1.1 alnsn return SLJIT_SUCCESS; 584 1.1 alnsn } 585 1.1 alnsn 586 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) 587 1.1 alnsn { 588 1.3 alnsn sljit_u8 *inst; 589 1.1 alnsn 590 1.1 alnsn CHECK_ERROR(); 591 1.3 alnsn CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); 592 1.1 alnsn ADJUST_LOCAL_OFFSET(dst, dstw); 593 1.1 alnsn 594 1.1 alnsn /* For UNUSED dst. Uncommon, but possible. */ 595 1.1 alnsn if (dst == SLJIT_UNUSED) 596 1.2 alnsn dst = TMP_REG1; 597 1.1 alnsn 598 1.2 alnsn if (FAST_IS_REG(dst)) { 599 1.1 alnsn if (reg_map[dst] < 8) { 600 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 601 1.2 alnsn FAIL_IF(!inst); 602 1.1 alnsn INC_SIZE(1); 603 1.1 alnsn POP_REG(reg_lmap[dst]); 604 1.2 alnsn return SLJIT_SUCCESS; 605 1.1 alnsn } 606 1.1 alnsn 607 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); 608 1.2 alnsn FAIL_IF(!inst); 609 1.2 alnsn INC_SIZE(2); 610 1.2 alnsn *inst++ = REX_B; 611 1.2 alnsn POP_REG(reg_lmap[dst]); 612 1.2 alnsn return SLJIT_SUCCESS; 613 1.1 alnsn } 614 1.2 alnsn 615 1.2 alnsn /* REX_W is not necessary (src is not immediate). */ 616 1.2 alnsn compiler->mode32 = 1; 617 1.2 alnsn inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); 618 1.2 alnsn FAIL_IF(!inst); 619 1.2 alnsn *inst++ = POP_rm; 620 1.1 alnsn return SLJIT_SUCCESS; 621 1.1 alnsn } 622 1.1 alnsn 623 1.3 alnsn SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) 624 1.1 alnsn { 625 1.3 alnsn sljit_u8 *inst; 626 1.1 alnsn 627 1.1 alnsn CHECK_ERROR(); 628 1.3 alnsn CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); 629 1.1 alnsn ADJUST_LOCAL_OFFSET(src, srcw); 630 1.1 alnsn 631 1.1 alnsn if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) { 632 1.2 alnsn FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); 633 1.2 alnsn src = TMP_REG1; 634 1.1 alnsn } 635 1.1 alnsn 636 1.2 alnsn if (FAST_IS_REG(src)) { 637 1.1 alnsn if (reg_map[src] < 8) { 638 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1); 639 1.2 alnsn FAIL_IF(!inst); 640 1.1 alnsn 641 1.1 alnsn INC_SIZE(1 + 1); 642 1.1 alnsn PUSH_REG(reg_lmap[src]); 643 1.1 alnsn } 644 1.1 alnsn else { 645 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1); 646 1.2 alnsn FAIL_IF(!inst); 647 1.1 alnsn 648 1.1 alnsn INC_SIZE(2 + 1); 649 1.2 alnsn *inst++ = REX_B; 650 1.1 alnsn PUSH_REG(reg_lmap[src]); 651 1.1 alnsn } 652 1.1 alnsn } 653 1.1 alnsn else if (src & SLJIT_MEM) { 654 1.1 alnsn /* REX_W is not necessary (src is not immediate). */ 655 1.1 alnsn compiler->mode32 = 1; 656 1.2 alnsn inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); 657 1.2 alnsn FAIL_IF(!inst); 658 1.2 alnsn *inst++ = GROUP_FF; 659 1.2 alnsn *inst |= PUSH_rm; 660 1.1 alnsn 661 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); 662 1.2 alnsn FAIL_IF(!inst); 663 1.1 alnsn INC_SIZE(1); 664 1.1 alnsn } 665 1.1 alnsn else { 666 1.1 alnsn SLJIT_ASSERT(IS_HALFWORD(srcw)); 667 1.1 alnsn /* SLJIT_IMM. */ 668 1.3 alnsn inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1); 669 1.2 alnsn FAIL_IF(!inst); 670 1.1 alnsn 671 1.1 alnsn INC_SIZE(5 + 1); 672 1.2 alnsn *inst++ = PUSH_i32; 673 1.4 alnsn sljit_unaligned_store_s32(inst, srcw); 674 1.3 alnsn inst += sizeof(sljit_s32); 675 1.1 alnsn } 676 1.1 alnsn 677 1.1 alnsn RET(); 678 1.1 alnsn return SLJIT_SUCCESS; 679 1.1 alnsn } 680 1.1 alnsn 681 1.1 alnsn 682 1.1 alnsn /* --------------------------------------------------------------------- */ 683 1.1 alnsn /* Extend input */ 684 1.1 alnsn /* --------------------------------------------------------------------- */ 685 1.1 alnsn 686 1.3 alnsn static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, 687 1.3 alnsn sljit_s32 dst, sljit_sw dstw, 688 1.3 alnsn sljit_s32 src, sljit_sw srcw) 689 1.1 alnsn { 690 1.3 alnsn sljit_u8* inst; 691 1.3 alnsn sljit_s32 dst_r; 692 1.1 alnsn 693 1.1 alnsn compiler->mode32 = 0; 694 1.1 alnsn 695 1.1 alnsn if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) 696 1.1 alnsn return SLJIT_SUCCESS; /* Empty instruction. */ 697 1.1 alnsn 698 1.1 alnsn if (src & SLJIT_IMM) { 699 1.2 alnsn if (FAST_IS_REG(dst)) { 700 1.1 alnsn if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { 701 1.3 alnsn inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); 702 1.2 alnsn FAIL_IF(!inst); 703 1.2 alnsn *inst = MOV_rm_i32; 704 1.1 alnsn return SLJIT_SUCCESS; 705 1.1 alnsn } 706 1.1 alnsn return emit_load_imm64(compiler, dst, srcw); 707 1.1 alnsn } 708 1.1 alnsn compiler->mode32 = 1; 709 1.3 alnsn inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); 710 1.2 alnsn FAIL_IF(!inst); 711 1.2 alnsn *inst = MOV_rm_i32; 712 1.1 alnsn compiler->mode32 = 0; 713 1.1 alnsn return SLJIT_SUCCESS; 714 1.1 alnsn } 715 1.1 alnsn 716 1.2 alnsn dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; 717 1.1 alnsn 718 1.2 alnsn if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) 719 1.1 alnsn dst_r = src; 720 1.1 alnsn else { 721 1.1 alnsn if (sign) { 722 1.2 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); 723 1.2 alnsn FAIL_IF(!inst); 724 1.2 alnsn *inst++ = MOVSXD_r_rm; 725 1.1 alnsn } else { 726 1.1 alnsn compiler->mode32 = 1; 727 1.1 alnsn FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw)); 728 1.1 alnsn compiler->mode32 = 0; 729 1.1 alnsn } 730 1.1 alnsn } 731 1.1 alnsn 732 1.1 alnsn if (dst & SLJIT_MEM) { 733 1.1 alnsn compiler->mode32 = 1; 734 1.2 alnsn inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); 735 1.2 alnsn FAIL_IF(!inst); 736 1.2 alnsn *inst = MOV_rm_r; 737 1.1 alnsn compiler->mode32 = 0; 738 1.1 alnsn } 739 1.1 alnsn 740 1.1 alnsn return SLJIT_SUCCESS; 741 1.1 alnsn } 742