1848b8605Smrg/************************************************************************** 2848b8605Smrg * 3848b8605Smrg * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. 4848b8605Smrg * 5848b8605Smrg * Permission is hereby granted, free of charge, to any person obtaining a 6848b8605Smrg * copy of this software and associated documentation files (the "Software"), 7848b8605Smrg * to deal in the Software without restriction, including without limitation 8848b8605Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9848b8605Smrg * and/or sell copies of the Software, and to permit persons to whom the 10848b8605Smrg * Software is furnished to do so, subject to the following conditions: 11848b8605Smrg * 12848b8605Smrg * The above copyright notice and this permission notice shall be included 13848b8605Smrg * in all copies or substantial portions of the Software. 14848b8605Smrg * 15848b8605Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16848b8605Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17848b8605Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18848b8605Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19848b8605Smrg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20848b8605Smrg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21848b8605Smrg * OTHER DEALINGS IN THE SOFTWARE. 22848b8605Smrg * 23848b8605Smrg **************************************************************************/ 24848b8605Smrg 25848b8605Smrg#include "pipe/p_config.h" 26848b8605Smrg#include "util/u_cpu_detect.h" 27848b8605Smrg 28b8e80941Smrg#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 29848b8605Smrg 30848b8605Smrg#include "pipe/p_compiler.h" 31848b8605Smrg#include "util/u_debug.h" 32848b8605Smrg#include "util/u_pointer.h" 33848b8605Smrg 34848b8605Smrg#include "rtasm_execmem.h" 35848b8605Smrg#include "rtasm_x86sse.h" 36848b8605Smrg 37848b8605Smrg#define DISASSEM 0 38848b8605Smrg#define X86_TWOB 0x0f 39848b8605Smrg 40848b8605Smrg 41848b8605Smrg#define DUMP_SSE 0 42848b8605Smrg 43848b8605Smrg 44848b8605Smrgvoid x86_print_reg( struct x86_reg reg ) 45848b8605Smrg{ 46848b8605Smrg if (reg.mod != mod_REG) 47848b8605Smrg debug_printf( "[" ); 48848b8605Smrg 49848b8605Smrg switch( reg.file ) { 50848b8605Smrg case file_REG32: 51848b8605Smrg switch( reg.idx ) { 52848b8605Smrg case reg_AX: debug_printf( "EAX" ); break; 53848b8605Smrg case reg_CX: debug_printf( "ECX" ); break; 54848b8605Smrg case reg_DX: debug_printf( "EDX" ); break; 55848b8605Smrg case reg_BX: debug_printf( "EBX" ); break; 56848b8605Smrg case reg_SP: debug_printf( "ESP" ); break; 57848b8605Smrg case reg_BP: debug_printf( "EBP" ); break; 58848b8605Smrg case reg_SI: debug_printf( "ESI" ); break; 59848b8605Smrg case reg_DI: debug_printf( "EDI" ); break; 60848b8605Smrg } 61848b8605Smrg break; 62848b8605Smrg case file_MMX: 63848b8605Smrg debug_printf( "MMX%u", reg.idx ); 64848b8605Smrg break; 65848b8605Smrg case file_XMM: 66848b8605Smrg debug_printf( "XMM%u", reg.idx ); 67848b8605Smrg break; 68848b8605Smrg case file_x87: 69848b8605Smrg debug_printf( "fp%u", reg.idx ); 70848b8605Smrg break; 71848b8605Smrg } 72848b8605Smrg 73848b8605Smrg if (reg.mod == mod_DISP8 || 74848b8605Smrg reg.mod == mod_DISP32) 75848b8605Smrg debug_printf("+%d", reg.disp); 76848b8605Smrg 77848b8605Smrg if (reg.mod != mod_REG) 78848b8605Smrg debug_printf( "]" ); 79848b8605Smrg} 80848b8605Smrg 81848b8605Smrg#if DUMP_SSE 82848b8605Smrg 83848b8605Smrg#define DUMP_START() debug_printf( "\n" ) 84848b8605Smrg#define DUMP_END() debug_printf( "\n" ) 85848b8605Smrg 86848b8605Smrg#define DUMP() do { \ 87848b8605Smrg const char *foo = __FUNCTION__; \ 88848b8605Smrg while (*foo && *foo != '_') \ 89848b8605Smrg foo++; \ 90848b8605Smrg if (*foo) \ 91848b8605Smrg foo++; \ 92848b8605Smrg debug_printf( "\n%4x %14s ", p->csr - p->store, foo ); \ 93848b8605Smrg} while (0) 94848b8605Smrg 95848b8605Smrg#define DUMP_I( I ) do { \ 96848b8605Smrg DUMP(); \ 97848b8605Smrg debug_printf( "%u", I ); \ 98848b8605Smrg} while( 0 ) 99848b8605Smrg 100848b8605Smrg#define DUMP_R( R0 ) do { \ 101848b8605Smrg DUMP(); \ 102848b8605Smrg x86_print_reg( R0 ); \ 103848b8605Smrg} while( 0 ) 104848b8605Smrg 105848b8605Smrg#define DUMP_RR( R0, R1 ) do { \ 106848b8605Smrg DUMP(); \ 107848b8605Smrg x86_print_reg( R0 ); \ 108848b8605Smrg debug_printf( ", " ); \ 109848b8605Smrg x86_print_reg( R1 ); \ 110848b8605Smrg} while( 0 ) 111848b8605Smrg 112848b8605Smrg#define DUMP_RI( R0, I ) do { \ 113848b8605Smrg DUMP(); \ 114848b8605Smrg x86_print_reg( R0 ); \ 115848b8605Smrg debug_printf( ", %u", I ); \ 116848b8605Smrg} while( 0 ) 117848b8605Smrg 118848b8605Smrg#define DUMP_RRI( R0, R1, I ) do { \ 119848b8605Smrg DUMP(); \ 120848b8605Smrg x86_print_reg( R0 ); \ 121848b8605Smrg debug_printf( ", " ); \ 122848b8605Smrg x86_print_reg( R1 ); \ 123848b8605Smrg debug_printf( ", %u", I ); \ 124848b8605Smrg} while( 0 ) 125848b8605Smrg 126848b8605Smrg#else 127848b8605Smrg 128848b8605Smrg#define DUMP_START() 129848b8605Smrg#define DUMP_END() 130848b8605Smrg#define DUMP( ) 131848b8605Smrg#define DUMP_I( I ) 132848b8605Smrg#define DUMP_R( R0 ) 133848b8605Smrg#define DUMP_RR( R0, R1 ) 134848b8605Smrg#define DUMP_RI( R0, I ) 135848b8605Smrg#define DUMP_RRI( R0, R1, I ) 136848b8605Smrg 137848b8605Smrg#endif 138848b8605Smrg 139848b8605Smrg 140848b8605Smrgstatic void do_realloc( struct x86_function *p ) 141848b8605Smrg{ 142848b8605Smrg if (p->store == p->error_overflow) { 143848b8605Smrg p->csr = p->store; 144848b8605Smrg } 145848b8605Smrg else if (p->size == 0) { 146848b8605Smrg p->size = 1024; 147848b8605Smrg p->store = rtasm_exec_malloc(p->size); 148848b8605Smrg p->csr = p->store; 149848b8605Smrg } 150848b8605Smrg else { 151848b8605Smrg uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); 152848b8605Smrg unsigned char *tmp = p->store; 153848b8605Smrg p->size *= 2; 154848b8605Smrg p->store = rtasm_exec_malloc(p->size); 155848b8605Smrg 156848b8605Smrg if (p->store) { 157848b8605Smrg memcpy(p->store, tmp, used); 158848b8605Smrg p->csr = p->store + used; 159848b8605Smrg } 160848b8605Smrg else { 161848b8605Smrg p->csr = p->store; 162848b8605Smrg } 163848b8605Smrg 164848b8605Smrg rtasm_exec_free(tmp); 165848b8605Smrg } 166848b8605Smrg 167848b8605Smrg if (p->store == NULL) { 168848b8605Smrg p->store = p->csr = p->error_overflow; 169848b8605Smrg p->size = sizeof(p->error_overflow); 170848b8605Smrg } 171848b8605Smrg} 172848b8605Smrg 173848b8605Smrg/* Emit bytes to the instruction stream: 174848b8605Smrg */ 175848b8605Smrgstatic unsigned char *reserve( struct x86_function *p, int bytes ) 176848b8605Smrg{ 177848b8605Smrg if (p->csr + bytes - p->store > (int) p->size) 178848b8605Smrg do_realloc(p); 179848b8605Smrg 180848b8605Smrg { 181848b8605Smrg unsigned char *csr = p->csr; 182848b8605Smrg p->csr += bytes; 183848b8605Smrg return csr; 184848b8605Smrg } 185848b8605Smrg} 186848b8605Smrg 187848b8605Smrg 188848b8605Smrg 189848b8605Smrgstatic void emit_1b( struct x86_function *p, char b0 ) 190848b8605Smrg{ 191848b8605Smrg char *csr = (char *)reserve(p, 1); 192848b8605Smrg *csr = b0; 193848b8605Smrg} 194848b8605Smrg 195848b8605Smrgstatic void emit_1i( struct x86_function *p, int i0 ) 196848b8605Smrg{ 197848b8605Smrg int *icsr = (int *)reserve(p, sizeof(i0)); 198848b8605Smrg *icsr = i0; 199848b8605Smrg} 200848b8605Smrg 201848b8605Smrgstatic void emit_1ub( struct x86_function *p, unsigned char b0 ) 202848b8605Smrg{ 203848b8605Smrg unsigned char *csr = reserve(p, 1); 204848b8605Smrg *csr++ = b0; 205848b8605Smrg} 206848b8605Smrg 207848b8605Smrgstatic void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) 208848b8605Smrg{ 209848b8605Smrg unsigned char *csr = reserve(p, 2); 210848b8605Smrg *csr++ = b0; 211848b8605Smrg *csr++ = b1; 212848b8605Smrg} 213848b8605Smrg 214848b8605Smrgstatic void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) 215848b8605Smrg{ 216848b8605Smrg unsigned char *csr = reserve(p, 3); 217848b8605Smrg *csr++ = b0; 218848b8605Smrg *csr++ = b1; 219848b8605Smrg *csr++ = b2; 220848b8605Smrg} 221848b8605Smrg 222848b8605Smrg 223848b8605Smrg/* Build a modRM byte + possible displacement. No treatment of SIB 224848b8605Smrg * indexing. BZZT - no way to encode an absolute address. 225848b8605Smrg * 226848b8605Smrg * This is the "/r" field in the x86 manuals... 227848b8605Smrg */ 228848b8605Smrgstatic void emit_modrm( struct x86_function *p, 229848b8605Smrg struct x86_reg reg, 230848b8605Smrg struct x86_reg regmem ) 231848b8605Smrg{ 232848b8605Smrg unsigned char val = 0; 233848b8605Smrg 234848b8605Smrg assert(reg.mod == mod_REG); 235848b8605Smrg 236848b8605Smrg /* TODO: support extended x86-64 registers */ 237848b8605Smrg assert(reg.idx < 8); 238848b8605Smrg assert(regmem.idx < 8); 239848b8605Smrg 240848b8605Smrg val |= regmem.mod << 6; /* mod field */ 241848b8605Smrg val |= reg.idx << 3; /* reg field */ 242848b8605Smrg val |= regmem.idx; /* r/m field */ 243848b8605Smrg 244848b8605Smrg emit_1ub(p, val); 245848b8605Smrg 246848b8605Smrg /* Oh-oh we've stumbled into the SIB thing. 247848b8605Smrg */ 248848b8605Smrg if (regmem.file == file_REG32 && 249848b8605Smrg regmem.idx == reg_SP && 250848b8605Smrg regmem.mod != mod_REG) { 251848b8605Smrg emit_1ub(p, 0x24); /* simplistic! */ 252848b8605Smrg } 253848b8605Smrg 254848b8605Smrg switch (regmem.mod) { 255848b8605Smrg case mod_REG: 256848b8605Smrg case mod_INDIRECT: 257848b8605Smrg break; 258848b8605Smrg case mod_DISP8: 259848b8605Smrg emit_1b(p, (char) regmem.disp); 260848b8605Smrg break; 261848b8605Smrg case mod_DISP32: 262848b8605Smrg emit_1i(p, regmem.disp); 263848b8605Smrg break; 264848b8605Smrg default: 265848b8605Smrg assert(0); 266848b8605Smrg break; 267848b8605Smrg } 268848b8605Smrg} 269848b8605Smrg 270848b8605Smrg/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. 271848b8605Smrg */ 272848b8605Smrgstatic void emit_modrm_noreg( struct x86_function *p, 273848b8605Smrg unsigned op, 274848b8605Smrg struct x86_reg regmem ) 275848b8605Smrg{ 276848b8605Smrg struct x86_reg dummy = x86_make_reg(file_REG32, op); 277848b8605Smrg emit_modrm(p, dummy, regmem); 278848b8605Smrg} 279848b8605Smrg 280848b8605Smrg/* Many x86 instructions have two opcodes to cope with the situations 281848b8605Smrg * where the destination is a register or memory reference 282848b8605Smrg * respectively. This function selects the correct opcode based on 283848b8605Smrg * the arguments presented. 284848b8605Smrg */ 285848b8605Smrgstatic void emit_op_modrm( struct x86_function *p, 286848b8605Smrg unsigned char op_dst_is_reg, 287848b8605Smrg unsigned char op_dst_is_mem, 288848b8605Smrg struct x86_reg dst, 289848b8605Smrg struct x86_reg src ) 290848b8605Smrg{ 291848b8605Smrg switch (dst.mod) { 292848b8605Smrg case mod_REG: 293848b8605Smrg emit_1ub(p, op_dst_is_reg); 294848b8605Smrg emit_modrm(p, dst, src); 295848b8605Smrg break; 296848b8605Smrg case mod_INDIRECT: 297848b8605Smrg case mod_DISP32: 298848b8605Smrg case mod_DISP8: 299848b8605Smrg assert(src.mod == mod_REG); 300848b8605Smrg emit_1ub(p, op_dst_is_mem); 301848b8605Smrg emit_modrm(p, src, dst); 302848b8605Smrg break; 303848b8605Smrg default: 304848b8605Smrg assert(0); 305848b8605Smrg break; 306848b8605Smrg } 307848b8605Smrg} 308848b8605Smrg 309848b8605Smrg 310848b8605Smrg 311848b8605Smrg 312848b8605Smrg 313848b8605Smrg 314848b8605Smrg 315848b8605Smrg/* Create and manipulate registers and regmem values: 316848b8605Smrg */ 317848b8605Smrgstruct x86_reg x86_make_reg( enum x86_reg_file file, 318848b8605Smrg enum x86_reg_name idx ) 319848b8605Smrg{ 320848b8605Smrg struct x86_reg reg; 321848b8605Smrg 322848b8605Smrg reg.file = file; 323848b8605Smrg reg.idx = idx; 324848b8605Smrg reg.mod = mod_REG; 325848b8605Smrg reg.disp = 0; 326848b8605Smrg 327848b8605Smrg return reg; 328848b8605Smrg} 329848b8605Smrg 330848b8605Smrgstruct x86_reg x86_make_disp( struct x86_reg reg, 331848b8605Smrg int disp ) 332848b8605Smrg{ 333848b8605Smrg assert(reg.file == file_REG32); 334848b8605Smrg 335848b8605Smrg if (reg.mod == mod_REG) 336848b8605Smrg reg.disp = disp; 337848b8605Smrg else 338848b8605Smrg reg.disp += disp; 339848b8605Smrg 340848b8605Smrg if (reg.disp == 0 && reg.idx != reg_BP) 341848b8605Smrg reg.mod = mod_INDIRECT; 342848b8605Smrg else if (reg.disp <= 127 && reg.disp >= -128) 343848b8605Smrg reg.mod = mod_DISP8; 344848b8605Smrg else 345848b8605Smrg reg.mod = mod_DISP32; 346848b8605Smrg 347848b8605Smrg return reg; 348848b8605Smrg} 349848b8605Smrg 350848b8605Smrgstruct x86_reg x86_deref( struct x86_reg reg ) 351848b8605Smrg{ 352848b8605Smrg return x86_make_disp(reg, 0); 353848b8605Smrg} 354848b8605Smrg 355848b8605Smrgstruct x86_reg x86_get_base_reg( struct x86_reg reg ) 356848b8605Smrg{ 357848b8605Smrg return x86_make_reg( reg.file, reg.idx ); 358848b8605Smrg} 359848b8605Smrg 360848b8605Smrgint x86_get_label( struct x86_function *p ) 361848b8605Smrg{ 362848b8605Smrg return p->csr - p->store; 363848b8605Smrg} 364848b8605Smrg 365848b8605Smrg 366848b8605Smrg 367848b8605Smrg/*********************************************************************** 368848b8605Smrg * x86 instructions 369848b8605Smrg */ 370848b8605Smrg 371848b8605Smrg 372848b8605Smrgvoid x64_rexw(struct x86_function *p) 373848b8605Smrg{ 374848b8605Smrg if(x86_target(p) != X86_32) 375848b8605Smrg emit_1ub(p, 0x48); 376848b8605Smrg} 377848b8605Smrg 378848b8605Smrgvoid x86_jcc( struct x86_function *p, 379848b8605Smrg enum x86_cc cc, 380848b8605Smrg int label ) 381848b8605Smrg{ 382848b8605Smrg int offset = label - (x86_get_label(p) + 2); 383848b8605Smrg DUMP_I(cc); 384848b8605Smrg 385848b8605Smrg if (offset < 0) { 386848b8605Smrg /*assert(p->csr - p->store > -offset);*/ 387848b8605Smrg if (p->csr - p->store <= -offset) { 388848b8605Smrg /* probably out of memory (using the error_overflow buffer) */ 389848b8605Smrg return; 390848b8605Smrg } 391848b8605Smrg } 392848b8605Smrg 393848b8605Smrg if (offset <= 127 && offset >= -128) { 394848b8605Smrg emit_1ub(p, 0x70 + cc); 395848b8605Smrg emit_1b(p, (char) offset); 396848b8605Smrg } 397848b8605Smrg else { 398848b8605Smrg offset = label - (x86_get_label(p) + 6); 399848b8605Smrg emit_2ub(p, 0x0f, 0x80 + cc); 400848b8605Smrg emit_1i(p, offset); 401848b8605Smrg } 402848b8605Smrg} 403848b8605Smrg 404848b8605Smrg/* Always use a 32bit offset for forward jumps: 405848b8605Smrg */ 406848b8605Smrgint x86_jcc_forward( struct x86_function *p, 407848b8605Smrg enum x86_cc cc ) 408848b8605Smrg{ 409848b8605Smrg DUMP_I(cc); 410848b8605Smrg emit_2ub(p, 0x0f, 0x80 + cc); 411848b8605Smrg emit_1i(p, 0); 412848b8605Smrg return x86_get_label(p); 413848b8605Smrg} 414848b8605Smrg 415848b8605Smrgint x86_jmp_forward( struct x86_function *p) 416848b8605Smrg{ 417848b8605Smrg DUMP(); 418848b8605Smrg emit_1ub(p, 0xe9); 419848b8605Smrg emit_1i(p, 0); 420848b8605Smrg return x86_get_label(p); 421848b8605Smrg} 422848b8605Smrg 423848b8605Smrgint x86_call_forward( struct x86_function *p) 424848b8605Smrg{ 425848b8605Smrg DUMP(); 426848b8605Smrg 427848b8605Smrg emit_1ub(p, 0xe8); 428848b8605Smrg emit_1i(p, 0); 429848b8605Smrg return x86_get_label(p); 430848b8605Smrg} 431848b8605Smrg 432848b8605Smrg/* Fixup offset from forward jump: 433848b8605Smrg */ 434848b8605Smrgvoid x86_fixup_fwd_jump( struct x86_function *p, 435848b8605Smrg int fixup ) 436848b8605Smrg{ 437848b8605Smrg *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup; 438848b8605Smrg} 439848b8605Smrg 440848b8605Smrgvoid x86_jmp( struct x86_function *p, int label) 441848b8605Smrg{ 442848b8605Smrg DUMP_I( label ); 443848b8605Smrg emit_1ub(p, 0xe9); 444848b8605Smrg emit_1i(p, label - x86_get_label(p) - 4); 445848b8605Smrg} 446848b8605Smrg 447848b8605Smrgvoid x86_call( struct x86_function *p, struct x86_reg reg) 448848b8605Smrg{ 449848b8605Smrg DUMP_R( reg ); 450848b8605Smrg emit_1ub(p, 0xff); 451848b8605Smrg emit_modrm_noreg(p, 2, reg); 452848b8605Smrg} 453848b8605Smrg 454848b8605Smrg 455848b8605Smrgvoid x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) 456848b8605Smrg{ 457848b8605Smrg DUMP_RI( dst, imm ); 458848b8605Smrg assert(dst.file == file_REG32); 459848b8605Smrg assert(dst.mod == mod_REG); 460848b8605Smrg emit_1ub(p, 0xb8 + dst.idx); 461848b8605Smrg emit_1i(p, imm); 462848b8605Smrg} 463848b8605Smrg 464848b8605Smrgvoid x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm ) 465848b8605Smrg{ 466848b8605Smrg DUMP_RI( dst, imm ); 467848b8605Smrg if(dst.mod == mod_REG) 468848b8605Smrg x86_mov_reg_imm(p, dst, imm); 469848b8605Smrg else 470848b8605Smrg { 471848b8605Smrg emit_1ub(p, 0xc7); 472848b8605Smrg emit_modrm_noreg(p, 0, dst); 473848b8605Smrg emit_1i(p, imm); 474848b8605Smrg } 475848b8605Smrg} 476848b8605Smrg 477848b8605Smrgvoid x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm ) 478848b8605Smrg{ 479848b8605Smrg DUMP_RI( dst, imm ); 480848b8605Smrg emit_1ub(p, 0x66); 481848b8605Smrg if(dst.mod == mod_REG) 482848b8605Smrg { 483848b8605Smrg emit_1ub(p, 0xb8 + dst.idx); 484848b8605Smrg emit_2ub(p, imm & 0xff, imm >> 8); 485848b8605Smrg } 486848b8605Smrg else 487848b8605Smrg { 488848b8605Smrg emit_1ub(p, 0xc7); 489848b8605Smrg emit_modrm_noreg(p, 0, dst); 490848b8605Smrg emit_2ub(p, imm & 0xff, imm >> 8); 491848b8605Smrg } 492848b8605Smrg} 493848b8605Smrg 494848b8605Smrgvoid x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm ) 495848b8605Smrg{ 496848b8605Smrg DUMP_RI( dst, imm ); 497848b8605Smrg if(dst.mod == mod_REG) 498848b8605Smrg { 499848b8605Smrg emit_1ub(p, 0xb0 + dst.idx); 500848b8605Smrg emit_1ub(p, imm); 501848b8605Smrg } 502848b8605Smrg else 503848b8605Smrg { 504848b8605Smrg emit_1ub(p, 0xc6); 505848b8605Smrg emit_modrm_noreg(p, 0, dst); 506848b8605Smrg emit_1ub(p, imm); 507848b8605Smrg } 508848b8605Smrg} 509848b8605Smrg 510848b8605Smrg/** 511848b8605Smrg * Immediate group 1 instructions. 512848b8605Smrg */ 513b8e80941Smrgstatic inline void 514848b8605Smrgx86_group1_imm( struct x86_function *p, 515848b8605Smrg unsigned op, struct x86_reg dst, int imm ) 516848b8605Smrg{ 517848b8605Smrg assert(dst.file == file_REG32); 518848b8605Smrg assert(dst.mod == mod_REG); 519848b8605Smrg if(-0x80 <= imm && imm < 0x80) { 520848b8605Smrg emit_1ub(p, 0x83); 521848b8605Smrg emit_modrm_noreg(p, op, dst); 522848b8605Smrg emit_1b(p, (char)imm); 523848b8605Smrg } 524848b8605Smrg else { 525848b8605Smrg emit_1ub(p, 0x81); 526848b8605Smrg emit_modrm_noreg(p, op, dst); 527848b8605Smrg emit_1i(p, imm); 528848b8605Smrg } 529848b8605Smrg} 530848b8605Smrg 531848b8605Smrgvoid x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) 532848b8605Smrg{ 533848b8605Smrg DUMP_RI( dst, imm ); 534848b8605Smrg x86_group1_imm(p, 0, dst, imm); 535848b8605Smrg} 536848b8605Smrg 537848b8605Smrgvoid x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) 538848b8605Smrg{ 539848b8605Smrg DUMP_RI( dst, imm ); 540848b8605Smrg x86_group1_imm(p, 1, dst, imm); 541848b8605Smrg} 542848b8605Smrg 543848b8605Smrgvoid x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) 544848b8605Smrg{ 545848b8605Smrg DUMP_RI( dst, imm ); 546848b8605Smrg x86_group1_imm(p, 4, dst, imm); 547848b8605Smrg} 548848b8605Smrg 549848b8605Smrgvoid x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) 550848b8605Smrg{ 551848b8605Smrg DUMP_RI( dst, imm ); 552848b8605Smrg x86_group1_imm(p, 5, dst, imm); 553848b8605Smrg} 554848b8605Smrg 555848b8605Smrgvoid x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) 556848b8605Smrg{ 557848b8605Smrg DUMP_RI( dst, imm ); 558848b8605Smrg x86_group1_imm(p, 6, dst, imm); 559848b8605Smrg} 560848b8605Smrg 561848b8605Smrgvoid x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) 562848b8605Smrg{ 563848b8605Smrg DUMP_RI( dst, imm ); 564848b8605Smrg x86_group1_imm(p, 7, dst, imm); 565848b8605Smrg} 566848b8605Smrg 567848b8605Smrg 568848b8605Smrgvoid x86_push( struct x86_function *p, 569848b8605Smrg struct x86_reg reg ) 570848b8605Smrg{ 571848b8605Smrg DUMP_R( reg ); 572848b8605Smrg if (reg.mod == mod_REG) 573848b8605Smrg emit_1ub(p, 0x50 + reg.idx); 574848b8605Smrg else 575848b8605Smrg { 576848b8605Smrg emit_1ub(p, 0xff); 577848b8605Smrg emit_modrm_noreg(p, 6, reg); 578848b8605Smrg } 579848b8605Smrg 580848b8605Smrg 581848b8605Smrg p->stack_offset += sizeof(void*); 582848b8605Smrg} 583848b8605Smrg 584848b8605Smrgvoid x86_push_imm32( struct x86_function *p, 585848b8605Smrg int imm32 ) 586848b8605Smrg{ 587848b8605Smrg DUMP_I( imm32 ); 588848b8605Smrg emit_1ub(p, 0x68); 589848b8605Smrg emit_1i(p, imm32); 590848b8605Smrg 591848b8605Smrg p->stack_offset += sizeof(void*); 592848b8605Smrg} 593848b8605Smrg 594848b8605Smrg 595848b8605Smrgvoid x86_pop( struct x86_function *p, 596848b8605Smrg struct x86_reg reg ) 597848b8605Smrg{ 598848b8605Smrg DUMP_R( reg ); 599848b8605Smrg assert(reg.mod == mod_REG); 600848b8605Smrg emit_1ub(p, 0x58 + reg.idx); 601848b8605Smrg p->stack_offset -= sizeof(void*); 602848b8605Smrg} 603848b8605Smrg 604848b8605Smrgvoid x86_inc( struct x86_function *p, 605848b8605Smrg struct x86_reg reg ) 606848b8605Smrg{ 607848b8605Smrg DUMP_R( reg ); 608848b8605Smrg if(x86_target(p) == X86_32 && reg.mod == mod_REG) 609848b8605Smrg { 610848b8605Smrg emit_1ub(p, 0x40 + reg.idx); 611848b8605Smrg return; 612848b8605Smrg } 613848b8605Smrg emit_1ub(p, 0xff); 614848b8605Smrg emit_modrm_noreg(p, 0, reg); 615848b8605Smrg} 616848b8605Smrg 617848b8605Smrgvoid x86_dec( struct x86_function *p, 618848b8605Smrg struct x86_reg reg ) 619848b8605Smrg{ 620848b8605Smrg DUMP_R( reg ); 621848b8605Smrg if(x86_target(p) == X86_32 && reg.mod == mod_REG) 622848b8605Smrg { 623848b8605Smrg emit_1ub(p, 0x48 + reg.idx); 624848b8605Smrg return; 625848b8605Smrg } 626848b8605Smrg emit_1ub(p, 0xff); 627848b8605Smrg emit_modrm_noreg(p, 1, reg); 628848b8605Smrg} 629848b8605Smrg 630848b8605Smrgvoid x86_ret( struct x86_function *p ) 631848b8605Smrg{ 632848b8605Smrg DUMP(); 633848b8605Smrg assert(p->stack_offset == 0); 634848b8605Smrg emit_1ub(p, 0xc3); 635848b8605Smrg} 636848b8605Smrg 637848b8605Smrgvoid x86_retw( struct x86_function *p, unsigned short imm ) 638848b8605Smrg{ 639848b8605Smrg DUMP(); 640848b8605Smrg emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff); 641848b8605Smrg} 642848b8605Smrg 643848b8605Smrgvoid x86_sahf( struct x86_function *p ) 644848b8605Smrg{ 645848b8605Smrg DUMP(); 646848b8605Smrg emit_1ub(p, 0x9e); 647848b8605Smrg} 648848b8605Smrg 649848b8605Smrgvoid x86_mov( struct x86_function *p, 650848b8605Smrg struct x86_reg dst, 651848b8605Smrg struct x86_reg src ) 652848b8605Smrg{ 653848b8605Smrg DUMP_RR( dst, src ); 654848b8605Smrg /* special hack for reading arguments until we support x86-64 registers everywhere */ 655848b8605Smrg if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) 656848b8605Smrg { 657848b8605Smrg uint8_t rex = 0x40; 658848b8605Smrg if(dst.idx >= 8) 659848b8605Smrg { 660848b8605Smrg rex |= 4; 661848b8605Smrg dst.idx -= 8; 662848b8605Smrg } 663848b8605Smrg if(src.idx >= 8) 664848b8605Smrg { 665848b8605Smrg rex |= 1; 666848b8605Smrg src.idx -= 8; 667848b8605Smrg } 668848b8605Smrg emit_1ub(p, rex); 669848b8605Smrg } 670848b8605Smrg emit_op_modrm( p, 0x8b, 0x89, dst, src ); 671848b8605Smrg} 672848b8605Smrg 673848b8605Smrgvoid x86_mov16( struct x86_function *p, 674848b8605Smrg struct x86_reg dst, 675848b8605Smrg struct x86_reg src ) 676848b8605Smrg{ 677848b8605Smrg DUMP_RR( dst, src ); 678848b8605Smrg emit_1ub(p, 0x66); 679848b8605Smrg emit_op_modrm( p, 0x8b, 0x89, dst, src ); 680848b8605Smrg} 681848b8605Smrg 682848b8605Smrgvoid x86_mov8( struct x86_function *p, 683848b8605Smrg struct x86_reg dst, 684848b8605Smrg struct x86_reg src ) 685848b8605Smrg{ 686848b8605Smrg DUMP_RR( dst, src ); 687848b8605Smrg emit_op_modrm( p, 0x8a, 0x88, dst, src ); 688848b8605Smrg} 689848b8605Smrg 690848b8605Smrgvoid x64_mov64( struct x86_function *p, 691848b8605Smrg struct x86_reg dst, 692848b8605Smrg struct x86_reg src ) 693848b8605Smrg{ 694848b8605Smrg uint8_t rex = 0x48; 695848b8605Smrg DUMP_RR( dst, src ); 696848b8605Smrg assert(x86_target(p) != X86_32); 697848b8605Smrg 698848b8605Smrg /* special hack for reading arguments until we support x86-64 registers everywhere */ 699848b8605Smrg if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) 700848b8605Smrg { 701848b8605Smrg if(dst.idx >= 8) 702848b8605Smrg { 703848b8605Smrg rex |= 4; 704848b8605Smrg dst.idx -= 8; 705848b8605Smrg } 706848b8605Smrg if(src.idx >= 8) 707848b8605Smrg { 708848b8605Smrg rex |= 1; 709848b8605Smrg src.idx -= 8; 710848b8605Smrg } 711848b8605Smrg } 712848b8605Smrg emit_1ub(p, rex); 713848b8605Smrg emit_op_modrm( p, 0x8b, 0x89, dst, src ); 714848b8605Smrg} 715848b8605Smrg 716848b8605Smrgvoid x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 717848b8605Smrg{ 718848b8605Smrg DUMP_RR( dst, src ); 719848b8605Smrg emit_2ub(p, 0x0f, 0xb6); 720848b8605Smrg emit_modrm(p, dst, src); 721848b8605Smrg} 722848b8605Smrg 723848b8605Smrgvoid x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 724848b8605Smrg{ 725848b8605Smrg DUMP_RR( dst, src ); 726848b8605Smrg emit_2ub(p, 0x0f, 0xb7); 727848b8605Smrg emit_modrm(p, dst, src); 728848b8605Smrg} 729848b8605Smrg 730848b8605Smrgvoid x86_cmovcc( struct x86_function *p, 731848b8605Smrg struct x86_reg dst, 732848b8605Smrg struct x86_reg src, 733848b8605Smrg enum x86_cc cc) 734848b8605Smrg{ 735848b8605Smrg DUMP_RRI( dst, src, cc ); 736848b8605Smrg emit_2ub( p, 0x0f, 0x40 + cc ); 737848b8605Smrg emit_modrm( p, dst, src ); 738848b8605Smrg} 739848b8605Smrg 740848b8605Smrgvoid x86_xor( struct x86_function *p, 741848b8605Smrg struct x86_reg dst, 742848b8605Smrg struct x86_reg src ) 743848b8605Smrg{ 744848b8605Smrg DUMP_RR( dst, src ); 745848b8605Smrg emit_op_modrm( p, 0x33, 0x31, dst, src ); 746848b8605Smrg} 747848b8605Smrg 748848b8605Smrgvoid x86_cmp( struct x86_function *p, 749848b8605Smrg struct x86_reg dst, 750848b8605Smrg struct x86_reg src ) 751848b8605Smrg{ 752848b8605Smrg DUMP_RR( dst, src ); 753848b8605Smrg emit_op_modrm( p, 0x3b, 0x39, dst, src ); 754848b8605Smrg} 755848b8605Smrg 756848b8605Smrgvoid x86_lea( struct x86_function *p, 757848b8605Smrg struct x86_reg dst, 758848b8605Smrg struct x86_reg src ) 759848b8605Smrg{ 760848b8605Smrg DUMP_RR( dst, src ); 761848b8605Smrg emit_1ub(p, 0x8d); 762848b8605Smrg emit_modrm( p, dst, src ); 763848b8605Smrg} 764848b8605Smrg 765848b8605Smrgvoid x86_test( struct x86_function *p, 766848b8605Smrg struct x86_reg dst, 767848b8605Smrg struct x86_reg src ) 768848b8605Smrg{ 769848b8605Smrg DUMP_RR( dst, src ); 770848b8605Smrg emit_1ub(p, 0x85); 771848b8605Smrg emit_modrm( p, dst, src ); 772848b8605Smrg} 773848b8605Smrg 774848b8605Smrgvoid x86_add( struct x86_function *p, 775848b8605Smrg struct x86_reg dst, 776848b8605Smrg struct x86_reg src ) 777848b8605Smrg{ 778848b8605Smrg DUMP_RR( dst, src ); 779848b8605Smrg emit_op_modrm(p, 0x03, 0x01, dst, src ); 780848b8605Smrg} 781848b8605Smrg 782848b8605Smrg/* Calculate EAX * src, results in EDX:EAX. 783848b8605Smrg */ 784848b8605Smrgvoid x86_mul( struct x86_function *p, 785848b8605Smrg struct x86_reg src ) 786848b8605Smrg{ 787848b8605Smrg DUMP_R( src ); 788848b8605Smrg emit_1ub(p, 0xf7); 789848b8605Smrg emit_modrm_noreg(p, 4, src ); 790848b8605Smrg} 791848b8605Smrg 792848b8605Smrg 793848b8605Smrgvoid x86_imul( struct x86_function *p, 794848b8605Smrg struct x86_reg dst, 795848b8605Smrg struct x86_reg src ) 796848b8605Smrg{ 797848b8605Smrg DUMP_RR( dst, src ); 798848b8605Smrg emit_2ub(p, X86_TWOB, 0xAF); 799848b8605Smrg emit_modrm(p, dst, src); 800848b8605Smrg} 801848b8605Smrg 802848b8605Smrg 803848b8605Smrgvoid x86_sub( struct x86_function *p, 804848b8605Smrg struct x86_reg dst, 805848b8605Smrg struct x86_reg src ) 806848b8605Smrg{ 807848b8605Smrg DUMP_RR( dst, src ); 808848b8605Smrg emit_op_modrm(p, 0x2b, 0x29, dst, src ); 809848b8605Smrg} 810848b8605Smrg 811848b8605Smrgvoid x86_or( struct x86_function *p, 812848b8605Smrg struct x86_reg dst, 813848b8605Smrg struct x86_reg src ) 814848b8605Smrg{ 815848b8605Smrg DUMP_RR( dst, src ); 816848b8605Smrg emit_op_modrm( p, 0x0b, 0x09, dst, src ); 817848b8605Smrg} 818848b8605Smrg 819848b8605Smrgvoid x86_and( struct x86_function *p, 820848b8605Smrg struct x86_reg dst, 821848b8605Smrg struct x86_reg src ) 822848b8605Smrg{ 823848b8605Smrg DUMP_RR( dst, src ); 824848b8605Smrg emit_op_modrm( p, 0x23, 0x21, dst, src ); 825848b8605Smrg} 826848b8605Smrg 827848b8605Smrgvoid x86_div( struct x86_function *p, 828848b8605Smrg struct x86_reg src ) 829848b8605Smrg{ 830848b8605Smrg assert(src.file == file_REG32 && src.mod == mod_REG); 831848b8605Smrg emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); 832848b8605Smrg} 833848b8605Smrg 834848b8605Smrgvoid x86_bswap( struct x86_function *p, struct x86_reg reg ) 835848b8605Smrg{ 836848b8605Smrg DUMP_R(reg); 837848b8605Smrg assert(reg.file == file_REG32); 838848b8605Smrg assert(reg.mod == mod_REG); 839848b8605Smrg emit_2ub(p, 0x0f, 0xc8 + reg.idx); 840848b8605Smrg} 841848b8605Smrg 842848b8605Smrgvoid x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 843848b8605Smrg{ 844848b8605Smrg DUMP_RI(reg, imm); 845848b8605Smrg if(imm == 1) 846848b8605Smrg { 847848b8605Smrg emit_1ub(p, 0xd1); 848848b8605Smrg emit_modrm_noreg(p, 5, reg); 849848b8605Smrg } 850848b8605Smrg else 851848b8605Smrg { 852848b8605Smrg emit_1ub(p, 0xc1); 853848b8605Smrg emit_modrm_noreg(p, 5, reg); 854848b8605Smrg emit_1ub(p, imm); 855848b8605Smrg } 856848b8605Smrg} 857848b8605Smrg 858848b8605Smrgvoid x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 859848b8605Smrg{ 860848b8605Smrg DUMP_RI(reg, imm); 861848b8605Smrg if(imm == 1) 862848b8605Smrg { 863848b8605Smrg emit_1ub(p, 0xd1); 864848b8605Smrg emit_modrm_noreg(p, 7, reg); 865848b8605Smrg } 866848b8605Smrg else 867848b8605Smrg { 868848b8605Smrg emit_1ub(p, 0xc1); 869848b8605Smrg emit_modrm_noreg(p, 7, reg); 870848b8605Smrg emit_1ub(p, imm); 871848b8605Smrg } 872848b8605Smrg} 873848b8605Smrg 874848b8605Smrgvoid x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 875848b8605Smrg{ 876848b8605Smrg DUMP_RI(reg, imm); 877848b8605Smrg if(imm == 1) 878848b8605Smrg { 879848b8605Smrg emit_1ub(p, 0xd1); 880848b8605Smrg emit_modrm_noreg(p, 4, reg); 881848b8605Smrg } 882848b8605Smrg else 883848b8605Smrg { 884848b8605Smrg emit_1ub(p, 0xc1); 885848b8605Smrg emit_modrm_noreg(p, 4, reg); 886848b8605Smrg emit_1ub(p, imm); 887848b8605Smrg } 888848b8605Smrg} 889848b8605Smrg 890848b8605Smrg 891848b8605Smrg/*********************************************************************** 892848b8605Smrg * SSE instructions 893848b8605Smrg */ 894848b8605Smrg 895848b8605Smrgvoid sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) 896848b8605Smrg{ 897848b8605Smrg DUMP_R( ptr ); 898848b8605Smrg assert(ptr.mod != mod_REG); 899848b8605Smrg emit_2ub(p, 0x0f, 0x18); 900848b8605Smrg emit_modrm_noreg(p, 0, ptr); 901848b8605Smrg} 902848b8605Smrg 903848b8605Smrgvoid sse_prefetch0( struct x86_function *p, struct x86_reg ptr) 904848b8605Smrg{ 905848b8605Smrg DUMP_R( ptr ); 906848b8605Smrg assert(ptr.mod != mod_REG); 907848b8605Smrg emit_2ub(p, 0x0f, 0x18); 908848b8605Smrg emit_modrm_noreg(p, 1, ptr); 909848b8605Smrg} 910848b8605Smrg 911848b8605Smrgvoid sse_prefetch1( struct x86_function *p, struct x86_reg ptr) 912848b8605Smrg{ 913848b8605Smrg DUMP_R( ptr ); 914848b8605Smrg assert(ptr.mod != mod_REG); 915848b8605Smrg emit_2ub(p, 0x0f, 0x18); 916848b8605Smrg emit_modrm_noreg(p, 2, ptr); 917848b8605Smrg} 918848b8605Smrg 919848b8605Smrgvoid sse_movntps( struct x86_function *p, 920848b8605Smrg struct x86_reg dst, 921848b8605Smrg struct x86_reg src) 922848b8605Smrg{ 923848b8605Smrg DUMP_RR( dst, src ); 924848b8605Smrg 925848b8605Smrg assert(dst.mod != mod_REG); 926848b8605Smrg assert(src.mod == mod_REG); 927848b8605Smrg emit_2ub(p, 0x0f, 0x2b); 928848b8605Smrg emit_modrm(p, src, dst); 929848b8605Smrg} 930848b8605Smrg 931848b8605Smrg 932848b8605Smrg 933848b8605Smrg 934848b8605Smrgvoid sse_movss( struct x86_function *p, 935848b8605Smrg struct x86_reg dst, 936848b8605Smrg struct x86_reg src ) 937848b8605Smrg{ 938848b8605Smrg DUMP_RR( dst, src ); 939848b8605Smrg emit_2ub(p, 0xF3, X86_TWOB); 940848b8605Smrg emit_op_modrm( p, 0x10, 0x11, dst, src ); 941848b8605Smrg} 942848b8605Smrg 943848b8605Smrgvoid sse_movaps( struct x86_function *p, 944848b8605Smrg struct x86_reg dst, 945848b8605Smrg struct x86_reg src ) 946848b8605Smrg{ 947848b8605Smrg DUMP_RR( dst, src ); 948848b8605Smrg emit_1ub(p, X86_TWOB); 949848b8605Smrg emit_op_modrm( p, 0x28, 0x29, dst, src ); 950848b8605Smrg} 951848b8605Smrg 952848b8605Smrgvoid sse_movups( struct x86_function *p, 953848b8605Smrg struct x86_reg dst, 954848b8605Smrg struct x86_reg src ) 955848b8605Smrg{ 956848b8605Smrg DUMP_RR( dst, src ); 957848b8605Smrg emit_1ub(p, X86_TWOB); 958848b8605Smrg emit_op_modrm( p, 0x10, 0x11, dst, src ); 959848b8605Smrg} 960848b8605Smrg 961848b8605Smrgvoid sse_movhps( struct x86_function *p, 962848b8605Smrg struct x86_reg dst, 963848b8605Smrg struct x86_reg src ) 964848b8605Smrg{ 965848b8605Smrg DUMP_RR( dst, src ); 966848b8605Smrg assert(dst.mod != mod_REG || src.mod != mod_REG); 967848b8605Smrg emit_1ub(p, X86_TWOB); 968848b8605Smrg emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ 969848b8605Smrg} 970848b8605Smrg 971848b8605Smrgvoid sse_movlps( struct x86_function *p, 972848b8605Smrg struct x86_reg dst, 973848b8605Smrg struct x86_reg src ) 974848b8605Smrg{ 975848b8605Smrg DUMP_RR( dst, src ); 976848b8605Smrg assert(dst.mod != mod_REG || src.mod != mod_REG); 977848b8605Smrg emit_1ub(p, X86_TWOB); 978848b8605Smrg emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ 979848b8605Smrg} 980848b8605Smrg 981848b8605Smrgvoid sse_maxps( struct x86_function *p, 982848b8605Smrg struct x86_reg dst, 983848b8605Smrg struct x86_reg src ) 984848b8605Smrg{ 985848b8605Smrg DUMP_RR( dst, src ); 986848b8605Smrg emit_2ub(p, X86_TWOB, 0x5F); 987848b8605Smrg emit_modrm( p, dst, src ); 988848b8605Smrg} 989848b8605Smrg 990848b8605Smrgvoid sse_maxss( struct x86_function *p, 991848b8605Smrg struct x86_reg dst, 992848b8605Smrg struct x86_reg src ) 993848b8605Smrg{ 994848b8605Smrg DUMP_RR( dst, src ); 995848b8605Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x5F); 996848b8605Smrg emit_modrm( p, dst, src ); 997848b8605Smrg} 998848b8605Smrg 999848b8605Smrgvoid sse_divss( struct x86_function *p, 1000848b8605Smrg struct x86_reg dst, 1001848b8605Smrg struct x86_reg src ) 1002848b8605Smrg{ 1003848b8605Smrg DUMP_RR( dst, src ); 1004848b8605Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x5E); 1005848b8605Smrg emit_modrm( p, dst, src ); 1006848b8605Smrg} 1007848b8605Smrg 1008848b8605Smrgvoid sse_minps( struct x86_function *p, 1009848b8605Smrg struct x86_reg dst, 1010848b8605Smrg struct x86_reg src ) 1011848b8605Smrg{ 1012848b8605Smrg DUMP_RR( dst, src ); 1013848b8605Smrg emit_2ub(p, X86_TWOB, 0x5D); 1014848b8605Smrg emit_modrm( p, dst, src ); 1015848b8605Smrg} 1016848b8605Smrg 1017848b8605Smrgvoid sse_subps( struct x86_function *p, 1018848b8605Smrg struct x86_reg dst, 1019848b8605Smrg struct x86_reg src ) 1020848b8605Smrg{ 1021848b8605Smrg DUMP_RR( dst, src ); 1022848b8605Smrg emit_2ub(p, X86_TWOB, 0x5C); 1023848b8605Smrg emit_modrm( p, dst, src ); 1024848b8605Smrg} 1025848b8605Smrg 1026848b8605Smrgvoid sse_mulps( struct x86_function *p, 1027848b8605Smrg struct x86_reg dst, 1028848b8605Smrg struct x86_reg src ) 1029848b8605Smrg{ 1030848b8605Smrg DUMP_RR( dst, src ); 1031848b8605Smrg emit_2ub(p, X86_TWOB, 0x59); 1032848b8605Smrg emit_modrm( p, dst, src ); 1033848b8605Smrg} 1034848b8605Smrg 1035848b8605Smrgvoid sse_mulss( struct x86_function *p, 1036848b8605Smrg struct x86_reg dst, 1037848b8605Smrg struct x86_reg src ) 1038848b8605Smrg{ 1039848b8605Smrg DUMP_RR( dst, src ); 1040848b8605Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x59); 1041848b8605Smrg emit_modrm( p, dst, src ); 1042848b8605Smrg} 1043848b8605Smrg 1044848b8605Smrgvoid sse_addps( struct x86_function *p, 1045848b8605Smrg struct x86_reg dst, 1046848b8605Smrg struct x86_reg src ) 1047848b8605Smrg{ 1048848b8605Smrg DUMP_RR( dst, src ); 1049848b8605Smrg emit_2ub(p, X86_TWOB, 0x58); 1050848b8605Smrg emit_modrm( p, dst, src ); 1051848b8605Smrg} 1052848b8605Smrg 1053848b8605Smrgvoid sse_addss( struct x86_function *p, 1054848b8605Smrg struct x86_reg dst, 1055848b8605Smrg struct x86_reg src ) 1056848b8605Smrg{ 1057848b8605Smrg DUMP_RR( dst, src ); 1058848b8605Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x58); 1059848b8605Smrg emit_modrm( p, dst, src ); 1060848b8605Smrg} 1061848b8605Smrg 1062848b8605Smrgvoid sse_andnps( struct x86_function *p, 1063848b8605Smrg struct x86_reg dst, 1064848b8605Smrg struct x86_reg src ) 1065848b8605Smrg{ 1066848b8605Smrg DUMP_RR( dst, src ); 1067848b8605Smrg emit_2ub(p, X86_TWOB, 0x55); 1068848b8605Smrg emit_modrm( p, dst, src ); 1069848b8605Smrg} 1070848b8605Smrg 1071848b8605Smrgvoid sse_andps( struct x86_function *p, 1072848b8605Smrg struct x86_reg dst, 1073848b8605Smrg struct x86_reg src ) 1074848b8605Smrg{ 1075848b8605Smrg DUMP_RR( dst, src ); 1076848b8605Smrg emit_2ub(p, X86_TWOB, 0x54); 1077848b8605Smrg emit_modrm( p, dst, src ); 1078848b8605Smrg} 1079848b8605Smrg 1080848b8605Smrgvoid sse_rsqrtps( struct x86_function *p, 1081848b8605Smrg struct x86_reg dst, 1082848b8605Smrg struct x86_reg src ) 1083848b8605Smrg{ 1084848b8605Smrg DUMP_RR( dst, src ); 1085848b8605Smrg emit_2ub(p, X86_TWOB, 0x52); 1086848b8605Smrg emit_modrm( p, dst, src ); 1087848b8605Smrg} 1088848b8605Smrg 1089848b8605Smrgvoid sse_rsqrtss( struct x86_function *p, 1090848b8605Smrg struct x86_reg dst, 1091848b8605Smrg struct x86_reg src ) 1092848b8605Smrg{ 1093848b8605Smrg DUMP_RR( dst, src ); 1094848b8605Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x52); 1095848b8605Smrg emit_modrm( p, dst, src ); 1096848b8605Smrg 1097848b8605Smrg} 1098848b8605Smrg 1099848b8605Smrgvoid sse_movhlps( struct x86_function *p, 1100848b8605Smrg struct x86_reg dst, 1101848b8605Smrg struct x86_reg src ) 1102848b8605Smrg{ 1103848b8605Smrg DUMP_RR( dst, src ); 1104848b8605Smrg assert(dst.mod == mod_REG && src.mod == mod_REG); 1105848b8605Smrg emit_2ub(p, X86_TWOB, 0x12); 1106848b8605Smrg emit_modrm( p, dst, src ); 1107848b8605Smrg} 1108848b8605Smrg 1109848b8605Smrgvoid sse_movlhps( struct x86_function *p, 1110848b8605Smrg struct x86_reg dst, 1111848b8605Smrg struct x86_reg src ) 1112848b8605Smrg{ 1113848b8605Smrg DUMP_RR( dst, src ); 1114848b8605Smrg assert(dst.mod == mod_REG && src.mod == mod_REG); 1115848b8605Smrg emit_2ub(p, X86_TWOB, 0x16); 1116848b8605Smrg emit_modrm( p, dst, src ); 1117848b8605Smrg} 1118848b8605Smrg 1119848b8605Smrgvoid sse_orps( struct x86_function *p, 1120848b8605Smrg struct x86_reg dst, 1121848b8605Smrg struct x86_reg src ) 1122848b8605Smrg{ 1123848b8605Smrg DUMP_RR( dst, src ); 1124848b8605Smrg emit_2ub(p, X86_TWOB, 0x56); 1125848b8605Smrg emit_modrm( p, dst, src ); 1126848b8605Smrg} 1127848b8605Smrg 1128848b8605Smrgvoid sse_xorps( struct x86_function *p, 1129848b8605Smrg struct x86_reg dst, 1130848b8605Smrg struct x86_reg src ) 1131848b8605Smrg{ 1132848b8605Smrg DUMP_RR( dst, src ); 1133848b8605Smrg emit_2ub(p, X86_TWOB, 0x57); 1134848b8605Smrg emit_modrm( p, dst, src ); 1135848b8605Smrg} 1136848b8605Smrg 1137848b8605Smrgvoid sse_cvtps2pi( struct x86_function *p, 1138848b8605Smrg struct x86_reg dst, 1139848b8605Smrg struct x86_reg src ) 1140848b8605Smrg{ 1141848b8605Smrg DUMP_RR( dst, src ); 1142848b8605Smrg assert(dst.file == file_MMX && 1143848b8605Smrg (src.file == file_XMM || src.mod != mod_REG)); 1144848b8605Smrg 1145848b8605Smrg p->need_emms = 1; 1146848b8605Smrg 1147848b8605Smrg emit_2ub(p, X86_TWOB, 0x2d); 1148848b8605Smrg emit_modrm( p, dst, src ); 1149848b8605Smrg} 1150848b8605Smrg 1151848b8605Smrgvoid sse2_cvtdq2ps( struct x86_function *p, 1152848b8605Smrg struct x86_reg dst, 1153848b8605Smrg struct x86_reg src ) 1154848b8605Smrg{ 1155848b8605Smrg DUMP_RR( dst, src ); 1156848b8605Smrg emit_2ub(p, X86_TWOB, 0x5b); 1157848b8605Smrg emit_modrm( p, dst, src ); 1158848b8605Smrg} 1159848b8605Smrg 1160848b8605Smrg 1161848b8605Smrg/* Shufps can also be used to implement a reduced swizzle when dest == 1162848b8605Smrg * arg0. 1163848b8605Smrg */ 1164848b8605Smrgvoid sse_shufps( struct x86_function *p, 1165848b8605Smrg struct x86_reg dst, 1166848b8605Smrg struct x86_reg src, 1167848b8605Smrg unsigned char shuf) 1168848b8605Smrg{ 1169848b8605Smrg DUMP_RRI( dst, src, shuf ); 1170848b8605Smrg emit_2ub(p, X86_TWOB, 0xC6); 1171848b8605Smrg emit_modrm(p, dst, src); 1172848b8605Smrg emit_1ub(p, shuf); 1173848b8605Smrg} 1174848b8605Smrg 1175848b8605Smrgvoid sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1176848b8605Smrg{ 1177848b8605Smrg DUMP_RR( dst, src ); 1178848b8605Smrg emit_2ub( p, X86_TWOB, 0x15 ); 1179848b8605Smrg emit_modrm( p, dst, src ); 1180848b8605Smrg} 1181848b8605Smrg 1182848b8605Smrgvoid sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1183848b8605Smrg{ 1184848b8605Smrg DUMP_RR( dst, src ); 1185848b8605Smrg emit_2ub( p, X86_TWOB, 0x14 ); 1186848b8605Smrg emit_modrm( p, dst, src ); 1187848b8605Smrg} 1188848b8605Smrg 1189848b8605Smrgvoid sse_cmpps( struct x86_function *p, 1190848b8605Smrg struct x86_reg dst, 1191848b8605Smrg struct x86_reg src, 1192848b8605Smrg enum sse_cc cc) 1193848b8605Smrg{ 1194848b8605Smrg DUMP_RRI( dst, src, cc ); 1195848b8605Smrg emit_2ub(p, X86_TWOB, 0xC2); 1196848b8605Smrg emit_modrm(p, dst, src); 1197848b8605Smrg emit_1ub(p, cc); 1198848b8605Smrg} 1199848b8605Smrg 1200848b8605Smrgvoid sse_pmovmskb( struct x86_function *p, 1201848b8605Smrg struct x86_reg dst, 1202848b8605Smrg struct x86_reg src) 1203848b8605Smrg{ 1204848b8605Smrg DUMP_RR( dst, src ); 1205848b8605Smrg emit_3ub(p, 0x66, X86_TWOB, 0xD7); 1206848b8605Smrg emit_modrm(p, dst, src); 1207848b8605Smrg} 1208848b8605Smrg 1209848b8605Smrgvoid sse_movmskps( struct x86_function *p, 1210848b8605Smrg struct x86_reg dst, 1211848b8605Smrg struct x86_reg src) 1212848b8605Smrg{ 1213848b8605Smrg DUMP_RR( dst, src ); 1214848b8605Smrg emit_2ub(p, X86_TWOB, 0x50); 1215848b8605Smrg emit_modrm(p, dst, src); 1216848b8605Smrg} 1217848b8605Smrg 1218848b8605Smrg/*********************************************************************** 1219848b8605Smrg * SSE2 instructions 1220848b8605Smrg */ 1221848b8605Smrg 1222848b8605Smrgvoid sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1223848b8605Smrg{ 1224848b8605Smrg DUMP_RR(dst, src); 1225848b8605Smrg emit_2ub(p, 0x66, 0x0f); 1226848b8605Smrg if(dst.mod == mod_REG && dst.file == file_REG32) 1227848b8605Smrg { 1228848b8605Smrg emit_1ub(p, 0x7e); 1229848b8605Smrg emit_modrm(p, src, dst); 1230848b8605Smrg } 1231848b8605Smrg else 1232848b8605Smrg { 1233848b8605Smrg emit_op_modrm(p, 0x6e, 0x7e, dst, src); 1234848b8605Smrg } 1235848b8605Smrg} 1236848b8605Smrg 1237848b8605Smrgvoid sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1238848b8605Smrg{ 1239848b8605Smrg DUMP_RR(dst, src); 1240848b8605Smrg switch (dst.mod) { 1241848b8605Smrg case mod_REG: 1242848b8605Smrg emit_3ub(p, 0xf3, 0x0f, 0x7e); 1243848b8605Smrg emit_modrm(p, dst, src); 1244848b8605Smrg break; 1245848b8605Smrg case mod_INDIRECT: 1246848b8605Smrg case mod_DISP32: 1247848b8605Smrg case mod_DISP8: 1248848b8605Smrg assert(src.mod == mod_REG); 1249848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0xd6); 1250848b8605Smrg emit_modrm(p, src, dst); 1251848b8605Smrg break; 1252848b8605Smrg default: 1253848b8605Smrg assert(0); 1254848b8605Smrg break; 1255848b8605Smrg } 1256848b8605Smrg} 1257848b8605Smrg 1258848b8605Smrgvoid sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1259848b8605Smrg{ 1260848b8605Smrg DUMP_RR(dst, src); 1261848b8605Smrg emit_2ub(p, 0xf3, 0x0f); 1262848b8605Smrg emit_op_modrm(p, 0x6f, 0x7f, dst, src); 1263848b8605Smrg} 1264848b8605Smrg 1265848b8605Smrgvoid sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1266848b8605Smrg{ 1267848b8605Smrg DUMP_RR(dst, src); 1268848b8605Smrg emit_2ub(p, 0x66, 0x0f); 1269848b8605Smrg emit_op_modrm(p, 0x6f, 0x7f, dst, src); 1270848b8605Smrg} 1271848b8605Smrg 1272848b8605Smrgvoid sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1273848b8605Smrg{ 1274848b8605Smrg DUMP_RR(dst, src); 1275848b8605Smrg emit_2ub(p, 0xf2, 0x0f); 1276848b8605Smrg emit_op_modrm(p, 0x10, 0x11, dst, src); 1277848b8605Smrg} 1278848b8605Smrg 1279848b8605Smrgvoid sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1280848b8605Smrg{ 1281848b8605Smrg DUMP_RR(dst, src); 1282848b8605Smrg emit_2ub(p, 0x66, 0x0f); 1283848b8605Smrg emit_op_modrm(p, 0x10, 0x11, dst, src); 1284848b8605Smrg} 1285848b8605Smrg 1286848b8605Smrgvoid sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1287848b8605Smrg{ 1288848b8605Smrg DUMP_RR(dst, src); 1289848b8605Smrg emit_2ub(p, 0x66, 0x0f); 1290848b8605Smrg emit_op_modrm(p, 0x28, 0x29, dst, src); 1291848b8605Smrg} 1292848b8605Smrg 1293848b8605Smrg/** 1294848b8605Smrg * Perform a reduced swizzle: 1295848b8605Smrg */ 1296848b8605Smrgvoid sse2_pshufd( struct x86_function *p, 1297848b8605Smrg struct x86_reg dst, 1298848b8605Smrg struct x86_reg src, 1299848b8605Smrg unsigned char shuf) 1300848b8605Smrg{ 1301848b8605Smrg DUMP_RRI( dst, src, shuf ); 1302848b8605Smrg emit_3ub(p, 0x66, X86_TWOB, 0x70); 1303848b8605Smrg emit_modrm(p, dst, src); 1304848b8605Smrg emit_1ub(p, shuf); 1305848b8605Smrg} 1306848b8605Smrg 1307848b8605Smrgvoid sse2_pshuflw( struct x86_function *p, 1308848b8605Smrg struct x86_reg dst, 1309848b8605Smrg struct x86_reg src, 1310848b8605Smrg unsigned char shuf) 1311848b8605Smrg{ 1312848b8605Smrg DUMP_RRI( dst, src, shuf ); 1313848b8605Smrg emit_3ub(p, 0xf2, X86_TWOB, 0x70); 1314848b8605Smrg emit_modrm(p, dst, src); 1315848b8605Smrg emit_1ub(p, shuf); 1316848b8605Smrg} 1317848b8605Smrg 1318848b8605Smrgvoid sse2_pshufhw( struct x86_function *p, 1319848b8605Smrg struct x86_reg dst, 1320848b8605Smrg struct x86_reg src, 1321848b8605Smrg unsigned char shuf) 1322848b8605Smrg{ 1323848b8605Smrg DUMP_RRI( dst, src, shuf ); 1324848b8605Smrg emit_3ub(p, 0xf3, X86_TWOB, 0x70); 1325848b8605Smrg emit_modrm(p, dst, src); 1326848b8605Smrg emit_1ub(p, shuf); 1327848b8605Smrg} 1328848b8605Smrg 1329848b8605Smrgvoid sse2_cvttps2dq( struct x86_function *p, 1330848b8605Smrg struct x86_reg dst, 1331848b8605Smrg struct x86_reg src ) 1332848b8605Smrg{ 1333848b8605Smrg DUMP_RR( dst, src ); 1334848b8605Smrg emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); 1335848b8605Smrg emit_modrm( p, dst, src ); 1336848b8605Smrg} 1337848b8605Smrg 1338848b8605Smrgvoid sse2_cvtps2dq( struct x86_function *p, 1339848b8605Smrg struct x86_reg dst, 1340848b8605Smrg struct x86_reg src ) 1341848b8605Smrg{ 1342848b8605Smrg DUMP_RR( dst, src ); 1343848b8605Smrg emit_3ub(p, 0x66, X86_TWOB, 0x5B); 1344848b8605Smrg emit_modrm( p, dst, src ); 1345848b8605Smrg} 1346848b8605Smrg 1347848b8605Smrgvoid sse2_cvtsd2ss( struct x86_function *p, 1348848b8605Smrg struct x86_reg dst, 1349848b8605Smrg struct x86_reg src ) 1350848b8605Smrg{ 1351848b8605Smrg DUMP_RR( dst, src ); 1352848b8605Smrg emit_3ub(p, 0xf2, 0x0f, 0x5a); 1353848b8605Smrg emit_modrm( p, dst, src ); 1354848b8605Smrg} 1355848b8605Smrg 1356848b8605Smrgvoid sse2_cvtpd2ps( struct x86_function *p, 1357848b8605Smrg struct x86_reg dst, 1358848b8605Smrg struct x86_reg src ) 1359848b8605Smrg{ 1360848b8605Smrg DUMP_RR( dst, src ); 1361848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x5a); 1362848b8605Smrg emit_modrm( p, dst, src ); 1363848b8605Smrg} 1364848b8605Smrg 1365848b8605Smrgvoid sse2_packssdw( struct x86_function *p, 1366848b8605Smrg struct x86_reg dst, 1367848b8605Smrg struct x86_reg src ) 1368848b8605Smrg{ 1369848b8605Smrg DUMP_RR( dst, src ); 1370848b8605Smrg emit_3ub(p, 0x66, X86_TWOB, 0x6B); 1371848b8605Smrg emit_modrm( p, dst, src ); 1372848b8605Smrg} 1373848b8605Smrg 1374848b8605Smrgvoid sse2_packsswb( struct x86_function *p, 1375848b8605Smrg struct x86_reg dst, 1376848b8605Smrg struct x86_reg src ) 1377848b8605Smrg{ 1378848b8605Smrg DUMP_RR( dst, src ); 1379848b8605Smrg emit_3ub(p, 0x66, X86_TWOB, 0x63); 1380848b8605Smrg emit_modrm( p, dst, src ); 1381848b8605Smrg} 1382848b8605Smrg 1383848b8605Smrgvoid sse2_packuswb( struct x86_function *p, 1384848b8605Smrg struct x86_reg dst, 1385848b8605Smrg struct x86_reg src ) 1386848b8605Smrg{ 1387848b8605Smrg DUMP_RR( dst, src ); 1388848b8605Smrg emit_3ub(p, 0x66, X86_TWOB, 0x67); 1389848b8605Smrg emit_modrm( p, dst, src ); 1390848b8605Smrg} 1391848b8605Smrg 1392848b8605Smrgvoid sse2_punpcklbw( struct x86_function *p, 1393848b8605Smrg struct x86_reg dst, 1394848b8605Smrg struct x86_reg src ) 1395848b8605Smrg{ 1396848b8605Smrg DUMP_RR( dst, src ); 1397848b8605Smrg emit_3ub(p, 0x66, X86_TWOB, 0x60); 1398848b8605Smrg emit_modrm( p, dst, src ); 1399848b8605Smrg} 1400848b8605Smrg 1401848b8605Smrgvoid sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1402848b8605Smrg{ 1403848b8605Smrg DUMP_RR( dst, src ); 1404848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x61); 1405848b8605Smrg emit_modrm( p, dst, src ); 1406848b8605Smrg} 1407848b8605Smrg 1408848b8605Smrgvoid sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1409848b8605Smrg{ 1410848b8605Smrg DUMP_RR( dst, src ); 1411848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x62); 1412848b8605Smrg emit_modrm( p, dst, src ); 1413848b8605Smrg} 1414848b8605Smrg 1415848b8605Smrgvoid sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1416848b8605Smrg{ 1417848b8605Smrg DUMP_RR( dst, src ); 1418848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x6c); 1419848b8605Smrg emit_modrm( p, dst, src ); 1420848b8605Smrg} 1421848b8605Smrg 1422848b8605Smrgvoid sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1423848b8605Smrg{ 1424848b8605Smrg DUMP_RI(dst, imm); 1425848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x71); 1426848b8605Smrg emit_modrm_noreg(p, 6, dst); 1427848b8605Smrg emit_1ub(p, imm); 1428848b8605Smrg} 1429848b8605Smrg 1430848b8605Smrgvoid sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1431848b8605Smrg{ 1432848b8605Smrg DUMP_RI(dst, imm); 1433848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x72); 1434848b8605Smrg emit_modrm_noreg(p, 6, dst); 1435848b8605Smrg emit_1ub(p, imm); 1436848b8605Smrg} 1437848b8605Smrg 1438848b8605Smrgvoid sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1439848b8605Smrg{ 1440848b8605Smrg DUMP_RI(dst, imm); 1441848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x73); 1442848b8605Smrg emit_modrm_noreg(p, 6, dst); 1443848b8605Smrg emit_1ub(p, imm); 1444848b8605Smrg} 1445848b8605Smrg 1446848b8605Smrgvoid sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1447848b8605Smrg{ 1448848b8605Smrg DUMP_RI(dst, imm); 1449848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x71); 1450848b8605Smrg emit_modrm_noreg(p, 2, dst); 1451848b8605Smrg emit_1ub(p, imm); 1452848b8605Smrg} 1453848b8605Smrg 1454848b8605Smrgvoid sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1455848b8605Smrg{ 1456848b8605Smrg DUMP_RI(dst, imm); 1457848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x72); 1458848b8605Smrg emit_modrm_noreg(p, 2, dst); 1459848b8605Smrg emit_1ub(p, imm); 1460848b8605Smrg} 1461848b8605Smrg 1462848b8605Smrgvoid sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1463848b8605Smrg{ 1464848b8605Smrg DUMP_RI(dst, imm); 1465848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x73); 1466848b8605Smrg emit_modrm_noreg(p, 2, dst); 1467848b8605Smrg emit_1ub(p, imm); 1468848b8605Smrg} 1469848b8605Smrg 1470848b8605Smrgvoid sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1471848b8605Smrg{ 1472848b8605Smrg DUMP_RI(dst, imm); 1473848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x71); 1474848b8605Smrg emit_modrm_noreg(p, 4, dst); 1475848b8605Smrg emit_1ub(p, imm); 1476848b8605Smrg} 1477848b8605Smrg 1478848b8605Smrgvoid sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1479848b8605Smrg{ 1480848b8605Smrg DUMP_RI(dst, imm); 1481848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0x72); 1482848b8605Smrg emit_modrm_noreg(p, 4, dst); 1483848b8605Smrg emit_1ub(p, imm); 1484848b8605Smrg} 1485848b8605Smrg 1486848b8605Smrgvoid sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1487848b8605Smrg{ 1488848b8605Smrg DUMP_RR(dst, src); 1489848b8605Smrg emit_3ub(p, 0x66, 0x0f, 0xeb); 1490848b8605Smrg emit_modrm(p, dst, src); 1491848b8605Smrg} 1492848b8605Smrg 1493848b8605Smrgvoid sse2_rcpps( struct x86_function *p, 1494848b8605Smrg struct x86_reg dst, 1495848b8605Smrg struct x86_reg src ) 1496848b8605Smrg{ 1497848b8605Smrg DUMP_RR( dst, src ); 1498848b8605Smrg emit_2ub(p, X86_TWOB, 0x53); 1499848b8605Smrg emit_modrm( p, dst, src ); 1500848b8605Smrg} 1501848b8605Smrg 1502848b8605Smrgvoid sse2_rcpss( struct x86_function *p, 1503848b8605Smrg struct x86_reg dst, 1504848b8605Smrg struct x86_reg src ) 1505848b8605Smrg{ 1506848b8605Smrg DUMP_RR( dst, src ); 1507848b8605Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x53); 1508848b8605Smrg emit_modrm( p, dst, src ); 1509848b8605Smrg} 1510848b8605Smrg 1511848b8605Smrg/*********************************************************************** 1512848b8605Smrg * x87 instructions 1513848b8605Smrg */ 1514848b8605Smrgstatic void note_x87_pop( struct x86_function *p ) 1515848b8605Smrg{ 1516848b8605Smrg p->x87_stack--; 1517848b8605Smrg assert(p->x87_stack >= 0); 1518848b8605Smrg} 1519848b8605Smrg 1520848b8605Smrgstatic void note_x87_push( struct x86_function *p ) 1521848b8605Smrg{ 1522848b8605Smrg p->x87_stack++; 1523848b8605Smrg assert(p->x87_stack <= 7); 1524848b8605Smrg} 1525848b8605Smrg 1526848b8605Smrgvoid x87_assert_stack_empty( struct x86_function *p ) 1527848b8605Smrg{ 1528848b8605Smrg assert (p->x87_stack == 0); 1529848b8605Smrg} 1530848b8605Smrg 1531848b8605Smrg 1532848b8605Smrgvoid x87_fist( struct x86_function *p, struct x86_reg dst ) 1533848b8605Smrg{ 1534848b8605Smrg DUMP_R( dst ); 1535848b8605Smrg emit_1ub(p, 0xdb); 1536848b8605Smrg emit_modrm_noreg(p, 2, dst); 1537848b8605Smrg} 1538848b8605Smrg 1539848b8605Smrgvoid x87_fistp( struct x86_function *p, struct x86_reg dst ) 1540848b8605Smrg{ 1541848b8605Smrg DUMP_R( dst ); 1542848b8605Smrg emit_1ub(p, 0xdb); 1543848b8605Smrg emit_modrm_noreg(p, 3, dst); 1544848b8605Smrg note_x87_pop(p); 1545848b8605Smrg} 1546848b8605Smrg 1547848b8605Smrgvoid x87_fild( struct x86_function *p, struct x86_reg arg ) 1548848b8605Smrg{ 1549848b8605Smrg DUMP_R( arg ); 1550848b8605Smrg emit_1ub(p, 0xdf); 1551848b8605Smrg emit_modrm_noreg(p, 0, arg); 1552848b8605Smrg note_x87_push(p); 1553848b8605Smrg} 1554848b8605Smrg 1555848b8605Smrgvoid x87_fldz( struct x86_function *p ) 1556848b8605Smrg{ 1557848b8605Smrg DUMP(); 1558848b8605Smrg emit_2ub(p, 0xd9, 0xee); 1559848b8605Smrg note_x87_push(p); 1560848b8605Smrg} 1561848b8605Smrg 1562848b8605Smrg 1563848b8605Smrgvoid x87_fldcw( struct x86_function *p, struct x86_reg arg ) 1564848b8605Smrg{ 1565848b8605Smrg DUMP_R( arg ); 1566848b8605Smrg assert(arg.file == file_REG32); 1567848b8605Smrg assert(arg.mod != mod_REG); 1568848b8605Smrg emit_1ub(p, 0xd9); 1569848b8605Smrg emit_modrm_noreg(p, 5, arg); 1570848b8605Smrg} 1571848b8605Smrg 1572848b8605Smrgvoid x87_fld1( struct x86_function *p ) 1573848b8605Smrg{ 1574848b8605Smrg DUMP(); 1575848b8605Smrg emit_2ub(p, 0xd9, 0xe8); 1576848b8605Smrg note_x87_push(p); 1577848b8605Smrg} 1578848b8605Smrg 1579848b8605Smrgvoid x87_fldl2e( struct x86_function *p ) 1580848b8605Smrg{ 1581848b8605Smrg DUMP(); 1582848b8605Smrg emit_2ub(p, 0xd9, 0xea); 1583848b8605Smrg note_x87_push(p); 1584848b8605Smrg} 1585848b8605Smrg 1586848b8605Smrgvoid x87_fldln2( struct x86_function *p ) 1587848b8605Smrg{ 1588848b8605Smrg DUMP(); 1589848b8605Smrg emit_2ub(p, 0xd9, 0xed); 1590848b8605Smrg note_x87_push(p); 1591848b8605Smrg} 1592848b8605Smrg 1593848b8605Smrgvoid x87_fwait( struct x86_function *p ) 1594848b8605Smrg{ 1595848b8605Smrg DUMP(); 1596848b8605Smrg emit_1ub(p, 0x9b); 1597848b8605Smrg} 1598848b8605Smrg 1599848b8605Smrgvoid x87_fnclex( struct x86_function *p ) 1600848b8605Smrg{ 1601848b8605Smrg DUMP(); 1602848b8605Smrg emit_2ub(p, 0xdb, 0xe2); 1603848b8605Smrg} 1604848b8605Smrg 1605848b8605Smrgvoid x87_fclex( struct x86_function *p ) 1606848b8605Smrg{ 1607848b8605Smrg x87_fwait(p); 1608848b8605Smrg x87_fnclex(p); 1609848b8605Smrg} 1610848b8605Smrg 1611848b8605Smrgvoid x87_fcmovb( struct x86_function *p, struct x86_reg arg ) 1612848b8605Smrg{ 1613848b8605Smrg DUMP_R( arg ); 1614848b8605Smrg assert(arg.file == file_x87); 1615848b8605Smrg emit_2ub(p, 0xda, 0xc0+arg.idx); 1616848b8605Smrg} 1617848b8605Smrg 1618848b8605Smrgvoid x87_fcmove( struct x86_function *p, struct x86_reg arg ) 1619848b8605Smrg{ 1620848b8605Smrg DUMP_R( arg ); 1621848b8605Smrg assert(arg.file == file_x87); 1622848b8605Smrg emit_2ub(p, 0xda, 0xc8+arg.idx); 1623848b8605Smrg} 1624848b8605Smrg 1625848b8605Smrgvoid x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) 1626848b8605Smrg{ 1627848b8605Smrg DUMP_R( arg ); 1628848b8605Smrg assert(arg.file == file_x87); 1629848b8605Smrg emit_2ub(p, 0xda, 0xd0+arg.idx); 1630848b8605Smrg} 1631848b8605Smrg 1632848b8605Smrgvoid x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) 1633848b8605Smrg{ 1634848b8605Smrg DUMP_R( arg ); 1635848b8605Smrg assert(arg.file == file_x87); 1636848b8605Smrg emit_2ub(p, 0xdb, 0xc0+arg.idx); 1637848b8605Smrg} 1638848b8605Smrg 1639848b8605Smrgvoid x87_fcmovne( struct x86_function *p, struct x86_reg arg ) 1640848b8605Smrg{ 1641848b8605Smrg DUMP_R( arg ); 1642848b8605Smrg assert(arg.file == file_x87); 1643848b8605Smrg emit_2ub(p, 0xdb, 0xc8+arg.idx); 1644848b8605Smrg} 1645848b8605Smrg 1646848b8605Smrgvoid x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) 1647848b8605Smrg{ 1648848b8605Smrg DUMP_R( arg ); 1649848b8605Smrg assert(arg.file == file_x87); 1650848b8605Smrg emit_2ub(p, 0xdb, 0xd0+arg.idx); 1651848b8605Smrg} 1652848b8605Smrg 1653848b8605Smrg 1654848b8605Smrg 1655848b8605Smrgstatic void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, 1656848b8605Smrg unsigned char dst0ub0, 1657848b8605Smrg unsigned char dst0ub1, 1658848b8605Smrg unsigned char arg0ub0, 1659848b8605Smrg unsigned char arg0ub1, 1660848b8605Smrg unsigned char argmem_noreg) 1661848b8605Smrg{ 1662848b8605Smrg assert(dst.file == file_x87); 1663848b8605Smrg 1664848b8605Smrg if (arg.file == file_x87) { 1665848b8605Smrg if (dst.idx == 0) 1666848b8605Smrg emit_2ub(p, dst0ub0, dst0ub1+arg.idx); 1667848b8605Smrg else if (arg.idx == 0) 1668848b8605Smrg emit_2ub(p, arg0ub0, arg0ub1+arg.idx); 1669848b8605Smrg else 1670848b8605Smrg assert(0); 1671848b8605Smrg } 1672848b8605Smrg else if (dst.idx == 0) { 1673848b8605Smrg assert(arg.file == file_REG32); 1674848b8605Smrg emit_1ub(p, 0xd8); 1675848b8605Smrg emit_modrm_noreg(p, argmem_noreg, arg); 1676848b8605Smrg } 1677848b8605Smrg else 1678848b8605Smrg assert(0); 1679848b8605Smrg} 1680848b8605Smrg 1681848b8605Smrgvoid x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1682848b8605Smrg{ 1683848b8605Smrg DUMP_RR( dst, src ); 1684848b8605Smrg x87_arith_op(p, dst, src, 1685848b8605Smrg 0xd8, 0xc8, 1686848b8605Smrg 0xdc, 0xc8, 1687848b8605Smrg 4); 1688848b8605Smrg} 1689848b8605Smrg 1690848b8605Smrgvoid x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1691848b8605Smrg{ 1692848b8605Smrg DUMP_RR( dst, src ); 1693848b8605Smrg x87_arith_op(p, dst, src, 1694848b8605Smrg 0xd8, 0xe0, 1695848b8605Smrg 0xdc, 0xe8, 1696848b8605Smrg 4); 1697848b8605Smrg} 1698848b8605Smrg 1699848b8605Smrgvoid x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1700848b8605Smrg{ 1701848b8605Smrg DUMP_RR( dst, src ); 1702848b8605Smrg x87_arith_op(p, dst, src, 1703848b8605Smrg 0xd8, 0xe8, 1704848b8605Smrg 0xdc, 0xe0, 1705848b8605Smrg 5); 1706848b8605Smrg} 1707848b8605Smrg 1708848b8605Smrgvoid x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1709848b8605Smrg{ 1710848b8605Smrg DUMP_RR( dst, src ); 1711848b8605Smrg x87_arith_op(p, dst, src, 1712848b8605Smrg 0xd8, 0xc0, 1713848b8605Smrg 0xdc, 0xc0, 1714848b8605Smrg 0); 1715848b8605Smrg} 1716848b8605Smrg 1717848b8605Smrgvoid x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1718848b8605Smrg{ 1719848b8605Smrg DUMP_RR( dst, src ); 1720848b8605Smrg x87_arith_op(p, dst, src, 1721848b8605Smrg 0xd8, 0xf0, 1722848b8605Smrg 0xdc, 0xf8, 1723848b8605Smrg 6); 1724848b8605Smrg} 1725848b8605Smrg 1726848b8605Smrgvoid x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1727848b8605Smrg{ 1728848b8605Smrg DUMP_RR( dst, src ); 1729848b8605Smrg x87_arith_op(p, dst, src, 1730848b8605Smrg 0xd8, 0xf8, 1731848b8605Smrg 0xdc, 0xf0, 1732848b8605Smrg 7); 1733848b8605Smrg} 1734848b8605Smrg 1735848b8605Smrgvoid x87_fmulp( struct x86_function *p, struct x86_reg dst ) 1736848b8605Smrg{ 1737848b8605Smrg DUMP_R( dst ); 1738848b8605Smrg assert(dst.file == file_x87); 1739848b8605Smrg assert(dst.idx >= 1); 1740848b8605Smrg emit_2ub(p, 0xde, 0xc8+dst.idx); 1741848b8605Smrg note_x87_pop(p); 1742848b8605Smrg} 1743848b8605Smrg 1744848b8605Smrgvoid x87_fsubp( struct x86_function *p, struct x86_reg dst ) 1745848b8605Smrg{ 1746848b8605Smrg DUMP_R( dst ); 1747848b8605Smrg assert(dst.file == file_x87); 1748848b8605Smrg assert(dst.idx >= 1); 1749848b8605Smrg emit_2ub(p, 0xde, 0xe8+dst.idx); 1750848b8605Smrg note_x87_pop(p); 1751848b8605Smrg} 1752848b8605Smrg 1753848b8605Smrgvoid x87_fsubrp( struct x86_function *p, struct x86_reg dst ) 1754848b8605Smrg{ 1755848b8605Smrg DUMP_R( dst ); 1756848b8605Smrg assert(dst.file == file_x87); 1757848b8605Smrg assert(dst.idx >= 1); 1758848b8605Smrg emit_2ub(p, 0xde, 0xe0+dst.idx); 1759848b8605Smrg note_x87_pop(p); 1760848b8605Smrg} 1761848b8605Smrg 1762848b8605Smrgvoid x87_faddp( struct x86_function *p, struct x86_reg dst ) 1763848b8605Smrg{ 1764848b8605Smrg DUMP_R( dst ); 1765848b8605Smrg assert(dst.file == file_x87); 1766848b8605Smrg assert(dst.idx >= 1); 1767848b8605Smrg emit_2ub(p, 0xde, 0xc0+dst.idx); 1768848b8605Smrg note_x87_pop(p); 1769848b8605Smrg} 1770848b8605Smrg 1771848b8605Smrgvoid x87_fdivp( struct x86_function *p, struct x86_reg dst ) 1772848b8605Smrg{ 1773848b8605Smrg DUMP_R( dst ); 1774848b8605Smrg assert(dst.file == file_x87); 1775848b8605Smrg assert(dst.idx >= 1); 1776848b8605Smrg emit_2ub(p, 0xde, 0xf8+dst.idx); 1777848b8605Smrg note_x87_pop(p); 1778848b8605Smrg} 1779848b8605Smrg 1780848b8605Smrgvoid x87_fdivrp( struct x86_function *p, struct x86_reg dst ) 1781848b8605Smrg{ 1782848b8605Smrg DUMP_R( dst ); 1783848b8605Smrg assert(dst.file == file_x87); 1784848b8605Smrg assert(dst.idx >= 1); 1785848b8605Smrg emit_2ub(p, 0xde, 0xf0+dst.idx); 1786848b8605Smrg note_x87_pop(p); 1787848b8605Smrg} 1788848b8605Smrg 1789848b8605Smrgvoid x87_ftst( struct x86_function *p ) 1790848b8605Smrg{ 1791848b8605Smrg DUMP(); 1792848b8605Smrg emit_2ub(p, 0xd9, 0xe4); 1793848b8605Smrg} 1794848b8605Smrg 1795848b8605Smrgvoid x87_fucom( struct x86_function *p, struct x86_reg arg ) 1796848b8605Smrg{ 1797848b8605Smrg DUMP_R( arg ); 1798848b8605Smrg assert(arg.file == file_x87); 1799848b8605Smrg emit_2ub(p, 0xdd, 0xe0+arg.idx); 1800848b8605Smrg} 1801848b8605Smrg 1802848b8605Smrgvoid x87_fucomp( struct x86_function *p, struct x86_reg arg ) 1803848b8605Smrg{ 1804848b8605Smrg DUMP_R( arg ); 1805848b8605Smrg assert(arg.file == file_x87); 1806848b8605Smrg emit_2ub(p, 0xdd, 0xe8+arg.idx); 1807848b8605Smrg note_x87_pop(p); 1808848b8605Smrg} 1809848b8605Smrg 1810848b8605Smrgvoid x87_fucompp( struct x86_function *p ) 1811848b8605Smrg{ 1812848b8605Smrg DUMP(); 1813848b8605Smrg emit_2ub(p, 0xda, 0xe9); 1814848b8605Smrg note_x87_pop(p); /* pop twice */ 1815848b8605Smrg note_x87_pop(p); /* pop twice */ 1816848b8605Smrg} 1817848b8605Smrg 1818848b8605Smrgvoid x87_fxch( struct x86_function *p, struct x86_reg arg ) 1819848b8605Smrg{ 1820848b8605Smrg DUMP_R( arg ); 1821848b8605Smrg assert(arg.file == file_x87); 1822848b8605Smrg emit_2ub(p, 0xd9, 0xc8+arg.idx); 1823848b8605Smrg} 1824848b8605Smrg 1825848b8605Smrgvoid x87_fabs( struct x86_function *p ) 1826848b8605Smrg{ 1827848b8605Smrg DUMP(); 1828848b8605Smrg emit_2ub(p, 0xd9, 0xe1); 1829848b8605Smrg} 1830848b8605Smrg 1831848b8605Smrgvoid x87_fchs( struct x86_function *p ) 1832848b8605Smrg{ 1833848b8605Smrg DUMP(); 1834848b8605Smrg emit_2ub(p, 0xd9, 0xe0); 1835848b8605Smrg} 1836848b8605Smrg 1837848b8605Smrgvoid x87_fcos( struct x86_function *p ) 1838848b8605Smrg{ 1839848b8605Smrg DUMP(); 1840848b8605Smrg emit_2ub(p, 0xd9, 0xff); 1841848b8605Smrg} 1842848b8605Smrg 1843848b8605Smrg 1844848b8605Smrgvoid x87_fprndint( struct x86_function *p ) 1845848b8605Smrg{ 1846848b8605Smrg DUMP(); 1847848b8605Smrg emit_2ub(p, 0xd9, 0xfc); 1848848b8605Smrg} 1849848b8605Smrg 1850848b8605Smrgvoid x87_fscale( struct x86_function *p ) 1851848b8605Smrg{ 1852848b8605Smrg DUMP(); 1853848b8605Smrg emit_2ub(p, 0xd9, 0xfd); 1854848b8605Smrg} 1855848b8605Smrg 1856848b8605Smrgvoid x87_fsin( struct x86_function *p ) 1857848b8605Smrg{ 1858848b8605Smrg DUMP(); 1859848b8605Smrg emit_2ub(p, 0xd9, 0xfe); 1860848b8605Smrg} 1861848b8605Smrg 1862848b8605Smrgvoid x87_fsincos( struct x86_function *p ) 1863848b8605Smrg{ 1864848b8605Smrg DUMP(); 1865848b8605Smrg emit_2ub(p, 0xd9, 0xfb); 1866848b8605Smrg} 1867848b8605Smrg 1868848b8605Smrgvoid x87_fsqrt( struct x86_function *p ) 1869848b8605Smrg{ 1870848b8605Smrg DUMP(); 1871848b8605Smrg emit_2ub(p, 0xd9, 0xfa); 1872848b8605Smrg} 1873848b8605Smrg 1874848b8605Smrgvoid x87_fxtract( struct x86_function *p ) 1875848b8605Smrg{ 1876848b8605Smrg DUMP(); 1877848b8605Smrg emit_2ub(p, 0xd9, 0xf4); 1878848b8605Smrg} 1879848b8605Smrg 1880848b8605Smrg/* st0 = (2^st0)-1 1881848b8605Smrg * 1882848b8605Smrg * Restrictions: -1.0 <= st0 <= 1.0 1883848b8605Smrg */ 1884848b8605Smrgvoid x87_f2xm1( struct x86_function *p ) 1885848b8605Smrg{ 1886848b8605Smrg DUMP(); 1887848b8605Smrg emit_2ub(p, 0xd9, 0xf0); 1888848b8605Smrg} 1889848b8605Smrg 1890848b8605Smrg/* st1 = st1 * log2(st0); 1891848b8605Smrg * pop_stack; 1892848b8605Smrg */ 1893848b8605Smrgvoid x87_fyl2x( struct x86_function *p ) 1894848b8605Smrg{ 1895848b8605Smrg DUMP(); 1896848b8605Smrg emit_2ub(p, 0xd9, 0xf1); 1897848b8605Smrg note_x87_pop(p); 1898848b8605Smrg} 1899848b8605Smrg 1900848b8605Smrg/* st1 = st1 * log2(st0 + 1.0); 1901848b8605Smrg * pop_stack; 1902848b8605Smrg * 1903848b8605Smrg * A fast operation, with restrictions: -.29 < st0 < .29 1904848b8605Smrg */ 1905848b8605Smrgvoid x87_fyl2xp1( struct x86_function *p ) 1906848b8605Smrg{ 1907848b8605Smrg DUMP(); 1908848b8605Smrg emit_2ub(p, 0xd9, 0xf9); 1909848b8605Smrg note_x87_pop(p); 1910848b8605Smrg} 1911848b8605Smrg 1912848b8605Smrg 1913848b8605Smrgvoid x87_fld( struct x86_function *p, struct x86_reg arg ) 1914848b8605Smrg{ 1915848b8605Smrg DUMP_R( arg ); 1916848b8605Smrg if (arg.file == file_x87) 1917848b8605Smrg emit_2ub(p, 0xd9, 0xc0 + arg.idx); 1918848b8605Smrg else { 1919848b8605Smrg emit_1ub(p, 0xd9); 1920848b8605Smrg emit_modrm_noreg(p, 0, arg); 1921848b8605Smrg } 1922848b8605Smrg note_x87_push(p); 1923848b8605Smrg} 1924848b8605Smrg 1925848b8605Smrgvoid x87_fst( struct x86_function *p, struct x86_reg dst ) 1926848b8605Smrg{ 1927848b8605Smrg DUMP_R( dst ); 1928848b8605Smrg if (dst.file == file_x87) 1929848b8605Smrg emit_2ub(p, 0xdd, 0xd0 + dst.idx); 1930848b8605Smrg else { 1931848b8605Smrg emit_1ub(p, 0xd9); 1932848b8605Smrg emit_modrm_noreg(p, 2, dst); 1933848b8605Smrg } 1934848b8605Smrg} 1935848b8605Smrg 1936848b8605Smrgvoid x87_fstp( struct x86_function *p, struct x86_reg dst ) 1937848b8605Smrg{ 1938848b8605Smrg DUMP_R( dst ); 1939848b8605Smrg if (dst.file == file_x87) 1940848b8605Smrg emit_2ub(p, 0xdd, 0xd8 + dst.idx); 1941848b8605Smrg else { 1942848b8605Smrg emit_1ub(p, 0xd9); 1943848b8605Smrg emit_modrm_noreg(p, 3, dst); 1944848b8605Smrg } 1945848b8605Smrg note_x87_pop(p); 1946848b8605Smrg} 1947848b8605Smrg 1948848b8605Smrgvoid x87_fpop( struct x86_function *p ) 1949848b8605Smrg{ 1950848b8605Smrg x87_fstp( p, x86_make_reg( file_x87, 0 )); 1951848b8605Smrg} 1952848b8605Smrg 1953848b8605Smrg 1954848b8605Smrgvoid x87_fcom( struct x86_function *p, struct x86_reg dst ) 1955848b8605Smrg{ 1956848b8605Smrg DUMP_R( dst ); 1957848b8605Smrg if (dst.file == file_x87) 1958848b8605Smrg emit_2ub(p, 0xd8, 0xd0 + dst.idx); 1959848b8605Smrg else { 1960848b8605Smrg emit_1ub(p, 0xd8); 1961848b8605Smrg emit_modrm_noreg(p, 2, dst); 1962848b8605Smrg } 1963848b8605Smrg} 1964848b8605Smrg 1965848b8605Smrg 1966848b8605Smrgvoid x87_fcomp( struct x86_function *p, struct x86_reg dst ) 1967848b8605Smrg{ 1968848b8605Smrg DUMP_R( dst ); 1969848b8605Smrg if (dst.file == file_x87) 1970848b8605Smrg emit_2ub(p, 0xd8, 0xd8 + dst.idx); 1971848b8605Smrg else { 1972848b8605Smrg emit_1ub(p, 0xd8); 1973848b8605Smrg emit_modrm_noreg(p, 3, dst); 1974848b8605Smrg } 1975848b8605Smrg note_x87_pop(p); 1976848b8605Smrg} 1977848b8605Smrg 1978848b8605Smrgvoid x87_fcomi( struct x86_function *p, struct x86_reg arg ) 1979848b8605Smrg{ 1980848b8605Smrg DUMP_R( arg ); 1981848b8605Smrg emit_2ub(p, 0xdb, 0xf0+arg.idx); 1982848b8605Smrg} 1983848b8605Smrg 1984848b8605Smrgvoid x87_fcomip( struct x86_function *p, struct x86_reg arg ) 1985848b8605Smrg{ 1986848b8605Smrg DUMP_R( arg ); 1987848b8605Smrg emit_2ub(p, 0xdb, 0xf0+arg.idx); 1988848b8605Smrg note_x87_pop(p); 1989848b8605Smrg} 1990848b8605Smrg 1991848b8605Smrg 1992848b8605Smrgvoid x87_fnstsw( struct x86_function *p, struct x86_reg dst ) 1993848b8605Smrg{ 1994848b8605Smrg DUMP_R( dst ); 1995848b8605Smrg assert(dst.file == file_REG32); 1996848b8605Smrg 1997848b8605Smrg if (dst.idx == reg_AX && 1998848b8605Smrg dst.mod == mod_REG) 1999848b8605Smrg emit_2ub(p, 0xdf, 0xe0); 2000848b8605Smrg else { 2001848b8605Smrg emit_1ub(p, 0xdd); 2002848b8605Smrg emit_modrm_noreg(p, 7, dst); 2003848b8605Smrg } 2004848b8605Smrg} 2005848b8605Smrg 2006848b8605Smrg 2007848b8605Smrgvoid x87_fnstcw( struct x86_function *p, struct x86_reg dst ) 2008848b8605Smrg{ 2009848b8605Smrg DUMP_R( dst ); 2010848b8605Smrg assert(dst.file == file_REG32); 2011848b8605Smrg 2012848b8605Smrg emit_1ub(p, 0x9b); /* WAIT -- needed? */ 2013848b8605Smrg emit_1ub(p, 0xd9); 2014848b8605Smrg emit_modrm_noreg(p, 7, dst); 2015848b8605Smrg} 2016848b8605Smrg 2017848b8605Smrg 2018848b8605Smrg 2019848b8605Smrg 2020848b8605Smrg/*********************************************************************** 2021848b8605Smrg * MMX instructions 2022848b8605Smrg */ 2023848b8605Smrg 2024848b8605Smrgvoid mmx_emms( struct x86_function *p ) 2025848b8605Smrg{ 2026848b8605Smrg DUMP(); 2027848b8605Smrg assert(p->need_emms); 2028848b8605Smrg emit_2ub(p, 0x0f, 0x77); 2029848b8605Smrg p->need_emms = 0; 2030848b8605Smrg} 2031848b8605Smrg 2032848b8605Smrgvoid mmx_packssdw( struct x86_function *p, 2033848b8605Smrg struct x86_reg dst, 2034848b8605Smrg struct x86_reg src ) 2035848b8605Smrg{ 2036848b8605Smrg DUMP_RR( dst, src ); 2037848b8605Smrg assert(dst.file == file_MMX && 2038848b8605Smrg (src.file == file_MMX || src.mod != mod_REG)); 2039848b8605Smrg 2040848b8605Smrg p->need_emms = 1; 2041848b8605Smrg 2042848b8605Smrg emit_2ub(p, X86_TWOB, 0x6b); 2043848b8605Smrg emit_modrm( p, dst, src ); 2044848b8605Smrg} 2045848b8605Smrg 2046848b8605Smrgvoid mmx_packuswb( struct x86_function *p, 2047848b8605Smrg struct x86_reg dst, 2048848b8605Smrg struct x86_reg src ) 2049848b8605Smrg{ 2050848b8605Smrg DUMP_RR( dst, src ); 2051848b8605Smrg assert(dst.file == file_MMX && 2052848b8605Smrg (src.file == file_MMX || src.mod != mod_REG)); 2053848b8605Smrg 2054848b8605Smrg p->need_emms = 1; 2055848b8605Smrg 2056848b8605Smrg emit_2ub(p, X86_TWOB, 0x67); 2057848b8605Smrg emit_modrm( p, dst, src ); 2058848b8605Smrg} 2059848b8605Smrg 2060848b8605Smrgvoid mmx_movd( struct x86_function *p, 2061848b8605Smrg struct x86_reg dst, 2062848b8605Smrg struct x86_reg src ) 2063848b8605Smrg{ 2064848b8605Smrg DUMP_RR( dst, src ); 2065848b8605Smrg p->need_emms = 1; 2066848b8605Smrg emit_1ub(p, X86_TWOB); 2067848b8605Smrg emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 2068848b8605Smrg} 2069848b8605Smrg 2070848b8605Smrgvoid mmx_movq( struct x86_function *p, 2071848b8605Smrg struct x86_reg dst, 2072848b8605Smrg struct x86_reg src ) 2073848b8605Smrg{ 2074848b8605Smrg DUMP_RR( dst, src ); 2075848b8605Smrg p->need_emms = 1; 2076848b8605Smrg emit_1ub(p, X86_TWOB); 2077848b8605Smrg emit_op_modrm( p, 0x6f, 0x7f, dst, src ); 2078848b8605Smrg} 2079848b8605Smrg 2080848b8605Smrg 2081848b8605Smrg/*********************************************************************** 2082848b8605Smrg * Helper functions 2083848b8605Smrg */ 2084848b8605Smrg 2085848b8605Smrg 2086848b8605Smrgvoid x86_cdecl_caller_push_regs( struct x86_function *p ) 2087848b8605Smrg{ 2088848b8605Smrg x86_push(p, x86_make_reg(file_REG32, reg_AX)); 2089848b8605Smrg x86_push(p, x86_make_reg(file_REG32, reg_CX)); 2090848b8605Smrg x86_push(p, x86_make_reg(file_REG32, reg_DX)); 2091848b8605Smrg} 2092848b8605Smrg 2093848b8605Smrgvoid x86_cdecl_caller_pop_regs( struct x86_function *p ) 2094848b8605Smrg{ 2095848b8605Smrg x86_pop(p, x86_make_reg(file_REG32, reg_DX)); 2096848b8605Smrg x86_pop(p, x86_make_reg(file_REG32, reg_CX)); 2097848b8605Smrg x86_pop(p, x86_make_reg(file_REG32, reg_AX)); 2098848b8605Smrg} 2099848b8605Smrg 2100848b8605Smrg 2101848b8605Smrgstruct x86_reg x86_fn_arg( struct x86_function *p, 2102848b8605Smrg unsigned arg ) 2103848b8605Smrg{ 2104848b8605Smrg switch(x86_target(p)) 2105848b8605Smrg { 2106848b8605Smrg case X86_64_WIN64_ABI: 2107848b8605Smrg /* Microsoft uses a different calling convention than the rest of the world */ 2108848b8605Smrg switch(arg) 2109848b8605Smrg { 2110848b8605Smrg case 1: 2111848b8605Smrg return x86_make_reg(file_REG32, reg_CX); 2112848b8605Smrg case 2: 2113848b8605Smrg return x86_make_reg(file_REG32, reg_DX); 2114848b8605Smrg case 3: 2115848b8605Smrg return x86_make_reg(file_REG32, reg_R8); 2116848b8605Smrg case 4: 2117848b8605Smrg return x86_make_reg(file_REG32, reg_R9); 2118848b8605Smrg default: 2119848b8605Smrg /* Win64 allocates stack slots as if it pushed the first 4 arguments too */ 2120848b8605Smrg return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2121848b8605Smrg p->stack_offset + arg * 8); 2122848b8605Smrg } 2123848b8605Smrg case X86_64_STD_ABI: 2124848b8605Smrg switch(arg) 2125848b8605Smrg { 2126848b8605Smrg case 1: 2127848b8605Smrg return x86_make_reg(file_REG32, reg_DI); 2128848b8605Smrg case 2: 2129848b8605Smrg return x86_make_reg(file_REG32, reg_SI); 2130848b8605Smrg case 3: 2131848b8605Smrg return x86_make_reg(file_REG32, reg_DX); 2132848b8605Smrg case 4: 2133848b8605Smrg return x86_make_reg(file_REG32, reg_CX); 2134848b8605Smrg case 5: 2135848b8605Smrg return x86_make_reg(file_REG32, reg_R8); 2136848b8605Smrg case 6: 2137848b8605Smrg return x86_make_reg(file_REG32, reg_R9); 2138848b8605Smrg default: 2139848b8605Smrg return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2140848b8605Smrg p->stack_offset + (arg - 6) * 8); /* ??? */ 2141848b8605Smrg } 2142848b8605Smrg case X86_32: 2143848b8605Smrg return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2144848b8605Smrg p->stack_offset + arg * 4); /* ??? */ 2145848b8605Smrg default: 2146848b8605Smrg assert(0 && "Unexpected x86 target ABI in x86_fn_arg"); 2147848b8605Smrg return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */ 2148848b8605Smrg } 2149848b8605Smrg} 2150848b8605Smrg 2151848b8605Smrgstatic void x86_init_func_common( struct x86_function *p ) 2152848b8605Smrg{ 2153848b8605Smrg util_cpu_detect(); 2154848b8605Smrg p->caps = 0; 2155848b8605Smrg if(util_cpu_caps.has_mmx) 2156848b8605Smrg p->caps |= X86_MMX; 2157848b8605Smrg if(util_cpu_caps.has_mmx2) 2158848b8605Smrg p->caps |= X86_MMX2; 2159848b8605Smrg if(util_cpu_caps.has_sse) 2160848b8605Smrg p->caps |= X86_SSE; 2161848b8605Smrg if(util_cpu_caps.has_sse2) 2162848b8605Smrg p->caps |= X86_SSE2; 2163848b8605Smrg if(util_cpu_caps.has_sse3) 2164848b8605Smrg p->caps |= X86_SSE3; 2165848b8605Smrg if(util_cpu_caps.has_sse4_1) 2166848b8605Smrg p->caps |= X86_SSE4_1; 2167848b8605Smrg p->csr = p->store; 2168848b8605Smrg DUMP_START(); 2169848b8605Smrg} 2170848b8605Smrg 2171848b8605Smrgvoid x86_init_func( struct x86_function *p ) 2172848b8605Smrg{ 2173848b8605Smrg p->size = 0; 2174848b8605Smrg p->store = NULL; 2175848b8605Smrg x86_init_func_common(p); 2176848b8605Smrg} 2177848b8605Smrg 2178848b8605Smrgvoid x86_init_func_size( struct x86_function *p, unsigned code_size ) 2179848b8605Smrg{ 2180848b8605Smrg p->size = code_size; 2181848b8605Smrg p->store = rtasm_exec_malloc(code_size); 2182848b8605Smrg if (p->store == NULL) { 2183848b8605Smrg p->store = p->error_overflow; 2184848b8605Smrg } 2185848b8605Smrg x86_init_func_common(p); 2186848b8605Smrg} 2187848b8605Smrg 2188848b8605Smrgvoid x86_release_func( struct x86_function *p ) 2189848b8605Smrg{ 2190848b8605Smrg if (p->store && p->store != p->error_overflow) 2191848b8605Smrg rtasm_exec_free(p->store); 2192848b8605Smrg 2193848b8605Smrg p->store = NULL; 2194848b8605Smrg p->csr = NULL; 2195848b8605Smrg p->size = 0; 2196848b8605Smrg} 2197848b8605Smrg 2198848b8605Smrg 2199b8e80941Smrgstatic inline x86_func 2200848b8605Smrgvoidptr_to_x86_func(void *v) 2201848b8605Smrg{ 2202848b8605Smrg union { 2203848b8605Smrg void *v; 2204848b8605Smrg x86_func f; 2205848b8605Smrg } u; 2206b8e80941Smrg STATIC_ASSERT(sizeof(u.v) == sizeof(u.f)); 2207848b8605Smrg u.v = v; 2208848b8605Smrg return u.f; 2209848b8605Smrg} 2210848b8605Smrg 2211848b8605Smrg 2212848b8605Smrgx86_func x86_get_func( struct x86_function *p ) 2213848b8605Smrg{ 2214848b8605Smrg DUMP_END(); 2215848b8605Smrg if (DISASSEM && p->store) 2216848b8605Smrg debug_printf("disassemble %p %p\n", p->store, p->csr); 2217848b8605Smrg 2218848b8605Smrg if (p->store == p->error_overflow) 2219848b8605Smrg return voidptr_to_x86_func(NULL); 2220848b8605Smrg else 2221848b8605Smrg return voidptr_to_x86_func(p->store); 2222848b8605Smrg} 2223848b8605Smrg 2224848b8605Smrg#else 2225848b8605Smrg 2226848b8605Smrgvoid x86sse_dummy( void ); 2227848b8605Smrg 2228848b8605Smrgvoid x86sse_dummy( void ) 2229848b8605Smrg{ 2230848b8605Smrg} 2231848b8605Smrg 2232848b8605Smrg#endif 2233