14a49301eSmrg#ifdef USE_X86_ASM 27117f1b4Smrg#if defined(__i386__) || defined(__386__) 37117f1b4Smrg 401e04c3fSmrg#include <stdio.h> 57ec681f3Smrg#include <string.h> 67ec681f3Smrg#include <assert.h> 701e04c3fSmrg 801e04c3fSmrg#include "main/execmem.h" 97117f1b4Smrg#include "x86sse.h" 107117f1b4Smrg 117117f1b4Smrg#define DISASSEM 0 127117f1b4Smrg#define X86_TWOB 0x0f 137117f1b4Smrg 144a49301eSmrg#if 0 157117f1b4Smrgstatic unsigned char *cptr( void (*label)() ) 167117f1b4Smrg{ 177117f1b4Smrg return (unsigned char *)(unsigned long)label; 187117f1b4Smrg} 194a49301eSmrg#endif 207117f1b4Smrg 217117f1b4Smrg 227117f1b4Smrgstatic void do_realloc( struct x86_function *p ) 237117f1b4Smrg{ 247117f1b4Smrg if (p->size == 0) { 257117f1b4Smrg p->size = 1024; 267117f1b4Smrg p->store = _mesa_exec_malloc(p->size); 277117f1b4Smrg p->csr = p->store; 287117f1b4Smrg } 297117f1b4Smrg else { 307117f1b4Smrg unsigned used = p->csr - p->store; 317117f1b4Smrg unsigned char *tmp = p->store; 327117f1b4Smrg p->size *= 2; 337117f1b4Smrg p->store = _mesa_exec_malloc(p->size); 347117f1b4Smrg memcpy(p->store, tmp, used); 357117f1b4Smrg p->csr = p->store + used; 367117f1b4Smrg _mesa_exec_free(tmp); 377117f1b4Smrg } 387117f1b4Smrg} 397117f1b4Smrg 407117f1b4Smrg/* Emit bytes to the instruction stream: 417117f1b4Smrg */ 427117f1b4Smrgstatic unsigned char *reserve( struct x86_function *p, int bytes ) 437117f1b4Smrg{ 447117f1b4Smrg if (p->csr + bytes - p->store > p->size) 457117f1b4Smrg do_realloc(p); 467117f1b4Smrg 477117f1b4Smrg { 487117f1b4Smrg unsigned char *csr = p->csr; 497117f1b4Smrg p->csr += bytes; 507117f1b4Smrg return csr; 517117f1b4Smrg } 527117f1b4Smrg} 537117f1b4Smrg 547117f1b4Smrg 557117f1b4Smrg 567117f1b4Smrgstatic void emit_1b( struct x86_function *p, char b0 ) 577117f1b4Smrg{ 587117f1b4Smrg char *csr = (char *)reserve(p, 1); 597117f1b4Smrg *csr = b0; 607117f1b4Smrg} 617117f1b4Smrg 627117f1b4Smrgstatic void emit_1i( struct x86_function *p, int i0 ) 637117f1b4Smrg{ 647117f1b4Smrg int *icsr = (int *)reserve(p, sizeof(i0)); 657117f1b4Smrg *icsr = i0; 667117f1b4Smrg} 677117f1b4Smrg 687117f1b4Smrgstatic void emit_1ub( struct x86_function *p, unsigned char b0 ) 697117f1b4Smrg{ 707117f1b4Smrg unsigned char *csr = reserve(p, 1); 717117f1b4Smrg *csr++ = b0; 727117f1b4Smrg} 737117f1b4Smrg 747117f1b4Smrgstatic void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) 757117f1b4Smrg{ 767117f1b4Smrg unsigned char *csr = reserve(p, 2); 777117f1b4Smrg *csr++ = b0; 787117f1b4Smrg *csr++ = b1; 797117f1b4Smrg} 807117f1b4Smrg 817117f1b4Smrgstatic void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) 827117f1b4Smrg{ 837117f1b4Smrg unsigned char *csr = reserve(p, 3); 847117f1b4Smrg *csr++ = b0; 857117f1b4Smrg *csr++ = b1; 867117f1b4Smrg *csr++ = b2; 877117f1b4Smrg} 887117f1b4Smrg 897117f1b4Smrg 907117f1b4Smrg/* Build a modRM byte + possible displacement. No treatment of SIB 917117f1b4Smrg * indexing. BZZT - no way to encode an absolute address. 927117f1b4Smrg */ 937ec681f3Smrgstatic void emit_modrm( struct x86_function *p, 947ec681f3Smrg struct x86_reg reg, 957117f1b4Smrg struct x86_reg regmem ) 967117f1b4Smrg{ 977117f1b4Smrg unsigned char val = 0; 987ec681f3Smrg 997117f1b4Smrg assert(reg.mod == mod_REG); 1007ec681f3Smrg 1017117f1b4Smrg val |= regmem.mod << 6; /* mod field */ 1027117f1b4Smrg val |= reg.idx << 3; /* reg field */ 1037117f1b4Smrg val |= regmem.idx; /* r/m field */ 1047ec681f3Smrg 1057117f1b4Smrg emit_1ub(p, val); 1067117f1b4Smrg 1077117f1b4Smrg /* Oh-oh we've stumbled into the SIB thing. 1087117f1b4Smrg */ 1097117f1b4Smrg if (regmem.file == file_REG32 && 1107117f1b4Smrg regmem.idx == reg_SP) { 1117117f1b4Smrg emit_1ub(p, 0x24); /* simplistic! */ 1127117f1b4Smrg } 1137117f1b4Smrg 1147117f1b4Smrg switch (regmem.mod) { 1157117f1b4Smrg case mod_REG: 1167117f1b4Smrg case mod_INDIRECT: 1177117f1b4Smrg break; 1187117f1b4Smrg case mod_DISP8: 1197117f1b4Smrg emit_1b(p, regmem.disp); 1207117f1b4Smrg break; 1217117f1b4Smrg case mod_DISP32: 1227117f1b4Smrg emit_1i(p, regmem.disp); 1237117f1b4Smrg break; 1247117f1b4Smrg default: 1257117f1b4Smrg assert(0); 1267117f1b4Smrg break; 1277117f1b4Smrg } 1287117f1b4Smrg} 1297117f1b4Smrg 1307117f1b4Smrg 1317117f1b4Smrgstatic void emit_modrm_noreg( struct x86_function *p, 1327117f1b4Smrg unsigned op, 1337117f1b4Smrg struct x86_reg regmem ) 1347117f1b4Smrg{ 1357117f1b4Smrg struct x86_reg dummy = x86_make_reg(file_REG32, op); 1367117f1b4Smrg emit_modrm(p, dummy, regmem); 1377117f1b4Smrg} 1387117f1b4Smrg 1397117f1b4Smrg/* Many x86 instructions have two opcodes to cope with the situations 1407117f1b4Smrg * where the destination is a register or memory reference 1417117f1b4Smrg * respectively. This function selects the correct opcode based on 1427117f1b4Smrg * the arguments presented. 1437117f1b4Smrg */ 1447117f1b4Smrgstatic void emit_op_modrm( struct x86_function *p, 1457ec681f3Smrg unsigned char op_dst_is_reg, 1467117f1b4Smrg unsigned char op_dst_is_mem, 1477117f1b4Smrg struct x86_reg dst, 1487117f1b4Smrg struct x86_reg src ) 1497ec681f3Smrg{ 1507117f1b4Smrg switch (dst.mod) { 1517117f1b4Smrg case mod_REG: 1527117f1b4Smrg emit_1ub(p, op_dst_is_reg); 1537117f1b4Smrg emit_modrm(p, dst, src); 1547117f1b4Smrg break; 1557117f1b4Smrg case mod_INDIRECT: 1567117f1b4Smrg case mod_DISP32: 1577117f1b4Smrg case mod_DISP8: 1587117f1b4Smrg assert(src.mod == mod_REG); 1597117f1b4Smrg emit_1ub(p, op_dst_is_mem); 1607117f1b4Smrg emit_modrm(p, src, dst); 1617117f1b4Smrg break; 1627117f1b4Smrg default: 1637117f1b4Smrg assert(0); 1647117f1b4Smrg break; 1657117f1b4Smrg } 1667117f1b4Smrg} 1677117f1b4Smrg 1687117f1b4Smrg 1697117f1b4Smrg 1707117f1b4Smrg 1717117f1b4Smrg 1727117f1b4Smrg 1737117f1b4Smrg 1747117f1b4Smrg/* Create and manipulate registers and regmem values: 1757117f1b4Smrg */ 1767117f1b4Smrgstruct x86_reg x86_make_reg( enum x86_reg_file file, 1777117f1b4Smrg enum x86_reg_name idx ) 1787117f1b4Smrg{ 1797117f1b4Smrg struct x86_reg reg; 1807117f1b4Smrg 1817117f1b4Smrg reg.file = file; 1827117f1b4Smrg reg.idx = idx; 1837117f1b4Smrg reg.mod = mod_REG; 1847117f1b4Smrg reg.disp = 0; 1857117f1b4Smrg 1867117f1b4Smrg return reg; 1877117f1b4Smrg} 1887117f1b4Smrg 1897117f1b4Smrgstruct x86_reg x86_make_disp( struct x86_reg reg, 1907117f1b4Smrg int disp ) 1917117f1b4Smrg{ 1927117f1b4Smrg assert(reg.file == file_REG32); 1937117f1b4Smrg 1947117f1b4Smrg if (reg.mod == mod_REG) 1957117f1b4Smrg reg.disp = disp; 1967117f1b4Smrg else 1977117f1b4Smrg reg.disp += disp; 1987117f1b4Smrg 1997117f1b4Smrg if (reg.disp == 0) 2007117f1b4Smrg reg.mod = mod_INDIRECT; 2017117f1b4Smrg else if (reg.disp <= 127 && reg.disp >= -128) 2027117f1b4Smrg reg.mod = mod_DISP8; 2037117f1b4Smrg else 2047117f1b4Smrg reg.mod = mod_DISP32; 2057117f1b4Smrg 2067117f1b4Smrg return reg; 2077117f1b4Smrg} 2087117f1b4Smrg 2097117f1b4Smrgstruct x86_reg x86_deref( struct x86_reg reg ) 2107117f1b4Smrg{ 2117117f1b4Smrg return x86_make_disp(reg, 0); 2127117f1b4Smrg} 2137117f1b4Smrg 2147117f1b4Smrgstruct x86_reg x86_get_base_reg( struct x86_reg reg ) 2157117f1b4Smrg{ 2167117f1b4Smrg return x86_make_reg( reg.file, reg.idx ); 2177117f1b4Smrg} 2187117f1b4Smrg 2197117f1b4Smrgunsigned char *x86_get_label( struct x86_function *p ) 2207117f1b4Smrg{ 2217117f1b4Smrg return p->csr; 2227117f1b4Smrg} 2237117f1b4Smrg 2247117f1b4Smrg 2257117f1b4Smrg 2267117f1b4Smrg/*********************************************************************** 2277117f1b4Smrg * x86 instructions 2287117f1b4Smrg */ 2297117f1b4Smrg 2307117f1b4Smrg 2317117f1b4Smrgvoid x86_jcc( struct x86_function *p, 2327117f1b4Smrg enum x86_cc cc, 2337117f1b4Smrg unsigned char *label ) 2347117f1b4Smrg{ 2357117f1b4Smrg int offset = label - (x86_get_label(p) + 2); 2367ec681f3Smrg 2377117f1b4Smrg if (offset <= 127 && offset >= -128) { 2387117f1b4Smrg emit_1ub(p, 0x70 + cc); 2397117f1b4Smrg emit_1b(p, (char) offset); 2407117f1b4Smrg } 2417117f1b4Smrg else { 2427117f1b4Smrg offset = label - (x86_get_label(p) + 6); 2437117f1b4Smrg emit_2ub(p, 0x0f, 0x80 + cc); 2447117f1b4Smrg emit_1i(p, offset); 2457117f1b4Smrg } 2467117f1b4Smrg} 2477117f1b4Smrg 2487117f1b4Smrg/* Always use a 32bit offset for forward jumps: 2497117f1b4Smrg */ 2507117f1b4Smrgunsigned char *x86_jcc_forward( struct x86_function *p, 2517117f1b4Smrg enum x86_cc cc ) 2527117f1b4Smrg{ 2537117f1b4Smrg emit_2ub(p, 0x0f, 0x80 + cc); 2547117f1b4Smrg emit_1i(p, 0); 2557117f1b4Smrg return x86_get_label(p); 2567117f1b4Smrg} 2577117f1b4Smrg 2587117f1b4Smrgunsigned char *x86_jmp_forward( struct x86_function *p) 2597117f1b4Smrg{ 2607117f1b4Smrg emit_1ub(p, 0xe9); 2617117f1b4Smrg emit_1i(p, 0); 2627117f1b4Smrg return x86_get_label(p); 2637117f1b4Smrg} 2647117f1b4Smrg 2657117f1b4Smrgunsigned char *x86_call_forward( struct x86_function *p) 2667117f1b4Smrg{ 2677117f1b4Smrg emit_1ub(p, 0xe8); 2687117f1b4Smrg emit_1i(p, 0); 2697117f1b4Smrg return x86_get_label(p); 2707117f1b4Smrg} 2717117f1b4Smrg 2727117f1b4Smrg/* Fixup offset from forward jump: 2737117f1b4Smrg */ 2747117f1b4Smrgvoid x86_fixup_fwd_jump( struct x86_function *p, 2757117f1b4Smrg unsigned char *fixup ) 2767117f1b4Smrg{ 2777117f1b4Smrg *(int *)(fixup - 4) = x86_get_label(p) - fixup; 2787117f1b4Smrg} 2797117f1b4Smrg 2807117f1b4Smrgvoid x86_jmp( struct x86_function *p, unsigned char *label) 2817117f1b4Smrg{ 2827117f1b4Smrg emit_1ub(p, 0xe9); 2837117f1b4Smrg emit_1i(p, label - x86_get_label(p) - 4); 2847117f1b4Smrg} 2857117f1b4Smrg 2867117f1b4Smrg#if 0 2877117f1b4Smrg/* This doesn't work once we start reallocating & copying the 2887117f1b4Smrg * generated code on buffer fills, because the call is relative to the 2897117f1b4Smrg * current pc. 2907117f1b4Smrg */ 2917117f1b4Smrgvoid x86_call( struct x86_function *p, void (*label)()) 2927117f1b4Smrg{ 2937117f1b4Smrg emit_1ub(p, 0xe8); 2947117f1b4Smrg emit_1i(p, cptr(label) - x86_get_label(p) - 4); 2957117f1b4Smrg} 2967117f1b4Smrg#else 2977117f1b4Smrgvoid x86_call( struct x86_function *p, struct x86_reg reg) 2987117f1b4Smrg{ 2997117f1b4Smrg emit_1ub(p, 0xff); 3004a49301eSmrg emit_modrm_noreg(p, 2, reg); 3017117f1b4Smrg} 3027117f1b4Smrg#endif 3037117f1b4Smrg 3047117f1b4Smrg 3057117f1b4Smrg/* michal: 3067117f1b4Smrg * Temporary. As I need immediate operands, and dont want to mess with the codegen, 3077117f1b4Smrg * I load the immediate into general purpose register and use it. 3087117f1b4Smrg */ 3097117f1b4Smrgvoid x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) 3107117f1b4Smrg{ 3117117f1b4Smrg assert(dst.mod == mod_REG); 3127117f1b4Smrg emit_1ub(p, 0xb8 + dst.idx); 3137117f1b4Smrg emit_1i(p, imm); 3147117f1b4Smrg} 3157117f1b4Smrg 3167117f1b4Smrgvoid x86_push( struct x86_function *p, 3177117f1b4Smrg struct x86_reg reg ) 3187117f1b4Smrg{ 3197117f1b4Smrg assert(reg.mod == mod_REG); 3207117f1b4Smrg emit_1ub(p, 0x50 + reg.idx); 3217117f1b4Smrg p->stack_offset += 4; 3227117f1b4Smrg} 3237117f1b4Smrg 3247117f1b4Smrgvoid x86_pop( struct x86_function *p, 3257117f1b4Smrg struct x86_reg reg ) 3267117f1b4Smrg{ 3277117f1b4Smrg assert(reg.mod == mod_REG); 3287117f1b4Smrg emit_1ub(p, 0x58 + reg.idx); 3297117f1b4Smrg p->stack_offset -= 4; 3307117f1b4Smrg} 3317117f1b4Smrg 3327117f1b4Smrgvoid x86_inc( struct x86_function *p, 3337117f1b4Smrg struct x86_reg reg ) 3347117f1b4Smrg{ 3357117f1b4Smrg assert(reg.mod == mod_REG); 3367117f1b4Smrg emit_1ub(p, 0x40 + reg.idx); 3377117f1b4Smrg} 3387117f1b4Smrg 3397117f1b4Smrgvoid x86_dec( struct x86_function *p, 3407117f1b4Smrg struct x86_reg reg ) 3417117f1b4Smrg{ 3427117f1b4Smrg assert(reg.mod == mod_REG); 3437117f1b4Smrg emit_1ub(p, 0x48 + reg.idx); 3447117f1b4Smrg} 3457117f1b4Smrg 3467117f1b4Smrgvoid x86_ret( struct x86_function *p ) 3477117f1b4Smrg{ 3487117f1b4Smrg emit_1ub(p, 0xc3); 3497117f1b4Smrg} 3507117f1b4Smrg 3517117f1b4Smrgvoid x86_sahf( struct x86_function *p ) 3527117f1b4Smrg{ 3537117f1b4Smrg emit_1ub(p, 0x9e); 3547117f1b4Smrg} 3557117f1b4Smrg 3567117f1b4Smrgvoid x86_mov( struct x86_function *p, 3577117f1b4Smrg struct x86_reg dst, 3587117f1b4Smrg struct x86_reg src ) 3597117f1b4Smrg{ 3607117f1b4Smrg emit_op_modrm( p, 0x8b, 0x89, dst, src ); 3617117f1b4Smrg} 3627117f1b4Smrg 3637117f1b4Smrgvoid x86_xor( struct x86_function *p, 3647117f1b4Smrg struct x86_reg dst, 3657117f1b4Smrg struct x86_reg src ) 3667117f1b4Smrg{ 3677117f1b4Smrg emit_op_modrm( p, 0x33, 0x31, dst, src ); 3687117f1b4Smrg} 3697117f1b4Smrg 3707117f1b4Smrgvoid x86_cmp( struct x86_function *p, 3717117f1b4Smrg struct x86_reg dst, 3727117f1b4Smrg struct x86_reg src ) 3737117f1b4Smrg{ 3747117f1b4Smrg emit_op_modrm( p, 0x3b, 0x39, dst, src ); 3757117f1b4Smrg} 3767117f1b4Smrg 3777117f1b4Smrgvoid x86_lea( struct x86_function *p, 3787117f1b4Smrg struct x86_reg dst, 3797117f1b4Smrg struct x86_reg src ) 3807117f1b4Smrg{ 3817117f1b4Smrg emit_1ub(p, 0x8d); 3827117f1b4Smrg emit_modrm( p, dst, src ); 3837117f1b4Smrg} 3847117f1b4Smrg 3857117f1b4Smrgvoid x86_test( struct x86_function *p, 3867117f1b4Smrg struct x86_reg dst, 3877117f1b4Smrg struct x86_reg src ) 3887117f1b4Smrg{ 3897117f1b4Smrg emit_1ub(p, 0x85); 3907117f1b4Smrg emit_modrm( p, dst, src ); 3917117f1b4Smrg} 3927117f1b4Smrg 3937117f1b4Smrgvoid x86_add( struct x86_function *p, 3947117f1b4Smrg struct x86_reg dst, 3957117f1b4Smrg struct x86_reg src ) 3967117f1b4Smrg{ 3977117f1b4Smrg emit_op_modrm(p, 0x03, 0x01, dst, src ); 3987117f1b4Smrg} 3997117f1b4Smrg 4007117f1b4Smrgvoid x86_mul( struct x86_function *p, 4017117f1b4Smrg struct x86_reg src ) 4027117f1b4Smrg{ 4037117f1b4Smrg assert (src.file == file_REG32 && src.mod == mod_REG); 4047117f1b4Smrg emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); 4057117f1b4Smrg} 4067117f1b4Smrg 4077117f1b4Smrgvoid x86_sub( struct x86_function *p, 4087117f1b4Smrg struct x86_reg dst, 4097117f1b4Smrg struct x86_reg src ) 4107117f1b4Smrg{ 4117117f1b4Smrg emit_op_modrm(p, 0x2b, 0x29, dst, src ); 4127117f1b4Smrg} 4137117f1b4Smrg 4147117f1b4Smrgvoid x86_or( struct x86_function *p, 4157117f1b4Smrg struct x86_reg dst, 4167117f1b4Smrg struct x86_reg src ) 4177117f1b4Smrg{ 4187117f1b4Smrg emit_op_modrm( p, 0x0b, 0x09, dst, src ); 4197117f1b4Smrg} 4207117f1b4Smrg 4217117f1b4Smrgvoid x86_and( struct x86_function *p, 4227117f1b4Smrg struct x86_reg dst, 4237117f1b4Smrg struct x86_reg src ) 4247117f1b4Smrg{ 4257117f1b4Smrg emit_op_modrm( p, 0x23, 0x21, dst, src ); 4267117f1b4Smrg} 4277117f1b4Smrg 4287117f1b4Smrg 4297117f1b4Smrg 4307117f1b4Smrg/*********************************************************************** 4317117f1b4Smrg * SSE instructions 4327117f1b4Smrg */ 4337117f1b4Smrg 4347117f1b4Smrg 4357117f1b4Smrgvoid sse_movss( struct x86_function *p, 4367117f1b4Smrg struct x86_reg dst, 4377117f1b4Smrg struct x86_reg src ) 4387117f1b4Smrg{ 4397117f1b4Smrg emit_2ub(p, 0xF3, X86_TWOB); 4407117f1b4Smrg emit_op_modrm( p, 0x10, 0x11, dst, src ); 4417117f1b4Smrg} 4427117f1b4Smrg 4437117f1b4Smrgvoid sse_movaps( struct x86_function *p, 4447117f1b4Smrg struct x86_reg dst, 4457117f1b4Smrg struct x86_reg src ) 4467117f1b4Smrg{ 4477117f1b4Smrg emit_1ub(p, X86_TWOB); 4487117f1b4Smrg emit_op_modrm( p, 0x28, 0x29, dst, src ); 4497117f1b4Smrg} 4507117f1b4Smrg 4517117f1b4Smrgvoid sse_movups( struct x86_function *p, 4527117f1b4Smrg struct x86_reg dst, 4537117f1b4Smrg struct x86_reg src ) 4547117f1b4Smrg{ 4557117f1b4Smrg emit_1ub(p, X86_TWOB); 4567117f1b4Smrg emit_op_modrm( p, 0x10, 0x11, dst, src ); 4577117f1b4Smrg} 4587117f1b4Smrg 4597117f1b4Smrgvoid sse_movhps( struct x86_function *p, 4607117f1b4Smrg struct x86_reg dst, 4617117f1b4Smrg struct x86_reg src ) 4627117f1b4Smrg{ 4637117f1b4Smrg assert(dst.mod != mod_REG || src.mod != mod_REG); 4647117f1b4Smrg emit_1ub(p, X86_TWOB); 4657117f1b4Smrg emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ 4667117f1b4Smrg} 4677117f1b4Smrg 4687117f1b4Smrgvoid sse_movlps( struct x86_function *p, 4697117f1b4Smrg struct x86_reg dst, 4707117f1b4Smrg struct x86_reg src ) 4717117f1b4Smrg{ 4727117f1b4Smrg assert(dst.mod != mod_REG || src.mod != mod_REG); 4737117f1b4Smrg emit_1ub(p, X86_TWOB); 4747117f1b4Smrg emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ 4757117f1b4Smrg} 4767117f1b4Smrg 4777117f1b4Smrgvoid sse_maxps( struct x86_function *p, 4787117f1b4Smrg struct x86_reg dst, 4797117f1b4Smrg struct x86_reg src ) 4807117f1b4Smrg{ 4817117f1b4Smrg emit_2ub(p, X86_TWOB, 0x5F); 4827117f1b4Smrg emit_modrm( p, dst, src ); 4837117f1b4Smrg} 4847117f1b4Smrg 4857117f1b4Smrgvoid sse_maxss( struct x86_function *p, 4867117f1b4Smrg struct x86_reg dst, 4877117f1b4Smrg struct x86_reg src ) 4887117f1b4Smrg{ 4897117f1b4Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x5F); 4907117f1b4Smrg emit_modrm( p, dst, src ); 4917117f1b4Smrg} 4927117f1b4Smrg 4937117f1b4Smrgvoid sse_divss( struct x86_function *p, 4947117f1b4Smrg struct x86_reg dst, 4957117f1b4Smrg struct x86_reg src ) 4967117f1b4Smrg{ 4977117f1b4Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x5E); 4987117f1b4Smrg emit_modrm( p, dst, src ); 4997117f1b4Smrg} 5007117f1b4Smrg 5017117f1b4Smrgvoid sse_minps( struct x86_function *p, 5027117f1b4Smrg struct x86_reg dst, 5037117f1b4Smrg struct x86_reg src ) 5047117f1b4Smrg{ 5057117f1b4Smrg emit_2ub(p, X86_TWOB, 0x5D); 5067117f1b4Smrg emit_modrm( p, dst, src ); 5077117f1b4Smrg} 5087117f1b4Smrg 5097117f1b4Smrgvoid sse_subps( struct x86_function *p, 5107117f1b4Smrg struct x86_reg dst, 5117117f1b4Smrg struct x86_reg src ) 5127117f1b4Smrg{ 5137117f1b4Smrg emit_2ub(p, X86_TWOB, 0x5C); 5147117f1b4Smrg emit_modrm( p, dst, src ); 5157117f1b4Smrg} 5167117f1b4Smrg 5177117f1b4Smrgvoid sse_mulps( struct x86_function *p, 5187117f1b4Smrg struct x86_reg dst, 5197117f1b4Smrg struct x86_reg src ) 5207117f1b4Smrg{ 5217117f1b4Smrg emit_2ub(p, X86_TWOB, 0x59); 5227117f1b4Smrg emit_modrm( p, dst, src ); 5237117f1b4Smrg} 5247117f1b4Smrg 5257117f1b4Smrgvoid sse_mulss( struct x86_function *p, 5267117f1b4Smrg struct x86_reg dst, 5277117f1b4Smrg struct x86_reg src ) 5287117f1b4Smrg{ 5297117f1b4Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x59); 5307117f1b4Smrg emit_modrm( p, dst, src ); 5317117f1b4Smrg} 5327117f1b4Smrg 5337117f1b4Smrgvoid sse_addps( struct x86_function *p, 5347117f1b4Smrg struct x86_reg dst, 5357117f1b4Smrg struct x86_reg src ) 5367117f1b4Smrg{ 5377117f1b4Smrg emit_2ub(p, X86_TWOB, 0x58); 5387117f1b4Smrg emit_modrm( p, dst, src ); 5397117f1b4Smrg} 5407117f1b4Smrg 5417117f1b4Smrgvoid sse_addss( struct x86_function *p, 5427117f1b4Smrg struct x86_reg dst, 5437117f1b4Smrg struct x86_reg src ) 5447117f1b4Smrg{ 5457117f1b4Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x58); 5467117f1b4Smrg emit_modrm( p, dst, src ); 5477117f1b4Smrg} 5487117f1b4Smrg 5497117f1b4Smrgvoid sse_andnps( struct x86_function *p, 5507117f1b4Smrg struct x86_reg dst, 5517117f1b4Smrg struct x86_reg src ) 5527117f1b4Smrg{ 5537117f1b4Smrg emit_2ub(p, X86_TWOB, 0x55); 5547117f1b4Smrg emit_modrm( p, dst, src ); 5557117f1b4Smrg} 5567117f1b4Smrg 5577117f1b4Smrgvoid sse_andps( struct x86_function *p, 5587117f1b4Smrg struct x86_reg dst, 5597117f1b4Smrg struct x86_reg src ) 5607117f1b4Smrg{ 5617117f1b4Smrg emit_2ub(p, X86_TWOB, 0x54); 5627117f1b4Smrg emit_modrm( p, dst, src ); 5637117f1b4Smrg} 5647117f1b4Smrg 5657117f1b4Smrgvoid sse_rsqrtps( struct x86_function *p, 5667117f1b4Smrg struct x86_reg dst, 5677117f1b4Smrg struct x86_reg src ) 5687117f1b4Smrg{ 5697117f1b4Smrg emit_2ub(p, X86_TWOB, 0x52); 5707117f1b4Smrg emit_modrm( p, dst, src ); 5717117f1b4Smrg} 5727117f1b4Smrg 5737117f1b4Smrgvoid sse_rsqrtss( struct x86_function *p, 5747117f1b4Smrg struct x86_reg dst, 5757117f1b4Smrg struct x86_reg src ) 5767117f1b4Smrg{ 5777117f1b4Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x52); 5787117f1b4Smrg emit_modrm( p, dst, src ); 5797117f1b4Smrg 5807117f1b4Smrg} 5817117f1b4Smrg 5827117f1b4Smrgvoid sse_movhlps( struct x86_function *p, 5837117f1b4Smrg struct x86_reg dst, 5847117f1b4Smrg struct x86_reg src ) 5857117f1b4Smrg{ 5867117f1b4Smrg assert(dst.mod == mod_REG && src.mod == mod_REG); 5877117f1b4Smrg emit_2ub(p, X86_TWOB, 0x12); 5887117f1b4Smrg emit_modrm( p, dst, src ); 5897117f1b4Smrg} 5907117f1b4Smrg 5917117f1b4Smrgvoid sse_movlhps( struct x86_function *p, 5927117f1b4Smrg struct x86_reg dst, 5937117f1b4Smrg struct x86_reg src ) 5947117f1b4Smrg{ 5957117f1b4Smrg assert(dst.mod == mod_REG && src.mod == mod_REG); 5967117f1b4Smrg emit_2ub(p, X86_TWOB, 0x16); 5977117f1b4Smrg emit_modrm( p, dst, src ); 5987117f1b4Smrg} 5997117f1b4Smrg 6007117f1b4Smrgvoid sse_orps( struct x86_function *p, 6017117f1b4Smrg struct x86_reg dst, 6027117f1b4Smrg struct x86_reg src ) 6037117f1b4Smrg{ 6047117f1b4Smrg emit_2ub(p, X86_TWOB, 0x56); 6057117f1b4Smrg emit_modrm( p, dst, src ); 6067117f1b4Smrg} 6077117f1b4Smrg 6087117f1b4Smrgvoid sse_xorps( struct x86_function *p, 6097117f1b4Smrg struct x86_reg dst, 6107117f1b4Smrg struct x86_reg src ) 6117117f1b4Smrg{ 6127117f1b4Smrg emit_2ub(p, X86_TWOB, 0x57); 6137117f1b4Smrg emit_modrm( p, dst, src ); 6147117f1b4Smrg} 6157117f1b4Smrg 6167117f1b4Smrgvoid sse_cvtps2pi( struct x86_function *p, 6177117f1b4Smrg struct x86_reg dst, 6187117f1b4Smrg struct x86_reg src ) 6197117f1b4Smrg{ 6207ec681f3Smrg assert(dst.file == file_MMX && 6217117f1b4Smrg (src.file == file_XMM || src.mod != mod_REG)); 6227117f1b4Smrg 6237117f1b4Smrg p->need_emms = 1; 6247117f1b4Smrg 6257117f1b4Smrg emit_2ub(p, X86_TWOB, 0x2d); 6267117f1b4Smrg emit_modrm( p, dst, src ); 6277117f1b4Smrg} 6287117f1b4Smrg 6297117f1b4Smrg 6307117f1b4Smrg/* Shufps can also be used to implement a reduced swizzle when dest == 6317117f1b4Smrg * arg0. 6327117f1b4Smrg */ 6337117f1b4Smrgvoid sse_shufps( struct x86_function *p, 6347117f1b4Smrg struct x86_reg dest, 6357117f1b4Smrg struct x86_reg arg0, 6367ec681f3Smrg unsigned char shuf) 6377117f1b4Smrg{ 6387117f1b4Smrg emit_2ub(p, X86_TWOB, 0xC6); 6397117f1b4Smrg emit_modrm(p, dest, arg0); 6407ec681f3Smrg emit_1ub(p, shuf); 6417117f1b4Smrg} 6427117f1b4Smrg 6437117f1b4Smrgvoid sse_cmpps( struct x86_function *p, 6447117f1b4Smrg struct x86_reg dest, 6457117f1b4Smrg struct x86_reg arg0, 6467ec681f3Smrg unsigned char cc) 6477117f1b4Smrg{ 6487117f1b4Smrg emit_2ub(p, X86_TWOB, 0xC2); 6497117f1b4Smrg emit_modrm(p, dest, arg0); 6507ec681f3Smrg emit_1ub(p, cc); 6517117f1b4Smrg} 6527117f1b4Smrg 6537117f1b4Smrgvoid sse_pmovmskb( struct x86_function *p, 6547117f1b4Smrg struct x86_reg dest, 6557117f1b4Smrg struct x86_reg src) 6567117f1b4Smrg{ 6577117f1b4Smrg emit_3ub(p, 0x66, X86_TWOB, 0xD7); 6587117f1b4Smrg emit_modrm(p, dest, src); 6597117f1b4Smrg} 6607117f1b4Smrg 6617117f1b4Smrg/*********************************************************************** 6627117f1b4Smrg * SSE2 instructions 6637117f1b4Smrg */ 6647117f1b4Smrg 6657117f1b4Smrg/** 6667117f1b4Smrg * Perform a reduced swizzle: 6677117f1b4Smrg */ 6687117f1b4Smrgvoid sse2_pshufd( struct x86_function *p, 6697117f1b4Smrg struct x86_reg dest, 6707117f1b4Smrg struct x86_reg arg0, 6717ec681f3Smrg unsigned char shuf) 6727117f1b4Smrg{ 6737117f1b4Smrg emit_3ub(p, 0x66, X86_TWOB, 0x70); 6747117f1b4Smrg emit_modrm(p, dest, arg0); 6757ec681f3Smrg emit_1ub(p, shuf); 6767117f1b4Smrg} 6777117f1b4Smrg 6787117f1b4Smrgvoid sse2_cvttps2dq( struct x86_function *p, 6797117f1b4Smrg struct x86_reg dst, 6807117f1b4Smrg struct x86_reg src ) 6817117f1b4Smrg{ 6827117f1b4Smrg emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); 6837117f1b4Smrg emit_modrm( p, dst, src ); 6847117f1b4Smrg} 6857117f1b4Smrg 6867117f1b4Smrgvoid sse2_cvtps2dq( struct x86_function *p, 6877117f1b4Smrg struct x86_reg dst, 6887117f1b4Smrg struct x86_reg src ) 6897117f1b4Smrg{ 6907117f1b4Smrg emit_3ub(p, 0x66, X86_TWOB, 0x5B); 6917117f1b4Smrg emit_modrm( p, dst, src ); 6927117f1b4Smrg} 6937117f1b4Smrg 6947117f1b4Smrgvoid sse2_packssdw( struct x86_function *p, 6957117f1b4Smrg struct x86_reg dst, 6967117f1b4Smrg struct x86_reg src ) 6977117f1b4Smrg{ 6987117f1b4Smrg emit_3ub(p, 0x66, X86_TWOB, 0x6B); 6997117f1b4Smrg emit_modrm( p, dst, src ); 7007117f1b4Smrg} 7017117f1b4Smrg 7027117f1b4Smrgvoid sse2_packsswb( struct x86_function *p, 7037117f1b4Smrg struct x86_reg dst, 7047117f1b4Smrg struct x86_reg src ) 7057117f1b4Smrg{ 7067117f1b4Smrg emit_3ub(p, 0x66, X86_TWOB, 0x63); 7077117f1b4Smrg emit_modrm( p, dst, src ); 7087117f1b4Smrg} 7097117f1b4Smrg 7107117f1b4Smrgvoid sse2_packuswb( struct x86_function *p, 7117117f1b4Smrg struct x86_reg dst, 7127117f1b4Smrg struct x86_reg src ) 7137117f1b4Smrg{ 7147117f1b4Smrg emit_3ub(p, 0x66, X86_TWOB, 0x67); 7157117f1b4Smrg emit_modrm( p, dst, src ); 7167117f1b4Smrg} 7177117f1b4Smrg 7187117f1b4Smrgvoid sse2_rcpps( struct x86_function *p, 7197117f1b4Smrg struct x86_reg dst, 7207117f1b4Smrg struct x86_reg src ) 7217117f1b4Smrg{ 7227117f1b4Smrg emit_2ub(p, X86_TWOB, 0x53); 7237117f1b4Smrg emit_modrm( p, dst, src ); 7247117f1b4Smrg} 7257117f1b4Smrg 7267117f1b4Smrgvoid sse2_rcpss( struct x86_function *p, 7277117f1b4Smrg struct x86_reg dst, 7287117f1b4Smrg struct x86_reg src ) 7297117f1b4Smrg{ 7307117f1b4Smrg emit_3ub(p, 0xF3, X86_TWOB, 0x53); 7317117f1b4Smrg emit_modrm( p, dst, src ); 7327117f1b4Smrg} 7337117f1b4Smrg 7347117f1b4Smrgvoid sse2_movd( struct x86_function *p, 7357117f1b4Smrg struct x86_reg dst, 7367117f1b4Smrg struct x86_reg src ) 7377117f1b4Smrg{ 7387117f1b4Smrg emit_2ub(p, 0x66, X86_TWOB); 7397117f1b4Smrg emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 7407117f1b4Smrg} 7417117f1b4Smrg 7427117f1b4Smrg 7437117f1b4Smrg 7447117f1b4Smrg 7457117f1b4Smrg/*********************************************************************** 7467117f1b4Smrg * x87 instructions 7477117f1b4Smrg */ 7487117f1b4Smrgvoid x87_fist( struct x86_function *p, struct x86_reg dst ) 7497117f1b4Smrg{ 7507117f1b4Smrg emit_1ub(p, 0xdb); 7517117f1b4Smrg emit_modrm_noreg(p, 2, dst); 7527117f1b4Smrg} 7537117f1b4Smrg 7547117f1b4Smrgvoid x87_fistp( struct x86_function *p, struct x86_reg dst ) 7557117f1b4Smrg{ 7567117f1b4Smrg emit_1ub(p, 0xdb); 7577117f1b4Smrg emit_modrm_noreg(p, 3, dst); 7587117f1b4Smrg} 7597117f1b4Smrg 7607117f1b4Smrgvoid x87_fild( struct x86_function *p, struct x86_reg arg ) 7617117f1b4Smrg{ 7627117f1b4Smrg emit_1ub(p, 0xdf); 7637117f1b4Smrg emit_modrm_noreg(p, 0, arg); 7647117f1b4Smrg} 7657117f1b4Smrg 7667117f1b4Smrgvoid x87_fldz( struct x86_function *p ) 7677117f1b4Smrg{ 7687117f1b4Smrg emit_2ub(p, 0xd9, 0xee); 7697117f1b4Smrg} 7707117f1b4Smrg 7717117f1b4Smrg 7727117f1b4Smrgvoid x87_fldcw( struct x86_function *p, struct x86_reg arg ) 7737117f1b4Smrg{ 7747117f1b4Smrg assert(arg.file == file_REG32); 7757117f1b4Smrg assert(arg.mod != mod_REG); 7767117f1b4Smrg emit_1ub(p, 0xd9); 7777117f1b4Smrg emit_modrm_noreg(p, 5, arg); 7787117f1b4Smrg} 7797117f1b4Smrg 7807117f1b4Smrgvoid x87_fld1( struct x86_function *p ) 7817117f1b4Smrg{ 7827117f1b4Smrg emit_2ub(p, 0xd9, 0xe8); 7837117f1b4Smrg} 7847117f1b4Smrg 7857117f1b4Smrgvoid x87_fldl2e( struct x86_function *p ) 7867117f1b4Smrg{ 7877117f1b4Smrg emit_2ub(p, 0xd9, 0xea); 7887117f1b4Smrg} 7897117f1b4Smrg 7907117f1b4Smrgvoid x87_fldln2( struct x86_function *p ) 7917117f1b4Smrg{ 7927117f1b4Smrg emit_2ub(p, 0xd9, 0xed); 7937117f1b4Smrg} 7947117f1b4Smrg 7957117f1b4Smrgvoid x87_fwait( struct x86_function *p ) 7967117f1b4Smrg{ 7977117f1b4Smrg emit_1ub(p, 0x9b); 7987117f1b4Smrg} 7997117f1b4Smrg 8007117f1b4Smrgvoid x87_fnclex( struct x86_function *p ) 8017117f1b4Smrg{ 8027117f1b4Smrg emit_2ub(p, 0xdb, 0xe2); 8037117f1b4Smrg} 8047117f1b4Smrg 8057117f1b4Smrgvoid x87_fclex( struct x86_function *p ) 8067117f1b4Smrg{ 8077117f1b4Smrg x87_fwait(p); 8087117f1b4Smrg x87_fnclex(p); 8097117f1b4Smrg} 8107117f1b4Smrg 8117117f1b4Smrg 8127117f1b4Smrgstatic void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, 8137117f1b4Smrg unsigned char dst0ub0, 8147117f1b4Smrg unsigned char dst0ub1, 8157117f1b4Smrg unsigned char arg0ub0, 8167117f1b4Smrg unsigned char arg0ub1, 8177117f1b4Smrg unsigned char argmem_noreg) 8187117f1b4Smrg{ 8197117f1b4Smrg assert(dst.file == file_x87); 8207117f1b4Smrg 8217117f1b4Smrg if (arg.file == file_x87) { 8227ec681f3Smrg if (dst.idx == 0) 8237117f1b4Smrg emit_2ub(p, dst0ub0, dst0ub1+arg.idx); 8247ec681f3Smrg else if (arg.idx == 0) 8257117f1b4Smrg emit_2ub(p, arg0ub0, arg0ub1+arg.idx); 8267117f1b4Smrg else 8277117f1b4Smrg assert(0); 8287117f1b4Smrg } 8297117f1b4Smrg else if (dst.idx == 0) { 8307117f1b4Smrg assert(arg.file == file_REG32); 8317117f1b4Smrg emit_1ub(p, 0xd8); 8327117f1b4Smrg emit_modrm_noreg(p, argmem_noreg, arg); 8337117f1b4Smrg } 8347117f1b4Smrg else 8357117f1b4Smrg assert(0); 8367117f1b4Smrg} 8377117f1b4Smrg 8387117f1b4Smrgvoid x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 8397117f1b4Smrg{ 8407ec681f3Smrg x87_arith_op(p, dst, arg, 8417117f1b4Smrg 0xd8, 0xc8, 8427117f1b4Smrg 0xdc, 0xc8, 8437117f1b4Smrg 4); 8447117f1b4Smrg} 8457117f1b4Smrg 8467117f1b4Smrgvoid x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 8477117f1b4Smrg{ 8487ec681f3Smrg x87_arith_op(p, dst, arg, 8497117f1b4Smrg 0xd8, 0xe0, 8507117f1b4Smrg 0xdc, 0xe8, 8517117f1b4Smrg 4); 8527117f1b4Smrg} 8537117f1b4Smrg 8547117f1b4Smrgvoid x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 8557117f1b4Smrg{ 8567ec681f3Smrg x87_arith_op(p, dst, arg, 8577117f1b4Smrg 0xd8, 0xe8, 8587117f1b4Smrg 0xdc, 0xe0, 8597117f1b4Smrg 5); 8607117f1b4Smrg} 8617117f1b4Smrg 8627117f1b4Smrgvoid x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 8637117f1b4Smrg{ 8647ec681f3Smrg x87_arith_op(p, dst, arg, 8657117f1b4Smrg 0xd8, 0xc0, 8667117f1b4Smrg 0xdc, 0xc0, 8677117f1b4Smrg 0); 8687117f1b4Smrg} 8697117f1b4Smrg 8707117f1b4Smrgvoid x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 8717117f1b4Smrg{ 8727ec681f3Smrg x87_arith_op(p, dst, arg, 8737117f1b4Smrg 0xd8, 0xf0, 8747117f1b4Smrg 0xdc, 0xf8, 8757117f1b4Smrg 6); 8767117f1b4Smrg} 8777117f1b4Smrg 8787117f1b4Smrgvoid x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 8797117f1b4Smrg{ 8807ec681f3Smrg x87_arith_op(p, dst, arg, 8817117f1b4Smrg 0xd8, 0xf8, 8827117f1b4Smrg 0xdc, 0xf0, 8837117f1b4Smrg 7); 8847117f1b4Smrg} 8857117f1b4Smrg 8867117f1b4Smrgvoid x87_fmulp( struct x86_function *p, struct x86_reg dst ) 8877117f1b4Smrg{ 8887117f1b4Smrg assert(dst.file == file_x87); 8897117f1b4Smrg assert(dst.idx >= 1); 8907117f1b4Smrg emit_2ub(p, 0xde, 0xc8+dst.idx); 8917117f1b4Smrg} 8927117f1b4Smrg 8937117f1b4Smrgvoid x87_fsubp( struct x86_function *p, struct x86_reg dst ) 8947117f1b4Smrg{ 8957117f1b4Smrg assert(dst.file == file_x87); 8967117f1b4Smrg assert(dst.idx >= 1); 8977117f1b4Smrg emit_2ub(p, 0xde, 0xe8+dst.idx); 8987117f1b4Smrg} 8997117f1b4Smrg 9007117f1b4Smrgvoid x87_fsubrp( struct x86_function *p, struct x86_reg dst ) 9017117f1b4Smrg{ 9027117f1b4Smrg assert(dst.file == file_x87); 9037117f1b4Smrg assert(dst.idx >= 1); 9047117f1b4Smrg emit_2ub(p, 0xde, 0xe0+dst.idx); 9057117f1b4Smrg} 9067117f1b4Smrg 9077117f1b4Smrgvoid x87_faddp( struct x86_function *p, struct x86_reg dst ) 9087117f1b4Smrg{ 9097117f1b4Smrg assert(dst.file == file_x87); 9107117f1b4Smrg assert(dst.idx >= 1); 9117117f1b4Smrg emit_2ub(p, 0xde, 0xc0+dst.idx); 9127117f1b4Smrg} 9137117f1b4Smrg 9147117f1b4Smrgvoid x87_fdivp( struct x86_function *p, struct x86_reg dst ) 9157117f1b4Smrg{ 9167117f1b4Smrg assert(dst.file == file_x87); 9177117f1b4Smrg assert(dst.idx >= 1); 9187117f1b4Smrg emit_2ub(p, 0xde, 0xf8+dst.idx); 9197117f1b4Smrg} 9207117f1b4Smrg 9217117f1b4Smrgvoid x87_fdivrp( struct x86_function *p, struct x86_reg dst ) 9227117f1b4Smrg{ 9237117f1b4Smrg assert(dst.file == file_x87); 9247117f1b4Smrg assert(dst.idx >= 1); 9257117f1b4Smrg emit_2ub(p, 0xde, 0xf0+dst.idx); 9267117f1b4Smrg} 9277117f1b4Smrg 9287117f1b4Smrgvoid x87_fucom( struct x86_function *p, struct x86_reg arg ) 9297117f1b4Smrg{ 9307117f1b4Smrg assert(arg.file == file_x87); 9317117f1b4Smrg emit_2ub(p, 0xdd, 0xe0+arg.idx); 9327117f1b4Smrg} 9337117f1b4Smrg 9347117f1b4Smrgvoid x87_fucomp( struct x86_function *p, struct x86_reg arg ) 9357117f1b4Smrg{ 9367117f1b4Smrg assert(arg.file == file_x87); 9377117f1b4Smrg emit_2ub(p, 0xdd, 0xe8+arg.idx); 9387117f1b4Smrg} 9397117f1b4Smrg 9407117f1b4Smrgvoid x87_fucompp( struct x86_function *p ) 9417117f1b4Smrg{ 9427117f1b4Smrg emit_2ub(p, 0xda, 0xe9); 9437117f1b4Smrg} 9447117f1b4Smrg 9457117f1b4Smrgvoid x87_fxch( struct x86_function *p, struct x86_reg arg ) 9467117f1b4Smrg{ 9477117f1b4Smrg assert(arg.file == file_x87); 9487117f1b4Smrg emit_2ub(p, 0xd9, 0xc8+arg.idx); 9497117f1b4Smrg} 9507117f1b4Smrg 9517117f1b4Smrgvoid x87_fabs( struct x86_function *p ) 9527117f1b4Smrg{ 9537117f1b4Smrg emit_2ub(p, 0xd9, 0xe1); 9547117f1b4Smrg} 9557117f1b4Smrg 9567117f1b4Smrgvoid x87_fchs( struct x86_function *p ) 9577117f1b4Smrg{ 9587117f1b4Smrg emit_2ub(p, 0xd9, 0xe0); 9597117f1b4Smrg} 9607117f1b4Smrg 9617117f1b4Smrgvoid x87_fcos( struct x86_function *p ) 9627117f1b4Smrg{ 9637117f1b4Smrg emit_2ub(p, 0xd9, 0xff); 9647117f1b4Smrg} 9657117f1b4Smrg 9667117f1b4Smrg 9677117f1b4Smrgvoid x87_fprndint( struct x86_function *p ) 9687117f1b4Smrg{ 9697117f1b4Smrg emit_2ub(p, 0xd9, 0xfc); 9707117f1b4Smrg} 9717117f1b4Smrg 9727117f1b4Smrgvoid x87_fscale( struct x86_function *p ) 9737117f1b4Smrg{ 9747117f1b4Smrg emit_2ub(p, 0xd9, 0xfd); 9757117f1b4Smrg} 9767117f1b4Smrg 9777117f1b4Smrgvoid x87_fsin( struct x86_function *p ) 9787117f1b4Smrg{ 9797117f1b4Smrg emit_2ub(p, 0xd9, 0xfe); 9807117f1b4Smrg} 9817117f1b4Smrg 9827117f1b4Smrgvoid x87_fsincos( struct x86_function *p ) 9837117f1b4Smrg{ 9847117f1b4Smrg emit_2ub(p, 0xd9, 0xfb); 9857117f1b4Smrg} 9867117f1b4Smrg 9877117f1b4Smrgvoid x87_fsqrt( struct x86_function *p ) 9887117f1b4Smrg{ 9897117f1b4Smrg emit_2ub(p, 0xd9, 0xfa); 9907117f1b4Smrg} 9917117f1b4Smrg 9927117f1b4Smrgvoid x87_fxtract( struct x86_function *p ) 9937117f1b4Smrg{ 9947117f1b4Smrg emit_2ub(p, 0xd9, 0xf4); 9957117f1b4Smrg} 9967117f1b4Smrg 9977117f1b4Smrg/* st0 = (2^st0)-1 9987117f1b4Smrg * 9997117f1b4Smrg * Restrictions: -1.0 <= st0 <= 1.0 10007117f1b4Smrg */ 10017117f1b4Smrgvoid x87_f2xm1( struct x86_function *p ) 10027117f1b4Smrg{ 10037117f1b4Smrg emit_2ub(p, 0xd9, 0xf0); 10047117f1b4Smrg} 10057117f1b4Smrg 10067117f1b4Smrg/* st1 = st1 * log2(st0); 10077117f1b4Smrg * pop_stack; 10087117f1b4Smrg */ 10097117f1b4Smrgvoid x87_fyl2x( struct x86_function *p ) 10107117f1b4Smrg{ 10117117f1b4Smrg emit_2ub(p, 0xd9, 0xf1); 10127117f1b4Smrg} 10137117f1b4Smrg 10147117f1b4Smrg/* st1 = st1 * log2(st0 + 1.0); 10157117f1b4Smrg * pop_stack; 10167117f1b4Smrg * 10177ec681f3Smrg * A fast operation, with restrictions: -.29 < st0 < .29 10187117f1b4Smrg */ 10197117f1b4Smrgvoid x87_fyl2xp1( struct x86_function *p ) 10207117f1b4Smrg{ 10217117f1b4Smrg emit_2ub(p, 0xd9, 0xf9); 10227117f1b4Smrg} 10237117f1b4Smrg 10247117f1b4Smrg 10257117f1b4Smrgvoid x87_fld( struct x86_function *p, struct x86_reg arg ) 10267117f1b4Smrg{ 10277ec681f3Smrg if (arg.file == file_x87) 10287117f1b4Smrg emit_2ub(p, 0xd9, 0xc0 + arg.idx); 10297117f1b4Smrg else { 10307117f1b4Smrg emit_1ub(p, 0xd9); 10317117f1b4Smrg emit_modrm_noreg(p, 0, arg); 10327117f1b4Smrg } 10337117f1b4Smrg} 10347117f1b4Smrg 10357117f1b4Smrgvoid x87_fst( struct x86_function *p, struct x86_reg dst ) 10367117f1b4Smrg{ 10377ec681f3Smrg if (dst.file == file_x87) 10387117f1b4Smrg emit_2ub(p, 0xdd, 0xd0 + dst.idx); 10397117f1b4Smrg else { 10407117f1b4Smrg emit_1ub(p, 0xd9); 10417117f1b4Smrg emit_modrm_noreg(p, 2, dst); 10427117f1b4Smrg } 10437117f1b4Smrg} 10447117f1b4Smrg 10457117f1b4Smrgvoid x87_fstp( struct x86_function *p, struct x86_reg dst ) 10467117f1b4Smrg{ 10477ec681f3Smrg if (dst.file == file_x87) 10487117f1b4Smrg emit_2ub(p, 0xdd, 0xd8 + dst.idx); 10497117f1b4Smrg else { 10507117f1b4Smrg emit_1ub(p, 0xd9); 10517117f1b4Smrg emit_modrm_noreg(p, 3, dst); 10527117f1b4Smrg } 10537117f1b4Smrg} 10547117f1b4Smrg 10557117f1b4Smrgvoid x87_fcom( struct x86_function *p, struct x86_reg dst ) 10567117f1b4Smrg{ 10577ec681f3Smrg if (dst.file == file_x87) 10587117f1b4Smrg emit_2ub(p, 0xd8, 0xd0 + dst.idx); 10597117f1b4Smrg else { 10607117f1b4Smrg emit_1ub(p, 0xd8); 10617117f1b4Smrg emit_modrm_noreg(p, 2, dst); 10627117f1b4Smrg } 10637117f1b4Smrg} 10647117f1b4Smrg 10657117f1b4Smrgvoid x87_fcomp( struct x86_function *p, struct x86_reg dst ) 10667117f1b4Smrg{ 10677ec681f3Smrg if (dst.file == file_x87) 10687117f1b4Smrg emit_2ub(p, 0xd8, 0xd8 + dst.idx); 10697117f1b4Smrg else { 10707117f1b4Smrg emit_1ub(p, 0xd8); 10717117f1b4Smrg emit_modrm_noreg(p, 3, dst); 10727117f1b4Smrg } 10737117f1b4Smrg} 10747117f1b4Smrg 10757117f1b4Smrg 10767117f1b4Smrgvoid x87_fnstsw( struct x86_function *p, struct x86_reg dst ) 10777117f1b4Smrg{ 10787117f1b4Smrg assert(dst.file == file_REG32); 10797117f1b4Smrg 10807117f1b4Smrg if (dst.idx == reg_AX && 10817ec681f3Smrg dst.mod == mod_REG) 10827117f1b4Smrg emit_2ub(p, 0xdf, 0xe0); 10837117f1b4Smrg else { 10847117f1b4Smrg emit_1ub(p, 0xdd); 10857117f1b4Smrg emit_modrm_noreg(p, 7, dst); 10867117f1b4Smrg } 10877117f1b4Smrg} 10887117f1b4Smrg 10897117f1b4Smrg 10907117f1b4Smrg 10917117f1b4Smrg 10927117f1b4Smrg/*********************************************************************** 10937117f1b4Smrg * MMX instructions 10947117f1b4Smrg */ 10957117f1b4Smrg 10967117f1b4Smrgvoid mmx_emms( struct x86_function *p ) 10977117f1b4Smrg{ 10987117f1b4Smrg assert(p->need_emms); 10997117f1b4Smrg emit_2ub(p, 0x0f, 0x77); 11007117f1b4Smrg p->need_emms = 0; 11017117f1b4Smrg} 11027117f1b4Smrg 11037117f1b4Smrgvoid mmx_packssdw( struct x86_function *p, 11047117f1b4Smrg struct x86_reg dst, 11057117f1b4Smrg struct x86_reg src ) 11067117f1b4Smrg{ 11077ec681f3Smrg assert(dst.file == file_MMX && 11087117f1b4Smrg (src.file == file_MMX || src.mod != mod_REG)); 11097117f1b4Smrg 11107117f1b4Smrg p->need_emms = 1; 11117117f1b4Smrg 11127117f1b4Smrg emit_2ub(p, X86_TWOB, 0x6b); 11137117f1b4Smrg emit_modrm( p, dst, src ); 11147117f1b4Smrg} 11157117f1b4Smrg 11167117f1b4Smrgvoid mmx_packuswb( struct x86_function *p, 11177117f1b4Smrg struct x86_reg dst, 11187117f1b4Smrg struct x86_reg src ) 11197117f1b4Smrg{ 11207ec681f3Smrg assert(dst.file == file_MMX && 11217117f1b4Smrg (src.file == file_MMX || src.mod != mod_REG)); 11227117f1b4Smrg 11237117f1b4Smrg p->need_emms = 1; 11247117f1b4Smrg 11257117f1b4Smrg emit_2ub(p, X86_TWOB, 0x67); 11267117f1b4Smrg emit_modrm( p, dst, src ); 11277117f1b4Smrg} 11287117f1b4Smrg 11297117f1b4Smrgvoid mmx_movd( struct x86_function *p, 11307117f1b4Smrg struct x86_reg dst, 11317117f1b4Smrg struct x86_reg src ) 11327117f1b4Smrg{ 11337117f1b4Smrg p->need_emms = 1; 11347117f1b4Smrg emit_1ub(p, X86_TWOB); 11357117f1b4Smrg emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 11367117f1b4Smrg} 11377117f1b4Smrg 11387117f1b4Smrgvoid mmx_movq( struct x86_function *p, 11397117f1b4Smrg struct x86_reg dst, 11407117f1b4Smrg struct x86_reg src ) 11417117f1b4Smrg{ 11427117f1b4Smrg p->need_emms = 1; 11437117f1b4Smrg emit_1ub(p, X86_TWOB); 11447117f1b4Smrg emit_op_modrm( p, 0x6f, 0x7f, dst, src ); 11457117f1b4Smrg} 11467117f1b4Smrg 11477117f1b4Smrg 11487117f1b4Smrg/*********************************************************************** 11497117f1b4Smrg * Helper functions 11507117f1b4Smrg */ 11517117f1b4Smrg 11527117f1b4Smrg 11537117f1b4Smrg/* Retreive a reference to one of the function arguments, taking into 11547117f1b4Smrg * account any push/pop activity: 11557117f1b4Smrg */ 11567117f1b4Smrgstruct x86_reg x86_fn_arg( struct x86_function *p, 11577117f1b4Smrg unsigned arg ) 11587117f1b4Smrg{ 11597ec681f3Smrg return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 11607117f1b4Smrg p->stack_offset + arg * 4); /* ??? */ 11617117f1b4Smrg} 11627117f1b4Smrg 11637117f1b4Smrg 11647117f1b4Smrgvoid x86_init_func( struct x86_function *p ) 11657117f1b4Smrg{ 11667117f1b4Smrg p->size = 0; 11677117f1b4Smrg p->store = NULL; 11687117f1b4Smrg p->csr = p->store; 11697117f1b4Smrg} 11707117f1b4Smrg 11717117f1b4Smrgint x86_init_func_size( struct x86_function *p, unsigned code_size ) 11727117f1b4Smrg{ 11737117f1b4Smrg p->size = code_size; 11747117f1b4Smrg p->store = _mesa_exec_malloc(code_size); 11757117f1b4Smrg p->csr = p->store; 11767117f1b4Smrg return p->store != NULL; 11777117f1b4Smrg} 11787117f1b4Smrg 11797117f1b4Smrgvoid x86_release_func( struct x86_function *p ) 11807117f1b4Smrg{ 11817117f1b4Smrg _mesa_exec_free(p->store); 11827117f1b4Smrg p->store = NULL; 11837117f1b4Smrg p->csr = NULL; 11847117f1b4Smrg p->size = 0; 11857117f1b4Smrg} 11867117f1b4Smrg 11877117f1b4Smrg 11887117f1b4Smrgvoid (*x86_get_func( struct x86_function *p ))(void) 11897117f1b4Smrg{ 11907117f1b4Smrg if (DISASSEM && p->store) 1191cdc920a0Smrg printf("disassemble %p %p\n", p->store, p->csr); 11927117f1b4Smrg return (void (*)(void)) (unsigned long) p->store; 11937117f1b4Smrg} 11947117f1b4Smrg 11957117f1b4Smrg#else 11967117f1b4Smrg 11977117f1b4Smrgvoid x86sse_dummy( void ) 11987117f1b4Smrg{ 11997117f1b4Smrg} 12007117f1b4Smrg 12017117f1b4Smrg#endif 12024a49301eSmrg 12034a49301eSmrg#else /* USE_X86_ASM */ 12044a49301eSmrg 12054a49301eSmrgint x86sse_c_dummy_var; /* silence warning */ 12064a49301eSmrg 12074a49301eSmrg#endif /* USE_X86_ASM */ 1208