14a49301eSmrg#ifdef USE_X86_ASM
27117f1b4Smrg#if defined(__i386__) || defined(__386__)
37117f1b4Smrg
401e04c3fSmrg#include <stdio.h>
57ec681f3Smrg#include <string.h>
67ec681f3Smrg#include <assert.h>
701e04c3fSmrg
801e04c3fSmrg#include "main/execmem.h"
97117f1b4Smrg#include "x86sse.h"
107117f1b4Smrg
117117f1b4Smrg#define DISASSEM 0
127117f1b4Smrg#define X86_TWOB 0x0f
137117f1b4Smrg
144a49301eSmrg#if 0
157117f1b4Smrgstatic unsigned char *cptr( void (*label)() )
167117f1b4Smrg{
177117f1b4Smrg   return (unsigned char *)(unsigned long)label;
187117f1b4Smrg}
194a49301eSmrg#endif
207117f1b4Smrg
217117f1b4Smrg
227117f1b4Smrgstatic void do_realloc( struct x86_function *p )
237117f1b4Smrg{
247117f1b4Smrg   if (p->size == 0) {
257117f1b4Smrg      p->size = 1024;
267117f1b4Smrg      p->store = _mesa_exec_malloc(p->size);
277117f1b4Smrg      p->csr = p->store;
287117f1b4Smrg   }
297117f1b4Smrg   else {
307117f1b4Smrg      unsigned used = p->csr - p->store;
317117f1b4Smrg      unsigned char *tmp = p->store;
327117f1b4Smrg      p->size *= 2;
337117f1b4Smrg      p->store = _mesa_exec_malloc(p->size);
347117f1b4Smrg      memcpy(p->store, tmp, used);
357117f1b4Smrg      p->csr = p->store + used;
367117f1b4Smrg      _mesa_exec_free(tmp);
377117f1b4Smrg   }
387117f1b4Smrg}
397117f1b4Smrg
407117f1b4Smrg/* Emit bytes to the instruction stream:
417117f1b4Smrg */
427117f1b4Smrgstatic unsigned char *reserve( struct x86_function *p, int bytes )
437117f1b4Smrg{
447117f1b4Smrg   if (p->csr + bytes - p->store > p->size)
457117f1b4Smrg      do_realloc(p);
467117f1b4Smrg
477117f1b4Smrg   {
487117f1b4Smrg      unsigned char *csr = p->csr;
497117f1b4Smrg      p->csr += bytes;
507117f1b4Smrg      return csr;
517117f1b4Smrg   }
527117f1b4Smrg}
537117f1b4Smrg
547117f1b4Smrg
557117f1b4Smrg
567117f1b4Smrgstatic void emit_1b( struct x86_function *p, char b0 )
577117f1b4Smrg{
587117f1b4Smrg   char *csr = (char *)reserve(p, 1);
597117f1b4Smrg   *csr = b0;
607117f1b4Smrg}
617117f1b4Smrg
627117f1b4Smrgstatic void emit_1i( struct x86_function *p, int i0 )
637117f1b4Smrg{
647117f1b4Smrg   int *icsr = (int *)reserve(p, sizeof(i0));
657117f1b4Smrg   *icsr = i0;
667117f1b4Smrg}
677117f1b4Smrg
687117f1b4Smrgstatic void emit_1ub( struct x86_function *p, unsigned char b0 )
697117f1b4Smrg{
707117f1b4Smrg   unsigned char *csr = reserve(p, 1);
717117f1b4Smrg   *csr++ = b0;
727117f1b4Smrg}
737117f1b4Smrg
747117f1b4Smrgstatic void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
757117f1b4Smrg{
767117f1b4Smrg   unsigned char *csr = reserve(p, 2);
777117f1b4Smrg   *csr++ = b0;
787117f1b4Smrg   *csr++ = b1;
797117f1b4Smrg}
807117f1b4Smrg
817117f1b4Smrgstatic void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
827117f1b4Smrg{
837117f1b4Smrg   unsigned char *csr = reserve(p, 3);
847117f1b4Smrg   *csr++ = b0;
857117f1b4Smrg   *csr++ = b1;
867117f1b4Smrg   *csr++ = b2;
877117f1b4Smrg}
887117f1b4Smrg
897117f1b4Smrg
907117f1b4Smrg/* Build a modRM byte + possible displacement.  No treatment of SIB
917117f1b4Smrg * indexing.  BZZT - no way to encode an absolute address.
927117f1b4Smrg */
937ec681f3Smrgstatic void emit_modrm( struct x86_function *p,
947ec681f3Smrg			struct x86_reg reg,
957117f1b4Smrg			struct x86_reg regmem )
967117f1b4Smrg{
977117f1b4Smrg   unsigned char val = 0;
987ec681f3Smrg
997117f1b4Smrg   assert(reg.mod == mod_REG);
1007ec681f3Smrg
1017117f1b4Smrg   val |= regmem.mod << 6;     	/* mod field */
1027117f1b4Smrg   val |= reg.idx << 3;		/* reg field */
1037117f1b4Smrg   val |= regmem.idx;		/* r/m field */
1047ec681f3Smrg
1057117f1b4Smrg   emit_1ub(p, val);
1067117f1b4Smrg
1077117f1b4Smrg   /* Oh-oh we've stumbled into the SIB thing.
1087117f1b4Smrg    */
1097117f1b4Smrg   if (regmem.file == file_REG32 &&
1107117f1b4Smrg       regmem.idx == reg_SP) {
1117117f1b4Smrg      emit_1ub(p, 0x24);		/* simplistic! */
1127117f1b4Smrg   }
1137117f1b4Smrg
1147117f1b4Smrg   switch (regmem.mod) {
1157117f1b4Smrg   case mod_REG:
1167117f1b4Smrg   case mod_INDIRECT:
1177117f1b4Smrg      break;
1187117f1b4Smrg   case mod_DISP8:
1197117f1b4Smrg      emit_1b(p, regmem.disp);
1207117f1b4Smrg      break;
1217117f1b4Smrg   case mod_DISP32:
1227117f1b4Smrg      emit_1i(p, regmem.disp);
1237117f1b4Smrg      break;
1247117f1b4Smrg   default:
1257117f1b4Smrg      assert(0);
1267117f1b4Smrg      break;
1277117f1b4Smrg   }
1287117f1b4Smrg}
1297117f1b4Smrg
1307117f1b4Smrg
1317117f1b4Smrgstatic void emit_modrm_noreg( struct x86_function *p,
1327117f1b4Smrg			      unsigned op,
1337117f1b4Smrg			      struct x86_reg regmem )
1347117f1b4Smrg{
1357117f1b4Smrg   struct x86_reg dummy = x86_make_reg(file_REG32, op);
1367117f1b4Smrg   emit_modrm(p, dummy, regmem);
1377117f1b4Smrg}
1387117f1b4Smrg
1397117f1b4Smrg/* Many x86 instructions have two opcodes to cope with the situations
1407117f1b4Smrg * where the destination is a register or memory reference
1417117f1b4Smrg * respectively.  This function selects the correct opcode based on
1427117f1b4Smrg * the arguments presented.
1437117f1b4Smrg */
1447117f1b4Smrgstatic void emit_op_modrm( struct x86_function *p,
1457ec681f3Smrg			   unsigned char op_dst_is_reg,
1467117f1b4Smrg			   unsigned char op_dst_is_mem,
1477117f1b4Smrg			   struct x86_reg dst,
1487117f1b4Smrg			   struct x86_reg src )
1497ec681f3Smrg{
1507117f1b4Smrg   switch (dst.mod) {
1517117f1b4Smrg   case mod_REG:
1527117f1b4Smrg      emit_1ub(p, op_dst_is_reg);
1537117f1b4Smrg      emit_modrm(p, dst, src);
1547117f1b4Smrg      break;
1557117f1b4Smrg   case mod_INDIRECT:
1567117f1b4Smrg   case mod_DISP32:
1577117f1b4Smrg   case mod_DISP8:
1587117f1b4Smrg      assert(src.mod == mod_REG);
1597117f1b4Smrg      emit_1ub(p, op_dst_is_mem);
1607117f1b4Smrg      emit_modrm(p, src, dst);
1617117f1b4Smrg      break;
1627117f1b4Smrg   default:
1637117f1b4Smrg      assert(0);
1647117f1b4Smrg      break;
1657117f1b4Smrg   }
1667117f1b4Smrg}
1677117f1b4Smrg
1687117f1b4Smrg
1697117f1b4Smrg
1707117f1b4Smrg
1717117f1b4Smrg
1727117f1b4Smrg
1737117f1b4Smrg
1747117f1b4Smrg/* Create and manipulate registers and regmem values:
1757117f1b4Smrg */
1767117f1b4Smrgstruct x86_reg x86_make_reg( enum x86_reg_file file,
1777117f1b4Smrg			     enum x86_reg_name idx )
1787117f1b4Smrg{
1797117f1b4Smrg   struct x86_reg reg;
1807117f1b4Smrg
1817117f1b4Smrg   reg.file = file;
1827117f1b4Smrg   reg.idx = idx;
1837117f1b4Smrg   reg.mod = mod_REG;
1847117f1b4Smrg   reg.disp = 0;
1857117f1b4Smrg
1867117f1b4Smrg   return reg;
1877117f1b4Smrg}
1887117f1b4Smrg
1897117f1b4Smrgstruct x86_reg x86_make_disp( struct x86_reg reg,
1907117f1b4Smrg			      int disp )
1917117f1b4Smrg{
1927117f1b4Smrg   assert(reg.file == file_REG32);
1937117f1b4Smrg
1947117f1b4Smrg   if (reg.mod == mod_REG)
1957117f1b4Smrg      reg.disp = disp;
1967117f1b4Smrg   else
1977117f1b4Smrg      reg.disp += disp;
1987117f1b4Smrg
1997117f1b4Smrg   if (reg.disp == 0)
2007117f1b4Smrg      reg.mod = mod_INDIRECT;
2017117f1b4Smrg   else if (reg.disp <= 127 && reg.disp >= -128)
2027117f1b4Smrg      reg.mod = mod_DISP8;
2037117f1b4Smrg   else
2047117f1b4Smrg      reg.mod = mod_DISP32;
2057117f1b4Smrg
2067117f1b4Smrg   return reg;
2077117f1b4Smrg}
2087117f1b4Smrg
2097117f1b4Smrgstruct x86_reg x86_deref( struct x86_reg reg )
2107117f1b4Smrg{
2117117f1b4Smrg   return x86_make_disp(reg, 0);
2127117f1b4Smrg}
2137117f1b4Smrg
2147117f1b4Smrgstruct x86_reg x86_get_base_reg( struct x86_reg reg )
2157117f1b4Smrg{
2167117f1b4Smrg   return x86_make_reg( reg.file, reg.idx );
2177117f1b4Smrg}
2187117f1b4Smrg
2197117f1b4Smrgunsigned char *x86_get_label( struct x86_function *p )
2207117f1b4Smrg{
2217117f1b4Smrg   return p->csr;
2227117f1b4Smrg}
2237117f1b4Smrg
2247117f1b4Smrg
2257117f1b4Smrg
2267117f1b4Smrg/***********************************************************************
2277117f1b4Smrg * x86 instructions
2287117f1b4Smrg */
2297117f1b4Smrg
2307117f1b4Smrg
2317117f1b4Smrgvoid x86_jcc( struct x86_function *p,
2327117f1b4Smrg	      enum x86_cc cc,
2337117f1b4Smrg	      unsigned char *label )
2347117f1b4Smrg{
2357117f1b4Smrg   int offset = label - (x86_get_label(p) + 2);
2367ec681f3Smrg
2377117f1b4Smrg   if (offset <= 127 && offset >= -128) {
2387117f1b4Smrg      emit_1ub(p, 0x70 + cc);
2397117f1b4Smrg      emit_1b(p, (char) offset);
2407117f1b4Smrg   }
2417117f1b4Smrg   else {
2427117f1b4Smrg      offset = label - (x86_get_label(p) + 6);
2437117f1b4Smrg      emit_2ub(p, 0x0f, 0x80 + cc);
2447117f1b4Smrg      emit_1i(p, offset);
2457117f1b4Smrg   }
2467117f1b4Smrg}
2477117f1b4Smrg
2487117f1b4Smrg/* Always use a 32bit offset for forward jumps:
2497117f1b4Smrg */
2507117f1b4Smrgunsigned char *x86_jcc_forward( struct x86_function *p,
2517117f1b4Smrg			  enum x86_cc cc )
2527117f1b4Smrg{
2537117f1b4Smrg   emit_2ub(p, 0x0f, 0x80 + cc);
2547117f1b4Smrg   emit_1i(p, 0);
2557117f1b4Smrg   return x86_get_label(p);
2567117f1b4Smrg}
2577117f1b4Smrg
2587117f1b4Smrgunsigned char *x86_jmp_forward( struct x86_function *p)
2597117f1b4Smrg{
2607117f1b4Smrg   emit_1ub(p, 0xe9);
2617117f1b4Smrg   emit_1i(p, 0);
2627117f1b4Smrg   return x86_get_label(p);
2637117f1b4Smrg}
2647117f1b4Smrg
2657117f1b4Smrgunsigned char *x86_call_forward( struct x86_function *p)
2667117f1b4Smrg{
2677117f1b4Smrg   emit_1ub(p, 0xe8);
2687117f1b4Smrg   emit_1i(p, 0);
2697117f1b4Smrg   return x86_get_label(p);
2707117f1b4Smrg}
2717117f1b4Smrg
2727117f1b4Smrg/* Fixup offset from forward jump:
2737117f1b4Smrg */
2747117f1b4Smrgvoid x86_fixup_fwd_jump( struct x86_function *p,
2757117f1b4Smrg			 unsigned char *fixup )
2767117f1b4Smrg{
2777117f1b4Smrg   *(int *)(fixup - 4) = x86_get_label(p) - fixup;
2787117f1b4Smrg}
2797117f1b4Smrg
2807117f1b4Smrgvoid x86_jmp( struct x86_function *p, unsigned char *label)
2817117f1b4Smrg{
2827117f1b4Smrg   emit_1ub(p, 0xe9);
2837117f1b4Smrg   emit_1i(p, label - x86_get_label(p) - 4);
2847117f1b4Smrg}
2857117f1b4Smrg
2867117f1b4Smrg#if 0
2877117f1b4Smrg/* This doesn't work once we start reallocating & copying the
2887117f1b4Smrg * generated code on buffer fills, because the call is relative to the
2897117f1b4Smrg * current pc.
2907117f1b4Smrg */
2917117f1b4Smrgvoid x86_call( struct x86_function *p, void (*label)())
2927117f1b4Smrg{
2937117f1b4Smrg   emit_1ub(p, 0xe8);
2947117f1b4Smrg   emit_1i(p, cptr(label) - x86_get_label(p) - 4);
2957117f1b4Smrg}
2967117f1b4Smrg#else
2977117f1b4Smrgvoid x86_call( struct x86_function *p, struct x86_reg reg)
2987117f1b4Smrg{
2997117f1b4Smrg   emit_1ub(p, 0xff);
3004a49301eSmrg   emit_modrm_noreg(p, 2, reg);
3017117f1b4Smrg}
3027117f1b4Smrg#endif
3037117f1b4Smrg
3047117f1b4Smrg
3057117f1b4Smrg/* michal:
3067117f1b4Smrg * Temporary. As I need immediate operands, and dont want to mess with the codegen,
3077117f1b4Smrg * I load the immediate into general purpose register and use it.
3087117f1b4Smrg */
3097117f1b4Smrgvoid x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
3107117f1b4Smrg{
3117117f1b4Smrg   assert(dst.mod == mod_REG);
3127117f1b4Smrg   emit_1ub(p, 0xb8 + dst.idx);
3137117f1b4Smrg   emit_1i(p, imm);
3147117f1b4Smrg}
3157117f1b4Smrg
3167117f1b4Smrgvoid x86_push( struct x86_function *p,
3177117f1b4Smrg	       struct x86_reg reg )
3187117f1b4Smrg{
3197117f1b4Smrg   assert(reg.mod == mod_REG);
3207117f1b4Smrg   emit_1ub(p, 0x50 + reg.idx);
3217117f1b4Smrg   p->stack_offset += 4;
3227117f1b4Smrg}
3237117f1b4Smrg
3247117f1b4Smrgvoid x86_pop( struct x86_function *p,
3257117f1b4Smrg	      struct x86_reg reg )
3267117f1b4Smrg{
3277117f1b4Smrg   assert(reg.mod == mod_REG);
3287117f1b4Smrg   emit_1ub(p, 0x58 + reg.idx);
3297117f1b4Smrg   p->stack_offset -= 4;
3307117f1b4Smrg}
3317117f1b4Smrg
3327117f1b4Smrgvoid x86_inc( struct x86_function *p,
3337117f1b4Smrg	      struct x86_reg reg )
3347117f1b4Smrg{
3357117f1b4Smrg   assert(reg.mod == mod_REG);
3367117f1b4Smrg   emit_1ub(p, 0x40 + reg.idx);
3377117f1b4Smrg}
3387117f1b4Smrg
3397117f1b4Smrgvoid x86_dec( struct x86_function *p,
3407117f1b4Smrg	      struct x86_reg reg )
3417117f1b4Smrg{
3427117f1b4Smrg   assert(reg.mod == mod_REG);
3437117f1b4Smrg   emit_1ub(p, 0x48 + reg.idx);
3447117f1b4Smrg}
3457117f1b4Smrg
3467117f1b4Smrgvoid x86_ret( struct x86_function *p )
3477117f1b4Smrg{
3487117f1b4Smrg   emit_1ub(p, 0xc3);
3497117f1b4Smrg}
3507117f1b4Smrg
3517117f1b4Smrgvoid x86_sahf( struct x86_function *p )
3527117f1b4Smrg{
3537117f1b4Smrg   emit_1ub(p, 0x9e);
3547117f1b4Smrg}
3557117f1b4Smrg
3567117f1b4Smrgvoid x86_mov( struct x86_function *p,
3577117f1b4Smrg	      struct x86_reg dst,
3587117f1b4Smrg	      struct x86_reg src )
3597117f1b4Smrg{
3607117f1b4Smrg   emit_op_modrm( p, 0x8b, 0x89, dst, src );
3617117f1b4Smrg}
3627117f1b4Smrg
3637117f1b4Smrgvoid x86_xor( struct x86_function *p,
3647117f1b4Smrg	      struct x86_reg dst,
3657117f1b4Smrg	      struct x86_reg src )
3667117f1b4Smrg{
3677117f1b4Smrg   emit_op_modrm( p, 0x33, 0x31, dst, src );
3687117f1b4Smrg}
3697117f1b4Smrg
3707117f1b4Smrgvoid x86_cmp( struct x86_function *p,
3717117f1b4Smrg	      struct x86_reg dst,
3727117f1b4Smrg	      struct x86_reg src )
3737117f1b4Smrg{
3747117f1b4Smrg   emit_op_modrm( p, 0x3b, 0x39, dst, src );
3757117f1b4Smrg}
3767117f1b4Smrg
3777117f1b4Smrgvoid x86_lea( struct x86_function *p,
3787117f1b4Smrg	      struct x86_reg dst,
3797117f1b4Smrg	      struct x86_reg src )
3807117f1b4Smrg{
3817117f1b4Smrg   emit_1ub(p, 0x8d);
3827117f1b4Smrg   emit_modrm( p, dst, src );
3837117f1b4Smrg}
3847117f1b4Smrg
3857117f1b4Smrgvoid x86_test( struct x86_function *p,
3867117f1b4Smrg	       struct x86_reg dst,
3877117f1b4Smrg	       struct x86_reg src )
3887117f1b4Smrg{
3897117f1b4Smrg   emit_1ub(p, 0x85);
3907117f1b4Smrg   emit_modrm( p, dst, src );
3917117f1b4Smrg}
3927117f1b4Smrg
3937117f1b4Smrgvoid x86_add( struct x86_function *p,
3947117f1b4Smrg	       struct x86_reg dst,
3957117f1b4Smrg	       struct x86_reg src )
3967117f1b4Smrg{
3977117f1b4Smrg   emit_op_modrm(p, 0x03, 0x01, dst, src );
3987117f1b4Smrg}
3997117f1b4Smrg
4007117f1b4Smrgvoid x86_mul( struct x86_function *p,
4017117f1b4Smrg	       struct x86_reg src )
4027117f1b4Smrg{
4037117f1b4Smrg   assert (src.file == file_REG32 && src.mod == mod_REG);
4047117f1b4Smrg   emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
4057117f1b4Smrg}
4067117f1b4Smrg
4077117f1b4Smrgvoid x86_sub( struct x86_function *p,
4087117f1b4Smrg	       struct x86_reg dst,
4097117f1b4Smrg	       struct x86_reg src )
4107117f1b4Smrg{
4117117f1b4Smrg   emit_op_modrm(p, 0x2b, 0x29, dst, src );
4127117f1b4Smrg}
4137117f1b4Smrg
4147117f1b4Smrgvoid x86_or( struct x86_function *p,
4157117f1b4Smrg             struct x86_reg dst,
4167117f1b4Smrg             struct x86_reg src )
4177117f1b4Smrg{
4187117f1b4Smrg   emit_op_modrm( p, 0x0b, 0x09, dst, src );
4197117f1b4Smrg}
4207117f1b4Smrg
4217117f1b4Smrgvoid x86_and( struct x86_function *p,
4227117f1b4Smrg              struct x86_reg dst,
4237117f1b4Smrg              struct x86_reg src )
4247117f1b4Smrg{
4257117f1b4Smrg   emit_op_modrm( p, 0x23, 0x21, dst, src );
4267117f1b4Smrg}
4277117f1b4Smrg
4287117f1b4Smrg
4297117f1b4Smrg
4307117f1b4Smrg/***********************************************************************
4317117f1b4Smrg * SSE instructions
4327117f1b4Smrg */
4337117f1b4Smrg
4347117f1b4Smrg
4357117f1b4Smrgvoid sse_movss( struct x86_function *p,
4367117f1b4Smrg		struct x86_reg dst,
4377117f1b4Smrg		struct x86_reg src )
4387117f1b4Smrg{
4397117f1b4Smrg   emit_2ub(p, 0xF3, X86_TWOB);
4407117f1b4Smrg   emit_op_modrm( p, 0x10, 0x11, dst, src );
4417117f1b4Smrg}
4427117f1b4Smrg
4437117f1b4Smrgvoid sse_movaps( struct x86_function *p,
4447117f1b4Smrg		 struct x86_reg dst,
4457117f1b4Smrg		 struct x86_reg src )
4467117f1b4Smrg{
4477117f1b4Smrg   emit_1ub(p, X86_TWOB);
4487117f1b4Smrg   emit_op_modrm( p, 0x28, 0x29, dst, src );
4497117f1b4Smrg}
4507117f1b4Smrg
4517117f1b4Smrgvoid sse_movups( struct x86_function *p,
4527117f1b4Smrg		 struct x86_reg dst,
4537117f1b4Smrg		 struct x86_reg src )
4547117f1b4Smrg{
4557117f1b4Smrg   emit_1ub(p, X86_TWOB);
4567117f1b4Smrg   emit_op_modrm( p, 0x10, 0x11, dst, src );
4577117f1b4Smrg}
4587117f1b4Smrg
4597117f1b4Smrgvoid sse_movhps( struct x86_function *p,
4607117f1b4Smrg		 struct x86_reg dst,
4617117f1b4Smrg		 struct x86_reg src )
4627117f1b4Smrg{
4637117f1b4Smrg   assert(dst.mod != mod_REG || src.mod != mod_REG);
4647117f1b4Smrg   emit_1ub(p, X86_TWOB);
4657117f1b4Smrg   emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
4667117f1b4Smrg}
4677117f1b4Smrg
4687117f1b4Smrgvoid sse_movlps( struct x86_function *p,
4697117f1b4Smrg		 struct x86_reg dst,
4707117f1b4Smrg		 struct x86_reg src )
4717117f1b4Smrg{
4727117f1b4Smrg   assert(dst.mod != mod_REG || src.mod != mod_REG);
4737117f1b4Smrg   emit_1ub(p, X86_TWOB);
4747117f1b4Smrg   emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
4757117f1b4Smrg}
4767117f1b4Smrg
4777117f1b4Smrgvoid sse_maxps( struct x86_function *p,
4787117f1b4Smrg		struct x86_reg dst,
4797117f1b4Smrg		struct x86_reg src )
4807117f1b4Smrg{
4817117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x5F);
4827117f1b4Smrg   emit_modrm( p, dst, src );
4837117f1b4Smrg}
4847117f1b4Smrg
4857117f1b4Smrgvoid sse_maxss( struct x86_function *p,
4867117f1b4Smrg		struct x86_reg dst,
4877117f1b4Smrg		struct x86_reg src )
4887117f1b4Smrg{
4897117f1b4Smrg   emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
4907117f1b4Smrg   emit_modrm( p, dst, src );
4917117f1b4Smrg}
4927117f1b4Smrg
4937117f1b4Smrgvoid sse_divss( struct x86_function *p,
4947117f1b4Smrg		struct x86_reg dst,
4957117f1b4Smrg		struct x86_reg src )
4967117f1b4Smrg{
4977117f1b4Smrg   emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
4987117f1b4Smrg   emit_modrm( p, dst, src );
4997117f1b4Smrg}
5007117f1b4Smrg
5017117f1b4Smrgvoid sse_minps( struct x86_function *p,
5027117f1b4Smrg		struct x86_reg dst,
5037117f1b4Smrg		struct x86_reg src )
5047117f1b4Smrg{
5057117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x5D);
5067117f1b4Smrg   emit_modrm( p, dst, src );
5077117f1b4Smrg}
5087117f1b4Smrg
5097117f1b4Smrgvoid sse_subps( struct x86_function *p,
5107117f1b4Smrg		struct x86_reg dst,
5117117f1b4Smrg		struct x86_reg src )
5127117f1b4Smrg{
5137117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x5C);
5147117f1b4Smrg   emit_modrm( p, dst, src );
5157117f1b4Smrg}
5167117f1b4Smrg
5177117f1b4Smrgvoid sse_mulps( struct x86_function *p,
5187117f1b4Smrg		struct x86_reg dst,
5197117f1b4Smrg		struct x86_reg src )
5207117f1b4Smrg{
5217117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x59);
5227117f1b4Smrg   emit_modrm( p, dst, src );
5237117f1b4Smrg}
5247117f1b4Smrg
5257117f1b4Smrgvoid sse_mulss( struct x86_function *p,
5267117f1b4Smrg		struct x86_reg dst,
5277117f1b4Smrg		struct x86_reg src )
5287117f1b4Smrg{
5297117f1b4Smrg   emit_3ub(p, 0xF3, X86_TWOB, 0x59);
5307117f1b4Smrg   emit_modrm( p, dst, src );
5317117f1b4Smrg}
5327117f1b4Smrg
5337117f1b4Smrgvoid sse_addps( struct x86_function *p,
5347117f1b4Smrg		struct x86_reg dst,
5357117f1b4Smrg		struct x86_reg src )
5367117f1b4Smrg{
5377117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x58);
5387117f1b4Smrg   emit_modrm( p, dst, src );
5397117f1b4Smrg}
5407117f1b4Smrg
5417117f1b4Smrgvoid sse_addss( struct x86_function *p,
5427117f1b4Smrg		struct x86_reg dst,
5437117f1b4Smrg		struct x86_reg src )
5447117f1b4Smrg{
5457117f1b4Smrg   emit_3ub(p, 0xF3, X86_TWOB, 0x58);
5467117f1b4Smrg   emit_modrm( p, dst, src );
5477117f1b4Smrg}
5487117f1b4Smrg
5497117f1b4Smrgvoid sse_andnps( struct x86_function *p,
5507117f1b4Smrg                 struct x86_reg dst,
5517117f1b4Smrg                 struct x86_reg src )
5527117f1b4Smrg{
5537117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x55);
5547117f1b4Smrg   emit_modrm( p, dst, src );
5557117f1b4Smrg}
5567117f1b4Smrg
5577117f1b4Smrgvoid sse_andps( struct x86_function *p,
5587117f1b4Smrg		struct x86_reg dst,
5597117f1b4Smrg		struct x86_reg src )
5607117f1b4Smrg{
5617117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x54);
5627117f1b4Smrg   emit_modrm( p, dst, src );
5637117f1b4Smrg}
5647117f1b4Smrg
5657117f1b4Smrgvoid sse_rsqrtps( struct x86_function *p,
5667117f1b4Smrg                  struct x86_reg dst,
5677117f1b4Smrg                  struct x86_reg src )
5687117f1b4Smrg{
5697117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x52);
5707117f1b4Smrg   emit_modrm( p, dst, src );
5717117f1b4Smrg}
5727117f1b4Smrg
5737117f1b4Smrgvoid sse_rsqrtss( struct x86_function *p,
5747117f1b4Smrg		  struct x86_reg dst,
5757117f1b4Smrg		  struct x86_reg src )
5767117f1b4Smrg{
5777117f1b4Smrg   emit_3ub(p, 0xF3, X86_TWOB, 0x52);
5787117f1b4Smrg   emit_modrm( p, dst, src );
5797117f1b4Smrg
5807117f1b4Smrg}
5817117f1b4Smrg
5827117f1b4Smrgvoid sse_movhlps( struct x86_function *p,
5837117f1b4Smrg		  struct x86_reg dst,
5847117f1b4Smrg		  struct x86_reg src )
5857117f1b4Smrg{
5867117f1b4Smrg   assert(dst.mod == mod_REG && src.mod == mod_REG);
5877117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x12);
5887117f1b4Smrg   emit_modrm( p, dst, src );
5897117f1b4Smrg}
5907117f1b4Smrg
5917117f1b4Smrgvoid sse_movlhps( struct x86_function *p,
5927117f1b4Smrg		  struct x86_reg dst,
5937117f1b4Smrg		  struct x86_reg src )
5947117f1b4Smrg{
5957117f1b4Smrg   assert(dst.mod == mod_REG && src.mod == mod_REG);
5967117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x16);
5977117f1b4Smrg   emit_modrm( p, dst, src );
5987117f1b4Smrg}
5997117f1b4Smrg
6007117f1b4Smrgvoid sse_orps( struct x86_function *p,
6017117f1b4Smrg               struct x86_reg dst,
6027117f1b4Smrg               struct x86_reg src )
6037117f1b4Smrg{
6047117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x56);
6057117f1b4Smrg   emit_modrm( p, dst, src );
6067117f1b4Smrg}
6077117f1b4Smrg
6087117f1b4Smrgvoid sse_xorps( struct x86_function *p,
6097117f1b4Smrg                struct x86_reg dst,
6107117f1b4Smrg                struct x86_reg src )
6117117f1b4Smrg{
6127117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x57);
6137117f1b4Smrg   emit_modrm( p, dst, src );
6147117f1b4Smrg}
6157117f1b4Smrg
6167117f1b4Smrgvoid sse_cvtps2pi( struct x86_function *p,
6177117f1b4Smrg		   struct x86_reg dst,
6187117f1b4Smrg		   struct x86_reg src )
6197117f1b4Smrg{
6207ec681f3Smrg   assert(dst.file == file_MMX &&
6217117f1b4Smrg	  (src.file == file_XMM || src.mod != mod_REG));
6227117f1b4Smrg
6237117f1b4Smrg   p->need_emms = 1;
6247117f1b4Smrg
6257117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x2d);
6267117f1b4Smrg   emit_modrm( p, dst, src );
6277117f1b4Smrg}
6287117f1b4Smrg
6297117f1b4Smrg
6307117f1b4Smrg/* Shufps can also be used to implement a reduced swizzle when dest ==
6317117f1b4Smrg * arg0.
6327117f1b4Smrg */
6337117f1b4Smrgvoid sse_shufps( struct x86_function *p,
6347117f1b4Smrg		 struct x86_reg dest,
6357117f1b4Smrg		 struct x86_reg arg0,
6367ec681f3Smrg		 unsigned char shuf)
6377117f1b4Smrg{
6387117f1b4Smrg   emit_2ub(p, X86_TWOB, 0xC6);
6397117f1b4Smrg   emit_modrm(p, dest, arg0);
6407ec681f3Smrg   emit_1ub(p, shuf);
6417117f1b4Smrg}
6427117f1b4Smrg
6437117f1b4Smrgvoid sse_cmpps( struct x86_function *p,
6447117f1b4Smrg		struct x86_reg dest,
6457117f1b4Smrg		struct x86_reg arg0,
6467ec681f3Smrg		unsigned char cc)
6477117f1b4Smrg{
6487117f1b4Smrg   emit_2ub(p, X86_TWOB, 0xC2);
6497117f1b4Smrg   emit_modrm(p, dest, arg0);
6507ec681f3Smrg   emit_1ub(p, cc);
6517117f1b4Smrg}
6527117f1b4Smrg
6537117f1b4Smrgvoid sse_pmovmskb( struct x86_function *p,
6547117f1b4Smrg                   struct x86_reg dest,
6557117f1b4Smrg                   struct x86_reg src)
6567117f1b4Smrg{
6577117f1b4Smrg    emit_3ub(p, 0x66, X86_TWOB, 0xD7);
6587117f1b4Smrg    emit_modrm(p, dest, src);
6597117f1b4Smrg}
6607117f1b4Smrg
6617117f1b4Smrg/***********************************************************************
6627117f1b4Smrg * SSE2 instructions
6637117f1b4Smrg */
6647117f1b4Smrg
6657117f1b4Smrg/**
6667117f1b4Smrg * Perform a reduced swizzle:
6677117f1b4Smrg */
6687117f1b4Smrgvoid sse2_pshufd( struct x86_function *p,
6697117f1b4Smrg		  struct x86_reg dest,
6707117f1b4Smrg		  struct x86_reg arg0,
6717ec681f3Smrg		  unsigned char shuf)
6727117f1b4Smrg{
6737117f1b4Smrg   emit_3ub(p, 0x66, X86_TWOB, 0x70);
6747117f1b4Smrg   emit_modrm(p, dest, arg0);
6757ec681f3Smrg   emit_1ub(p, shuf);
6767117f1b4Smrg}
6777117f1b4Smrg
6787117f1b4Smrgvoid sse2_cvttps2dq( struct x86_function *p,
6797117f1b4Smrg                     struct x86_reg dst,
6807117f1b4Smrg                     struct x86_reg src )
6817117f1b4Smrg{
6827117f1b4Smrg   emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
6837117f1b4Smrg   emit_modrm( p, dst, src );
6847117f1b4Smrg}
6857117f1b4Smrg
6867117f1b4Smrgvoid sse2_cvtps2dq( struct x86_function *p,
6877117f1b4Smrg		    struct x86_reg dst,
6887117f1b4Smrg		    struct x86_reg src )
6897117f1b4Smrg{
6907117f1b4Smrg   emit_3ub(p, 0x66, X86_TWOB, 0x5B);
6917117f1b4Smrg   emit_modrm( p, dst, src );
6927117f1b4Smrg}
6937117f1b4Smrg
6947117f1b4Smrgvoid sse2_packssdw( struct x86_function *p,
6957117f1b4Smrg		    struct x86_reg dst,
6967117f1b4Smrg		    struct x86_reg src )
6977117f1b4Smrg{
6987117f1b4Smrg   emit_3ub(p, 0x66, X86_TWOB, 0x6B);
6997117f1b4Smrg   emit_modrm( p, dst, src );
7007117f1b4Smrg}
7017117f1b4Smrg
7027117f1b4Smrgvoid sse2_packsswb( struct x86_function *p,
7037117f1b4Smrg		    struct x86_reg dst,
7047117f1b4Smrg		    struct x86_reg src )
7057117f1b4Smrg{
7067117f1b4Smrg   emit_3ub(p, 0x66, X86_TWOB, 0x63);
7077117f1b4Smrg   emit_modrm( p, dst, src );
7087117f1b4Smrg}
7097117f1b4Smrg
7107117f1b4Smrgvoid sse2_packuswb( struct x86_function *p,
7117117f1b4Smrg		    struct x86_reg dst,
7127117f1b4Smrg		    struct x86_reg src )
7137117f1b4Smrg{
7147117f1b4Smrg   emit_3ub(p, 0x66, X86_TWOB, 0x67);
7157117f1b4Smrg   emit_modrm( p, dst, src );
7167117f1b4Smrg}
7177117f1b4Smrg
7187117f1b4Smrgvoid sse2_rcpps( struct x86_function *p,
7197117f1b4Smrg                 struct x86_reg dst,
7207117f1b4Smrg                 struct x86_reg src )
7217117f1b4Smrg{
7227117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x53);
7237117f1b4Smrg   emit_modrm( p, dst, src );
7247117f1b4Smrg}
7257117f1b4Smrg
7267117f1b4Smrgvoid sse2_rcpss( struct x86_function *p,
7277117f1b4Smrg		struct x86_reg dst,
7287117f1b4Smrg		struct x86_reg src )
7297117f1b4Smrg{
7307117f1b4Smrg   emit_3ub(p, 0xF3, X86_TWOB, 0x53);
7317117f1b4Smrg   emit_modrm( p, dst, src );
7327117f1b4Smrg}
7337117f1b4Smrg
7347117f1b4Smrgvoid sse2_movd( struct x86_function *p,
7357117f1b4Smrg		struct x86_reg dst,
7367117f1b4Smrg		struct x86_reg src )
7377117f1b4Smrg{
7387117f1b4Smrg   emit_2ub(p, 0x66, X86_TWOB);
7397117f1b4Smrg   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
7407117f1b4Smrg}
7417117f1b4Smrg
7427117f1b4Smrg
7437117f1b4Smrg
7447117f1b4Smrg
7457117f1b4Smrg/***********************************************************************
7467117f1b4Smrg * x87 instructions
7477117f1b4Smrg */
7487117f1b4Smrgvoid x87_fist( struct x86_function *p, struct x86_reg dst )
7497117f1b4Smrg{
7507117f1b4Smrg   emit_1ub(p, 0xdb);
7517117f1b4Smrg   emit_modrm_noreg(p, 2, dst);
7527117f1b4Smrg}
7537117f1b4Smrg
7547117f1b4Smrgvoid x87_fistp( struct x86_function *p, struct x86_reg dst )
7557117f1b4Smrg{
7567117f1b4Smrg   emit_1ub(p, 0xdb);
7577117f1b4Smrg   emit_modrm_noreg(p, 3, dst);
7587117f1b4Smrg}
7597117f1b4Smrg
7607117f1b4Smrgvoid x87_fild( struct x86_function *p, struct x86_reg arg )
7617117f1b4Smrg{
7627117f1b4Smrg   emit_1ub(p, 0xdf);
7637117f1b4Smrg   emit_modrm_noreg(p, 0, arg);
7647117f1b4Smrg}
7657117f1b4Smrg
7667117f1b4Smrgvoid x87_fldz( struct x86_function *p )
7677117f1b4Smrg{
7687117f1b4Smrg   emit_2ub(p, 0xd9, 0xee);
7697117f1b4Smrg}
7707117f1b4Smrg
7717117f1b4Smrg
7727117f1b4Smrgvoid x87_fldcw( struct x86_function *p, struct x86_reg arg )
7737117f1b4Smrg{
7747117f1b4Smrg   assert(arg.file == file_REG32);
7757117f1b4Smrg   assert(arg.mod != mod_REG);
7767117f1b4Smrg   emit_1ub(p, 0xd9);
7777117f1b4Smrg   emit_modrm_noreg(p, 5, arg);
7787117f1b4Smrg}
7797117f1b4Smrg
7807117f1b4Smrgvoid x87_fld1( struct x86_function *p )
7817117f1b4Smrg{
7827117f1b4Smrg   emit_2ub(p, 0xd9, 0xe8);
7837117f1b4Smrg}
7847117f1b4Smrg
7857117f1b4Smrgvoid x87_fldl2e( struct x86_function *p )
7867117f1b4Smrg{
7877117f1b4Smrg   emit_2ub(p, 0xd9, 0xea);
7887117f1b4Smrg}
7897117f1b4Smrg
7907117f1b4Smrgvoid x87_fldln2( struct x86_function *p )
7917117f1b4Smrg{
7927117f1b4Smrg   emit_2ub(p, 0xd9, 0xed);
7937117f1b4Smrg}
7947117f1b4Smrg
7957117f1b4Smrgvoid x87_fwait( struct x86_function *p )
7967117f1b4Smrg{
7977117f1b4Smrg   emit_1ub(p, 0x9b);
7987117f1b4Smrg}
7997117f1b4Smrg
8007117f1b4Smrgvoid x87_fnclex( struct x86_function *p )
8017117f1b4Smrg{
8027117f1b4Smrg   emit_2ub(p, 0xdb, 0xe2);
8037117f1b4Smrg}
8047117f1b4Smrg
8057117f1b4Smrgvoid x87_fclex( struct x86_function *p )
8067117f1b4Smrg{
8077117f1b4Smrg   x87_fwait(p);
8087117f1b4Smrg   x87_fnclex(p);
8097117f1b4Smrg}
8107117f1b4Smrg
8117117f1b4Smrg
8127117f1b4Smrgstatic void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
8137117f1b4Smrg			  unsigned char dst0ub0,
8147117f1b4Smrg			  unsigned char dst0ub1,
8157117f1b4Smrg			  unsigned char arg0ub0,
8167117f1b4Smrg			  unsigned char arg0ub1,
8177117f1b4Smrg			  unsigned char argmem_noreg)
8187117f1b4Smrg{
8197117f1b4Smrg   assert(dst.file == file_x87);
8207117f1b4Smrg
8217117f1b4Smrg   if (arg.file == file_x87) {
8227ec681f3Smrg      if (dst.idx == 0)
8237117f1b4Smrg	 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
8247ec681f3Smrg      else if (arg.idx == 0)
8257117f1b4Smrg	 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
8267117f1b4Smrg      else
8277117f1b4Smrg	 assert(0);
8287117f1b4Smrg   }
8297117f1b4Smrg   else if (dst.idx == 0) {
8307117f1b4Smrg      assert(arg.file == file_REG32);
8317117f1b4Smrg      emit_1ub(p, 0xd8);
8327117f1b4Smrg      emit_modrm_noreg(p, argmem_noreg, arg);
8337117f1b4Smrg   }
8347117f1b4Smrg   else
8357117f1b4Smrg      assert(0);
8367117f1b4Smrg}
8377117f1b4Smrg
8387117f1b4Smrgvoid x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
8397117f1b4Smrg{
8407ec681f3Smrg   x87_arith_op(p, dst, arg,
8417117f1b4Smrg		0xd8, 0xc8,
8427117f1b4Smrg		0xdc, 0xc8,
8437117f1b4Smrg		4);
8447117f1b4Smrg}
8457117f1b4Smrg
8467117f1b4Smrgvoid x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
8477117f1b4Smrg{
8487ec681f3Smrg   x87_arith_op(p, dst, arg,
8497117f1b4Smrg		0xd8, 0xe0,
8507117f1b4Smrg		0xdc, 0xe8,
8517117f1b4Smrg		4);
8527117f1b4Smrg}
8537117f1b4Smrg
8547117f1b4Smrgvoid x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
8557117f1b4Smrg{
8567ec681f3Smrg   x87_arith_op(p, dst, arg,
8577117f1b4Smrg		0xd8, 0xe8,
8587117f1b4Smrg		0xdc, 0xe0,
8597117f1b4Smrg		5);
8607117f1b4Smrg}
8617117f1b4Smrg
8627117f1b4Smrgvoid x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
8637117f1b4Smrg{
8647ec681f3Smrg   x87_arith_op(p, dst, arg,
8657117f1b4Smrg		0xd8, 0xc0,
8667117f1b4Smrg		0xdc, 0xc0,
8677117f1b4Smrg		0);
8687117f1b4Smrg}
8697117f1b4Smrg
8707117f1b4Smrgvoid x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
8717117f1b4Smrg{
8727ec681f3Smrg   x87_arith_op(p, dst, arg,
8737117f1b4Smrg		0xd8, 0xf0,
8747117f1b4Smrg		0xdc, 0xf8,
8757117f1b4Smrg		6);
8767117f1b4Smrg}
8777117f1b4Smrg
8787117f1b4Smrgvoid x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
8797117f1b4Smrg{
8807ec681f3Smrg   x87_arith_op(p, dst, arg,
8817117f1b4Smrg		0xd8, 0xf8,
8827117f1b4Smrg		0xdc, 0xf0,
8837117f1b4Smrg		7);
8847117f1b4Smrg}
8857117f1b4Smrg
8867117f1b4Smrgvoid x87_fmulp( struct x86_function *p, struct x86_reg dst )
8877117f1b4Smrg{
8887117f1b4Smrg   assert(dst.file == file_x87);
8897117f1b4Smrg   assert(dst.idx >= 1);
8907117f1b4Smrg   emit_2ub(p, 0xde, 0xc8+dst.idx);
8917117f1b4Smrg}
8927117f1b4Smrg
8937117f1b4Smrgvoid x87_fsubp( struct x86_function *p, struct x86_reg dst )
8947117f1b4Smrg{
8957117f1b4Smrg   assert(dst.file == file_x87);
8967117f1b4Smrg   assert(dst.idx >= 1);
8977117f1b4Smrg   emit_2ub(p, 0xde, 0xe8+dst.idx);
8987117f1b4Smrg}
8997117f1b4Smrg
9007117f1b4Smrgvoid x87_fsubrp( struct x86_function *p, struct x86_reg dst )
9017117f1b4Smrg{
9027117f1b4Smrg   assert(dst.file == file_x87);
9037117f1b4Smrg   assert(dst.idx >= 1);
9047117f1b4Smrg   emit_2ub(p, 0xde, 0xe0+dst.idx);
9057117f1b4Smrg}
9067117f1b4Smrg
9077117f1b4Smrgvoid x87_faddp( struct x86_function *p, struct x86_reg dst )
9087117f1b4Smrg{
9097117f1b4Smrg   assert(dst.file == file_x87);
9107117f1b4Smrg   assert(dst.idx >= 1);
9117117f1b4Smrg   emit_2ub(p, 0xde, 0xc0+dst.idx);
9127117f1b4Smrg}
9137117f1b4Smrg
9147117f1b4Smrgvoid x87_fdivp( struct x86_function *p, struct x86_reg dst )
9157117f1b4Smrg{
9167117f1b4Smrg   assert(dst.file == file_x87);
9177117f1b4Smrg   assert(dst.idx >= 1);
9187117f1b4Smrg   emit_2ub(p, 0xde, 0xf8+dst.idx);
9197117f1b4Smrg}
9207117f1b4Smrg
9217117f1b4Smrgvoid x87_fdivrp( struct x86_function *p, struct x86_reg dst )
9227117f1b4Smrg{
9237117f1b4Smrg   assert(dst.file == file_x87);
9247117f1b4Smrg   assert(dst.idx >= 1);
9257117f1b4Smrg   emit_2ub(p, 0xde, 0xf0+dst.idx);
9267117f1b4Smrg}
9277117f1b4Smrg
9287117f1b4Smrgvoid x87_fucom( struct x86_function *p, struct x86_reg arg )
9297117f1b4Smrg{
9307117f1b4Smrg   assert(arg.file == file_x87);
9317117f1b4Smrg   emit_2ub(p, 0xdd, 0xe0+arg.idx);
9327117f1b4Smrg}
9337117f1b4Smrg
9347117f1b4Smrgvoid x87_fucomp( struct x86_function *p, struct x86_reg arg )
9357117f1b4Smrg{
9367117f1b4Smrg   assert(arg.file == file_x87);
9377117f1b4Smrg   emit_2ub(p, 0xdd, 0xe8+arg.idx);
9387117f1b4Smrg}
9397117f1b4Smrg
9407117f1b4Smrgvoid x87_fucompp( struct x86_function *p )
9417117f1b4Smrg{
9427117f1b4Smrg   emit_2ub(p, 0xda, 0xe9);
9437117f1b4Smrg}
9447117f1b4Smrg
9457117f1b4Smrgvoid x87_fxch( struct x86_function *p, struct x86_reg arg )
9467117f1b4Smrg{
9477117f1b4Smrg   assert(arg.file == file_x87);
9487117f1b4Smrg   emit_2ub(p, 0xd9, 0xc8+arg.idx);
9497117f1b4Smrg}
9507117f1b4Smrg
9517117f1b4Smrgvoid x87_fabs( struct x86_function *p )
9527117f1b4Smrg{
9537117f1b4Smrg   emit_2ub(p, 0xd9, 0xe1);
9547117f1b4Smrg}
9557117f1b4Smrg
9567117f1b4Smrgvoid x87_fchs( struct x86_function *p )
9577117f1b4Smrg{
9587117f1b4Smrg   emit_2ub(p, 0xd9, 0xe0);
9597117f1b4Smrg}
9607117f1b4Smrg
9617117f1b4Smrgvoid x87_fcos( struct x86_function *p )
9627117f1b4Smrg{
9637117f1b4Smrg   emit_2ub(p, 0xd9, 0xff);
9647117f1b4Smrg}
9657117f1b4Smrg
9667117f1b4Smrg
9677117f1b4Smrgvoid x87_fprndint( struct x86_function *p )
9687117f1b4Smrg{
9697117f1b4Smrg   emit_2ub(p, 0xd9, 0xfc);
9707117f1b4Smrg}
9717117f1b4Smrg
9727117f1b4Smrgvoid x87_fscale( struct x86_function *p )
9737117f1b4Smrg{
9747117f1b4Smrg   emit_2ub(p, 0xd9, 0xfd);
9757117f1b4Smrg}
9767117f1b4Smrg
9777117f1b4Smrgvoid x87_fsin( struct x86_function *p )
9787117f1b4Smrg{
9797117f1b4Smrg   emit_2ub(p, 0xd9, 0xfe);
9807117f1b4Smrg}
9817117f1b4Smrg
9827117f1b4Smrgvoid x87_fsincos( struct x86_function *p )
9837117f1b4Smrg{
9847117f1b4Smrg   emit_2ub(p, 0xd9, 0xfb);
9857117f1b4Smrg}
9867117f1b4Smrg
9877117f1b4Smrgvoid x87_fsqrt( struct x86_function *p )
9887117f1b4Smrg{
9897117f1b4Smrg   emit_2ub(p, 0xd9, 0xfa);
9907117f1b4Smrg}
9917117f1b4Smrg
9927117f1b4Smrgvoid x87_fxtract( struct x86_function *p )
9937117f1b4Smrg{
9947117f1b4Smrg   emit_2ub(p, 0xd9, 0xf4);
9957117f1b4Smrg}
9967117f1b4Smrg
9977117f1b4Smrg/* st0 = (2^st0)-1
9987117f1b4Smrg *
9997117f1b4Smrg * Restrictions: -1.0 <= st0 <= 1.0
10007117f1b4Smrg */
10017117f1b4Smrgvoid x87_f2xm1( struct x86_function *p )
10027117f1b4Smrg{
10037117f1b4Smrg   emit_2ub(p, 0xd9, 0xf0);
10047117f1b4Smrg}
10057117f1b4Smrg
10067117f1b4Smrg/* st1 = st1 * log2(st0);
10077117f1b4Smrg * pop_stack;
10087117f1b4Smrg */
10097117f1b4Smrgvoid x87_fyl2x( struct x86_function *p )
10107117f1b4Smrg{
10117117f1b4Smrg   emit_2ub(p, 0xd9, 0xf1);
10127117f1b4Smrg}
10137117f1b4Smrg
10147117f1b4Smrg/* st1 = st1 * log2(st0 + 1.0);
10157117f1b4Smrg * pop_stack;
10167117f1b4Smrg *
10177ec681f3Smrg * A fast operation, with restrictions: -.29 < st0 < .29
10187117f1b4Smrg */
10197117f1b4Smrgvoid x87_fyl2xp1( struct x86_function *p )
10207117f1b4Smrg{
10217117f1b4Smrg   emit_2ub(p, 0xd9, 0xf9);
10227117f1b4Smrg}
10237117f1b4Smrg
10247117f1b4Smrg
10257117f1b4Smrgvoid x87_fld( struct x86_function *p, struct x86_reg arg )
10267117f1b4Smrg{
10277ec681f3Smrg   if (arg.file == file_x87)
10287117f1b4Smrg      emit_2ub(p, 0xd9, 0xc0 + arg.idx);
10297117f1b4Smrg   else {
10307117f1b4Smrg      emit_1ub(p, 0xd9);
10317117f1b4Smrg      emit_modrm_noreg(p, 0, arg);
10327117f1b4Smrg   }
10337117f1b4Smrg}
10347117f1b4Smrg
10357117f1b4Smrgvoid x87_fst( struct x86_function *p, struct x86_reg dst )
10367117f1b4Smrg{
10377ec681f3Smrg   if (dst.file == file_x87)
10387117f1b4Smrg      emit_2ub(p, 0xdd, 0xd0 + dst.idx);
10397117f1b4Smrg   else {
10407117f1b4Smrg      emit_1ub(p, 0xd9);
10417117f1b4Smrg      emit_modrm_noreg(p, 2, dst);
10427117f1b4Smrg   }
10437117f1b4Smrg}
10447117f1b4Smrg
10457117f1b4Smrgvoid x87_fstp( struct x86_function *p, struct x86_reg dst )
10467117f1b4Smrg{
10477ec681f3Smrg   if (dst.file == file_x87)
10487117f1b4Smrg      emit_2ub(p, 0xdd, 0xd8 + dst.idx);
10497117f1b4Smrg   else {
10507117f1b4Smrg      emit_1ub(p, 0xd9);
10517117f1b4Smrg      emit_modrm_noreg(p, 3, dst);
10527117f1b4Smrg   }
10537117f1b4Smrg}
10547117f1b4Smrg
10557117f1b4Smrgvoid x87_fcom( struct x86_function *p, struct x86_reg dst )
10567117f1b4Smrg{
10577ec681f3Smrg   if (dst.file == file_x87)
10587117f1b4Smrg      emit_2ub(p, 0xd8, 0xd0 + dst.idx);
10597117f1b4Smrg   else {
10607117f1b4Smrg      emit_1ub(p, 0xd8);
10617117f1b4Smrg      emit_modrm_noreg(p, 2, dst);
10627117f1b4Smrg   }
10637117f1b4Smrg}
10647117f1b4Smrg
10657117f1b4Smrgvoid x87_fcomp( struct x86_function *p, struct x86_reg dst )
10667117f1b4Smrg{
10677ec681f3Smrg   if (dst.file == file_x87)
10687117f1b4Smrg      emit_2ub(p, 0xd8, 0xd8 + dst.idx);
10697117f1b4Smrg   else {
10707117f1b4Smrg      emit_1ub(p, 0xd8);
10717117f1b4Smrg      emit_modrm_noreg(p, 3, dst);
10727117f1b4Smrg   }
10737117f1b4Smrg}
10747117f1b4Smrg
10757117f1b4Smrg
10767117f1b4Smrgvoid x87_fnstsw( struct x86_function *p, struct x86_reg dst )
10777117f1b4Smrg{
10787117f1b4Smrg   assert(dst.file == file_REG32);
10797117f1b4Smrg
10807117f1b4Smrg   if (dst.idx == reg_AX &&
10817ec681f3Smrg       dst.mod == mod_REG)
10827117f1b4Smrg      emit_2ub(p, 0xdf, 0xe0);
10837117f1b4Smrg   else {
10847117f1b4Smrg      emit_1ub(p, 0xdd);
10857117f1b4Smrg      emit_modrm_noreg(p, 7, dst);
10867117f1b4Smrg   }
10877117f1b4Smrg}
10887117f1b4Smrg
10897117f1b4Smrg
10907117f1b4Smrg
10917117f1b4Smrg
10927117f1b4Smrg/***********************************************************************
10937117f1b4Smrg * MMX instructions
10947117f1b4Smrg */
10957117f1b4Smrg
10967117f1b4Smrgvoid mmx_emms( struct x86_function *p )
10977117f1b4Smrg{
10987117f1b4Smrg   assert(p->need_emms);
10997117f1b4Smrg   emit_2ub(p, 0x0f, 0x77);
11007117f1b4Smrg   p->need_emms = 0;
11017117f1b4Smrg}
11027117f1b4Smrg
11037117f1b4Smrgvoid mmx_packssdw( struct x86_function *p,
11047117f1b4Smrg		   struct x86_reg dst,
11057117f1b4Smrg		   struct x86_reg src )
11067117f1b4Smrg{
11077ec681f3Smrg   assert(dst.file == file_MMX &&
11087117f1b4Smrg	  (src.file == file_MMX || src.mod != mod_REG));
11097117f1b4Smrg
11107117f1b4Smrg   p->need_emms = 1;
11117117f1b4Smrg
11127117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x6b);
11137117f1b4Smrg   emit_modrm( p, dst, src );
11147117f1b4Smrg}
11157117f1b4Smrg
11167117f1b4Smrgvoid mmx_packuswb( struct x86_function *p,
11177117f1b4Smrg		   struct x86_reg dst,
11187117f1b4Smrg		   struct x86_reg src )
11197117f1b4Smrg{
11207ec681f3Smrg   assert(dst.file == file_MMX &&
11217117f1b4Smrg	  (src.file == file_MMX || src.mod != mod_REG));
11227117f1b4Smrg
11237117f1b4Smrg   p->need_emms = 1;
11247117f1b4Smrg
11257117f1b4Smrg   emit_2ub(p, X86_TWOB, 0x67);
11267117f1b4Smrg   emit_modrm( p, dst, src );
11277117f1b4Smrg}
11287117f1b4Smrg
11297117f1b4Smrgvoid mmx_movd( struct x86_function *p,
11307117f1b4Smrg	       struct x86_reg dst,
11317117f1b4Smrg	       struct x86_reg src )
11327117f1b4Smrg{
11337117f1b4Smrg   p->need_emms = 1;
11347117f1b4Smrg   emit_1ub(p, X86_TWOB);
11357117f1b4Smrg   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
11367117f1b4Smrg}
11377117f1b4Smrg
11387117f1b4Smrgvoid mmx_movq( struct x86_function *p,
11397117f1b4Smrg	       struct x86_reg dst,
11407117f1b4Smrg	       struct x86_reg src )
11417117f1b4Smrg{
11427117f1b4Smrg   p->need_emms = 1;
11437117f1b4Smrg   emit_1ub(p, X86_TWOB);
11447117f1b4Smrg   emit_op_modrm( p, 0x6f, 0x7f, dst, src );
11457117f1b4Smrg}
11467117f1b4Smrg
11477117f1b4Smrg
11487117f1b4Smrg/***********************************************************************
11497117f1b4Smrg * Helper functions
11507117f1b4Smrg */
11517117f1b4Smrg
11527117f1b4Smrg
11537117f1b4Smrg/* Retreive a reference to one of the function arguments, taking into
11547117f1b4Smrg * account any push/pop activity:
11557117f1b4Smrg */
11567117f1b4Smrgstruct x86_reg x86_fn_arg( struct x86_function *p,
11577117f1b4Smrg			   unsigned arg )
11587117f1b4Smrg{
11597ec681f3Smrg   return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
11607117f1b4Smrg			p->stack_offset + arg * 4);	/* ??? */
11617117f1b4Smrg}
11627117f1b4Smrg
11637117f1b4Smrg
11647117f1b4Smrgvoid x86_init_func( struct x86_function *p )
11657117f1b4Smrg{
11667117f1b4Smrg   p->size = 0;
11677117f1b4Smrg   p->store = NULL;
11687117f1b4Smrg   p->csr = p->store;
11697117f1b4Smrg}
11707117f1b4Smrg
11717117f1b4Smrgint x86_init_func_size( struct x86_function *p, unsigned code_size )
11727117f1b4Smrg{
11737117f1b4Smrg   p->size = code_size;
11747117f1b4Smrg   p->store = _mesa_exec_malloc(code_size);
11757117f1b4Smrg   p->csr = p->store;
11767117f1b4Smrg   return p->store != NULL;
11777117f1b4Smrg}
11787117f1b4Smrg
11797117f1b4Smrgvoid x86_release_func( struct x86_function *p )
11807117f1b4Smrg{
11817117f1b4Smrg   _mesa_exec_free(p->store);
11827117f1b4Smrg   p->store = NULL;
11837117f1b4Smrg   p->csr = NULL;
11847117f1b4Smrg   p->size = 0;
11857117f1b4Smrg}
11867117f1b4Smrg
11877117f1b4Smrg
11887117f1b4Smrgvoid (*x86_get_func( struct x86_function *p ))(void)
11897117f1b4Smrg{
11907117f1b4Smrg   if (DISASSEM && p->store)
1191cdc920a0Smrg      printf("disassemble %p %p\n", p->store, p->csr);
11927117f1b4Smrg   return (void (*)(void)) (unsigned long) p->store;
11937117f1b4Smrg}
11947117f1b4Smrg
11957117f1b4Smrg#else
11967117f1b4Smrg
11977117f1b4Smrgvoid x86sse_dummy( void )
11987117f1b4Smrg{
11997117f1b4Smrg}
12007117f1b4Smrg
12017117f1b4Smrg#endif
12024a49301eSmrg
12034a49301eSmrg#else  /* USE_X86_ASM */
12044a49301eSmrg
12054a49301eSmrgint x86sse_c_dummy_var; /* silence warning */
12064a49301eSmrg
12074a49301eSmrg#endif /* USE_X86_ASM */
1208