x86sse.c revision b8e80941
1#ifdef USE_X86_ASM
2#if defined(__i386__) || defined(__386__)
3
4#include <stdio.h>
5
6#include "main/imports.h"
7#include "main/execmem.h"
8#include "x86sse.h"
9
10#define DISASSEM 0
11#define X86_TWOB 0x0f
12
13#if 0
14static unsigned char *cptr( void (*label)() )
15{
16   return (unsigned char *)(unsigned long)label;
17}
18#endif
19
20
21static void do_realloc( struct x86_function *p )
22{
23   if (p->size == 0) {
24      p->size = 1024;
25      p->store = _mesa_exec_malloc(p->size);
26      p->csr = p->store;
27   }
28   else {
29      unsigned used = p->csr - p->store;
30      unsigned char *tmp = p->store;
31      p->size *= 2;
32      p->store = _mesa_exec_malloc(p->size);
33      memcpy(p->store, tmp, used);
34      p->csr = p->store + used;
35      _mesa_exec_free(tmp);
36   }
37}
38
39/* Emit bytes to the instruction stream:
40 */
41static unsigned char *reserve( struct x86_function *p, int bytes )
42{
43   if (p->csr + bytes - p->store > p->size)
44      do_realloc(p);
45
46   {
47      unsigned char *csr = p->csr;
48      p->csr += bytes;
49      return csr;
50   }
51}
52
53
54
55static void emit_1b( struct x86_function *p, char b0 )
56{
57   char *csr = (char *)reserve(p, 1);
58   *csr = b0;
59}
60
61static void emit_1i( struct x86_function *p, int i0 )
62{
63   int *icsr = (int *)reserve(p, sizeof(i0));
64   *icsr = i0;
65}
66
67static void emit_1ub( struct x86_function *p, unsigned char b0 )
68{
69   unsigned char *csr = reserve(p, 1);
70   *csr++ = b0;
71}
72
73static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
74{
75   unsigned char *csr = reserve(p, 2);
76   *csr++ = b0;
77   *csr++ = b1;
78}
79
80static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
81{
82   unsigned char *csr = reserve(p, 3);
83   *csr++ = b0;
84   *csr++ = b1;
85   *csr++ = b2;
86}
87
88
89/* Build a modRM byte + possible displacement.  No treatment of SIB
90 * indexing.  BZZT - no way to encode an absolute address.
91 */
92static void emit_modrm( struct x86_function *p,
93			struct x86_reg reg,
94			struct x86_reg regmem )
95{
96   unsigned char val = 0;
97
98   assert(reg.mod == mod_REG);
99
100   val |= regmem.mod << 6;     	/* mod field */
101   val |= reg.idx << 3;		/* reg field */
102   val |= regmem.idx;		/* r/m field */
103
104   emit_1ub(p, val);
105
106   /* Oh-oh we've stumbled into the SIB thing.
107    */
108   if (regmem.file == file_REG32 &&
109       regmem.idx == reg_SP) {
110      emit_1ub(p, 0x24);		/* simplistic! */
111   }
112
113   switch (regmem.mod) {
114   case mod_REG:
115   case mod_INDIRECT:
116      break;
117   case mod_DISP8:
118      emit_1b(p, regmem.disp);
119      break;
120   case mod_DISP32:
121      emit_1i(p, regmem.disp);
122      break;
123   default:
124      assert(0);
125      break;
126   }
127}
128
129
130static void emit_modrm_noreg( struct x86_function *p,
131			      unsigned op,
132			      struct x86_reg regmem )
133{
134   struct x86_reg dummy = x86_make_reg(file_REG32, op);
135   emit_modrm(p, dummy, regmem);
136}
137
138/* Many x86 instructions have two opcodes to cope with the situations
139 * where the destination is a register or memory reference
140 * respectively.  This function selects the correct opcode based on
141 * the arguments presented.
142 */
143static void emit_op_modrm( struct x86_function *p,
144			   unsigned char op_dst_is_reg,
145			   unsigned char op_dst_is_mem,
146			   struct x86_reg dst,
147			   struct x86_reg src )
148{
149   switch (dst.mod) {
150   case mod_REG:
151      emit_1ub(p, op_dst_is_reg);
152      emit_modrm(p, dst, src);
153      break;
154   case mod_INDIRECT:
155   case mod_DISP32:
156   case mod_DISP8:
157      assert(src.mod == mod_REG);
158      emit_1ub(p, op_dst_is_mem);
159      emit_modrm(p, src, dst);
160      break;
161   default:
162      assert(0);
163      break;
164   }
165}
166
167
168
169
170
171
172
173/* Create and manipulate registers and regmem values:
174 */
175struct x86_reg x86_make_reg( enum x86_reg_file file,
176			     enum x86_reg_name idx )
177{
178   struct x86_reg reg;
179
180   reg.file = file;
181   reg.idx = idx;
182   reg.mod = mod_REG;
183   reg.disp = 0;
184
185   return reg;
186}
187
188struct x86_reg x86_make_disp( struct x86_reg reg,
189			      int disp )
190{
191   assert(reg.file == file_REG32);
192
193   if (reg.mod == mod_REG)
194      reg.disp = disp;
195   else
196      reg.disp += disp;
197
198   if (reg.disp == 0)
199      reg.mod = mod_INDIRECT;
200   else if (reg.disp <= 127 && reg.disp >= -128)
201      reg.mod = mod_DISP8;
202   else
203      reg.mod = mod_DISP32;
204
205   return reg;
206}
207
208struct x86_reg x86_deref( struct x86_reg reg )
209{
210   return x86_make_disp(reg, 0);
211}
212
213struct x86_reg x86_get_base_reg( struct x86_reg reg )
214{
215   return x86_make_reg( reg.file, reg.idx );
216}
217
218unsigned char *x86_get_label( struct x86_function *p )
219{
220   return p->csr;
221}
222
223
224
225/***********************************************************************
226 * x86 instructions
227 */
228
229
230void x86_jcc( struct x86_function *p,
231	      enum x86_cc cc,
232	      unsigned char *label )
233{
234   int offset = label - (x86_get_label(p) + 2);
235
236   if (offset <= 127 && offset >= -128) {
237      emit_1ub(p, 0x70 + cc);
238      emit_1b(p, (char) offset);
239   }
240   else {
241      offset = label - (x86_get_label(p) + 6);
242      emit_2ub(p, 0x0f, 0x80 + cc);
243      emit_1i(p, offset);
244   }
245}
246
247/* Always use a 32bit offset for forward jumps:
248 */
249unsigned char *x86_jcc_forward( struct x86_function *p,
250			  enum x86_cc cc )
251{
252   emit_2ub(p, 0x0f, 0x80 + cc);
253   emit_1i(p, 0);
254   return x86_get_label(p);
255}
256
257unsigned char *x86_jmp_forward( struct x86_function *p)
258{
259   emit_1ub(p, 0xe9);
260   emit_1i(p, 0);
261   return x86_get_label(p);
262}
263
264unsigned char *x86_call_forward( struct x86_function *p)
265{
266   emit_1ub(p, 0xe8);
267   emit_1i(p, 0);
268   return x86_get_label(p);
269}
270
271/* Fixup offset from forward jump:
272 */
273void x86_fixup_fwd_jump( struct x86_function *p,
274			 unsigned char *fixup )
275{
276   *(int *)(fixup - 4) = x86_get_label(p) - fixup;
277}
278
279void x86_jmp( struct x86_function *p, unsigned char *label)
280{
281   emit_1ub(p, 0xe9);
282   emit_1i(p, label - x86_get_label(p) - 4);
283}
284
285#if 0
286/* This doesn't work once we start reallocating & copying the
287 * generated code on buffer fills, because the call is relative to the
288 * current pc.
289 */
290void x86_call( struct x86_function *p, void (*label)())
291{
292   emit_1ub(p, 0xe8);
293   emit_1i(p, cptr(label) - x86_get_label(p) - 4);
294}
295#else
296void x86_call( struct x86_function *p, struct x86_reg reg)
297{
298   emit_1ub(p, 0xff);
299   emit_modrm_noreg(p, 2, reg);
300}
301#endif
302
303
304/* michal:
305 * Temporary. As I need immediate operands, and dont want to mess with the codegen,
306 * I load the immediate into general purpose register and use it.
307 */
308void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
309{
310   assert(dst.mod == mod_REG);
311   emit_1ub(p, 0xb8 + dst.idx);
312   emit_1i(p, imm);
313}
314
315void x86_push( struct x86_function *p,
316	       struct x86_reg reg )
317{
318   assert(reg.mod == mod_REG);
319   emit_1ub(p, 0x50 + reg.idx);
320   p->stack_offset += 4;
321}
322
323void x86_pop( struct x86_function *p,
324	      struct x86_reg reg )
325{
326   assert(reg.mod == mod_REG);
327   emit_1ub(p, 0x58 + reg.idx);
328   p->stack_offset -= 4;
329}
330
331void x86_inc( struct x86_function *p,
332	      struct x86_reg reg )
333{
334   assert(reg.mod == mod_REG);
335   emit_1ub(p, 0x40 + reg.idx);
336}
337
338void x86_dec( struct x86_function *p,
339	      struct x86_reg reg )
340{
341   assert(reg.mod == mod_REG);
342   emit_1ub(p, 0x48 + reg.idx);
343}
344
345void x86_ret( struct x86_function *p )
346{
347   emit_1ub(p, 0xc3);
348}
349
350void x86_sahf( struct x86_function *p )
351{
352   emit_1ub(p, 0x9e);
353}
354
355void x86_mov( struct x86_function *p,
356	      struct x86_reg dst,
357	      struct x86_reg src )
358{
359   emit_op_modrm( p, 0x8b, 0x89, dst, src );
360}
361
362void x86_xor( struct x86_function *p,
363	      struct x86_reg dst,
364	      struct x86_reg src )
365{
366   emit_op_modrm( p, 0x33, 0x31, dst, src );
367}
368
369void x86_cmp( struct x86_function *p,
370	      struct x86_reg dst,
371	      struct x86_reg src )
372{
373   emit_op_modrm( p, 0x3b, 0x39, dst, src );
374}
375
376void x86_lea( struct x86_function *p,
377	      struct x86_reg dst,
378	      struct x86_reg src )
379{
380   emit_1ub(p, 0x8d);
381   emit_modrm( p, dst, src );
382}
383
384void x86_test( struct x86_function *p,
385	       struct x86_reg dst,
386	       struct x86_reg src )
387{
388   emit_1ub(p, 0x85);
389   emit_modrm( p, dst, src );
390}
391
392void x86_add( struct x86_function *p,
393	       struct x86_reg dst,
394	       struct x86_reg src )
395{
396   emit_op_modrm(p, 0x03, 0x01, dst, src );
397}
398
399void x86_mul( struct x86_function *p,
400	       struct x86_reg src )
401{
402   assert (src.file == file_REG32 && src.mod == mod_REG);
403   emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
404}
405
406void x86_sub( struct x86_function *p,
407	       struct x86_reg dst,
408	       struct x86_reg src )
409{
410   emit_op_modrm(p, 0x2b, 0x29, dst, src );
411}
412
413void x86_or( struct x86_function *p,
414             struct x86_reg dst,
415             struct x86_reg src )
416{
417   emit_op_modrm( p, 0x0b, 0x09, dst, src );
418}
419
420void x86_and( struct x86_function *p,
421              struct x86_reg dst,
422              struct x86_reg src )
423{
424   emit_op_modrm( p, 0x23, 0x21, dst, src );
425}
426
427
428
429/***********************************************************************
430 * SSE instructions
431 */
432
433
434void sse_movss( struct x86_function *p,
435		struct x86_reg dst,
436		struct x86_reg src )
437{
438   emit_2ub(p, 0xF3, X86_TWOB);
439   emit_op_modrm( p, 0x10, 0x11, dst, src );
440}
441
442void sse_movaps( struct x86_function *p,
443		 struct x86_reg dst,
444		 struct x86_reg src )
445{
446   emit_1ub(p, X86_TWOB);
447   emit_op_modrm( p, 0x28, 0x29, dst, src );
448}
449
450void sse_movups( struct x86_function *p,
451		 struct x86_reg dst,
452		 struct x86_reg src )
453{
454   emit_1ub(p, X86_TWOB);
455   emit_op_modrm( p, 0x10, 0x11, dst, src );
456}
457
458void sse_movhps( struct x86_function *p,
459		 struct x86_reg dst,
460		 struct x86_reg src )
461{
462   assert(dst.mod != mod_REG || src.mod != mod_REG);
463   emit_1ub(p, X86_TWOB);
464   emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
465}
466
467void sse_movlps( struct x86_function *p,
468		 struct x86_reg dst,
469		 struct x86_reg src )
470{
471   assert(dst.mod != mod_REG || src.mod != mod_REG);
472   emit_1ub(p, X86_TWOB);
473   emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
474}
475
476void sse_maxps( struct x86_function *p,
477		struct x86_reg dst,
478		struct x86_reg src )
479{
480   emit_2ub(p, X86_TWOB, 0x5F);
481   emit_modrm( p, dst, src );
482}
483
484void sse_maxss( struct x86_function *p,
485		struct x86_reg dst,
486		struct x86_reg src )
487{
488   emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
489   emit_modrm( p, dst, src );
490}
491
492void sse_divss( struct x86_function *p,
493		struct x86_reg dst,
494		struct x86_reg src )
495{
496   emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
497   emit_modrm( p, dst, src );
498}
499
500void sse_minps( struct x86_function *p,
501		struct x86_reg dst,
502		struct x86_reg src )
503{
504   emit_2ub(p, X86_TWOB, 0x5D);
505   emit_modrm( p, dst, src );
506}
507
508void sse_subps( struct x86_function *p,
509		struct x86_reg dst,
510		struct x86_reg src )
511{
512   emit_2ub(p, X86_TWOB, 0x5C);
513   emit_modrm( p, dst, src );
514}
515
516void sse_mulps( struct x86_function *p,
517		struct x86_reg dst,
518		struct x86_reg src )
519{
520   emit_2ub(p, X86_TWOB, 0x59);
521   emit_modrm( p, dst, src );
522}
523
524void sse_mulss( struct x86_function *p,
525		struct x86_reg dst,
526		struct x86_reg src )
527{
528   emit_3ub(p, 0xF3, X86_TWOB, 0x59);
529   emit_modrm( p, dst, src );
530}
531
532void sse_addps( struct x86_function *p,
533		struct x86_reg dst,
534		struct x86_reg src )
535{
536   emit_2ub(p, X86_TWOB, 0x58);
537   emit_modrm( p, dst, src );
538}
539
540void sse_addss( struct x86_function *p,
541		struct x86_reg dst,
542		struct x86_reg src )
543{
544   emit_3ub(p, 0xF3, X86_TWOB, 0x58);
545   emit_modrm( p, dst, src );
546}
547
548void sse_andnps( struct x86_function *p,
549                 struct x86_reg dst,
550                 struct x86_reg src )
551{
552   emit_2ub(p, X86_TWOB, 0x55);
553   emit_modrm( p, dst, src );
554}
555
556void sse_andps( struct x86_function *p,
557		struct x86_reg dst,
558		struct x86_reg src )
559{
560   emit_2ub(p, X86_TWOB, 0x54);
561   emit_modrm( p, dst, src );
562}
563
564void sse_rsqrtps( struct x86_function *p,
565                  struct x86_reg dst,
566                  struct x86_reg src )
567{
568   emit_2ub(p, X86_TWOB, 0x52);
569   emit_modrm( p, dst, src );
570}
571
572void sse_rsqrtss( struct x86_function *p,
573		  struct x86_reg dst,
574		  struct x86_reg src )
575{
576   emit_3ub(p, 0xF3, X86_TWOB, 0x52);
577   emit_modrm( p, dst, src );
578
579}
580
581void sse_movhlps( struct x86_function *p,
582		  struct x86_reg dst,
583		  struct x86_reg src )
584{
585   assert(dst.mod == mod_REG && src.mod == mod_REG);
586   emit_2ub(p, X86_TWOB, 0x12);
587   emit_modrm( p, dst, src );
588}
589
590void sse_movlhps( struct x86_function *p,
591		  struct x86_reg dst,
592		  struct x86_reg src )
593{
594   assert(dst.mod == mod_REG && src.mod == mod_REG);
595   emit_2ub(p, X86_TWOB, 0x16);
596   emit_modrm( p, dst, src );
597}
598
599void sse_orps( struct x86_function *p,
600               struct x86_reg dst,
601               struct x86_reg src )
602{
603   emit_2ub(p, X86_TWOB, 0x56);
604   emit_modrm( p, dst, src );
605}
606
607void sse_xorps( struct x86_function *p,
608                struct x86_reg dst,
609                struct x86_reg src )
610{
611   emit_2ub(p, X86_TWOB, 0x57);
612   emit_modrm( p, dst, src );
613}
614
615void sse_cvtps2pi( struct x86_function *p,
616		   struct x86_reg dst,
617		   struct x86_reg src )
618{
619   assert(dst.file == file_MMX &&
620	  (src.file == file_XMM || src.mod != mod_REG));
621
622   p->need_emms = 1;
623
624   emit_2ub(p, X86_TWOB, 0x2d);
625   emit_modrm( p, dst, src );
626}
627
628
629/* Shufps can also be used to implement a reduced swizzle when dest ==
630 * arg0.
631 */
632void sse_shufps( struct x86_function *p,
633		 struct x86_reg dest,
634		 struct x86_reg arg0,
635		 unsigned char shuf)
636{
637   emit_2ub(p, X86_TWOB, 0xC6);
638   emit_modrm(p, dest, arg0);
639   emit_1ub(p, shuf);
640}
641
642void sse_cmpps( struct x86_function *p,
643		struct x86_reg dest,
644		struct x86_reg arg0,
645		unsigned char cc)
646{
647   emit_2ub(p, X86_TWOB, 0xC2);
648   emit_modrm(p, dest, arg0);
649   emit_1ub(p, cc);
650}
651
652void sse_pmovmskb( struct x86_function *p,
653                   struct x86_reg dest,
654                   struct x86_reg src)
655{
656    emit_3ub(p, 0x66, X86_TWOB, 0xD7);
657    emit_modrm(p, dest, src);
658}
659
660/***********************************************************************
661 * SSE2 instructions
662 */
663
664/**
665 * Perform a reduced swizzle:
666 */
667void sse2_pshufd( struct x86_function *p,
668		  struct x86_reg dest,
669		  struct x86_reg arg0,
670		  unsigned char shuf)
671{
672   emit_3ub(p, 0x66, X86_TWOB, 0x70);
673   emit_modrm(p, dest, arg0);
674   emit_1ub(p, shuf);
675}
676
677void sse2_cvttps2dq( struct x86_function *p,
678                     struct x86_reg dst,
679                     struct x86_reg src )
680{
681   emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
682   emit_modrm( p, dst, src );
683}
684
685void sse2_cvtps2dq( struct x86_function *p,
686		    struct x86_reg dst,
687		    struct x86_reg src )
688{
689   emit_3ub(p, 0x66, X86_TWOB, 0x5B);
690   emit_modrm( p, dst, src );
691}
692
693void sse2_packssdw( struct x86_function *p,
694		    struct x86_reg dst,
695		    struct x86_reg src )
696{
697   emit_3ub(p, 0x66, X86_TWOB, 0x6B);
698   emit_modrm( p, dst, src );
699}
700
701void sse2_packsswb( struct x86_function *p,
702		    struct x86_reg dst,
703		    struct x86_reg src )
704{
705   emit_3ub(p, 0x66, X86_TWOB, 0x63);
706   emit_modrm( p, dst, src );
707}
708
709void sse2_packuswb( struct x86_function *p,
710		    struct x86_reg dst,
711		    struct x86_reg src )
712{
713   emit_3ub(p, 0x66, X86_TWOB, 0x67);
714   emit_modrm( p, dst, src );
715}
716
717void sse2_rcpps( struct x86_function *p,
718                 struct x86_reg dst,
719                 struct x86_reg src )
720{
721   emit_2ub(p, X86_TWOB, 0x53);
722   emit_modrm( p, dst, src );
723}
724
725void sse2_rcpss( struct x86_function *p,
726		struct x86_reg dst,
727		struct x86_reg src )
728{
729   emit_3ub(p, 0xF3, X86_TWOB, 0x53);
730   emit_modrm( p, dst, src );
731}
732
733void sse2_movd( struct x86_function *p,
734		struct x86_reg dst,
735		struct x86_reg src )
736{
737   emit_2ub(p, 0x66, X86_TWOB);
738   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
739}
740
741
742
743
744/***********************************************************************
745 * x87 instructions
746 */
747void x87_fist( struct x86_function *p, struct x86_reg dst )
748{
749   emit_1ub(p, 0xdb);
750   emit_modrm_noreg(p, 2, dst);
751}
752
753void x87_fistp( struct x86_function *p, struct x86_reg dst )
754{
755   emit_1ub(p, 0xdb);
756   emit_modrm_noreg(p, 3, dst);
757}
758
759void x87_fild( struct x86_function *p, struct x86_reg arg )
760{
761   emit_1ub(p, 0xdf);
762   emit_modrm_noreg(p, 0, arg);
763}
764
765void x87_fldz( struct x86_function *p )
766{
767   emit_2ub(p, 0xd9, 0xee);
768}
769
770
771void x87_fldcw( struct x86_function *p, struct x86_reg arg )
772{
773   assert(arg.file == file_REG32);
774   assert(arg.mod != mod_REG);
775   emit_1ub(p, 0xd9);
776   emit_modrm_noreg(p, 5, arg);
777}
778
779void x87_fld1( struct x86_function *p )
780{
781   emit_2ub(p, 0xd9, 0xe8);
782}
783
784void x87_fldl2e( struct x86_function *p )
785{
786   emit_2ub(p, 0xd9, 0xea);
787}
788
789void x87_fldln2( struct x86_function *p )
790{
791   emit_2ub(p, 0xd9, 0xed);
792}
793
794void x87_fwait( struct x86_function *p )
795{
796   emit_1ub(p, 0x9b);
797}
798
799void x87_fnclex( struct x86_function *p )
800{
801   emit_2ub(p, 0xdb, 0xe2);
802}
803
804void x87_fclex( struct x86_function *p )
805{
806   x87_fwait(p);
807   x87_fnclex(p);
808}
809
810
811static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
812			  unsigned char dst0ub0,
813			  unsigned char dst0ub1,
814			  unsigned char arg0ub0,
815			  unsigned char arg0ub1,
816			  unsigned char argmem_noreg)
817{
818   assert(dst.file == file_x87);
819
820   if (arg.file == file_x87) {
821      if (dst.idx == 0)
822	 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
823      else if (arg.idx == 0)
824	 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
825      else
826	 assert(0);
827   }
828   else if (dst.idx == 0) {
829      assert(arg.file == file_REG32);
830      emit_1ub(p, 0xd8);
831      emit_modrm_noreg(p, argmem_noreg, arg);
832   }
833   else
834      assert(0);
835}
836
837void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
838{
839   x87_arith_op(p, dst, arg,
840		0xd8, 0xc8,
841		0xdc, 0xc8,
842		4);
843}
844
845void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
846{
847   x87_arith_op(p, dst, arg,
848		0xd8, 0xe0,
849		0xdc, 0xe8,
850		4);
851}
852
853void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
854{
855   x87_arith_op(p, dst, arg,
856		0xd8, 0xe8,
857		0xdc, 0xe0,
858		5);
859}
860
861void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
862{
863   x87_arith_op(p, dst, arg,
864		0xd8, 0xc0,
865		0xdc, 0xc0,
866		0);
867}
868
869void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
870{
871   x87_arith_op(p, dst, arg,
872		0xd8, 0xf0,
873		0xdc, 0xf8,
874		6);
875}
876
877void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
878{
879   x87_arith_op(p, dst, arg,
880		0xd8, 0xf8,
881		0xdc, 0xf0,
882		7);
883}
884
885void x87_fmulp( struct x86_function *p, struct x86_reg dst )
886{
887   assert(dst.file == file_x87);
888   assert(dst.idx >= 1);
889   emit_2ub(p, 0xde, 0xc8+dst.idx);
890}
891
892void x87_fsubp( struct x86_function *p, struct x86_reg dst )
893{
894   assert(dst.file == file_x87);
895   assert(dst.idx >= 1);
896   emit_2ub(p, 0xde, 0xe8+dst.idx);
897}
898
899void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
900{
901   assert(dst.file == file_x87);
902   assert(dst.idx >= 1);
903   emit_2ub(p, 0xde, 0xe0+dst.idx);
904}
905
906void x87_faddp( struct x86_function *p, struct x86_reg dst )
907{
908   assert(dst.file == file_x87);
909   assert(dst.idx >= 1);
910   emit_2ub(p, 0xde, 0xc0+dst.idx);
911}
912
913void x87_fdivp( struct x86_function *p, struct x86_reg dst )
914{
915   assert(dst.file == file_x87);
916   assert(dst.idx >= 1);
917   emit_2ub(p, 0xde, 0xf8+dst.idx);
918}
919
920void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
921{
922   assert(dst.file == file_x87);
923   assert(dst.idx >= 1);
924   emit_2ub(p, 0xde, 0xf0+dst.idx);
925}
926
927void x87_fucom( struct x86_function *p, struct x86_reg arg )
928{
929   assert(arg.file == file_x87);
930   emit_2ub(p, 0xdd, 0xe0+arg.idx);
931}
932
933void x87_fucomp( struct x86_function *p, struct x86_reg arg )
934{
935   assert(arg.file == file_x87);
936   emit_2ub(p, 0xdd, 0xe8+arg.idx);
937}
938
939void x87_fucompp( struct x86_function *p )
940{
941   emit_2ub(p, 0xda, 0xe9);
942}
943
944void x87_fxch( struct x86_function *p, struct x86_reg arg )
945{
946   assert(arg.file == file_x87);
947   emit_2ub(p, 0xd9, 0xc8+arg.idx);
948}
949
950void x87_fabs( struct x86_function *p )
951{
952   emit_2ub(p, 0xd9, 0xe1);
953}
954
955void x87_fchs( struct x86_function *p )
956{
957   emit_2ub(p, 0xd9, 0xe0);
958}
959
960void x87_fcos( struct x86_function *p )
961{
962   emit_2ub(p, 0xd9, 0xff);
963}
964
965
966void x87_fprndint( struct x86_function *p )
967{
968   emit_2ub(p, 0xd9, 0xfc);
969}
970
971void x87_fscale( struct x86_function *p )
972{
973   emit_2ub(p, 0xd9, 0xfd);
974}
975
976void x87_fsin( struct x86_function *p )
977{
978   emit_2ub(p, 0xd9, 0xfe);
979}
980
981void x87_fsincos( struct x86_function *p )
982{
983   emit_2ub(p, 0xd9, 0xfb);
984}
985
986void x87_fsqrt( struct x86_function *p )
987{
988   emit_2ub(p, 0xd9, 0xfa);
989}
990
991void x87_fxtract( struct x86_function *p )
992{
993   emit_2ub(p, 0xd9, 0xf4);
994}
995
996/* st0 = (2^st0)-1
997 *
998 * Restrictions: -1.0 <= st0 <= 1.0
999 */
1000void x87_f2xm1( struct x86_function *p )
1001{
1002   emit_2ub(p, 0xd9, 0xf0);
1003}
1004
1005/* st1 = st1 * log2(st0);
1006 * pop_stack;
1007 */
1008void x87_fyl2x( struct x86_function *p )
1009{
1010   emit_2ub(p, 0xd9, 0xf1);
1011}
1012
1013/* st1 = st1 * log2(st0 + 1.0);
1014 * pop_stack;
1015 *
1016 * A fast operation, with restrictions: -.29 < st0 < .29
1017 */
1018void x87_fyl2xp1( struct x86_function *p )
1019{
1020   emit_2ub(p, 0xd9, 0xf9);
1021}
1022
1023
1024void x87_fld( struct x86_function *p, struct x86_reg arg )
1025{
1026   if (arg.file == file_x87)
1027      emit_2ub(p, 0xd9, 0xc0 + arg.idx);
1028   else {
1029      emit_1ub(p, 0xd9);
1030      emit_modrm_noreg(p, 0, arg);
1031   }
1032}
1033
1034void x87_fst( struct x86_function *p, struct x86_reg dst )
1035{
1036   if (dst.file == file_x87)
1037      emit_2ub(p, 0xdd, 0xd0 + dst.idx);
1038   else {
1039      emit_1ub(p, 0xd9);
1040      emit_modrm_noreg(p, 2, dst);
1041   }
1042}
1043
1044void x87_fstp( struct x86_function *p, struct x86_reg dst )
1045{
1046   if (dst.file == file_x87)
1047      emit_2ub(p, 0xdd, 0xd8 + dst.idx);
1048   else {
1049      emit_1ub(p, 0xd9);
1050      emit_modrm_noreg(p, 3, dst);
1051   }
1052}
1053
1054void x87_fcom( struct x86_function *p, struct x86_reg dst )
1055{
1056   if (dst.file == file_x87)
1057      emit_2ub(p, 0xd8, 0xd0 + dst.idx);
1058   else {
1059      emit_1ub(p, 0xd8);
1060      emit_modrm_noreg(p, 2, dst);
1061   }
1062}
1063
1064void x87_fcomp( struct x86_function *p, struct x86_reg dst )
1065{
1066   if (dst.file == file_x87)
1067      emit_2ub(p, 0xd8, 0xd8 + dst.idx);
1068   else {
1069      emit_1ub(p, 0xd8);
1070      emit_modrm_noreg(p, 3, dst);
1071   }
1072}
1073
1074
1075void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
1076{
1077   assert(dst.file == file_REG32);
1078
1079   if (dst.idx == reg_AX &&
1080       dst.mod == mod_REG)
1081      emit_2ub(p, 0xdf, 0xe0);
1082   else {
1083      emit_1ub(p, 0xdd);
1084      emit_modrm_noreg(p, 7, dst);
1085   }
1086}
1087
1088
1089
1090
1091/***********************************************************************
1092 * MMX instructions
1093 */
1094
1095void mmx_emms( struct x86_function *p )
1096{
1097   assert(p->need_emms);
1098   emit_2ub(p, 0x0f, 0x77);
1099   p->need_emms = 0;
1100}
1101
1102void mmx_packssdw( struct x86_function *p,
1103		   struct x86_reg dst,
1104		   struct x86_reg src )
1105{
1106   assert(dst.file == file_MMX &&
1107	  (src.file == file_MMX || src.mod != mod_REG));
1108
1109   p->need_emms = 1;
1110
1111   emit_2ub(p, X86_TWOB, 0x6b);
1112   emit_modrm( p, dst, src );
1113}
1114
1115void mmx_packuswb( struct x86_function *p,
1116		   struct x86_reg dst,
1117		   struct x86_reg src )
1118{
1119   assert(dst.file == file_MMX &&
1120	  (src.file == file_MMX || src.mod != mod_REG));
1121
1122   p->need_emms = 1;
1123
1124   emit_2ub(p, X86_TWOB, 0x67);
1125   emit_modrm( p, dst, src );
1126}
1127
1128void mmx_movd( struct x86_function *p,
1129	       struct x86_reg dst,
1130	       struct x86_reg src )
1131{
1132   p->need_emms = 1;
1133   emit_1ub(p, X86_TWOB);
1134   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
1135}
1136
1137void mmx_movq( struct x86_function *p,
1138	       struct x86_reg dst,
1139	       struct x86_reg src )
1140{
1141   p->need_emms = 1;
1142   emit_1ub(p, X86_TWOB);
1143   emit_op_modrm( p, 0x6f, 0x7f, dst, src );
1144}
1145
1146
1147/***********************************************************************
1148 * Helper functions
1149 */
1150
1151
1152/* Retreive a reference to one of the function arguments, taking into
1153 * account any push/pop activity:
1154 */
1155struct x86_reg x86_fn_arg( struct x86_function *p,
1156			   unsigned arg )
1157{
1158   return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
1159			p->stack_offset + arg * 4);	/* ??? */
1160}
1161
1162
1163void x86_init_func( struct x86_function *p )
1164{
1165   p->size = 0;
1166   p->store = NULL;
1167   p->csr = p->store;
1168}
1169
1170int x86_init_func_size( struct x86_function *p, unsigned code_size )
1171{
1172   p->size = code_size;
1173   p->store = _mesa_exec_malloc(code_size);
1174   p->csr = p->store;
1175   return p->store != NULL;
1176}
1177
1178void x86_release_func( struct x86_function *p )
1179{
1180   _mesa_exec_free(p->store);
1181   p->store = NULL;
1182   p->csr = NULL;
1183   p->size = 0;
1184}
1185
1186
1187void (*x86_get_func( struct x86_function *p ))(void)
1188{
1189   if (DISASSEM && p->store)
1190      printf("disassemble %p %p\n", p->store, p->csr);
1191   return (void (*)(void)) (unsigned long) p->store;
1192}
1193
1194#else
1195
1196void x86sse_dummy( void )
1197{
1198}
1199
1200#endif
1201
1202#else  /* USE_X86_ASM */
1203
1204int x86sse_c_dummy_var; /* silence warning */
1205
1206#endif /* USE_X86_ASM */
1207