Home | History | Annotate | Line # | Download | only in i386
floatundisf.S revision 1.1.1.2.4.2
      1 // This file is dual licensed under the MIT and the University of Illinois Open
      2 // Source Licenses. See LICENSE.TXT for details.
      3 
      4 #include "../assembly.h"
      5 
      6 // float __floatundisf(du_int a);
      7 
      8 // Note that there is a hardware instruction, fildll, that does most of what
      9 // this function needs to do.  However, because of our ia32 ABI, it will take
     10 // a write-small read-large stall, so the software implementation here is
     11 // actually several cycles faster.
     12 
     13 // This is a branch-free implementation.  A branchy implementation might be
     14 // faster for the common case if you know something a priori about the input
     15 // distribution.
     16 
     17 /* branch-free x87 implementation - one cycle slower than without x87.
     18 
     19 #ifdef __i386__
     20 
     21 .const
     22 .align 3
     23 
     24 		.quad	0x43f0000000000000
     25 twop64:	.quad	0x0000000000000000
     26 
     27 #define			TWOp64			twop64-0b(%ecx,%eax,8)
     28 
     29 .text
     30 .align 4
     31 DEFINE_COMPILERRT_FUNCTION(__floatundisf)
     32 	movl		8(%esp),		%eax
     33 	movd		8(%esp),		%xmm1
     34 	movd		4(%esp),		%xmm0
     35 	punpckldq	%xmm1,			%xmm0
     36 	calll		0f
     37 0:	popl		%ecx
     38 	sarl		$31,			%eax
     39 	movq		%xmm0,			4(%esp)
     40 	fildll		4(%esp)
     41 	faddl		TWOp64
     42 	fstps		4(%esp)
     43 	flds		4(%esp)
     44 	ret
     45 END_COMPILERRT_FUNCTION(__floatundisf)
     46 
     47 #endif // __i386__
     48 
     49 */
     50 
     51 /* branch-free, x87-free implementation - faster at the expense of code size */
     52 
     53 #ifdef __i386__
     54 
     55 #ifndef __ELF__
     56 .const
     57 .align 3
     58 #else
     59 .align 8
     60 #endif
     61 twop52: .quad 0x4330000000000000
     62 		.quad 0x0000000000000fff
     63 sticky: .quad 0x0000000000000000
     64 		.long 0x00000012
     65 twelve:	.long 0x00000000
     66 
     67 #define			TWOp52			twop52-0b(%ecx)
     68 #define			STICKY			sticky-0b(%ecx,%eax,8)
     69 
     70 .text
     71 .align 4
     72 DEFINE_COMPILERRT_FUNCTION(__floatundisf)
     73 	movl		8(%esp),		%eax
     74 	movd		8(%esp),		%xmm1
     75 	movd		4(%esp),		%xmm0
     76 	punpckldq	%xmm1,			%xmm0
     77 
     78 	calll		0f
     79 0:	popl		%ecx
     80 	shrl		%eax					// high 31 bits of input as sint32
     81 	addl		$0x7ff80000,	%eax
     82 	sarl		$31,			%eax	// (big input) ? -1 : 0
     83 	movsd		STICKY,			%xmm1	// (big input) ? 0xfff : 0
     84 	movl		$12,			%edx
     85 	andl		%eax,			%edx	// (big input) ? 12 : 0
     86 	movd		%edx,			%xmm3
     87 	andpd		%xmm0,			%xmm1	// (big input) ? input & 0xfff : 0
     88 	movsd		TWOp52,			%xmm2	// 0x1.0p52
     89 	psrlq		%xmm3,			%xmm0	// (big input) ? input >> 12 : input
     90 	orpd		%xmm2,			%xmm1	// 0x1.0p52 + ((big input) ? input & 0xfff : input)
     91 	orpd		%xmm1,			%xmm0	// 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input)
     92 	subsd		%xmm2,			%xmm0	// (double)((big input) ? (input >> 12 | input & 0xfff) : input)
     93 	cvtsd2ss	%xmm0,			%xmm0	// (float)((big input) ? (input >> 12 | input & 0xfff) : input)
     94 	pslld		$23,			%xmm3
     95 	paddd		%xmm3,			%xmm0	// (float)input
     96 	movd		%xmm0,			4(%esp)
     97 	flds		4(%esp)
     98 	ret
     99 END_COMPILERRT_FUNCTION(__floatundisf)
    100 
    101 #endif // __i386__
    102