Home | History | Annotate | Line # | Download | only in i386
      1      1.1  joerg // This file is dual licensed under the MIT and the University of Illinois Open
      2      1.1  joerg // Source Licenses. See LICENSE.TXT for details.
      3      1.1  joerg 
      4      1.1  joerg #include "../assembly.h"
      5      1.1  joerg 
      6      1.1  joerg // di_int __divdi3(di_int a, di_int b);
      7      1.1  joerg 
      8      1.1  joerg // result = a / b.
      9      1.1  joerg // both inputs and the output are 64-bit signed integers.
     10      1.1  joerg // This will do whatever the underlying hardware is set to do on division by zero.
     11      1.1  joerg // No other exceptions are generated, as the divide cannot overflow.
     12      1.1  joerg //
     13      1.1  joerg // This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
     14      1.1  joerg // on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
     15      1.1  joerg // currently possible via simulation of integer divides on the x87 unit.
     16      1.1  joerg //
     17      1.1  joerg // Stephen Canon, December 2008
     18      1.1  joerg 
     19      1.1  joerg #ifdef __i386__
     20      1.1  joerg 
     21      1.1  joerg .text
     22  1.1.1.2  joerg .balign 4
     23      1.1  joerg DEFINE_COMPILERRT_FUNCTION(__divdi3)
     24      1.1  joerg 
     25      1.1  joerg /* This is currently implemented by wrapping the unsigned divide up in an absolute
     26      1.1  joerg    value, then restoring the correct sign at the end of the computation.  This could
     27      1.1  joerg    certainly be improved upon. */
     28      1.1  joerg 
     29      1.1  joerg 	pushl		%esi
     30      1.1  joerg 	movl	 20(%esp),			%edx	// high word of b
     31      1.1  joerg 	movl	 16(%esp),			%eax	// low word of b
     32      1.1  joerg 	movl		%edx,			%ecx
     33      1.1  joerg 	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
     34      1.1  joerg 	xorl		%ecx,			%eax
     35      1.1  joerg 	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
     36      1.1  joerg 	subl		%ecx,			%eax
     37      1.1  joerg 	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
     38      1.1  joerg 	movl		%edx,		 20(%esp)
     39      1.1  joerg 	movl		%eax,		 16(%esp)	// store abs(b) back to stack
     40      1.1  joerg 	movl		%ecx,			%esi	// set aside sign of b
     41      1.1  joerg 
     42      1.1  joerg 	movl	 12(%esp),			%edx	// high word of b
     43      1.1  joerg 	movl	  8(%esp),			%eax	// low word of b
     44      1.1  joerg 	movl		%edx,			%ecx
     45      1.1  joerg 	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
     46      1.1  joerg 	xorl		%ecx,			%eax
     47      1.1  joerg 	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
     48      1.1  joerg 	subl		%ecx,			%eax
     49      1.1  joerg 	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
     50      1.1  joerg 	movl		%edx,		 12(%esp)
     51      1.1  joerg 	movl		%eax,		  8(%esp)	// store abs(a) back to stack
     52      1.1  joerg 	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b)
     53      1.1  joerg 
     54      1.1  joerg 	pushl		%ebx
     55      1.1  joerg 	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
     56      1.1  joerg 	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
     57      1.1  joerg 	jz			9f						// the code to handle that special case [9].
     58      1.1  joerg 
     59      1.1  joerg 	/* High word of b is known to be non-zero on this branch */
     60      1.1  joerg 
     61      1.1  joerg 	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
     62      1.1  joerg 
     63      1.1  joerg 	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
     64      1.1  joerg 	shrl		%eax					//
     65      1.1  joerg 	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
     66      1.1  joerg 	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
     67      1.1  joerg 	orl			%eax,			%ebx	//
     68      1.1  joerg 	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
     69      1.1  joerg 	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi
     70      1.1  joerg 	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
     71      1.1  joerg 	jae			1f
     72      1.1  joerg 
     73      1.1  joerg 	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
     74      1.1  joerg 
     75      1.1  joerg 	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
     76      1.1  joerg 
     77      1.1  joerg 	pushl		%edi
     78      1.1  joerg 	notl		%ecx
     79      1.1  joerg 	shrl		%eax
     80      1.1  joerg 	shrl		%cl,			%eax	// q = qs >> (1 + i)
     81      1.1  joerg 	movl		%eax,			%edi
     82      1.1  joerg 	mull	 24(%esp)					// q*blo
     83      1.1  joerg 	movl	 16(%esp),			%ebx
     84      1.1  joerg 	movl	 20(%esp),			%ecx	// ECX:EBX = a
     85      1.1  joerg 	subl		%eax,			%ebx
     86      1.1  joerg 	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
     87      1.1  joerg 	movl	 28(%esp),			%eax
     88      1.1  joerg 	imull		%edi,			%eax	// q*bhi
     89      1.1  joerg 	subl		%eax,			%ecx	// ECX:EBX = a - q*b
     90      1.1  joerg 	sbbl		$0,				%edi	// decrement q if remainder is negative
     91      1.1  joerg 	xorl		%edx,			%edx
     92      1.1  joerg 	movl		%edi,			%eax
     93      1.1  joerg 
     94      1.1  joerg 	addl		%esi,			%eax	// Restore correct sign to result
     95      1.1  joerg 	adcl		%esi,			%edx
     96      1.1  joerg 	xorl		%esi,			%eax
     97      1.1  joerg 	xorl		%esi,			%edx
     98      1.1  joerg 	popl		%edi					// Restore callee-save registers
     99      1.1  joerg 	popl		%ebx
    100      1.1  joerg 	popl		%esi
    101      1.1  joerg 	retl								// Return
    102      1.1  joerg 
    103      1.1  joerg 
    104      1.1  joerg 1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
    105      1.1  joerg 
    106      1.1  joerg 	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
    107      1.1  joerg 	divl		%ebx					// overflow, and find q and r such that
    108      1.1  joerg 										//
    109      1.1  joerg 										//		ahi:alo = (1:q)*bhi + r
    110      1.1  joerg 										//
    111      1.1  joerg 										// Note that q is a number in (31-i).(1+i)
    112      1.1  joerg 										// fix point.
    113      1.1  joerg 
    114      1.1  joerg 	pushl		%edi
    115      1.1  joerg 	notl		%ecx
    116      1.1  joerg 	shrl		%eax
    117      1.1  joerg 	orl			$0x80000000,	%eax
    118      1.1  joerg 	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
    119      1.1  joerg 	movl		%eax,			%edi
    120      1.1  joerg 	mull	 24(%esp)					// q*blo
    121      1.1  joerg 	movl	 16(%esp),			%ebx
    122      1.1  joerg 	movl	 20(%esp),			%ecx	// ECX:EBX = a
    123      1.1  joerg 	subl		%eax,			%ebx
    124      1.1  joerg 	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
    125      1.1  joerg 	movl	 28(%esp),			%eax
    126      1.1  joerg 	imull		%edi,			%eax	// q*bhi
    127      1.1  joerg 	subl		%eax,			%ecx	// ECX:EBX = a - q*b
    128      1.1  joerg 	sbbl		$0,				%edi	// decrement q if remainder is negative
    129      1.1  joerg 	xorl		%edx,			%edx
    130      1.1  joerg 	movl		%edi,			%eax
    131      1.1  joerg 
    132      1.1  joerg 	addl		%esi,			%eax	// Restore correct sign to result
    133      1.1  joerg 	adcl		%esi,			%edx
    134      1.1  joerg 	xorl		%esi,			%eax
    135      1.1  joerg 	xorl		%esi,			%edx
    136      1.1  joerg 	popl		%edi					// Restore callee-save registers
    137      1.1  joerg 	popl		%ebx
    138      1.1  joerg 	popl		%esi
    139      1.1  joerg 	retl								// Return
    140      1.1  joerg 
    141      1.1  joerg 
    142      1.1  joerg 9:	/* High word of b is zero on this branch */
    143      1.1  joerg 
    144      1.1  joerg 	movl	 16(%esp),			%eax	// Find qhi and rhi such that
    145      1.1  joerg 	movl	 20(%esp),			%ecx	//
    146      1.1  joerg 	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0  rhi < b
    147      1.1  joerg 	divl		%ecx					//
    148      1.1  joerg 	movl		%eax,			%ebx	//
    149      1.1  joerg 	movl	 12(%esp),			%eax	// Find qlo such that
    150      1.1  joerg 	divl		%ecx					//
    151      1.1  joerg 	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0  rlo < b
    152      1.1  joerg 
    153      1.1  joerg 	addl		%esi,			%eax	// Restore correct sign to result
    154      1.1  joerg 	adcl		%esi,			%edx
    155      1.1  joerg 	xorl		%esi,			%eax
    156      1.1  joerg 	xorl		%esi,			%edx
    157      1.1  joerg 	popl		%ebx					// Restore callee-save registers
    158      1.1  joerg 	popl		%esi
    159      1.1  joerg 	retl								// Return
    160      1.1  joerg END_COMPILERRT_FUNCTION(__divdi3)
    161      1.1  joerg 
    162      1.1  joerg #endif // __i386__
    163