1 1.1 joerg // This file is dual licensed under the MIT and the University of Illinois Open 2 1.1 joerg // Source Licenses. See LICENSE.TXT for details. 3 1.1 joerg 4 1.1 joerg #include "../assembly.h" 5 1.1 joerg 6 1.1 joerg // di_int __divdi3(di_int a, di_int b); 7 1.1 joerg 8 1.1 joerg // result = a / b. 9 1.1 joerg // both inputs and the output are 64-bit signed integers. 10 1.1 joerg // This will do whatever the underlying hardware is set to do on division by zero. 11 1.1 joerg // No other exceptions are generated, as the divide cannot overflow. 12 1.1 joerg // 13 1.1 joerg // This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 14 1.1 joerg // on x86_64. The performance goal is ~40 cycles per divide, which is faster than 15 1.1 joerg // currently possible via simulation of integer divides on the x87 unit. 16 1.1 joerg // 17 1.1 joerg // Stephen Canon, December 2008 18 1.1 joerg 19 1.1 joerg #ifdef __i386__ 20 1.1 joerg 21 1.1 joerg .text 22 1.1.1.2 joerg .balign 4 23 1.1 joerg DEFINE_COMPILERRT_FUNCTION(__divdi3) 24 1.1 joerg 25 1.1 joerg /* This is currently implemented by wrapping the unsigned divide up in an absolute 26 1.1 joerg value, then restoring the correct sign at the end of the computation. This could 27 1.1 joerg certainly be improved upon. */ 28 1.1 joerg 29 1.1 joerg pushl %esi 30 1.1 joerg movl 20(%esp), %edx // high word of b 31 1.1 joerg movl 16(%esp), %eax // low word of b 32 1.1 joerg movl %edx, %ecx 33 1.1 joerg sarl $31, %ecx // (b < 0) ? -1 : 0 34 1.1 joerg xorl %ecx, %eax 35 1.1 joerg xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b 36 1.1 joerg subl %ecx, %eax 37 1.1 joerg sbbl %ecx, %edx // EDX:EAX = abs(b) 38 1.1 joerg movl %edx, 20(%esp) 39 1.1 joerg movl %eax, 16(%esp) // store abs(b) back to stack 40 1.1 joerg movl %ecx, %esi // set aside sign of b 41 1.1 joerg 42 1.1 joerg movl 12(%esp), %edx // high word of b 43 1.1 joerg movl 8(%esp), %eax // low word of b 44 1.1 joerg movl %edx, %ecx 45 1.1 joerg sarl $31, %ecx // (a < 0) ? -1 : 0 46 1.1 joerg xorl %ecx, %eax 47 1.1 joerg xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a 48 1.1 joerg subl %ecx, %eax 49 1.1 joerg sbbl %ecx, %edx // EDX:EAX = abs(a) 50 1.1 joerg movl %edx, 12(%esp) 51 1.1 joerg movl %eax, 8(%esp) // store abs(a) back to stack 52 1.1 joerg xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b) 53 1.1 joerg 54 1.1 joerg pushl %ebx 55 1.1 joerg movl 24(%esp), %ebx // Find the index i of the leading bit in b. 56 1.1 joerg bsrl %ebx, %ecx // If the high word of b is zero, jump to 57 1.1 joerg jz 9f // the code to handle that special case [9]. 58 1.1 joerg 59 1.1 joerg /* High word of b is known to be non-zero on this branch */ 60 1.1 joerg 61 1.1 joerg movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 62 1.1 joerg 63 1.1 joerg shrl %cl, %eax // Practically, this means that bhi is given by: 64 1.1 joerg shrl %eax // 65 1.1 joerg notl %ecx // bhi = (high word of b) << (31 - i) | 66 1.1 joerg shll %cl, %ebx // (low word of b) >> (1 + i) 67 1.1 joerg orl %eax, %ebx // 68 1.1 joerg movl 16(%esp), %edx // Load the high and low words of a, and jump 69 1.1 joerg movl 12(%esp), %eax // to [1] if the high word is larger than bhi 70 1.1 joerg cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 71 1.1 joerg jae 1f 72 1.1 joerg 73 1.1 joerg /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 74 1.1 joerg 75 1.1 joerg divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 76 1.1 joerg 77 1.1 joerg pushl %edi 78 1.1 joerg notl %ecx 79 1.1 joerg shrl %eax 80 1.1 joerg shrl %cl, %eax // q = qs >> (1 + i) 81 1.1 joerg movl %eax, %edi 82 1.1 joerg mull 24(%esp) // q*blo 83 1.1 joerg movl 16(%esp), %ebx 84 1.1 joerg movl 20(%esp), %ecx // ECX:EBX = a 85 1.1 joerg subl %eax, %ebx 86 1.1 joerg sbbl %edx, %ecx // ECX:EBX = a - q*blo 87 1.1 joerg movl 28(%esp), %eax 88 1.1 joerg imull %edi, %eax // q*bhi 89 1.1 joerg subl %eax, %ecx // ECX:EBX = a - q*b 90 1.1 joerg sbbl $0, %edi // decrement q if remainder is negative 91 1.1 joerg xorl %edx, %edx 92 1.1 joerg movl %edi, %eax 93 1.1 joerg 94 1.1 joerg addl %esi, %eax // Restore correct sign to result 95 1.1 joerg adcl %esi, %edx 96 1.1 joerg xorl %esi, %eax 97 1.1 joerg xorl %esi, %edx 98 1.1 joerg popl %edi // Restore callee-save registers 99 1.1 joerg popl %ebx 100 1.1 joerg popl %esi 101 1.1 joerg retl // Return 102 1.1 joerg 103 1.1 joerg 104 1.1 joerg 1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 105 1.1 joerg 106 1.1 joerg subl %ebx, %edx // subtract bhi from ahi so that divide will not 107 1.1 joerg divl %ebx // overflow, and find q and r such that 108 1.1 joerg // 109 1.1 joerg // ahi:alo = (1:q)*bhi + r 110 1.1 joerg // 111 1.1 joerg // Note that q is a number in (31-i).(1+i) 112 1.1 joerg // fix point. 113 1.1 joerg 114 1.1 joerg pushl %edi 115 1.1 joerg notl %ecx 116 1.1 joerg shrl %eax 117 1.1 joerg orl $0x80000000, %eax 118 1.1 joerg shrl %cl, %eax // q = (1:qs) >> (1 + i) 119 1.1 joerg movl %eax, %edi 120 1.1 joerg mull 24(%esp) // q*blo 121 1.1 joerg movl 16(%esp), %ebx 122 1.1 joerg movl 20(%esp), %ecx // ECX:EBX = a 123 1.1 joerg subl %eax, %ebx 124 1.1 joerg sbbl %edx, %ecx // ECX:EBX = a - q*blo 125 1.1 joerg movl 28(%esp), %eax 126 1.1 joerg imull %edi, %eax // q*bhi 127 1.1 joerg subl %eax, %ecx // ECX:EBX = a - q*b 128 1.1 joerg sbbl $0, %edi // decrement q if remainder is negative 129 1.1 joerg xorl %edx, %edx 130 1.1 joerg movl %edi, %eax 131 1.1 joerg 132 1.1 joerg addl %esi, %eax // Restore correct sign to result 133 1.1 joerg adcl %esi, %edx 134 1.1 joerg xorl %esi, %eax 135 1.1 joerg xorl %esi, %edx 136 1.1 joerg popl %edi // Restore callee-save registers 137 1.1 joerg popl %ebx 138 1.1 joerg popl %esi 139 1.1 joerg retl // Return 140 1.1 joerg 141 1.1 joerg 142 1.1 joerg 9: /* High word of b is zero on this branch */ 143 1.1 joerg 144 1.1 joerg movl 16(%esp), %eax // Find qhi and rhi such that 145 1.1 joerg movl 20(%esp), %ecx // 146 1.1 joerg xorl %edx, %edx // ahi = qhi*b + rhi with 0 rhi < b 147 1.1 joerg divl %ecx // 148 1.1 joerg movl %eax, %ebx // 149 1.1 joerg movl 12(%esp), %eax // Find qlo such that 150 1.1 joerg divl %ecx // 151 1.1 joerg movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 rlo < b 152 1.1 joerg 153 1.1 joerg addl %esi, %eax // Restore correct sign to result 154 1.1 joerg adcl %esi, %edx 155 1.1 joerg xorl %esi, %eax 156 1.1 joerg xorl %esi, %edx 157 1.1 joerg popl %ebx // Restore callee-save registers 158 1.1 joerg popl %esi 159 1.1 joerg retl // Return 160 1.1 joerg END_COMPILERRT_FUNCTION(__divdi3) 161 1.1 joerg 162 1.1 joerg #endif // __i386__ 163