1 1.1 joerg /*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// 2 1.1 joerg * 3 1.1 joerg * The LLVM Compiler Infrastructure 4 1.1 joerg * 5 1.1 joerg * This file is dual licensed under the MIT and the University of Illinois Open 6 1.1 joerg * Source Licenses. See LICENSE.TXT for details. 7 1.1 joerg * 8 1.1 joerg *===----------------------------------------------------------------------===// 9 1.1 joerg * 10 1.1 joerg * This file implements the __udivsi3 (32-bit unsigned integer divide) 11 1.1 joerg * function for the ARM 32-bit architecture. 12 1.1 joerg * 13 1.1 joerg *===----------------------------------------------------------------------===*/ 14 1.1 joerg 15 1.1 joerg #include "../assembly.h" 16 1.1 joerg 17 1.1 joerg .syntax unified 18 1.1 joerg .text 19 1.1.1.2 joerg 20 1.1.1.2 joerg #if __ARM_ARCH_ISA_THUMB == 2 21 1.1.1.2 joerg .thumb 22 1.1.1.2 joerg #endif 23 1.1.1.2 joerg 24 1.1 joerg .p2align 2 25 1.1 joerg DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) 26 1.1.1.2 joerg 27 1.1.1.2 joerg @ unsigned int __udivsi3(unsigned int divident, unsigned int divisor) 28 1.1.1.2 joerg @ Calculate and return the quotient of the (unsigned) division. 29 1.1.1.2 joerg 30 1.1.1.3 joerg #if __ARM_ARCH_ISA_THUMB == 2 31 1.1.1.3 joerg DEFINE_COMPILERRT_THUMB_FUNCTION(__udivsi3) 32 1.1.1.3 joerg #else 33 1.1 joerg DEFINE_COMPILERRT_FUNCTION(__udivsi3) 34 1.1.1.3 joerg #endif 35 1.1 joerg #if __ARM_ARCH_EXT_IDIV__ 36 1.1 joerg tst r1, r1 37 1.1 joerg beq LOCAL_LABEL(divby0) 38 1.1.1.2 joerg udiv r0, r0, r1 39 1.1 joerg bx lr 40 1.1 joerg #else 41 1.1 joerg cmp r1, #1 42 1.1 joerg bcc LOCAL_LABEL(divby0) 43 1.1.1.2 joerg IT(eq) 44 1.1 joerg JMPc(lr, eq) 45 1.1 joerg cmp r0, r1 46 1.1.1.2 joerg ITT(cc) 47 1.1 joerg movcc r0, #0 48 1.1 joerg JMPc(lr, cc) 49 1.1 joerg /* 50 1.1 joerg * Implement division using binary long division algorithm. 51 1.1 joerg * 52 1.1 joerg * r0 is the numerator, r1 the denominator. 53 1.1 joerg * 54 1.1 joerg * The code before JMP computes the correct shift I, so that 55 1.1 joerg * r0 and (r1 << I) have the highest bit set in the same position. 56 1.1 joerg * At the time of JMP, ip := .Ldiv0block - 12 * I. 57 1.1 joerg * This depends on the fixed instruction size of block. 58 1.1.1.2 joerg * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. 59 1.1 joerg * 60 1.1 joerg * block(shift) implements the test-and-update-quotient core. 61 1.1 joerg * It assumes (r0 << shift) can be computed without overflow and 62 1.1 joerg * that (r0 << shift) < 2 * r1. The quotient is stored in r3. 63 1.1 joerg */ 64 1.1 joerg 65 1.1 joerg # ifdef __ARM_FEATURE_CLZ 66 1.1 joerg clz ip, r0 67 1.1 joerg clz r3, r1 68 1.1 joerg /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ 69 1.1 joerg sub r3, r3, ip 70 1.1.1.2 joerg # if __ARM_ARCH_ISA_THUMB == 2 71 1.1.1.2 joerg adr ip, LOCAL_LABEL(div0block) + 1 72 1.1.1.2 joerg sub ip, ip, r3, lsl #1 73 1.1.1.2 joerg # else 74 1.1 joerg adr ip, LOCAL_LABEL(div0block) 75 1.1.1.2 joerg # endif 76 1.1 joerg sub ip, ip, r3, lsl #2 77 1.1 joerg sub ip, ip, r3, lsl #3 78 1.1 joerg mov r3, #0 79 1.1 joerg bx ip 80 1.1 joerg # else 81 1.1.1.2 joerg # if __ARM_ARCH_ISA_THUMB == 2 82 1.1.1.2 joerg # error THUMB mode requires CLZ or UDIV 83 1.1.1.2 joerg # endif 84 1.1 joerg mov r2, r0 85 1.1 joerg adr ip, LOCAL_LABEL(div0block) 86 1.1 joerg 87 1.1 joerg lsr r3, r2, #16 88 1.1 joerg cmp r3, r1 89 1.1 joerg movhs r2, r3 90 1.1 joerg subhs ip, ip, #(16 * 12) 91 1.1 joerg 92 1.1 joerg lsr r3, r2, #8 93 1.1 joerg cmp r3, r1 94 1.1 joerg movhs r2, r3 95 1.1 joerg subhs ip, ip, #(8 * 12) 96 1.1 joerg 97 1.1 joerg lsr r3, r2, #4 98 1.1 joerg cmp r3, r1 99 1.1 joerg movhs r2, r3 100 1.1 joerg subhs ip, #(4 * 12) 101 1.1 joerg 102 1.1 joerg lsr r3, r2, #2 103 1.1 joerg cmp r3, r1 104 1.1 joerg movhs r2, r3 105 1.1 joerg subhs ip, ip, #(2 * 12) 106 1.1 joerg 107 1.1 joerg /* Last block, no need to update r2 or r3. */ 108 1.1 joerg cmp r1, r2, lsr #1 109 1.1 joerg subls ip, ip, #(1 * 12) 110 1.1 joerg 111 1.1 joerg mov r3, #0 112 1.1 joerg 113 1.1 joerg JMP(ip) 114 1.1 joerg # endif 115 1.1 joerg 116 1.1 joerg #define IMM # 117 1.1 joerg 118 1.1.1.2 joerg #define block(shift) \ 119 1.1.1.2 joerg cmp r0, r1, lsl IMM shift; \ 120 1.1.1.2 joerg ITT(hs); \ 121 1.1.1.2 joerg WIDE(addhs) r3, r3, IMM (1 << shift); \ 122 1.1.1.2 joerg WIDE(subhs) r0, r0, r1, lsl IMM shift 123 1.1 joerg 124 1.1 joerg block(31) 125 1.1 joerg block(30) 126 1.1 joerg block(29) 127 1.1 joerg block(28) 128 1.1 joerg block(27) 129 1.1 joerg block(26) 130 1.1 joerg block(25) 131 1.1 joerg block(24) 132 1.1 joerg block(23) 133 1.1 joerg block(22) 134 1.1 joerg block(21) 135 1.1 joerg block(20) 136 1.1 joerg block(19) 137 1.1 joerg block(18) 138 1.1 joerg block(17) 139 1.1 joerg block(16) 140 1.1 joerg block(15) 141 1.1 joerg block(14) 142 1.1 joerg block(13) 143 1.1 joerg block(12) 144 1.1 joerg block(11) 145 1.1 joerg block(10) 146 1.1 joerg block(9) 147 1.1 joerg block(8) 148 1.1 joerg block(7) 149 1.1 joerg block(6) 150 1.1 joerg block(5) 151 1.1 joerg block(4) 152 1.1 joerg block(3) 153 1.1 joerg block(2) 154 1.1 joerg block(1) 155 1.1 joerg LOCAL_LABEL(div0block): 156 1.1 joerg block(0) 157 1.1 joerg 158 1.1 joerg mov r0, r3 159 1.1 joerg JMP(lr) 160 1.1 joerg #endif /* __ARM_ARCH_EXT_IDIV__ */ 161 1.1 joerg 162 1.1 joerg LOCAL_LABEL(divby0): 163 1.1 joerg mov r0, #0 164 1.1 joerg #ifdef __ARM_EABI__ 165 1.1 joerg b __aeabi_idiv0 166 1.1 joerg #else 167 1.1 joerg JMP(lr) 168 1.1 joerg #endif 169 1.1 joerg 170 1.1 joerg END_COMPILERRT_FUNCTION(__udivsi3) 171