1 1.1 joerg /*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// 2 1.1 joerg * 3 1.1 joerg * The LLVM Compiler Infrastructure 4 1.1 joerg * 5 1.1 joerg * This file is dual licensed under the MIT and the University of Illinois Open 6 1.1 joerg * Source Licenses. See LICENSE.TXT for details. 7 1.1 joerg * 8 1.1 joerg *===----------------------------------------------------------------------===// 9 1.1 joerg * 10 1.1 joerg * This file implements the __udivmodsi4 (32-bit unsigned integer divide and 11 1.1 joerg * modulus) function for the ARM 32-bit architecture. 12 1.1 joerg * 13 1.1 joerg *===----------------------------------------------------------------------===*/ 14 1.1 joerg 15 1.1 joerg #include "../assembly.h" 16 1.1 joerg 17 1.1 joerg .syntax unified 18 1.1 joerg .text 19 1.1.1.2 joerg 20 1.1.1.2 joerg #if __ARM_ARCH_ISA_THUMB == 2 21 1.1.1.2 joerg .thumb 22 1.1.1.2 joerg #endif 23 1.1.1.2 joerg 24 1.1.1.2 joerg @ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, 25 1.1.1.2 joerg @ unsigned int *remainder) 26 1.1.1.2 joerg @ Calculate the quotient and remainder of the (unsigned) division. The return 27 1.1.1.2 joerg @ value is the quotient, the remainder is placed in the variable. 28 1.1.1.2 joerg 29 1.1 joerg .p2align 2 30 1.1.1.3 joerg #if __ARM_ARCH_ISA_THUMB == 2 31 1.1.1.3 joerg DEFINE_COMPILERRT_THUMB_FUNCTION(__udivmodsi4) 32 1.1.1.3 joerg #else 33 1.1 joerg DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) 34 1.1.1.3 joerg #endif 35 1.1 joerg #if __ARM_ARCH_EXT_IDIV__ 36 1.1 joerg tst r1, r1 37 1.1 joerg beq LOCAL_LABEL(divby0) 38 1.1 joerg mov r3, r0 39 1.1 joerg udiv r0, r3, r1 40 1.1 joerg mls r1, r0, r1, r3 41 1.1 joerg str r1, [r2] 42 1.1 joerg bx lr 43 1.1 joerg #else 44 1.1 joerg cmp r1, #1 45 1.1 joerg bcc LOCAL_LABEL(divby0) 46 1.1 joerg beq LOCAL_LABEL(divby1) 47 1.1 joerg cmp r0, r1 48 1.1 joerg bcc LOCAL_LABEL(quotient0) 49 1.1 joerg /* 50 1.1 joerg * Implement division using binary long division algorithm. 51 1.1 joerg * 52 1.1 joerg * r0 is the numerator, r1 the denominator. 53 1.1 joerg * 54 1.1 joerg * The code before JMP computes the correct shift I, so that 55 1.1 joerg * r0 and (r1 << I) have the highest bit set in the same position. 56 1.1 joerg * At the time of JMP, ip := .Ldiv0block - 12 * I. 57 1.1 joerg * This depends on the fixed instruction size of block. 58 1.1.1.2 joerg * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. 59 1.1 joerg * 60 1.1 joerg * block(shift) implements the test-and-update-quotient core. 61 1.1 joerg * It assumes (r0 << shift) can be computed without overflow and 62 1.1 joerg * that (r0 << shift) < 2 * r1. The quotient is stored in r3. 63 1.1 joerg */ 64 1.1 joerg 65 1.1 joerg # ifdef __ARM_FEATURE_CLZ 66 1.1 joerg clz ip, r0 67 1.1 joerg clz r3, r1 68 1.1 joerg /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ 69 1.1 joerg sub r3, r3, ip 70 1.1.1.2 joerg # if __ARM_ARCH_ISA_THUMB == 2 71 1.1.1.2 joerg adr ip, LOCAL_LABEL(div0block) + 1 72 1.1.1.2 joerg sub ip, ip, r3, lsl #1 73 1.1.1.2 joerg # else 74 1.1 joerg adr ip, LOCAL_LABEL(div0block) 75 1.1.1.2 joerg # endif 76 1.1 joerg sub ip, ip, r3, lsl #2 77 1.1 joerg sub ip, ip, r3, lsl #3 78 1.1 joerg mov r3, #0 79 1.1 joerg bx ip 80 1.1 joerg # else 81 1.1.1.2 joerg # if __ARM_ARCH_ISA_THUMB == 2 82 1.1.1.2 joerg # error THUMB mode requires CLZ or UDIV 83 1.1.1.2 joerg # endif 84 1.1 joerg str r4, [sp, #-8]! 85 1.1 joerg 86 1.1 joerg mov r4, r0 87 1.1 joerg adr ip, LOCAL_LABEL(div0block) 88 1.1 joerg 89 1.1 joerg lsr r3, r4, #16 90 1.1 joerg cmp r3, r1 91 1.1 joerg movhs r4, r3 92 1.1 joerg subhs ip, ip, #(16 * 12) 93 1.1 joerg 94 1.1 joerg lsr r3, r4, #8 95 1.1 joerg cmp r3, r1 96 1.1 joerg movhs r4, r3 97 1.1 joerg subhs ip, ip, #(8 * 12) 98 1.1 joerg 99 1.1 joerg lsr r3, r4, #4 100 1.1 joerg cmp r3, r1 101 1.1 joerg movhs r4, r3 102 1.1 joerg subhs ip, #(4 * 12) 103 1.1 joerg 104 1.1 joerg lsr r3, r4, #2 105 1.1 joerg cmp r3, r1 106 1.1 joerg movhs r4, r3 107 1.1 joerg subhs ip, ip, #(2 * 12) 108 1.1 joerg 109 1.1 joerg /* Last block, no need to update r3 or r4. */ 110 1.1 joerg cmp r1, r4, lsr #1 111 1.1 joerg subls ip, ip, #(1 * 12) 112 1.1 joerg 113 1.1 joerg ldr r4, [sp], #8 /* restore r4, we are done with it. */ 114 1.1 joerg mov r3, #0 115 1.1 joerg 116 1.1 joerg JMP(ip) 117 1.1 joerg # endif 118 1.1 joerg 119 1.1 joerg #define IMM # 120 1.1 joerg 121 1.1.1.2 joerg #define block(shift) \ 122 1.1.1.2 joerg cmp r0, r1, lsl IMM shift; \ 123 1.1.1.2 joerg ITT(hs); \ 124 1.1.1.2 joerg WIDE(addhs) r3, r3, IMM (1 << shift); \ 125 1.1.1.2 joerg WIDE(subhs) r0, r0, r1, lsl IMM shift 126 1.1 joerg 127 1.1 joerg block(31) 128 1.1 joerg block(30) 129 1.1 joerg block(29) 130 1.1 joerg block(28) 131 1.1 joerg block(27) 132 1.1 joerg block(26) 133 1.1 joerg block(25) 134 1.1 joerg block(24) 135 1.1 joerg block(23) 136 1.1 joerg block(22) 137 1.1 joerg block(21) 138 1.1 joerg block(20) 139 1.1 joerg block(19) 140 1.1 joerg block(18) 141 1.1 joerg block(17) 142 1.1 joerg block(16) 143 1.1 joerg block(15) 144 1.1 joerg block(14) 145 1.1 joerg block(13) 146 1.1 joerg block(12) 147 1.1 joerg block(11) 148 1.1 joerg block(10) 149 1.1 joerg block(9) 150 1.1 joerg block(8) 151 1.1 joerg block(7) 152 1.1 joerg block(6) 153 1.1 joerg block(5) 154 1.1 joerg block(4) 155 1.1 joerg block(3) 156 1.1 joerg block(2) 157 1.1 joerg block(1) 158 1.1 joerg LOCAL_LABEL(div0block): 159 1.1 joerg block(0) 160 1.1 joerg 161 1.1 joerg str r0, [r2] 162 1.1 joerg mov r0, r3 163 1.1 joerg JMP(lr) 164 1.1 joerg 165 1.1 joerg LOCAL_LABEL(quotient0): 166 1.1 joerg str r0, [r2] 167 1.1 joerg mov r0, #0 168 1.1 joerg JMP(lr) 169 1.1 joerg 170 1.1 joerg LOCAL_LABEL(divby1): 171 1.1 joerg mov r3, #0 172 1.1 joerg str r3, [r2] 173 1.1 joerg JMP(lr) 174 1.1 joerg #endif /* __ARM_ARCH_EXT_IDIV__ */ 175 1.1 joerg 176 1.1 joerg LOCAL_LABEL(divby0): 177 1.1 joerg mov r0, #0 178 1.1 joerg #ifdef __ARM_EABI__ 179 1.1 joerg b __aeabi_idiv0 180 1.1 joerg #else 181 1.1 joerg JMP(lr) 182 1.1 joerg #endif 183 1.1 joerg 184 1.1 joerg END_COMPILERRT_FUNCTION(__udivmodsi4) 185