1 1.1 joerg /*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// 2 1.1 joerg * 3 1.1 joerg * The LLVM Compiler Infrastructure 4 1.1 joerg * 5 1.1 joerg * This file is dual licensed under the MIT and the University of Illinois Open 6 1.1 joerg * Source Licenses. See LICENSE.TXT for details. 7 1.1 joerg * 8 1.1 joerg *===----------------------------------------------------------------------===// 9 1.1 joerg * 10 1.1 joerg * This file implements the __umodsi3 (32-bit unsigned integer modulus) 11 1.1 joerg * function for the ARM 32-bit architecture. 12 1.1 joerg * 13 1.1 joerg *===----------------------------------------------------------------------===*/ 14 1.1 joerg 15 1.1 joerg #include "../assembly.h" 16 1.1 joerg 17 1.1 joerg .syntax unified 18 1.1 joerg .text 19 1.1.1.2 joerg #if __ARM_ARCH_ISA_THUMB == 2 20 1.1.1.2 joerg .thumb 21 1.1.1.2 joerg #endif 22 1.1.1.2 joerg 23 1.1.1.2 joerg @ unsigned int __umodsi3(unsigned int divident, unsigned int divisor) 24 1.1.1.2 joerg @ Calculate and return the remainder of the (unsigned) division. 25 1.1.1.2 joerg 26 1.1 joerg .p2align 2 27 1.1.1.3 joerg #if __ARM_ARCH_ISA_THUMB == 2 28 1.1.1.3 joerg DEFINE_COMPILERRT_THUMB_FUNCTION(__umodsi3) 29 1.1.1.3 joerg #else 30 1.1 joerg DEFINE_COMPILERRT_FUNCTION(__umodsi3) 31 1.1.1.3 joerg #endif 32 1.1 joerg #if __ARM_ARCH_EXT_IDIV__ 33 1.1 joerg tst r1, r1 34 1.1 joerg beq LOCAL_LABEL(divby0) 35 1.1.1.2 joerg udiv r2, r0, r1 36 1.1.1.2 joerg mls r0, r2, r1, r0 37 1.1 joerg bx lr 38 1.1 joerg #else 39 1.1 joerg cmp r1, #1 40 1.1 joerg bcc LOCAL_LABEL(divby0) 41 1.1.1.2 joerg ITT(eq) 42 1.1 joerg moveq r0, #0 43 1.1 joerg JMPc(lr, eq) 44 1.1 joerg cmp r0, r1 45 1.1.1.2 joerg IT(cc) 46 1.1 joerg JMPc(lr, cc) 47 1.1 joerg /* 48 1.1 joerg * Implement division using binary long division algorithm. 49 1.1 joerg * 50 1.1 joerg * r0 is the numerator, r1 the denominator. 51 1.1 joerg * 52 1.1 joerg * The code before JMP computes the correct shift I, so that 53 1.1 joerg * r0 and (r1 << I) have the highest bit set in the same position. 54 1.1 joerg * At the time of JMP, ip := .Ldiv0block - 8 * I. 55 1.1 joerg * This depends on the fixed instruction size of block. 56 1.1.1.2 joerg * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. 57 1.1 joerg * 58 1.1 joerg * block(shift) implements the test-and-update-quotient core. 59 1.1 joerg * It assumes (r0 << shift) can be computed without overflow and 60 1.1 joerg * that (r0 << shift) < 2 * r1. The quotient is stored in r3. 61 1.1 joerg */ 62 1.1 joerg 63 1.1 joerg # ifdef __ARM_FEATURE_CLZ 64 1.1 joerg clz ip, r0 65 1.1 joerg clz r3, r1 66 1.1 joerg /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ 67 1.1 joerg sub r3, r3, ip 68 1.1.1.2 joerg # if __ARM_ARCH_ISA_THUMB == 2 69 1.1.1.2 joerg adr ip, LOCAL_LABEL(div0block) + 1 70 1.1.1.2 joerg sub ip, ip, r3, lsl #1 71 1.1.1.2 joerg # else 72 1.1 joerg adr ip, LOCAL_LABEL(div0block) 73 1.1.1.2 joerg # endif 74 1.1 joerg sub ip, ip, r3, lsl #3 75 1.1 joerg bx ip 76 1.1 joerg # else 77 1.1.1.2 joerg # if __ARM_ARCH_ISA_THUMB == 2 78 1.1.1.2 joerg # error THUMB mode requires CLZ or UDIV 79 1.1.1.2 joerg # endif 80 1.1 joerg mov r2, r0 81 1.1 joerg adr ip, LOCAL_LABEL(div0block) 82 1.1 joerg 83 1.1 joerg lsr r3, r2, #16 84 1.1 joerg cmp r3, r1 85 1.1 joerg movhs r2, r3 86 1.1 joerg subhs ip, ip, #(16 * 8) 87 1.1 joerg 88 1.1 joerg lsr r3, r2, #8 89 1.1 joerg cmp r3, r1 90 1.1 joerg movhs r2, r3 91 1.1 joerg subhs ip, ip, #(8 * 8) 92 1.1 joerg 93 1.1 joerg lsr r3, r2, #4 94 1.1 joerg cmp r3, r1 95 1.1 joerg movhs r2, r3 96 1.1 joerg subhs ip, #(4 * 8) 97 1.1 joerg 98 1.1 joerg lsr r3, r2, #2 99 1.1 joerg cmp r3, r1 100 1.1 joerg movhs r2, r3 101 1.1 joerg subhs ip, ip, #(2 * 8) 102 1.1 joerg 103 1.1 joerg /* Last block, no need to update r2 or r3. */ 104 1.1 joerg cmp r1, r2, lsr #1 105 1.1 joerg subls ip, ip, #(1 * 8) 106 1.1 joerg 107 1.1 joerg JMP(ip) 108 1.1 joerg # endif 109 1.1 joerg 110 1.1 joerg #define IMM # 111 1.1 joerg 112 1.1.1.2 joerg #define block(shift) \ 113 1.1.1.2 joerg cmp r0, r1, lsl IMM shift; \ 114 1.1.1.2 joerg IT(hs); \ 115 1.1.1.2 joerg WIDE(subhs) r0, r0, r1, lsl IMM shift 116 1.1 joerg 117 1.1 joerg block(31) 118 1.1 joerg block(30) 119 1.1 joerg block(29) 120 1.1 joerg block(28) 121 1.1 joerg block(27) 122 1.1 joerg block(26) 123 1.1 joerg block(25) 124 1.1 joerg block(24) 125 1.1 joerg block(23) 126 1.1 joerg block(22) 127 1.1 joerg block(21) 128 1.1 joerg block(20) 129 1.1 joerg block(19) 130 1.1 joerg block(18) 131 1.1 joerg block(17) 132 1.1 joerg block(16) 133 1.1 joerg block(15) 134 1.1 joerg block(14) 135 1.1 joerg block(13) 136 1.1 joerg block(12) 137 1.1 joerg block(11) 138 1.1 joerg block(10) 139 1.1 joerg block(9) 140 1.1 joerg block(8) 141 1.1 joerg block(7) 142 1.1 joerg block(6) 143 1.1 joerg block(5) 144 1.1 joerg block(4) 145 1.1 joerg block(3) 146 1.1 joerg block(2) 147 1.1 joerg block(1) 148 1.1 joerg LOCAL_LABEL(div0block): 149 1.1 joerg block(0) 150 1.1 joerg JMP(lr) 151 1.1 joerg #endif /* __ARM_ARCH_EXT_IDIV__ */ 152 1.1 joerg 153 1.1 joerg LOCAL_LABEL(divby0): 154 1.1 joerg mov r0, #0 155 1.1 joerg #ifdef __ARM_EABI__ 156 1.1 joerg b __aeabi_idiv0 157 1.1 joerg #else 158 1.1 joerg JMP(lr) 159 1.1 joerg #endif 160 1.1 joerg 161 1.1 joerg END_COMPILERRT_FUNCTION(__umodsi3) 162