1 dnl ARM mpn_mul_2. 2 3 dnl Contributed to the GNU project by Torbjrn Granlund. 4 5 dnl Copyright 2012 Free Software Foundation, Inc. 6 7 dnl This file is part of the GNU MP Library. 8 dnl 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 dnl it under the terms of either: 11 dnl 12 dnl * the GNU Lesser General Public License as published by the Free 13 dnl Software Foundation; either version 3 of the License, or (at your 14 dnl option) any later version. 15 dnl 16 dnl or 17 dnl 18 dnl * the GNU General Public License as published by the Free Software 19 dnl Foundation; either version 2 of the License, or (at your option) any 20 dnl later version. 21 dnl 22 dnl or both in parallel, as here. 23 dnl 24 dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 dnl for more details. 28 dnl 29 dnl You should have received copies of the GNU General Public License and the 30 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 dnl see https://www.gnu.org/licenses/. 32 33 include(`../config.m4') 34 35 C cycles/limb 36 C StrongARM: - 37 C XScale - 38 C ARM11 5.25 39 C Cortex-A5 3.63 40 C Cortex-A7 3.15 41 C Cortex-A8 5.0 42 C Cortex-A9 2.25 43 C Cortex-A15 2.5 44 C Cortex-A17 2.13 45 C Cortex-A53 3.5 46 47 C TODO 48 C * This is a trivial edit of the addmul_2 code. Check for simplifications, 49 C and possible speedups to 2.0 c/l. 50 51 define(`rp',`r0') 52 define(`up',`r1') 53 define(`n', `r2') 54 define(`vp',`r3') 55 56 define(`v0',`r6') 57 define(`v1',`r7') 58 define(`u0',`r3') 59 define(`u1',`r9') 60 61 define(`cya',`r8') 62 define(`cyb',`r12') 63 64 65 ASM_START() 66 PROLOGUE(mpn_mul_2) 67 push { r4, r5, r6, r7, r8, r9 } 68 69 ldm vp, { v0, v1 } 70 mov cya, #0 71 mov cyb, #0 72 73 tst n, #1 74 beq L(evn) 75 L(odd): mov r5, #0 76 ldr u0, [up, #0] 77 mov r4, #0 78 tst n, #2 79 beq L(fi1) 80 L(fi3): sub up, up, #12 81 sub rp, rp, #16 82 b L(lo3) 83 L(fi1): sub n, n, #1 84 sub up, up, #4 85 sub rp, rp, #8 86 b L(lo1) 87 L(evn): mov r4, #0 88 ldr u1, [up, #0] 89 mov r5, #0 90 tst n, #2 91 bne L(fi2) 92 L(fi0): sub up, up, #8 93 sub rp, rp, #12 94 b L(lo0) 95 L(fi2): subs n, n, #2 96 sub rp, rp, #4 97 bls L(end) 98 99 ALIGN(16) 100 L(top): ldr u0, [up, #4] 101 umaal r4, cya, u1, v0 102 str r4, [rp, #4] 103 mov r4, #0 104 umaal r5, cyb, u1, v1 105 L(lo1): ldr u1, [up, #8] 106 umaal r5, cya, u0, v0 107 str r5, [rp, #8] 108 mov r5, #0 109 umaal r4, cyb, u0, v1 110 L(lo0): ldr u0, [up, #12] 111 umaal r4, cya, u1, v0 112 str r4, [rp, #12] 113 mov r4, #0 114 umaal r5, cyb, u1, v1 115 L(lo3): ldr u1, [up, #16]! 116 umaal r5, cya, u0, v0 117 str r5, [rp, #16]! 118 mov r5, #0 119 umaal r4, cyb, u0, v1 120 subs n, n, #4 121 bhi L(top) 122 123 L(end): umaal r4, cya, u1, v0 124 ldr u0, [up, #4] 125 umaal r5, cyb, u1, v1 126 str r4, [rp, #4] 127 umaal r5, cya, u0, v0 128 umaal cya, cyb, u0, v1 129 str r5, [rp, #8] 130 str cya, [rp, #12] 131 mov r0, cyb 132 133 pop { r4, r5, r6, r7, r8, r9 } 134 bx r14 135 EPILOGUE() 136