1 1.1 mrg dnl ARM64 mpn_rsh1add_n and mpn_rsh1sub_n. 2 1.1 mrg 3 1.1 mrg dnl Contributed to the GNU project by Torbjrn Granlund. 4 1.1 mrg 5 1.1 mrg dnl Copyright 2017 Free Software Foundation, Inc. 6 1.1 mrg 7 1.1 mrg dnl This file is part of the GNU MP Library. 8 1.1 mrg dnl 9 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 1.1 mrg dnl it under the terms of either: 11 1.1 mrg dnl 12 1.1 mrg dnl * the GNU Lesser General Public License as published by the Free 13 1.1 mrg dnl Software Foundation; either version 3 of the License, or (at your 14 1.1 mrg dnl option) any later version. 15 1.1 mrg dnl 16 1.1 mrg dnl or 17 1.1 mrg dnl 18 1.1 mrg dnl * the GNU General Public License as published by the Free Software 19 1.1 mrg dnl Foundation; either version 2 of the License, or (at your option) any 20 1.1 mrg dnl later version. 21 1.1 mrg dnl 22 1.1 mrg dnl or both in parallel, as here. 23 1.1 mrg dnl 24 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 1.1 mrg dnl for more details. 28 1.1 mrg dnl 29 1.1 mrg dnl You should have received copies of the GNU General Public License and the 30 1.1 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 1.1 mrg dnl see https://www.gnu.org/licenses/. 32 1.1 mrg 33 1.1 mrg include(`../config.m4') 34 1.1 mrg 35 1.1 mrg C cycles/limb assumed optimal c/l 36 1.1 mrg C Cortex-A53 3.25-3.75 3.0 steady 37 1.1 mrg C Cortex-A57 2.15 1.75 38 1.1 mrg C X-Gene 2.75 2.5 39 1.1 mrg 40 1.1 mrg changecom(blah) 41 1.1 mrg 42 1.1 mrg define(`rp', `x0') 43 1.1 mrg define(`up', `x1') 44 1.1 mrg define(`vp', `x2') 45 1.1 mrg define(`n', `x3') 46 1.1 mrg 47 1.1 mrg ifdef(`OPERATION_rsh1add_n', ` 48 1.1 mrg define(`ADDSUB', adds) 49 1.1 mrg define(`ADDSUBC', adcs) 50 1.1 mrg define(`COND', `cs') 51 1.1 mrg define(`func_n', mpn_rsh1add_n)') 52 1.1 mrg ifdef(`OPERATION_rsh1sub_n', ` 53 1.1 mrg define(`ADDSUB', subs) 54 1.1 mrg define(`ADDSUBC', sbcs) 55 1.1 mrg define(`COND', `cc') 56 1.1 mrg define(`func_n', mpn_rsh1sub_n)') 57 1.1 mrg 58 1.1 mrg MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n) 59 1.1 mrg 60 1.1 mrg ASM_START() 61 1.1 mrg PROLOGUE(func_n) 62 1.1 mrg lsr x18, n, #2 63 1.1 mrg 64 1.1 mrg tbz n, #0, L(bx0) 65 1.1 mrg 66 1.1 mrg L(bx1): ldr x5, [up],#8 67 1.1 mrg ldr x9, [vp],#8 68 1.1 mrg tbnz n, #1, L(b11) 69 1.1 mrg 70 1.1 mrg L(b01): ADDSUB x13, x5, x9 71 1.1 mrg and x10, x13, #1 72 1.1 mrg cbz x18, L(1) 73 1.1 mrg ldp x4, x5, [up],#48 74 1.1 mrg ldp x8, x9, [vp],#48 75 1.1 mrg ADDSUBC x14, x4, x8 76 1.1 mrg ADDSUBC x15, x5, x9 77 1.1 mrg ldp x4, x5, [up,#-32] 78 1.1 mrg ldp x8, x9, [vp,#-32] 79 1.1 mrg extr x17, x14, x13, #1 80 1.1 mrg ADDSUBC x12, x4, x8 81 1.1 mrg ADDSUBC x13, x5, x9 82 1.1 mrg str x17, [rp], #24 83 1.1 mrg sub x18, x18, #1 84 1.1 mrg cbz x18, L(end) 85 1.1 mrg b L(top) 86 1.1 mrg 87 1.1 mrg L(1): cset x14, COND 88 1.1 mrg extr x17, x14, x13, #1 89 1.1 mrg str x17, [rp] 90 1.1 mrg mov x0, x10 91 1.1 mrg ret 92 1.1 mrg 93 1.1 mrg L(b11): ADDSUB x15, x5, x9 94 1.1 mrg and x10, x15, #1 95 1.1 mrg 96 1.1 mrg ldp x4, x5, [up],#32 97 1.1 mrg ldp x8, x9, [vp],#32 98 1.1 mrg ADDSUBC x12, x4, x8 99 1.1 mrg ADDSUBC x13, x5, x9 100 1.1 mrg cbz x18, L(3) 101 1.1 mrg ldp x4, x5, [up,#-16] 102 1.1 mrg ldp x8, x9, [vp,#-16] 103 1.1 mrg extr x17, x12, x15, #1 104 1.1 mrg ADDSUBC x14, x4, x8 105 1.1 mrg ADDSUBC x15, x5, x9 106 1.1 mrg str x17, [rp], #8 107 1.1 mrg b L(mid) 108 1.1 mrg 109 1.1 mrg L(3): extr x17, x12, x15, #1 110 1.1 mrg str x17, [rp], #8 111 1.1 mrg b L(2) 112 1.1 mrg 113 1.1 mrg L(bx0): tbz n, #1, L(b00) 114 1.1 mrg 115 1.1 mrg L(b10): ldp x4, x5, [up],#32 116 1.1 mrg ldp x8, x9, [vp],#32 117 1.1 mrg ADDSUB x12, x4, x8 118 1.1 mrg ADDSUBC x13, x5, x9 119 1.1 mrg and x10, x12, #1 120 1.1 mrg cbz x18, L(2) 121 1.1 mrg ldp x4, x5, [up,#-16] 122 1.1 mrg ldp x8, x9, [vp,#-16] 123 1.1 mrg ADDSUBC x14, x4, x8 124 1.1 mrg ADDSUBC x15, x5, x9 125 1.1 mrg b L(mid) 126 1.1 mrg 127 1.1 mrg L(b00): ldp x4, x5, [up],#48 128 1.1 mrg ldp x8, x9, [vp],#48 129 1.1 mrg ADDSUB x14, x4, x8 130 1.1 mrg ADDSUBC x15, x5, x9 131 1.1 mrg and x10, x14, #1 132 1.1 mrg ldp x4, x5, [up,#-32] 133 1.1 mrg ldp x8, x9, [vp,#-32] 134 1.1 mrg ADDSUBC x12, x4, x8 135 1.1 mrg ADDSUBC x13, x5, x9 136 1.1 mrg add rp, rp, #16 137 1.1 mrg sub x18, x18, #1 138 1.1 mrg cbz x18, L(end) 139 1.1 mrg 140 1.1 mrg ALIGN(16) 141 1.1 mrg L(top): ldp x4, x5, [up,#-16] 142 1.1 mrg ldp x8, x9, [vp,#-16] 143 1.1 mrg extr x16, x15, x14, #1 144 1.1 mrg extr x17, x12, x15, #1 145 1.1 mrg ADDSUBC x14, x4, x8 146 1.1 mrg ADDSUBC x15, x5, x9 147 1.1 mrg stp x16, x17, [rp,#-16] 148 1.1 mrg L(mid): ldp x4, x5, [up],#32 149 1.1 mrg ldp x8, x9, [vp],#32 150 1.1 mrg extr x16, x13, x12, #1 151 1.1 mrg extr x17, x14, x13, #1 152 1.1 mrg ADDSUBC x12, x4, x8 153 1.1 mrg ADDSUBC x13, x5, x9 154 1.1 mrg stp x16, x17, [rp],#32 155 1.1 mrg sub x18, x18, #1 156 1.1 mrg cbnz x18, L(top) 157 1.1 mrg 158 1.1 mrg L(end): extr x16, x15, x14, #1 159 1.1 mrg extr x17, x12, x15, #1 160 1.1 mrg stp x16, x17, [rp,#-16] 161 1.1 mrg L(2): cset x14, COND 162 1.1 mrg extr x16, x13, x12, #1 163 1.1 mrg extr x17, x14, x13, #1 164 1.1 mrg stp x16, x17, [rp] 165 1.1 mrg 166 1.1 mrg L(ret): mov x0, x10 167 1.1 mrg ret 168 1.1 mrg EPILOGUE() 169