1 1.1 mrg ; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store 2 1.1 mrg ; sum in a third limb vector. 3 1.1 mrg 4 1.1 mrg ; Copyright 1995, 1996, 2000 Free Software Foundation, Inc. 5 1.1 mrg 6 1.1.1.2 mrg ; This file is part of the GNU MP Library. 7 1.1.1.2 mrg ; 8 1.1.1.2 mrg ; The GNU MP Library is free software; you can redistribute it and/or modify 9 1.1.1.2 mrg ; it under the terms of either: 10 1.1.1.2 mrg ; 11 1.1.1.2 mrg ; * the GNU Lesser General Public License as published by the Free 12 1.1.1.2 mrg ; Software Foundation; either version 3 of the License, or (at your 13 1.1.1.2 mrg ; option) any later version. 14 1.1.1.2 mrg ; 15 1.1.1.2 mrg ; or 16 1.1.1.2 mrg ; 17 1.1.1.2 mrg ; * the GNU General Public License as published by the Free Software 18 1.1.1.2 mrg ; Foundation; either version 2 of the License, or (at your option) any 19 1.1.1.2 mrg ; later version. 20 1.1.1.2 mrg ; 21 1.1.1.2 mrg ; or both in parallel, as here. 22 1.1.1.2 mrg ; 23 1.1.1.2 mrg ; The GNU MP Library is distributed in the hope that it will be useful, but 24 1.1.1.2 mrg ; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 1.1.1.2 mrg ; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 1.1.1.2 mrg ; for more details. 27 1.1.1.2 mrg ; 28 1.1.1.2 mrg ; You should have received copies of the GNU General Public License and the 29 1.1.1.2 mrg ; GNU Lesser General Public License along with the GNU MP Library. If not, 30 1.1.1.2 mrg ; see https://www.gnu.org/licenses/. 31 1.1 mrg 32 1.1 mrg 33 1.1 mrg ; INPUT PARAMETERS 34 1.1 mrg #define res_ptr r2 35 1.1 mrg #define s1_ptr r3 36 1.1 mrg #define s2_ptr r4 37 1.1 mrg #define size r5 38 1.1 mrg 39 1.1 mrg #include "sysdep.h" 40 1.1 mrg 41 1.1 mrg text 42 1.1 mrg align 16 43 1.1 mrg global C_SYMBOL_NAME(__gmpn_add_n) 44 1.1 mrg C_SYMBOL_NAME(__gmpn_add_n): 45 1.1 mrg addu.co r0,r0,r0 ; clear cy flag 46 1.1 mrg xor r12,s2_ptr,res_ptr 47 1.1 mrg bb1 2,r12,L1 48 1.1 mrg ; ** V1a ** 49 1.1 mrg L0: bb0 2,res_ptr,L_v1 ; branch if res_ptr is aligned? 50 1.1 mrg /* Add least significant limb separately to align res_ptr and s2_ptr */ 51 1.1 mrg ld r10,s1_ptr,0 52 1.1 mrg addu s1_ptr,s1_ptr,4 53 1.1 mrg ld r8,s2_ptr,0 54 1.1 mrg addu s2_ptr,s2_ptr,4 55 1.1 mrg subu size,size,1 56 1.1 mrg addu.co r6,r10,r8 57 1.1 mrg st r6,res_ptr,0 58 1.1 mrg addu res_ptr,res_ptr,4 59 1.1 mrg L_v1: cmp r12,size,2 60 1.1 mrg bb1 lt,r12,Lend2 61 1.1 mrg 62 1.1 mrg ld r10,s1_ptr,0 63 1.1 mrg ld r12,s1_ptr,4 64 1.1 mrg ld.d r8,s2_ptr,0 65 1.1 mrg subu size,size,10 66 1.1 mrg bcnd lt0,size,Lfin1 67 1.1 mrg /* Add blocks of 8 limbs until less than 8 limbs remain */ 68 1.1 mrg align 8 69 1.1 mrg Loop1: subu size,size,8 70 1.1 mrg addu.cio r6,r10,r8 71 1.1 mrg ld r10,s1_ptr,8 72 1.1 mrg addu.cio r7,r12,r9 73 1.1 mrg ld r12,s1_ptr,12 74 1.1 mrg ld.d r8,s2_ptr,8 75 1.1 mrg st.d r6,res_ptr,0 76 1.1 mrg addu.cio r6,r10,r8 77 1.1 mrg ld r10,s1_ptr,16 78 1.1 mrg addu.cio r7,r12,r9 79 1.1 mrg ld r12,s1_ptr,20 80 1.1 mrg ld.d r8,s2_ptr,16 81 1.1 mrg st.d r6,res_ptr,8 82 1.1 mrg addu.cio r6,r10,r8 83 1.1 mrg ld r10,s1_ptr,24 84 1.1 mrg addu.cio r7,r12,r9 85 1.1 mrg ld r12,s1_ptr,28 86 1.1 mrg ld.d r8,s2_ptr,24 87 1.1 mrg st.d r6,res_ptr,16 88 1.1 mrg addu.cio r6,r10,r8 89 1.1 mrg ld r10,s1_ptr,32 90 1.1 mrg addu.cio r7,r12,r9 91 1.1 mrg ld r12,s1_ptr,36 92 1.1 mrg addu s1_ptr,s1_ptr,32 93 1.1 mrg ld.d r8,s2_ptr,32 94 1.1 mrg addu s2_ptr,s2_ptr,32 95 1.1 mrg st.d r6,res_ptr,24 96 1.1 mrg addu res_ptr,res_ptr,32 97 1.1 mrg bcnd ge0,size,Loop1 98 1.1 mrg 99 1.1 mrg Lfin1: addu size,size,8-2 100 1.1 mrg bcnd lt0,size,Lend1 101 1.1 mrg /* Add blocks of 2 limbs until less than 2 limbs remain */ 102 1.1 mrg Loope1: addu.cio r6,r10,r8 103 1.1 mrg ld r10,s1_ptr,8 104 1.1 mrg addu.cio r7,r12,r9 105 1.1 mrg ld r12,s1_ptr,12 106 1.1 mrg ld.d r8,s2_ptr,8 107 1.1 mrg st.d r6,res_ptr,0 108 1.1 mrg subu size,size,2 109 1.1 mrg addu s1_ptr,s1_ptr,8 110 1.1 mrg addu s2_ptr,s2_ptr,8 111 1.1 mrg addu res_ptr,res_ptr,8 112 1.1 mrg bcnd ge0,size,Loope1 113 1.1 mrg Lend1: addu.cio r6,r10,r8 114 1.1 mrg addu.cio r7,r12,r9 115 1.1 mrg st.d r6,res_ptr,0 116 1.1 mrg 117 1.1 mrg bb0 0,size,Lret1 118 1.1 mrg /* Add last limb */ 119 1.1 mrg ld r10,s1_ptr,8 120 1.1 mrg ld r8,s2_ptr,8 121 1.1 mrg addu.cio r6,r10,r8 122 1.1 mrg st r6,res_ptr,8 123 1.1 mrg 124 1.1 mrg Lret1: jmp.n r1 125 1.1 mrg addu.ci r2,r0,r0 ; return carry-out from most sign. limb 126 1.1 mrg 127 1.1 mrg L1: xor r12,s1_ptr,res_ptr 128 1.1 mrg bb1 2,r12,L2 129 1.1 mrg ; ** V1b ** 130 1.1 mrg or r12,r0,s2_ptr 131 1.1 mrg or s2_ptr,r0,s1_ptr 132 1.1 mrg or s1_ptr,r0,r12 133 1.1 mrg br L0 134 1.1 mrg 135 1.1 mrg ; ** V2 ** 136 1.1 mrg /* If we come here, the alignment of s1_ptr and res_ptr as well as the 137 1.1 mrg alignment of s2_ptr and res_ptr differ. Since there are only two ways 138 1.1 mrg things can be aligned (that we care about) we now know that the alignment 139 1.1 mrg of s1_ptr and s2_ptr are the same. */ 140 1.1 mrg 141 1.1 mrg L2: cmp r12,size,1 142 1.1 mrg bb1 eq,r12,Ljone 143 1.1 mrg bb0 2,s1_ptr,L_v2 ; branch if s1_ptr is aligned 144 1.1 mrg /* Add least significant limb separately to align res_ptr and s2_ptr */ 145 1.1 mrg ld r10,s1_ptr,0 146 1.1 mrg addu s1_ptr,s1_ptr,4 147 1.1 mrg ld r8,s2_ptr,0 148 1.1 mrg addu s2_ptr,s2_ptr,4 149 1.1 mrg subu size,size,1 150 1.1 mrg addu.co r6,r10,r8 151 1.1 mrg st r6,res_ptr,0 152 1.1 mrg addu res_ptr,res_ptr,4 153 1.1 mrg 154 1.1 mrg L_v2: subu size,size,8 155 1.1 mrg bcnd lt0,size,Lfin2 156 1.1 mrg /* Add blocks of 8 limbs until less than 8 limbs remain */ 157 1.1 mrg align 8 158 1.1 mrg Loop2: subu size,size,8 159 1.1 mrg ld.d r8,s1_ptr,0 160 1.1 mrg ld.d r6,s2_ptr,0 161 1.1 mrg addu.cio r8,r8,r6 162 1.1 mrg st r8,res_ptr,0 163 1.1 mrg addu.cio r9,r9,r7 164 1.1 mrg st r9,res_ptr,4 165 1.1 mrg ld.d r8,s1_ptr,8 166 1.1 mrg ld.d r6,s2_ptr,8 167 1.1 mrg addu.cio r8,r8,r6 168 1.1 mrg st r8,res_ptr,8 169 1.1 mrg addu.cio r9,r9,r7 170 1.1 mrg st r9,res_ptr,12 171 1.1 mrg ld.d r8,s1_ptr,16 172 1.1 mrg ld.d r6,s2_ptr,16 173 1.1 mrg addu.cio r8,r8,r6 174 1.1 mrg st r8,res_ptr,16 175 1.1 mrg addu.cio r9,r9,r7 176 1.1 mrg st r9,res_ptr,20 177 1.1 mrg ld.d r8,s1_ptr,24 178 1.1 mrg ld.d r6,s2_ptr,24 179 1.1 mrg addu.cio r8,r8,r6 180 1.1 mrg st r8,res_ptr,24 181 1.1 mrg addu.cio r9,r9,r7 182 1.1 mrg st r9,res_ptr,28 183 1.1 mrg addu s1_ptr,s1_ptr,32 184 1.1 mrg addu s2_ptr,s2_ptr,32 185 1.1 mrg addu res_ptr,res_ptr,32 186 1.1 mrg bcnd ge0,size,Loop2 187 1.1 mrg 188 1.1 mrg Lfin2: addu size,size,8-2 189 1.1 mrg bcnd lt0,size,Lend2 190 1.1 mrg Loope2: ld.d r8,s1_ptr,0 191 1.1 mrg ld.d r6,s2_ptr,0 192 1.1 mrg addu.cio r8,r8,r6 193 1.1 mrg st r8,res_ptr,0 194 1.1 mrg addu.cio r9,r9,r7 195 1.1 mrg st r9,res_ptr,4 196 1.1 mrg subu size,size,2 197 1.1 mrg addu s1_ptr,s1_ptr,8 198 1.1 mrg addu s2_ptr,s2_ptr,8 199 1.1 mrg addu res_ptr,res_ptr,8 200 1.1 mrg bcnd ge0,size,Loope2 201 1.1 mrg Lend2: bb0 0,size,Lret2 202 1.1 mrg /* Add last limb */ 203 1.1 mrg Ljone: ld r10,s1_ptr,0 204 1.1 mrg ld r8,s2_ptr,0 205 1.1 mrg addu.cio r6,r10,r8 206 1.1 mrg st r6,res_ptr,0 207 1.1 mrg 208 1.1 mrg Lret2: jmp.n r1 209 1.1 mrg addu.ci r2,r0,r0 ; return carry-out from most sign. limb 210