Home | History | Annotate | Line # | Download | only in mc88110
      1 ; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
      2 ; sum in a third limb vector.
      3 
      4 ; Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
      5 
      6 ;  This file is part of the GNU MP Library.
      7 ;
      8 ;  The GNU MP Library is free software; you can redistribute it and/or modify
      9 ;  it under the terms of either:
     10 ;
     11 ;    * the GNU Lesser General Public License as published by the Free
     12 ;      Software Foundation; either version 3 of the License, or (at your
     13 ;      option) any later version.
     14 ;
     15 ;  or
     16 ;
     17 ;    * the GNU General Public License as published by the Free Software
     18 ;      Foundation; either version 2 of the License, or (at your option) any
     19 ;      later version.
     20 ;
     21 ;  or both in parallel, as here.
     22 ;
     23 ;  The GNU MP Library is distributed in the hope that it will be useful, but
     24 ;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     25 ;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     26 ;  for more details.
     27 ;
     28 ;  You should have received copies of the GNU General Public License and the
     29 ;  GNU Lesser General Public License along with the GNU MP Library.  If not,
     30 ;  see https://www.gnu.org/licenses/.
     31 
     32 
     33 ; INPUT PARAMETERS
     34 #define res_ptr	r2
     35 #define s1_ptr	r3
     36 #define s2_ptr	r4
     37 #define size	r5
     38 
     39 #include "sysdep.h"
     40 
     41 	text
     42 	align	16
     43 	global	C_SYMBOL_NAME(__gmpn_add_n)
     44 C_SYMBOL_NAME(__gmpn_add_n):
     45 	addu.co	 r0,r0,r0		; clear cy flag
     46 	xor	 r12,s2_ptr,res_ptr
     47 	bb1	 2,r12,L1
     48 ; **  V1a  **
     49 L0:	bb0	 2,res_ptr,L_v1		; branch if res_ptr is aligned?
     50 /* Add least significant limb separately to align res_ptr and s2_ptr */
     51 	ld	 r10,s1_ptr,0
     52 	addu	 s1_ptr,s1_ptr,4
     53 	ld	 r8,s2_ptr,0
     54 	addu	 s2_ptr,s2_ptr,4
     55 	subu	 size,size,1
     56 	addu.co	 r6,r10,r8
     57 	st	 r6,res_ptr,0
     58 	addu	 res_ptr,res_ptr,4
     59 L_v1:	cmp	 r12,size,2
     60 	bb1	 lt,r12,Lend2
     61 
     62 	ld	 r10,s1_ptr,0
     63 	ld	 r12,s1_ptr,4
     64 	ld.d	 r8,s2_ptr,0
     65 	subu	 size,size,10
     66 	bcnd	 lt0,size,Lfin1
     67 /* Add blocks of 8 limbs until less than 8 limbs remain */
     68 	align	 8
     69 Loop1:	subu	 size,size,8
     70 	addu.cio r6,r10,r8
     71 	ld	 r10,s1_ptr,8
     72 	addu.cio r7,r12,r9
     73 	ld	 r12,s1_ptr,12
     74 	ld.d	 r8,s2_ptr,8
     75 	st.d	 r6,res_ptr,0
     76 	addu.cio r6,r10,r8
     77 	ld	 r10,s1_ptr,16
     78 	addu.cio r7,r12,r9
     79 	ld	 r12,s1_ptr,20
     80 	ld.d	 r8,s2_ptr,16
     81 	st.d	 r6,res_ptr,8
     82 	addu.cio r6,r10,r8
     83 	ld	 r10,s1_ptr,24
     84 	addu.cio r7,r12,r9
     85 	ld	 r12,s1_ptr,28
     86 	ld.d	 r8,s2_ptr,24
     87 	st.d	 r6,res_ptr,16
     88 	addu.cio r6,r10,r8
     89 	ld	 r10,s1_ptr,32
     90 	addu.cio r7,r12,r9
     91 	ld	 r12,s1_ptr,36
     92 	addu	 s1_ptr,s1_ptr,32
     93 	ld.d	 r8,s2_ptr,32
     94 	addu	 s2_ptr,s2_ptr,32
     95 	st.d	 r6,res_ptr,24
     96 	addu	 res_ptr,res_ptr,32
     97 	bcnd	 ge0,size,Loop1
     98 
     99 Lfin1:	addu	 size,size,8-2
    100 	bcnd	 lt0,size,Lend1
    101 /* Add blocks of 2 limbs until less than 2 limbs remain */
    102 Loope1:	addu.cio r6,r10,r8
    103 	ld	 r10,s1_ptr,8
    104 	addu.cio r7,r12,r9
    105 	ld	 r12,s1_ptr,12
    106 	ld.d	 r8,s2_ptr,8
    107 	st.d	 r6,res_ptr,0
    108 	subu	 size,size,2
    109 	addu	 s1_ptr,s1_ptr,8
    110 	addu	 s2_ptr,s2_ptr,8
    111 	addu	 res_ptr,res_ptr,8
    112 	bcnd	 ge0,size,Loope1
    113 Lend1:	addu.cio r6,r10,r8
    114 	addu.cio r7,r12,r9
    115 	st.d	 r6,res_ptr,0
    116 
    117 	bb0	 0,size,Lret1
    118 /* Add last limb */
    119 	ld	 r10,s1_ptr,8
    120 	ld	 r8,s2_ptr,8
    121 	addu.cio r6,r10,r8
    122 	st	 r6,res_ptr,8
    123 
    124 Lret1:	jmp.n	 r1
    125 	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb
    126 
    127 L1:	xor	 r12,s1_ptr,res_ptr
    128 	bb1	 2,r12,L2
    129 ; **  V1b  **
    130 	or	 r12,r0,s2_ptr
    131 	or	 s2_ptr,r0,s1_ptr
    132 	or	 s1_ptr,r0,r12
    133 	br	 L0
    134 
    135 ; **  V2  **
    136 /* If we come here, the alignment of s1_ptr and res_ptr as well as the
    137    alignment of s2_ptr and res_ptr differ.  Since there are only two ways
    138    things can be aligned (that we care about) we now know that the alignment
    139    of s1_ptr and s2_ptr are the same.  */
    140 
    141 L2:	cmp	 r12,size,1
    142 	bb1	 eq,r12,Ljone
    143 	bb0	 2,s1_ptr,L_v2		; branch if s1_ptr is aligned
    144 /* Add least significant limb separately to align res_ptr and s2_ptr */
    145 	ld	 r10,s1_ptr,0
    146 	addu	 s1_ptr,s1_ptr,4
    147 	ld	 r8,s2_ptr,0
    148 	addu	 s2_ptr,s2_ptr,4
    149 	subu	 size,size,1
    150 	addu.co	 r6,r10,r8
    151 	st	 r6,res_ptr,0
    152 	addu	 res_ptr,res_ptr,4
    153 
    154 L_v2:	subu	 size,size,8
    155 	bcnd	 lt0,size,Lfin2
    156 /* Add blocks of 8 limbs until less than 8 limbs remain */
    157 	align	 8
    158 Loop2:	subu	 size,size,8
    159 	ld.d	 r8,s1_ptr,0
    160 	ld.d	 r6,s2_ptr,0
    161 	addu.cio r8,r8,r6
    162 	st	 r8,res_ptr,0
    163 	addu.cio r9,r9,r7
    164 	st	 r9,res_ptr,4
    165 	ld.d	 r8,s1_ptr,8
    166 	ld.d	 r6,s2_ptr,8
    167 	addu.cio r8,r8,r6
    168 	st	 r8,res_ptr,8
    169 	addu.cio r9,r9,r7
    170 	st	 r9,res_ptr,12
    171 	ld.d	 r8,s1_ptr,16
    172 	ld.d	 r6,s2_ptr,16
    173 	addu.cio r8,r8,r6
    174 	st	 r8,res_ptr,16
    175 	addu.cio r9,r9,r7
    176 	st	 r9,res_ptr,20
    177 	ld.d	 r8,s1_ptr,24
    178 	ld.d	 r6,s2_ptr,24
    179 	addu.cio r8,r8,r6
    180 	st	 r8,res_ptr,24
    181 	addu.cio r9,r9,r7
    182 	st	 r9,res_ptr,28
    183 	addu	 s1_ptr,s1_ptr,32
    184 	addu	 s2_ptr,s2_ptr,32
    185 	addu	 res_ptr,res_ptr,32
    186 	bcnd	 ge0,size,Loop2
    187 
    188 Lfin2:	addu	 size,size,8-2
    189 	bcnd	 lt0,size,Lend2
    190 Loope2:	ld.d	 r8,s1_ptr,0
    191 	ld.d	 r6,s2_ptr,0
    192 	addu.cio r8,r8,r6
    193 	st	 r8,res_ptr,0
    194 	addu.cio r9,r9,r7
    195 	st	 r9,res_ptr,4
    196 	subu	 size,size,2
    197 	addu	 s1_ptr,s1_ptr,8
    198 	addu	 s2_ptr,s2_ptr,8
    199 	addu	 res_ptr,res_ptr,8
    200 	bcnd	 ge0,size,Loope2
    201 Lend2:	bb0	 0,size,Lret2
    202 /* Add last limb */
    203 Ljone:	ld	 r10,s1_ptr,0
    204 	ld	 r8,s2_ptr,0
    205 	addu.cio r6,r10,r8
    206 	st	 r6,res_ptr,0
    207 
    208 Lret2:	jmp.n	 r1
    209 	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb
    210