Home | History | Annotate | Line # | Download | only in mc88110
      1      1.1  mrg ; mc88110 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
      2      1.1  mrg ; sum in a third limb vector.
      3      1.1  mrg 
      4      1.1  mrg ; Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
      5      1.1  mrg 
      6  1.1.1.2  mrg ;  This file is part of the GNU MP Library.
      7  1.1.1.2  mrg ;
      8  1.1.1.2  mrg ;  The GNU MP Library is free software; you can redistribute it and/or modify
      9  1.1.1.2  mrg ;  it under the terms of either:
     10  1.1.1.2  mrg ;
     11  1.1.1.2  mrg ;    * the GNU Lesser General Public License as published by the Free
     12  1.1.1.2  mrg ;      Software Foundation; either version 3 of the License, or (at your
     13  1.1.1.2  mrg ;      option) any later version.
     14  1.1.1.2  mrg ;
     15  1.1.1.2  mrg ;  or
     16  1.1.1.2  mrg ;
     17  1.1.1.2  mrg ;    * the GNU General Public License as published by the Free Software
     18  1.1.1.2  mrg ;      Foundation; either version 2 of the License, or (at your option) any
     19  1.1.1.2  mrg ;      later version.
     20  1.1.1.2  mrg ;
     21  1.1.1.2  mrg ;  or both in parallel, as here.
     22  1.1.1.2  mrg ;
     23  1.1.1.2  mrg ;  The GNU MP Library is distributed in the hope that it will be useful, but
     24  1.1.1.2  mrg ;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     25  1.1.1.2  mrg ;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     26  1.1.1.2  mrg ;  for more details.
     27  1.1.1.2  mrg ;
     28  1.1.1.2  mrg ;  You should have received copies of the GNU General Public License and the
     29  1.1.1.2  mrg ;  GNU Lesser General Public License along with the GNU MP Library.  If not,
     30  1.1.1.2  mrg ;  see https://www.gnu.org/licenses/.
     31      1.1  mrg 
     32      1.1  mrg 
     33      1.1  mrg ; INPUT PARAMETERS
     34      1.1  mrg #define res_ptr	r2
     35      1.1  mrg #define s1_ptr	r3
     36      1.1  mrg #define s2_ptr	r4
     37      1.1  mrg #define size	r5
     38      1.1  mrg 
     39      1.1  mrg #include "sysdep.h"
     40      1.1  mrg 
     41      1.1  mrg 	text
     42      1.1  mrg 	align	16
     43      1.1  mrg 	global	C_SYMBOL_NAME(__gmpn_add_n)
     44      1.1  mrg C_SYMBOL_NAME(__gmpn_add_n):
     45      1.1  mrg 	addu.co	 r0,r0,r0		; clear cy flag
     46      1.1  mrg 	xor	 r12,s2_ptr,res_ptr
     47      1.1  mrg 	bb1	 2,r12,L1
     48      1.1  mrg ; **  V1a  **
     49      1.1  mrg L0:	bb0	 2,res_ptr,L_v1		; branch if res_ptr is aligned?
     50      1.1  mrg /* Add least significant limb separately to align res_ptr and s2_ptr */
     51      1.1  mrg 	ld	 r10,s1_ptr,0
     52      1.1  mrg 	addu	 s1_ptr,s1_ptr,4
     53      1.1  mrg 	ld	 r8,s2_ptr,0
     54      1.1  mrg 	addu	 s2_ptr,s2_ptr,4
     55      1.1  mrg 	subu	 size,size,1
     56      1.1  mrg 	addu.co	 r6,r10,r8
     57      1.1  mrg 	st	 r6,res_ptr,0
     58      1.1  mrg 	addu	 res_ptr,res_ptr,4
     59      1.1  mrg L_v1:	cmp	 r12,size,2
     60      1.1  mrg 	bb1	 lt,r12,Lend2
     61      1.1  mrg 
     62      1.1  mrg 	ld	 r10,s1_ptr,0
     63      1.1  mrg 	ld	 r12,s1_ptr,4
     64      1.1  mrg 	ld.d	 r8,s2_ptr,0
     65      1.1  mrg 	subu	 size,size,10
     66      1.1  mrg 	bcnd	 lt0,size,Lfin1
     67      1.1  mrg /* Add blocks of 8 limbs until less than 8 limbs remain */
     68      1.1  mrg 	align	 8
     69      1.1  mrg Loop1:	subu	 size,size,8
     70      1.1  mrg 	addu.cio r6,r10,r8
     71      1.1  mrg 	ld	 r10,s1_ptr,8
     72      1.1  mrg 	addu.cio r7,r12,r9
     73      1.1  mrg 	ld	 r12,s1_ptr,12
     74      1.1  mrg 	ld.d	 r8,s2_ptr,8
     75      1.1  mrg 	st.d	 r6,res_ptr,0
     76      1.1  mrg 	addu.cio r6,r10,r8
     77      1.1  mrg 	ld	 r10,s1_ptr,16
     78      1.1  mrg 	addu.cio r7,r12,r9
     79      1.1  mrg 	ld	 r12,s1_ptr,20
     80      1.1  mrg 	ld.d	 r8,s2_ptr,16
     81      1.1  mrg 	st.d	 r6,res_ptr,8
     82      1.1  mrg 	addu.cio r6,r10,r8
     83      1.1  mrg 	ld	 r10,s1_ptr,24
     84      1.1  mrg 	addu.cio r7,r12,r9
     85      1.1  mrg 	ld	 r12,s1_ptr,28
     86      1.1  mrg 	ld.d	 r8,s2_ptr,24
     87      1.1  mrg 	st.d	 r6,res_ptr,16
     88      1.1  mrg 	addu.cio r6,r10,r8
     89      1.1  mrg 	ld	 r10,s1_ptr,32
     90      1.1  mrg 	addu.cio r7,r12,r9
     91      1.1  mrg 	ld	 r12,s1_ptr,36
     92      1.1  mrg 	addu	 s1_ptr,s1_ptr,32
     93      1.1  mrg 	ld.d	 r8,s2_ptr,32
     94      1.1  mrg 	addu	 s2_ptr,s2_ptr,32
     95      1.1  mrg 	st.d	 r6,res_ptr,24
     96      1.1  mrg 	addu	 res_ptr,res_ptr,32
     97      1.1  mrg 	bcnd	 ge0,size,Loop1
     98      1.1  mrg 
     99      1.1  mrg Lfin1:	addu	 size,size,8-2
    100      1.1  mrg 	bcnd	 lt0,size,Lend1
    101      1.1  mrg /* Add blocks of 2 limbs until less than 2 limbs remain */
    102      1.1  mrg Loope1:	addu.cio r6,r10,r8
    103      1.1  mrg 	ld	 r10,s1_ptr,8
    104      1.1  mrg 	addu.cio r7,r12,r9
    105      1.1  mrg 	ld	 r12,s1_ptr,12
    106      1.1  mrg 	ld.d	 r8,s2_ptr,8
    107      1.1  mrg 	st.d	 r6,res_ptr,0
    108      1.1  mrg 	subu	 size,size,2
    109      1.1  mrg 	addu	 s1_ptr,s1_ptr,8
    110      1.1  mrg 	addu	 s2_ptr,s2_ptr,8
    111      1.1  mrg 	addu	 res_ptr,res_ptr,8
    112      1.1  mrg 	bcnd	 ge0,size,Loope1
    113      1.1  mrg Lend1:	addu.cio r6,r10,r8
    114      1.1  mrg 	addu.cio r7,r12,r9
    115      1.1  mrg 	st.d	 r6,res_ptr,0
    116      1.1  mrg 
    117      1.1  mrg 	bb0	 0,size,Lret1
    118      1.1  mrg /* Add last limb */
    119      1.1  mrg 	ld	 r10,s1_ptr,8
    120      1.1  mrg 	ld	 r8,s2_ptr,8
    121      1.1  mrg 	addu.cio r6,r10,r8
    122      1.1  mrg 	st	 r6,res_ptr,8
    123      1.1  mrg 
    124      1.1  mrg Lret1:	jmp.n	 r1
    125      1.1  mrg 	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb
    126      1.1  mrg 
    127      1.1  mrg L1:	xor	 r12,s1_ptr,res_ptr
    128      1.1  mrg 	bb1	 2,r12,L2
    129      1.1  mrg ; **  V1b  **
    130      1.1  mrg 	or	 r12,r0,s2_ptr
    131      1.1  mrg 	or	 s2_ptr,r0,s1_ptr
    132      1.1  mrg 	or	 s1_ptr,r0,r12
    133      1.1  mrg 	br	 L0
    134      1.1  mrg 
    135      1.1  mrg ; **  V2  **
    136      1.1  mrg /* If we come here, the alignment of s1_ptr and res_ptr as well as the
    137      1.1  mrg    alignment of s2_ptr and res_ptr differ.  Since there are only two ways
    138      1.1  mrg    things can be aligned (that we care about) we now know that the alignment
    139      1.1  mrg    of s1_ptr and s2_ptr are the same.  */
    140      1.1  mrg 
    141      1.1  mrg L2:	cmp	 r12,size,1
    142      1.1  mrg 	bb1	 eq,r12,Ljone
    143      1.1  mrg 	bb0	 2,s1_ptr,L_v2		; branch if s1_ptr is aligned
    144      1.1  mrg /* Add least significant limb separately to align res_ptr and s2_ptr */
    145      1.1  mrg 	ld	 r10,s1_ptr,0
    146      1.1  mrg 	addu	 s1_ptr,s1_ptr,4
    147      1.1  mrg 	ld	 r8,s2_ptr,0
    148      1.1  mrg 	addu	 s2_ptr,s2_ptr,4
    149      1.1  mrg 	subu	 size,size,1
    150      1.1  mrg 	addu.co	 r6,r10,r8
    151      1.1  mrg 	st	 r6,res_ptr,0
    152      1.1  mrg 	addu	 res_ptr,res_ptr,4
    153      1.1  mrg 
    154      1.1  mrg L_v2:	subu	 size,size,8
    155      1.1  mrg 	bcnd	 lt0,size,Lfin2
    156      1.1  mrg /* Add blocks of 8 limbs until less than 8 limbs remain */
    157      1.1  mrg 	align	 8
    158      1.1  mrg Loop2:	subu	 size,size,8
    159      1.1  mrg 	ld.d	 r8,s1_ptr,0
    160      1.1  mrg 	ld.d	 r6,s2_ptr,0
    161      1.1  mrg 	addu.cio r8,r8,r6
    162      1.1  mrg 	st	 r8,res_ptr,0
    163      1.1  mrg 	addu.cio r9,r9,r7
    164      1.1  mrg 	st	 r9,res_ptr,4
    165      1.1  mrg 	ld.d	 r8,s1_ptr,8
    166      1.1  mrg 	ld.d	 r6,s2_ptr,8
    167      1.1  mrg 	addu.cio r8,r8,r6
    168      1.1  mrg 	st	 r8,res_ptr,8
    169      1.1  mrg 	addu.cio r9,r9,r7
    170      1.1  mrg 	st	 r9,res_ptr,12
    171      1.1  mrg 	ld.d	 r8,s1_ptr,16
    172      1.1  mrg 	ld.d	 r6,s2_ptr,16
    173      1.1  mrg 	addu.cio r8,r8,r6
    174      1.1  mrg 	st	 r8,res_ptr,16
    175      1.1  mrg 	addu.cio r9,r9,r7
    176      1.1  mrg 	st	 r9,res_ptr,20
    177      1.1  mrg 	ld.d	 r8,s1_ptr,24
    178      1.1  mrg 	ld.d	 r6,s2_ptr,24
    179      1.1  mrg 	addu.cio r8,r8,r6
    180      1.1  mrg 	st	 r8,res_ptr,24
    181      1.1  mrg 	addu.cio r9,r9,r7
    182      1.1  mrg 	st	 r9,res_ptr,28
    183      1.1  mrg 	addu	 s1_ptr,s1_ptr,32
    184      1.1  mrg 	addu	 s2_ptr,s2_ptr,32
    185      1.1  mrg 	addu	 res_ptr,res_ptr,32
    186      1.1  mrg 	bcnd	 ge0,size,Loop2
    187      1.1  mrg 
    188      1.1  mrg Lfin2:	addu	 size,size,8-2
    189      1.1  mrg 	bcnd	 lt0,size,Lend2
    190      1.1  mrg Loope2:	ld.d	 r8,s1_ptr,0
    191      1.1  mrg 	ld.d	 r6,s2_ptr,0
    192      1.1  mrg 	addu.cio r8,r8,r6
    193      1.1  mrg 	st	 r8,res_ptr,0
    194      1.1  mrg 	addu.cio r9,r9,r7
    195      1.1  mrg 	st	 r9,res_ptr,4
    196      1.1  mrg 	subu	 size,size,2
    197      1.1  mrg 	addu	 s1_ptr,s1_ptr,8
    198      1.1  mrg 	addu	 s2_ptr,s2_ptr,8
    199      1.1  mrg 	addu	 res_ptr,res_ptr,8
    200      1.1  mrg 	bcnd	 ge0,size,Loope2
    201      1.1  mrg Lend2:	bb0	 0,size,Lret2
    202      1.1  mrg /* Add last limb */
    203      1.1  mrg Ljone:	ld	 r10,s1_ptr,0
    204      1.1  mrg 	ld	 r8,s2_ptr,0
    205      1.1  mrg 	addu.cio r6,r10,r8
    206      1.1  mrg 	st	 r6,res_ptr,0
    207      1.1  mrg 
    208      1.1  mrg Lret2:	jmp.n	 r1
    209      1.1  mrg 	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb
    210