Home | History | Annotate | Line # | Download | only in alpha
      1 dnl  Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and
      2 dnl  store sum in a third limb vector.
      3 
      4 dnl  Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
      5 
      6 dnl  This file is part of the GNU MP Library.
      7 dnl
      8 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      9 dnl  it under the terms of either:
     10 dnl
     11 dnl    * the GNU Lesser General Public License as published by the Free
     12 dnl      Software Foundation; either version 3 of the License, or (at your
     13 dnl      option) any later version.
     14 dnl
     15 dnl  or
     16 dnl
     17 dnl    * the GNU General Public License as published by the Free Software
     18 dnl      Foundation; either version 2 of the License, or (at your option) any
     19 dnl      later version.
     20 dnl
     21 dnl  or both in parallel, as here.
     22 dnl
     23 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     24 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     25 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     26 dnl  for more details.
     27 dnl
     28 dnl  You should have received copies of the GNU General Public License and the
     29 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     30 dnl  see https://www.gnu.org/licenses/.
     31 
     32 include(`../config.m4')
     33 
     34 C      cycles/limb
     35 C EV4:     ?
     36 C EV5:     4.75
     37 C EV6:     3
     38 
     39 dnl  INPUT PARAMETERS
     40 dnl  res_ptr	r16
     41 dnl  s1_ptr	r17
     42 dnl  s2_ptr	r18
     43 dnl  size	r19
     44 
     45 ASM_START()
     46 PROLOGUE(mpn_add_nc)
     47 	bis	r20,r31,r25
     48 	br	L(com)
     49 EPILOGUE()
     50 PROLOGUE(mpn_add_n)
     51 	bis	r31,r31,r25		C clear cy
     52 L(com):	subq	r19,4,r19		C decr loop cnt
     53 	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
     54 C Start software pipeline for 1st loop
     55 	ldq	r0,0(r18)
     56 	ldq	r4,0(r17)
     57 	ldq	r1,8(r18)
     58 	ldq	r5,8(r17)
     59 	addq	r17,32,r17		C update s1_ptr
     60 	addq	r0,r4,r28		C 1st main add
     61 	ldq	r2,16(r18)
     62 	addq	r25,r28,r20		C 1st carry add
     63 	ldq	r3,24(r18)
     64 	cmpult	r28,r4,r8		C compute cy from last add
     65 	ldq	r6,-16(r17)
     66 	cmpult	r20,r28,r25		C compute cy from last add
     67 	ldq	r7,-8(r17)
     68 	bis	r8,r25,r25		C combine cy from the two adds
     69 	subq	r19,4,r19		C decr loop cnt
     70 	addq	r1,r5,r28		C 2nd main add
     71 	addq	r18,32,r18		C update s2_ptr
     72 	addq	r28,r25,r21		C 2nd carry add
     73 	cmpult	r28,r5,r8		C compute cy from last add
     74 	blt	r19,$Lend1		C if less than 4 limbs remain, jump
     75 C 1st loop handles groups of 4 limbs in a software pipeline
     76 	ALIGN(16)
     77 $Loop:	cmpult	r21,r28,r25		C compute cy from last add
     78 	ldq	r0,0(r18)
     79 	bis	r8,r25,r25		C combine cy from the two adds
     80 	ldq	r1,8(r18)
     81 	addq	r2,r6,r28		C 3rd main add
     82 	ldq	r4,0(r17)
     83 	addq	r28,r25,r22		C 3rd carry add
     84 	ldq	r5,8(r17)
     85 	cmpult	r28,r6,r8		C compute cy from last add
     86 	cmpult	r22,r28,r25		C compute cy from last add
     87 	stq	r20,0(r16)
     88 	bis	r8,r25,r25		C combine cy from the two adds
     89 	stq	r21,8(r16)
     90 	addq	r3,r7,r28		C 4th main add
     91 	addq	r28,r25,r23		C 4th carry add
     92 	cmpult	r28,r7,r8		C compute cy from last add
     93 	cmpult	r23,r28,r25		C compute cy from last add
     94 		addq	r17,32,r17		C update s1_ptr
     95 	bis	r8,r25,r25		C combine cy from the two adds
     96 		addq	r16,32,r16		C update res_ptr
     97 	addq	r0,r4,r28		C 1st main add
     98 	ldq	r2,16(r18)
     99 	addq	r25,r28,r20		C 1st carry add
    100 	ldq	r3,24(r18)
    101 	cmpult	r28,r4,r8		C compute cy from last add
    102 	ldq	r6,-16(r17)
    103 	cmpult	r20,r28,r25		C compute cy from last add
    104 	ldq	r7,-8(r17)
    105 	bis	r8,r25,r25		C combine cy from the two adds
    106 	subq	r19,4,r19		C decr loop cnt
    107 	stq	r22,-16(r16)
    108 	addq	r1,r5,r28		C 2nd main add
    109 	stq	r23,-8(r16)
    110 	addq	r25,r28,r21		C 2nd carry add
    111 		addq	r18,32,r18		C update s2_ptr
    112 	cmpult	r28,r5,r8		C compute cy from last add
    113 	bge	r19,$Loop
    114 C Finish software pipeline for 1st loop
    115 $Lend1:	cmpult	r21,r28,r25		C compute cy from last add
    116 	bis	r8,r25,r25		C combine cy from the two adds
    117 	addq	r2,r6,r28		C 3rd main add
    118 	addq	r28,r25,r22		C 3rd carry add
    119 	cmpult	r28,r6,r8		C compute cy from last add
    120 	cmpult	r22,r28,r25		C compute cy from last add
    121 	stq	r20,0(r16)
    122 	bis	r8,r25,r25		C combine cy from the two adds
    123 	stq	r21,8(r16)
    124 	addq	r3,r7,r28		C 4th main add
    125 	addq	r28,r25,r23		C 4th carry add
    126 	cmpult	r28,r7,r8		C compute cy from last add
    127 	cmpult	r23,r28,r25		C compute cy from last add
    128 	bis	r8,r25,r25		C combine cy from the two adds
    129 	addq	r16,32,r16		C update res_ptr
    130 	stq	r22,-16(r16)
    131 	stq	r23,-8(r16)
    132 $Lend2:	addq	r19,4,r19		C restore loop cnt
    133 	beq	r19,$Lret
    134 C Start software pipeline for 2nd loop
    135 	ldq	r0,0(r18)
    136 	ldq	r4,0(r17)
    137 	subq	r19,1,r19
    138 	beq	r19,$Lend0
    139 C 2nd loop handles remaining 1-3 limbs
    140 	ALIGN(16)
    141 $Loop0:	addq	r0,r4,r28		C main add
    142 	ldq	r0,8(r18)
    143 	cmpult	r28,r4,r8		C compute cy from last add
    144 	ldq	r4,8(r17)
    145 	addq	r28,r25,r20		C carry add
    146 	addq	r18,8,r18
    147 	addq	r17,8,r17
    148 	stq	r20,0(r16)
    149 	cmpult	r20,r28,r25		C compute cy from last add
    150 	subq	r19,1,r19		C decr loop cnt
    151 	bis	r8,r25,r25		C combine cy from the two adds
    152 	addq	r16,8,r16
    153 	bne	r19,$Loop0
    154 $Lend0:	addq	r0,r4,r28		C main add
    155 	addq	r28,r25,r20		C carry add
    156 	cmpult	r28,r4,r8		C compute cy from last add
    157 	cmpult	r20,r28,r25		C compute cy from last add
    158 	stq	r20,0(r16)
    159 	bis	r8,r25,r25		C combine cy from the two adds
    160 
    161 $Lret:	bis	r25,r31,r0		C return cy
    162 	ret	r31,(r26),1
    163 EPILOGUE()
    164 ASM_END()
    165