Home | History | Annotate | Line # | Download | only in alpha
aorslsh1_n.asm revision 1.1.1.1
      1 dnl  Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
      2 
      3 dnl  Copyright 2003 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of the GNU Lesser General Public License as published
      9 dnl  by the Free Software Foundation; either version 3 of the License, or (at
     10 dnl  your option) any later version.
     11 
     12 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     13 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     14 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     15 dnl  License for more details.
     16 
     17 dnl  You should have received a copy of the GNU Lesser General Public License
     18 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     19 
     20 include(`../config.m4')
     21 
     22 C      cycles/limb
     23 C EV4:    12.5
     24 C EV5:     6.25
     25 C EV6:     4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875)
     26 
     27 C TODO
     28 C  * Write special version for ev6, as this is a slowdown for 100 < n < 2200
     29 C    compared to separate mpn_lshift and mpn_add_n.
     30 C  * Use addq instead of sll for left shift, and similarly cmplt instead of srl
     31 C    for right shift.
     32 
     33 dnl  INPUT PARAMETERS
     34 define(`rp',`r16')
     35 define(`up',`r17')
     36 define(`vp',`r18')
     37 define(`n', `r19')
     38 
     39 define(`u0', `r8')
     40 define(`u1', `r1')
     41 define(`u2', `r2')
     42 define(`u3', `r3')
     43 define(`v0', `r4')
     44 define(`v1', `r5')
     45 define(`v2', `r6')
     46 define(`v3', `r7')
     47 
     48 define(`cy0', `r0')
     49 define(`cy1', `r20')
     50 define(`cy', `r22')
     51 define(`rr', `r24')
     52 define(`ps', `r25')
     53 define(`sl', `r28')
     54 
     55 ifdef(`OPERATION_addlsh1_n',`
     56   define(ADDSUB,       addq)
     57   define(CARRY,       `cmpult $1,$2,$3')
     58   define(func, mpn_addlsh1_n)
     59 ')
     60 ifdef(`OPERATION_sublsh1_n',`
     61   define(ADDSUB,       subq)
     62   define(CARRY,       `cmpult $2,$1,$3')
     63   define(func, mpn_sublsh1_n)
     64 ')
     65 
     66 MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
     67 
     68 ASM_START()
     69 PROLOGUE(func)
     70 	lda	n, -4(n)
     71 	bis	r31, r31, cy1
     72 	and	n, 3, r1
     73 	beq	r1, $Lb00
     74 	cmpeq	r1, 1, r2
     75 	bne	r2, $Lb01
     76 	cmpeq	r1, 2, r2
     77 	bne	r2, $Lb10
     78 $Lb11:	C n = 3, 7, 11, ...
     79 	ldq	v0, 0(vp)
     80 	ldq	u0, 0(up)
     81 	ldq	v1, 8(vp)
     82 	ldq	u1, 8(up)
     83 	ldq	v2, 16(vp)
     84 	ldq	u2, 16(up)
     85 	lda	vp, 24(vp)
     86 	lda	up, 24(up)
     87 	bge	n, $Loop
     88 	br	r31, $Lcj3
     89 $Lb10:	C n = 2, 6, 10, ...
     90 	bis	r31, r31, cy0
     91 	ldq	v1, 0(vp)
     92 	ldq	u1, 0(up)
     93 	ldq	v2, 8(vp)
     94 	ldq	u2, 8(up)
     95 	lda	rp, -8(rp)
     96 	blt	n, $Lcj2
     97 	ldq	v3, 16(vp)
     98 	ldq	u3, 16(up)
     99 	lda	vp, 48(vp)
    100 	lda	up, 16(up)
    101 	br	r31, $LL10
    102 $Lb01:	C n = 1, 5, 9, ...
    103 	ldq	v2, 0(vp)
    104 	ldq	u2, 0(up)
    105 	lda	rp, -16(rp)
    106 	blt	n, $Lcj1
    107 	ldq	v3, 8(vp)
    108 	ldq	u3, 8(up)
    109 	ldq	v0, 16(vp)
    110 	ldq	u0, 16(up)
    111 	lda	vp, 40(vp)
    112 	lda	up, 8(up)
    113 	lda	rp, 32(rp)
    114 	br	r31, $LL01
    115 $Lb00:	C n = 4, 8, 12, ...
    116 	bis	r31, r31, cy0
    117 	ldq	v3, 0(vp)
    118 	ldq	u3, 0(up)
    119 	ldq	v0, 8(vp)
    120 	ldq	u0, 8(up)
    121 	ldq	v1, 16(vp)
    122 	ldq	u1, 16(up)
    123 	lda	vp, 32(vp)
    124 	lda	rp, 8(rp)
    125 	br	r31, $LL00x
    126 	ALIGN(16)
    127 C 0
    128 $Loop:	sll	v0, 1, sl	C left shift vlimb
    129 	ldq	v3, 0(vp)
    130 C 1
    131 	ADDSUB	u0, sl, ps	C ulimb + (vlimb << 1)
    132 	ldq	u3, 0(up)
    133 C 2
    134 	ADDSUB	ps, cy1, rr	C consume carry from previous operation
    135 	srl	v0, 63, cy0	C carry out #1
    136 C 3
    137 	CARRY(	ps, u0, cy)	C carry out #2
    138 	stq	rr, 0(rp)
    139 C 4
    140 	addq	cy, cy0, cy0	C combine carry out #1 and #2
    141 	CARRY(	rr, ps, cy)	C carry out #3
    142 C 5
    143 	addq	cy, cy0, cy0	C final carry out
    144 	lda	vp, 32(vp)	C bookkeeping
    145 C 6
    146 $LL10:	sll	v1, 1, sl
    147 	ldq	v0, -24(vp)
    148 C 7
    149 	ADDSUB	u1, sl, ps
    150 	ldq	u0, 8(up)
    151 C 8
    152 	ADDSUB	ps, cy0, rr
    153 	srl	v1, 63, cy1
    154 C 9
    155 	CARRY(	ps, u1, cy)
    156 	stq	rr, 8(rp)
    157 C 10
    158 	addq	cy, cy1, cy1
    159 	CARRY(	rr, ps, cy)
    160 C 11
    161 	addq	cy, cy1, cy1
    162 	lda	rp, 32(rp)	C bookkeeping
    163 C 12
    164 $LL01:	sll	v2, 1, sl
    165 	ldq	v1, -16(vp)
    166 C 13
    167 	ADDSUB	u2, sl, ps
    168 	ldq	u1, 16(up)
    169 C 14
    170 	ADDSUB	ps, cy1, rr
    171 	srl	v2, 63, cy0
    172 C 15
    173 	CARRY(	ps, u2, cy)
    174 	stq	rr, -16(rp)
    175 C 16
    176 	addq	cy, cy0, cy0
    177 	CARRY(	rr, ps, cy)
    178 C 17
    179 	addq	cy, cy0, cy0
    180 $LL00x:	lda	up, 32(up)	C bookkeeping
    181 C 18
    182 	sll	v3, 1, sl
    183 	ldq	v2, -8(vp)
    184 C 19
    185 	ADDSUB	u3, sl, ps
    186 	ldq	u2, -8(up)
    187 C 20
    188 	ADDSUB	ps, cy0, rr
    189 	srl	v3, 63, cy1
    190 C 21
    191 	CARRY(	ps, u3, cy)
    192 	stq	rr, -8(rp)
    193 C 22
    194 	addq	cy, cy1, cy1
    195 	CARRY(	rr, ps, cy)
    196 C 23
    197 	addq	cy, cy1, cy1
    198 	lda	n, -4(n)	C bookkeeping
    199 C 24
    200 	bge	n, $Loop
    201 
    202 $Lcj3:	sll	v0, 1, sl
    203 	ADDSUB	u0, sl, ps
    204 	ADDSUB	ps, cy1, rr
    205 	srl	v0, 63, cy0
    206 	CARRY(	ps, u0, cy)
    207 	stq	rr, 0(rp)
    208 	addq	cy, cy0, cy0
    209 	CARRY(	rr, ps, cy)
    210 	addq	cy, cy0, cy0
    211 
    212 $Lcj2:	sll	v1, 1, sl
    213 	ADDSUB	u1, sl, ps
    214 	ADDSUB	ps, cy0, rr
    215 	srl	v1, 63, cy1
    216 	CARRY(	ps, u1, cy)
    217 	stq	rr, 8(rp)
    218 	addq	cy, cy1, cy1
    219 	CARRY(	rr, ps, cy)
    220 	addq	cy, cy1, cy1
    221 
    222 $Lcj1:	sll	v2, 1, sl
    223 	ADDSUB	u2, sl, ps
    224 	ADDSUB	ps, cy1, rr
    225 	srl	v2, 63, cy0
    226 	CARRY(	ps, u2, cy)
    227 	stq	rr, 16(rp)
    228 	addq	cy, cy0, cy0
    229 	CARRY(	rr, ps, cy)
    230 	addq	cy, cy0, cy0
    231 
    232 	ret	r31,(r26),1
    233 EPILOGUE()
    234 ASM_END()
    235