aorslsh1_n.asm revision 1.1.1.1 1 dnl Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
2
3 dnl Copyright 2003 Free Software Foundation, Inc.
4
5 dnl This file is part of the GNU MP Library.
6
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of the GNU Lesser General Public License as published
9 dnl by the Free Software Foundation; either version 3 of the License, or (at
10 dnl your option) any later version.
11
12 dnl The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 dnl License for more details.
16
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
19
20 include(`../config.m4')
21
22 C cycles/limb
23 C EV4: 12.5
24 C EV5: 6.25
25 C EV6: 4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875)
26
27 C TODO
28 C * Write special version for ev6, as this is a slowdown for 100 < n < 2200
29 C compared to separate mpn_lshift and mpn_add_n.
30 C * Use addq instead of sll for left shift, and similarly cmplt instead of srl
31 C for right shift.
32
33 dnl INPUT PARAMETERS
34 define(`rp',`r16')
35 define(`up',`r17')
36 define(`vp',`r18')
37 define(`n', `r19')
38
39 define(`u0', `r8')
40 define(`u1', `r1')
41 define(`u2', `r2')
42 define(`u3', `r3')
43 define(`v0', `r4')
44 define(`v1', `r5')
45 define(`v2', `r6')
46 define(`v3', `r7')
47
48 define(`cy0', `r0')
49 define(`cy1', `r20')
50 define(`cy', `r22')
51 define(`rr', `r24')
52 define(`ps', `r25')
53 define(`sl', `r28')
54
55 ifdef(`OPERATION_addlsh1_n',`
56 define(ADDSUB, addq)
57 define(CARRY, `cmpult $1,$2,$3')
58 define(func, mpn_addlsh1_n)
59 ')
60 ifdef(`OPERATION_sublsh1_n',`
61 define(ADDSUB, subq)
62 define(CARRY, `cmpult $2,$1,$3')
63 define(func, mpn_sublsh1_n)
64 ')
65
66 MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
67
68 ASM_START()
69 PROLOGUE(func)
70 lda n, -4(n)
71 bis r31, r31, cy1
72 and n, 3, r1
73 beq r1, $Lb00
74 cmpeq r1, 1, r2
75 bne r2, $Lb01
76 cmpeq r1, 2, r2
77 bne r2, $Lb10
78 $Lb11: C n = 3, 7, 11, ...
79 ldq v0, 0(vp)
80 ldq u0, 0(up)
81 ldq v1, 8(vp)
82 ldq u1, 8(up)
83 ldq v2, 16(vp)
84 ldq u2, 16(up)
85 lda vp, 24(vp)
86 lda up, 24(up)
87 bge n, $Loop
88 br r31, $Lcj3
89 $Lb10: C n = 2, 6, 10, ...
90 bis r31, r31, cy0
91 ldq v1, 0(vp)
92 ldq u1, 0(up)
93 ldq v2, 8(vp)
94 ldq u2, 8(up)
95 lda rp, -8(rp)
96 blt n, $Lcj2
97 ldq v3, 16(vp)
98 ldq u3, 16(up)
99 lda vp, 48(vp)
100 lda up, 16(up)
101 br r31, $LL10
102 $Lb01: C n = 1, 5, 9, ...
103 ldq v2, 0(vp)
104 ldq u2, 0(up)
105 lda rp, -16(rp)
106 blt n, $Lcj1
107 ldq v3, 8(vp)
108 ldq u3, 8(up)
109 ldq v0, 16(vp)
110 ldq u0, 16(up)
111 lda vp, 40(vp)
112 lda up, 8(up)
113 lda rp, 32(rp)
114 br r31, $LL01
115 $Lb00: C n = 4, 8, 12, ...
116 bis r31, r31, cy0
117 ldq v3, 0(vp)
118 ldq u3, 0(up)
119 ldq v0, 8(vp)
120 ldq u0, 8(up)
121 ldq v1, 16(vp)
122 ldq u1, 16(up)
123 lda vp, 32(vp)
124 lda rp, 8(rp)
125 br r31, $LL00x
126 ALIGN(16)
127 C 0
128 $Loop: sll v0, 1, sl C left shift vlimb
129 ldq v3, 0(vp)
130 C 1
131 ADDSUB u0, sl, ps C ulimb + (vlimb << 1)
132 ldq u3, 0(up)
133 C 2
134 ADDSUB ps, cy1, rr C consume carry from previous operation
135 srl v0, 63, cy0 C carry out #1
136 C 3
137 CARRY( ps, u0, cy) C carry out #2
138 stq rr, 0(rp)
139 C 4
140 addq cy, cy0, cy0 C combine carry out #1 and #2
141 CARRY( rr, ps, cy) C carry out #3
142 C 5
143 addq cy, cy0, cy0 C final carry out
144 lda vp, 32(vp) C bookkeeping
145 C 6
146 $LL10: sll v1, 1, sl
147 ldq v0, -24(vp)
148 C 7
149 ADDSUB u1, sl, ps
150 ldq u0, 8(up)
151 C 8
152 ADDSUB ps, cy0, rr
153 srl v1, 63, cy1
154 C 9
155 CARRY( ps, u1, cy)
156 stq rr, 8(rp)
157 C 10
158 addq cy, cy1, cy1
159 CARRY( rr, ps, cy)
160 C 11
161 addq cy, cy1, cy1
162 lda rp, 32(rp) C bookkeeping
163 C 12
164 $LL01: sll v2, 1, sl
165 ldq v1, -16(vp)
166 C 13
167 ADDSUB u2, sl, ps
168 ldq u1, 16(up)
169 C 14
170 ADDSUB ps, cy1, rr
171 srl v2, 63, cy0
172 C 15
173 CARRY( ps, u2, cy)
174 stq rr, -16(rp)
175 C 16
176 addq cy, cy0, cy0
177 CARRY( rr, ps, cy)
178 C 17
179 addq cy, cy0, cy0
180 $LL00x: lda up, 32(up) C bookkeeping
181 C 18
182 sll v3, 1, sl
183 ldq v2, -8(vp)
184 C 19
185 ADDSUB u3, sl, ps
186 ldq u2, -8(up)
187 C 20
188 ADDSUB ps, cy0, rr
189 srl v3, 63, cy1
190 C 21
191 CARRY( ps, u3, cy)
192 stq rr, -8(rp)
193 C 22
194 addq cy, cy1, cy1
195 CARRY( rr, ps, cy)
196 C 23
197 addq cy, cy1, cy1
198 lda n, -4(n) C bookkeeping
199 C 24
200 bge n, $Loop
201
202 $Lcj3: sll v0, 1, sl
203 ADDSUB u0, sl, ps
204 ADDSUB ps, cy1, rr
205 srl v0, 63, cy0
206 CARRY( ps, u0, cy)
207 stq rr, 0(rp)
208 addq cy, cy0, cy0
209 CARRY( rr, ps, cy)
210 addq cy, cy0, cy0
211
212 $Lcj2: sll v1, 1, sl
213 ADDSUB u1, sl, ps
214 ADDSUB ps, cy0, rr
215 srl v1, 63, cy1
216 CARRY( ps, u1, cy)
217 stq rr, 8(rp)
218 addq cy, cy1, cy1
219 CARRY( rr, ps, cy)
220 addq cy, cy1, cy1
221
222 $Lcj1: sll v2, 1, sl
223 ADDSUB u2, sl, ps
224 ADDSUB ps, cy1, rr
225 srl v2, 63, cy0
226 CARRY( ps, u2, cy)
227 stq rr, 16(rp)
228 addq cy, cy0, cy0
229 CARRY( rr, ps, cy)
230 addq cy, cy0, cy0
231
232 ret r31,(r26),1
233 EPILOGUE()
234 ASM_END()
235