rshift.asm revision 1.1.1.3 1 1.1 mrg dnl PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt
2 1.1 mrg
3 1.1.1.2 mrg dnl Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
4 1.1 mrg
5 1.1 mrg dnl This file is part of the GNU MP Library.
6 1.1.1.3 mrg dnl
7 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 1.1.1.3 mrg dnl it under the terms of either:
9 1.1.1.3 mrg dnl
10 1.1.1.3 mrg dnl * the GNU Lesser General Public License as published by the Free
11 1.1.1.3 mrg dnl Software Foundation; either version 3 of the License, or (at your
12 1.1.1.3 mrg dnl option) any later version.
13 1.1.1.3 mrg dnl
14 1.1.1.3 mrg dnl or
15 1.1.1.3 mrg dnl
16 1.1.1.3 mrg dnl * the GNU General Public License as published by the Free Software
17 1.1.1.3 mrg dnl Foundation; either version 2 of the License, or (at your option) any
18 1.1.1.3 mrg dnl later version.
19 1.1.1.3 mrg dnl
20 1.1.1.3 mrg dnl or both in parallel, as here.
21 1.1.1.3 mrg dnl
22 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but
23 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 1.1.1.3 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 1.1.1.3 mrg dnl for more details.
26 1.1.1.3 mrg dnl
27 1.1.1.3 mrg dnl You should have received copies of the GNU General Public License and the
28 1.1.1.3 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not,
29 1.1.1.3 mrg dnl see https://www.gnu.org/licenses/.
30 1.1 mrg
31 1.1 mrg include(`../config.m4')
32 1.1 mrg
33 1.1.1.2 mrg C cycles/limb
34 1.1.1.2 mrg C POWER3/PPC630 ?
35 1.1.1.2 mrg C POWER4/PPC970 ?
36 1.1.1.2 mrg C POWER5 2.25
37 1.1.1.2 mrg C POWER6 9.75
38 1.1.1.2 mrg C POWER7 2.15
39 1.1.1.2 mrg
40 1.1.1.2 mrg C TODO
41 1.1.1.2 mrg C * Try to reduce the number of needed live registers
42 1.1.1.2 mrg C * Micro-optimise header code
43 1.1.1.2 mrg C * Keep in synch with lshift.asm and lshiftc.asm
44 1.1 mrg
45 1.1 mrg C INPUT PARAMETERS
46 1.1.1.2 mrg define(`rp', `r3')
47 1.1.1.2 mrg define(`up', `r4')
48 1.1.1.2 mrg define(`n', `r5')
49 1.1.1.2 mrg define(`cnt', `r6')
50 1.1.1.2 mrg
51 1.1.1.2 mrg define(`tnc',`r0')
52 1.1.1.2 mrg define(`u0',`r30')
53 1.1.1.2 mrg define(`u1',`r31')
54 1.1.1.2 mrg define(`retval',`r5')
55 1.1 mrg
56 1.1 mrg ASM_START()
57 1.1 mrg PROLOGUE(mpn_rshift)
58 1.1.1.2 mrg std r31, -8(r1)
59 1.1.1.2 mrg std r30, -16(r1)
60 1.1.1.2 mrg subfic tnc, cnt, 64
61 1.1.1.2 mrg C sldi r30, n, 3 C byte count corresponding to n
62 1.1.1.2 mrg C add rp, rp, r30 C rp = rp + n
63 1.1.1.2 mrg C add up, up, r30 C up = up + n
64 1.1.1.2 mrg rldicl. r30, n, 0,62 C r30 = n & 3, set cr0
65 1.1.1.2 mrg cmpdi cr6, r30, 2
66 1.1.1.2 mrg addi r31, n, 3 C compute count...
67 1.1.1.2 mrg ld r10, 0(up) C load 1st limb for b00...b11
68 1.1.1.2 mrg sld retval, r10, tnc
69 1.1 mrg ifdef(`HAVE_ABI_mode32',
70 1.1.1.2 mrg ` rldicl r31, r31, 62,34', C ...branch count
71 1.1.1.2 mrg ` srdi r31, r31, 2') C ...for ctr
72 1.1.1.2 mrg mtctr r31 C copy count into ctr
73 1.1.1.2 mrg beq cr0, L(b00)
74 1.1.1.2 mrg blt cr6, L(b01)
75 1.1.1.2 mrg ld r11, 8(up) C load 2nd limb for b10 and b11
76 1.1.1.2 mrg beq cr6, L(b10)
77 1.1.1.2 mrg
78 1.1.1.2 mrg ALIGN(16)
79 1.1.1.2 mrg L(b11): srd r8, r10, cnt
80 1.1.1.2 mrg sld r9, r11, tnc
81 1.1.1.2 mrg ld u1, 16(up)
82 1.1.1.2 mrg addi up, up, 24
83 1.1.1.2 mrg srd r12, r11, cnt
84 1.1.1.2 mrg sld r7, u1, tnc
85 1.1 mrg addi rp, rp, -16
86 1.1.1.2 mrg bdnz L(gt3)
87 1.1 mrg
88 1.1.1.2 mrg or r11, r8, r9
89 1.1.1.2 mrg srd r8, u1, cnt
90 1.1.1.2 mrg b L(cj3)
91 1.1.1.2 mrg
92 1.1.1.2 mrg ALIGN(16)
93 1.1.1.2 mrg L(gt3): ld u0, 0(up)
94 1.1.1.2 mrg or r11, r8, r9
95 1.1.1.2 mrg srd r8, u1, cnt
96 1.1.1.2 mrg sld r9, u0, tnc
97 1.1 mrg ld u1, 8(up)
98 1.1.1.2 mrg or r10, r12, r7
99 1.1.1.2 mrg b L(L11)
100 1.1 mrg
101 1.1.1.2 mrg ALIGN(32)
102 1.1.1.2 mrg L(b10): srd r12, r10, cnt
103 1.1.1.2 mrg addi rp, rp, -24
104 1.1.1.2 mrg sld r7, r11, tnc
105 1.1.1.2 mrg bdnz L(gt2)
106 1.1.1.2 mrg
107 1.1.1.2 mrg srd r8, r11, cnt
108 1.1.1.2 mrg or r10, r12, r7
109 1.1.1.2 mrg b L(cj2)
110 1.1.1.2 mrg
111 1.1.1.2 mrg L(gt2): ld u0, 16(up)
112 1.1.1.2 mrg srd r8, r11, cnt
113 1.1.1.2 mrg sld r9, u0, tnc
114 1.1.1.2 mrg ld u1, 24(up)
115 1.1.1.2 mrg or r10, r12, r7
116 1.1.1.2 mrg srd r12, u0, cnt
117 1.1.1.2 mrg sld r7, u1, tnc
118 1.1.1.2 mrg ld u0, 32(up)
119 1.1.1.2 mrg or r11, r8, r9
120 1.1.1.2 mrg addi up, up, 16
121 1.1.1.2 mrg b L(L10)
122 1.1.1.2 mrg
123 1.1.1.2 mrg ALIGN(16)
124 1.1.1.2 mrg L(b00): ld u1, 8(up)
125 1.1.1.2 mrg srd r12, r10, cnt
126 1.1.1.2 mrg sld r7, u1, tnc
127 1.1.1.2 mrg ld u0, 16(up)
128 1.1.1.2 mrg srd r8, u1, cnt
129 1.1.1.2 mrg sld r9, u0, tnc
130 1.1.1.2 mrg ld u1, 24(up)
131 1.1.1.2 mrg or r10, r12, r7
132 1.1.1.2 mrg srd r12, u0, cnt
133 1.1.1.2 mrg sld r7, u1, tnc
134 1.1.1.2 mrg addi rp, rp, -8
135 1.1.1.2 mrg bdz L(cj4)
136 1.1 mrg
137 1.1.1.2 mrg L(gt4): addi up, up, 32
138 1.1.1.2 mrg ld u0, 0(up)
139 1.1.1.2 mrg or r11, r8, r9
140 1.1.1.2 mrg b L(L00)
141 1.1 mrg
142 1.1.1.2 mrg ALIGN(16)
143 1.1.1.2 mrg L(b01): bdnz L(gt1)
144 1.1.1.2 mrg srd r8, r10, cnt
145 1.1.1.2 mrg std r8, 0(rp)
146 1.1.1.2 mrg b L(ret)
147 1.1.1.2 mrg
148 1.1.1.2 mrg L(gt1): ld u0, 8(up)
149 1.1.1.2 mrg srd r8, r10, cnt
150 1.1.1.2 mrg sld r9, u0, tnc
151 1.1.1.2 mrg ld u1, 16(up)
152 1.1.1.2 mrg srd r12, u0, cnt
153 1.1.1.2 mrg sld r7, u1, tnc
154 1.1.1.2 mrg ld u0, 24(up)
155 1.1.1.2 mrg or r11, r8, r9
156 1.1.1.2 mrg srd r8, u1, cnt
157 1.1.1.2 mrg sld r9, u0, tnc
158 1.1.1.2 mrg ld u1, 32(up)
159 1.1.1.2 mrg addi up, up, 40
160 1.1.1.2 mrg or r10, r12, r7
161 1.1.1.2 mrg bdz L(end)
162 1.1.1.2 mrg
163 1.1.1.2 mrg ALIGN(32)
164 1.1.1.2 mrg L(top): srd r12, u0, cnt
165 1.1.1.2 mrg sld r7, u1, tnc
166 1.1.1.2 mrg ld u0, 0(up)
167 1.1.1.2 mrg std r11, 0(rp)
168 1.1.1.2 mrg or r11, r8, r9
169 1.1.1.2 mrg L(L00): srd r8, u1, cnt
170 1.1.1.2 mrg sld r9, u0, tnc
171 1.1.1.2 mrg ld u1, 8(up)
172 1.1.1.2 mrg std r10, 8(rp)
173 1.1.1.2 mrg or r10, r12, r7
174 1.1.1.2 mrg L(L11): srd r12, u0, cnt
175 1.1.1.2 mrg sld r7, u1, tnc
176 1.1.1.2 mrg ld u0, 16(up)
177 1.1.1.2 mrg std r11, 16(rp)
178 1.1.1.2 mrg or r11, r8, r9
179 1.1.1.2 mrg L(L10): srd r8, u1, cnt
180 1.1.1.2 mrg sld r9, u0, tnc
181 1.1.1.2 mrg ld u1, 24(up)
182 1.1.1.2 mrg addi up, up, 32
183 1.1.1.2 mrg std r10, 24(rp)
184 1.1.1.2 mrg addi rp, rp, 32
185 1.1.1.2 mrg or r10, r12, r7
186 1.1.1.2 mrg bdnz L(top)
187 1.1.1.2 mrg
188 1.1.1.2 mrg ALIGN(32)
189 1.1.1.2 mrg L(end): srd r12, u0, cnt
190 1.1.1.2 mrg sld r7, u1, tnc
191 1.1.1.2 mrg std r11, 0(rp)
192 1.1.1.2 mrg L(cj4): or r11, r8, r9
193 1.1.1.2 mrg srd r8, u1, cnt
194 1.1.1.2 mrg std r10, 8(rp)
195 1.1.1.2 mrg L(cj3): or r10, r12, r7
196 1.1.1.2 mrg std r11, 16(rp)
197 1.1.1.2 mrg L(cj2): std r10, 24(rp)
198 1.1.1.2 mrg std r8, 32(rp)
199 1.1 mrg
200 1.1.1.2 mrg L(ret): ld r31, -8(r1)
201 1.1.1.2 mrg ld r30, -16(r1)
202 1.1 mrg ifdef(`HAVE_ABI_mode32',
203 1.1.1.2 mrg ` srdi r3, retval, 32
204 1.1.1.2 mrg mr r4, retval
205 1.1.1.2 mrg ',` mr r3, retval')
206 1.1 mrg blr
207 1.1 mrg EPILOGUE()
208