lshift.asm revision 1.1.1.2 1 dnl Intel Atom mpn_lshift -- mpn left shift.
2
3 dnl Copyright 2011 Free Software Foundation, Inc.
4
5 dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
6
7 dnl This file is part of the GNU MP Library.
8 dnl
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of either:
11 dnl
12 dnl * the GNU Lesser General Public License as published by the Free
13 dnl Software Foundation; either version 3 of the License, or (at your
14 dnl option) any later version.
15 dnl
16 dnl or
17 dnl
18 dnl * the GNU General Public License as published by the Free Software
19 dnl Foundation; either version 2 of the License, or (at your option) any
20 dnl later version.
21 dnl
22 dnl or both in parallel, as here.
23 dnl
24 dnl The GNU MP Library is distributed in the hope that it will be useful, but
25 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 dnl for more details.
28 dnl
29 dnl You should have received copies of the GNU General Public License and the
30 dnl GNU Lesser General Public License along with the GNU MP Library. If not,
31 dnl see https://www.gnu.org/licenses/.
32
33 include(`../config.m4')
34
35 C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
36 C unsigned cnt);
37
38 C cycles/limb
39 C cnt!=1 cnt==1
40 C P5
41 C P6 model 0-8,10-12
42 C P6 model 9 (Banias)
43 C P6 model 13 (Dothan)
44 C P4 model 0 (Willamette)
45 C P4 model 1 (?)
46 C P4 model 2 (Northwood)
47 C P4 model 3 (Prescott)
48 C P4 model 4 (Nocona)
49 C Intel Atom 5 2.5
50 C AMD K6
51 C AMD K7
52 C AMD K8
53 C AMD K10
54
55 defframe(PARAM_CNT, 16)
56 defframe(PARAM_SIZE,12)
57 defframe(PARAM_SRC, 8)
58 defframe(PARAM_DST, 4)
59
60 dnl re-use parameter space
61 define(SAVE_UP,`PARAM_CNT')
62 define(VAR_COUNT,`PARAM_SIZE')
63 define(SAVE_EBX,`PARAM_SRC')
64 define(SAVE_EBP,`PARAM_DST')
65
66 define(`rp', `%edi')
67 define(`up', `%esi')
68 define(`cnt', `%ecx')
69
70 ASM_START()
71 TEXT
72 ALIGN(8)
73 deflit(`FRAME',0)
74 PROLOGUE(mpn_lshift)
75 mov PARAM_CNT, cnt
76 mov PARAM_SIZE, %edx
77 mov up, SAVE_UP
78 mov PARAM_SRC, up
79 push rp FRAME_pushl()
80 mov PARAM_DST, rp
81
82 C We can use faster code for shift-by-1 under certain conditions.
83 cmp $1,cnt
84 jne L(normal)
85 cmpl rp, up
86 jnc L(special) C jump if s_ptr + 1 >= res_ptr
87 leal (up,%edx,4),%eax
88 cmpl %eax,rp
89 jnc L(special) C jump if res_ptr >= s_ptr + size
90
91 L(normal):
92 lea -4(up,%edx,4), up
93 mov %ebx, SAVE_EBX
94 lea -4(rp,%edx,4), rp
95
96 shr %edx
97 mov (up), %eax
98 mov %edx, VAR_COUNT
99 jnc L(evn)
100
101 mov %eax, %ebx
102 shl %cl, %ebx
103 neg cnt
104 shr %cl, %eax
105 test %edx, %edx
106 jnz L(gt1)
107 mov %ebx, (rp)
108 jmp L(quit)
109
110 L(gt1): mov %ebp, SAVE_EBP
111 push %eax
112 mov -4(up), %eax
113 mov %eax, %ebp
114 shr %cl, %eax
115 jmp L(lo1)
116
117 L(evn): mov %ebp, SAVE_EBP
118 neg cnt
119 mov %eax, %ebp
120 mov -4(up), %edx
121 shr %cl, %eax
122 mov %edx, %ebx
123 shr %cl, %edx
124 neg cnt
125 decl VAR_COUNT
126 lea 4(rp), rp
127 lea -4(up), up
128 jz L(end)
129 push %eax FRAME_pushl()
130
131 ALIGN(8)
132 L(top): shl %cl, %ebp
133 or %ebp, %edx
134 shl %cl, %ebx
135 neg cnt
136 mov -4(up), %eax
137 mov %eax, %ebp
138 mov %edx, -4(rp)
139 shr %cl, %eax
140 lea -8(rp), rp
141 L(lo1): mov -8(up), %edx
142 or %ebx, %eax
143 mov %edx, %ebx
144 shr %cl, %edx
145 lea -8(up), up
146 neg cnt
147 mov %eax, (rp)
148 decl VAR_COUNT
149 jg L(top)
150
151 pop %eax FRAME_popl()
152 L(end):
153 shl %cl, %ebp
154 shl %cl, %ebx
155 or %ebp, %edx
156 mov SAVE_EBP, %ebp
157 mov %edx, -4(rp)
158 mov %ebx, -8(rp)
159
160 L(quit):
161 mov SAVE_UP, up
162 mov SAVE_EBX, %ebx
163 pop rp FRAME_popl()
164 ret
165
166 L(special):
167 deflit(`FRAME',4)
168 lea 3(%edx), %eax C size + 3
169 dec %edx C size - 1
170 mov (up), %ecx
171 shr $2, %eax C (size + 3) / 4
172 and $3, %edx C (size - 1) % 4
173 jz L(goloop) C jmp if size == 1 (mod 4)
174 shr %edx
175 jnc L(odd) C jum if size == 3 (mod 4)
176
177 add %ecx, %ecx
178 lea 4(up), up
179 mov %ecx, (rp)
180 mov (up), %ecx
181 lea 4(rp), rp
182
183 dec %edx
184 jnz L(goloop) C jump if size == 0 (mod 4)
185 L(odd): lea -8(up), up
186 lea -8(rp), rp
187 jmp L(sentry) C reached if size == 2 or 3 (mod 4)
188
189 L(sloop):
190 adc %ecx, %ecx
191 mov 4(up), %edx
192 mov %ecx, (rp)
193 adc %edx, %edx
194 mov 8(up), %ecx
195 mov %edx, 4(rp)
196 L(sentry):
197 adc %ecx, %ecx
198 mov 12(up), %edx
199 mov %ecx, 8(rp)
200 adc %edx, %edx
201 lea 16(up), up
202 mov %edx, 12(rp)
203 lea 16(rp), rp
204 mov (up), %ecx
205 L(goloop):
206 decl %eax
207 jnz L(sloop)
208
209 L(squit):
210 adc %ecx, %ecx
211 mov %ecx, (rp)
212 adc %eax, %eax
213
214 mov SAVE_UP, up
215 pop rp FRAME_popl()
216 ret
217 EPILOGUE()
218 ASM_END()
219