logops_n.asm revision 1.1.1.3 1 1.1 mrg dnl IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
2 1.1 mrg dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
3 1.1 mrg
4 1.1.1.2 mrg dnl Contributed to the GNU project by Torbjorn Granlund.
5 1.1.1.2 mrg
6 1.1.1.3 mrg dnl Copyright 2003-2005 Free Software Foundation, Inc.
7 1.1.1.3 mrg
8 1.1 mrg dnl This file is part of the GNU MP Library.
9 1.1 mrg dnl
10 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify
11 1.1.1.3 mrg dnl it under the terms of either:
12 1.1.1.3 mrg dnl
13 1.1.1.3 mrg dnl * the GNU Lesser General Public License as published by the Free
14 1.1.1.3 mrg dnl Software Foundation; either version 3 of the License, or (at your
15 1.1.1.3 mrg dnl option) any later version.
16 1.1.1.3 mrg dnl
17 1.1.1.3 mrg dnl or
18 1.1.1.3 mrg dnl
19 1.1.1.3 mrg dnl * the GNU General Public License as published by the Free Software
20 1.1.1.3 mrg dnl Foundation; either version 2 of the License, or (at your option) any
21 1.1.1.3 mrg dnl later version.
22 1.1.1.3 mrg dnl
23 1.1.1.3 mrg dnl or both in parallel, as here.
24 1.1 mrg dnl
25 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but
26 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27 1.1.1.3 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28 1.1.1.3 mrg dnl for more details.
29 1.1 mrg dnl
30 1.1.1.3 mrg dnl You should have received copies of the GNU General Public License and the
31 1.1.1.3 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not,
32 1.1.1.3 mrg dnl see https://www.gnu.org/licenses/.
33 1.1 mrg
34 1.1 mrg include(`../config.m4')
35 1.1 mrg
36 1.1 mrg C cycles/limb
37 1.1 mrg C Itanium: 2
38 1.1 mrg C Itanium 2: 1
39 1.1 mrg
40 1.1 mrg C TODO
41 1.1 mrg C * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in
42 1.1 mrg C wind-down code).
43 1.1 mrg
44 1.1 mrg C INPUT PARAMETERS
45 1.1 mrg define(`rp', `r32')
46 1.1 mrg define(`up', `r33')
47 1.1 mrg define(`vp', `r34')
48 1.1 mrg define(`n', `r35')
49 1.1 mrg
50 1.1 mrg ifdef(`OPERATION_and_n',
51 1.1 mrg ` define(`func',`mpn_and_n')
52 1.1 mrg define(`logop', `and $1 = $2, $3')
53 1.1 mrg define(`notormov', `mov $1 = $2')')
54 1.1 mrg ifdef(`OPERATION_andn_n',
55 1.1 mrg ` define(`func',`mpn_andn_n')
56 1.1 mrg define(`logop', `andcm $1 = $2, $3')
57 1.1 mrg define(`notormov', `mov $1 = $2')')
58 1.1 mrg ifdef(`OPERATION_nand_n',
59 1.1 mrg ` define(`func',`mpn_nand_n')
60 1.1 mrg define(`logop', `and $1 = $2, $3')
61 1.1 mrg define(`notormov', `sub $1 = -1, $2')')
62 1.1 mrg ifdef(`OPERATION_ior_n',
63 1.1 mrg ` define(`func',`mpn_ior_n')
64 1.1 mrg define(`logop', `or $1 = $2, $3')
65 1.1 mrg define(`notormov', `mov $1 = $2')')
66 1.1 mrg ifdef(`OPERATION_iorn_n',
67 1.1 mrg ` define(`func',`mpn_iorn_n')
68 1.1 mrg define(`logop', `andcm $1 = $3, $2')
69 1.1 mrg define(`notormov', `sub $1 = -1, $2')')
70 1.1 mrg ifdef(`OPERATION_nior_n',
71 1.1 mrg ` define(`func',`mpn_nior_n')
72 1.1 mrg define(`logop', `or $1 = $2, $3')
73 1.1 mrg define(`notormov', `sub $1 = -1, $2')')
74 1.1 mrg ifdef(`OPERATION_xor_n',
75 1.1 mrg ` define(`func',`mpn_xor_n')
76 1.1 mrg define(`logop', `xor $1 = $2, $3')
77 1.1 mrg define(`notormov', `mov $1 = $2')')
78 1.1 mrg ifdef(`OPERATION_xnor_n',
79 1.1 mrg ` define(`func',`mpn_xnor_n')
80 1.1 mrg define(`logop', `xor $1 = $2, $3')
81 1.1 mrg define(`notormov', `sub $1 = -1, $2')')
82 1.1 mrg
83 1.1 mrg MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
84 1.1 mrg
85 1.1 mrg ASM_START()
86 1.1 mrg PROLOGUE(func)
87 1.1 mrg .prologue
88 1.1 mrg .save ar.lc, r2
89 1.1 mrg .body
90 1.1 mrg ifdef(`HAVE_ABI_32',
91 1.1 mrg ` addp4 rp = 0, rp C M I
92 1.1 mrg addp4 up = 0, up C M I
93 1.1 mrg addp4 vp = 0, vp C M I
94 1.1.1.3 mrg nop.m 0
95 1.1.1.3 mrg nop.m 0
96 1.1 mrg zxt4 n = n C I
97 1.1 mrg ;;
98 1.1 mrg ')
99 1.1 mrg {.mmi
100 1.1 mrg ld8 r10 = [up], 8 C M
101 1.1 mrg ld8 r11 = [vp], 8 C M
102 1.1 mrg mov.i r2 = ar.lc C I0
103 1.1 mrg }
104 1.1 mrg {.mmi
105 1.1 mrg and r14 = 3, n C M I
106 1.1 mrg cmp.lt p15, p14 = 4, n C M I
107 1.1 mrg shr.u n = n, 2 C I0
108 1.1 mrg ;;
109 1.1 mrg }
110 1.1 mrg {.mmi
111 1.1 mrg cmp.eq p6, p0 = 1, r14 C M I
112 1.1 mrg cmp.eq p7, p0 = 2, r14 C M I
113 1.1 mrg cmp.eq p8, p0 = 3, r14 C M I
114 1.1 mrg }
115 1.1 mrg {.bbb
116 1.1 mrg (p6) br.dptk .Lb01 C B
117 1.1 mrg (p7) br.dptk .Lb10 C B
118 1.1 mrg (p8) br.dptk .Lb11 C B
119 1.1 mrg }
120 1.1 mrg
121 1.1 mrg .Lb00: ld8 r17 = [up], 8 C M
122 1.1 mrg ld8 r21 = [vp], 8 C M
123 1.1 mrg add n = -2, n C M I
124 1.1 mrg ;;
125 1.1 mrg ld8 r18 = [up], 8 C M
126 1.1 mrg ld8 r22 = [vp], 8 C M
127 1.1 mrg ;;
128 1.1 mrg ld8 r19 = [up], 8 C M
129 1.1 mrg ld8 r23 = [vp], 8 C M
130 1.1 mrg (p15) br.cond.dpnt .grt4 C B
131 1.1 mrg
132 1.1 mrg logop( r14, r10, r11) C M I
133 1.1 mrg ;;
134 1.1 mrg logop( r15, r17, r21) C M I
135 1.1 mrg notormov( r8, r14) C M I
136 1.1 mrg br .Lcj4 C B
137 1.1 mrg
138 1.1 mrg .grt4: logop( r14, r10, r11) C M I
139 1.1 mrg ld8 r16 = [up], 8 C M
140 1.1 mrg ld8 r20 = [vp], 8 C M
141 1.1 mrg ;;
142 1.1 mrg logop( r15, r17, r21) C M I
143 1.1 mrg ld8 r17 = [up], 8 C M
144 1.1 mrg mov.i ar.lc = n C I0
145 1.1 mrg notormov( r8, r14) C M I
146 1.1 mrg ld8 r21 = [vp], 8 C M
147 1.1 mrg br .LL00 C B
148 1.1 mrg
149 1.1 mrg .Lb01: add n = -1, n C M I
150 1.1 mrg logop( r15, r10, r11) C M I
151 1.1 mrg (p15) br.cond.dpnt .grt1 C B
152 1.1 mrg ;;
153 1.1 mrg
154 1.1 mrg notormov( r9, r15) C M I
155 1.1 mrg br .Lcj1 C B
156 1.1 mrg
157 1.1 mrg .grt1: ld8 r16 = [up], 8 C M
158 1.1 mrg ld8 r20 = [vp], 8 C M
159 1.1 mrg ;;
160 1.1 mrg ld8 r17 = [up], 8 C M
161 1.1 mrg ld8 r21 = [vp], 8 C M
162 1.1 mrg mov.i ar.lc = n C I0
163 1.1 mrg ;;
164 1.1 mrg ld8 r18 = [up], 8 C M
165 1.1 mrg ld8 r22 = [vp], 8 C M
166 1.1 mrg ;;
167 1.1 mrg ld8 r19 = [up], 8 C M
168 1.1 mrg ld8 r23 = [vp], 8 C M
169 1.1 mrg br.cloop.dptk .grt5 C B
170 1.1 mrg ;;
171 1.1 mrg
172 1.1 mrg logop( r14, r16, r20) C M I
173 1.1 mrg notormov( r9, r15) C M I
174 1.1 mrg br .Lcj5 C B
175 1.1 mrg
176 1.1 mrg .grt5: logop( r14, r16, r20) C M I
177 1.1 mrg ld8 r16 = [up], 8 C M
178 1.1 mrg notormov( r9, r15) C M I
179 1.1 mrg ld8 r20 = [vp], 8 C M
180 1.1 mrg br .LL01 C B
181 1.1 mrg
182 1.1 mrg .Lb10: ld8 r19 = [up], 8 C M
183 1.1 mrg ld8 r23 = [vp], 8 C M
184 1.1 mrg (p15) br.cond.dpnt .grt2 C B
185 1.1 mrg
186 1.1 mrg logop( r14, r10, r11) C M I
187 1.1 mrg ;;
188 1.1 mrg logop( r15, r19, r23) C M I
189 1.1 mrg notormov( r8, r14) C M I
190 1.1 mrg br .Lcj2 C B
191 1.1 mrg
192 1.1 mrg .grt2: ld8 r16 = [up], 8 C M
193 1.1 mrg ld8 r20 = [vp], 8 C M
194 1.1 mrg add n = -1, n C M I
195 1.1 mrg ;;
196 1.1 mrg ld8 r17 = [up], 8 C M
197 1.1 mrg ld8 r21 = [vp], 8 C M
198 1.1 mrg logop( r14, r10, r11) C M I
199 1.1 mrg ;;
200 1.1 mrg ld8 r18 = [up], 8 C M
201 1.1 mrg ld8 r22 = [vp], 8 C M
202 1.1 mrg mov.i ar.lc = n C I0
203 1.1 mrg ;;
204 1.1 mrg logop( r15, r19, r23) C M I
205 1.1 mrg ld8 r19 = [up], 8 C M
206 1.1 mrg notormov( r8, r14) C M I
207 1.1 mrg ld8 r23 = [vp], 8 C M
208 1.1 mrg br.cloop.dptk .Loop C B
209 1.1 mrg br .Lcj6 C B
210 1.1 mrg
211 1.1 mrg .Lb11: ld8 r18 = [up], 8 C M
212 1.1 mrg ld8 r22 = [vp], 8 C M
213 1.1 mrg add n = -1, n C M I
214 1.1 mrg ;;
215 1.1 mrg ld8 r19 = [up], 8 C M
216 1.1 mrg ld8 r23 = [vp], 8 C M
217 1.1 mrg logop( r15, r10, r11) C M I
218 1.1 mrg (p15) br.cond.dpnt .grt3 C B
219 1.1 mrg ;;
220 1.1 mrg
221 1.1 mrg logop( r14, r18, r22) C M I
222 1.1 mrg notormov( r9, r15) C M I
223 1.1 mrg br .Lcj3 C B
224 1.1 mrg
225 1.1 mrg .grt3: ld8 r16 = [up], 8 C M
226 1.1 mrg ld8 r20 = [vp], 8 C M
227 1.1 mrg ;;
228 1.1 mrg ld8 r17 = [up], 8 C M
229 1.1 mrg ld8 r21 = [vp], 8 C M
230 1.1 mrg mov.i ar.lc = n C I0
231 1.1 mrg ;;
232 1.1 mrg logop( r14, r18, r22) C M I
233 1.1 mrg ld8 r18 = [up], 8 C M
234 1.1 mrg notormov( r9, r15) C M I
235 1.1 mrg ld8 r22 = [vp], 8 C M
236 1.1 mrg br .LL11 C B
237 1.1 mrg
238 1.1 mrg C *** MAIN LOOP START ***
239 1.1 mrg ALIGN(32)
240 1.1 mrg .Loop: st8 [rp] = r8, 8 C M
241 1.1 mrg logop( r14, r16, r20) C M I
242 1.1 mrg notormov( r9, r15) C M I
243 1.1 mrg ld8 r16 = [up], 8 C M
244 1.1 mrg ld8 r20 = [vp], 8 C M
245 1.1 mrg nop.b 0
246 1.1 mrg ;;
247 1.1 mrg .LL01: st8 [rp] = r9, 8 C M
248 1.1 mrg logop( r15, r17, r21) C M I
249 1.1 mrg notormov( r8, r14) C M I
250 1.1 mrg ld8 r17 = [up], 8 C M
251 1.1 mrg ld8 r21 = [vp], 8 C M
252 1.1 mrg nop.b 0
253 1.1 mrg ;;
254 1.1 mrg .LL00: st8 [rp] = r8, 8 C M
255 1.1 mrg logop( r14, r18, r22) C M I
256 1.1 mrg notormov( r9, r15) C M I
257 1.1 mrg ld8 r18 = [up], 8 C M
258 1.1 mrg ld8 r22 = [vp], 8 C M
259 1.1 mrg nop.b 0
260 1.1 mrg ;;
261 1.1 mrg .LL11: st8 [rp] = r9, 8 C M
262 1.1 mrg logop( r15, r19, r23) C M I
263 1.1 mrg notormov( r8, r14) C M I
264 1.1 mrg ld8 r19 = [up], 8 C M
265 1.1 mrg ld8 r23 = [vp], 8 C M
266 1.1 mrg br.cloop.dptk .Loop ;; C B
267 1.1 mrg C *** MAIN LOOP END ***
268 1.1 mrg
269 1.1 mrg .Lcj6: st8 [rp] = r8, 8 C M
270 1.1 mrg logop( r14, r16, r20) C M I
271 1.1 mrg notormov( r9, r15) C M I
272 1.1 mrg ;;
273 1.1 mrg .Lcj5: st8 [rp] = r9, 8 C M
274 1.1 mrg logop( r15, r17, r21) C M I
275 1.1 mrg notormov( r8, r14) C M I
276 1.1 mrg ;;
277 1.1 mrg .Lcj4: st8 [rp] = r8, 8 C M
278 1.1 mrg logop( r14, r18, r22) C M I
279 1.1 mrg notormov( r9, r15) C M I
280 1.1 mrg ;;
281 1.1 mrg .Lcj3: st8 [rp] = r9, 8 C M
282 1.1 mrg logop( r15, r19, r23) C M I
283 1.1 mrg notormov( r8, r14) C M I
284 1.1 mrg ;;
285 1.1 mrg .Lcj2: st8 [rp] = r8, 8 C M
286 1.1 mrg notormov( r9, r15) C M I
287 1.1 mrg ;;
288 1.1 mrg .Lcj1: st8 [rp] = r9, 8 C M
289 1.1 mrg mov.i ar.lc = r2 C I0
290 1.1 mrg br.ret.sptk.many b0 C B
291 1.1 mrg EPILOGUE()
292 1.1 mrg ASM_END()
293