addmul_1.asm revision 1.1.1.2 1 1.1 mrg dnl IA-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
2 1.1 mrg dnl result to a second limb vector.
3 1.1 mrg
4 1.1.1.2 mrg dnl Contributed to the GNU project by Torbjorn Granlund.
5 1.1.1.2 mrg
6 1.1 mrg dnl Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2007 Free Software
7 1.1 mrg dnl Foundation, Inc.
8 1.1 mrg
9 1.1 mrg dnl This file is part of the GNU MP Library.
10 1.1 mrg
11 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify
12 1.1 mrg dnl it under the terms of the GNU Lesser General Public License as published
13 1.1 mrg dnl by the Free Software Foundation; either version 3 of the License, or (at
14 1.1 mrg dnl your option) any later version.
15 1.1 mrg
16 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but
17 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 1.1 mrg dnl License for more details.
20 1.1 mrg
21 1.1 mrg dnl You should have received a copy of the GNU Lesser General Public License
22 1.1 mrg dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
23 1.1 mrg
24 1.1 mrg include(`../config.m4')
25 1.1 mrg
26 1.1 mrg C cycles/limb
27 1.1 mrg C Itanium: 3.0
28 1.1 mrg C Itanium 2: 2.0
29 1.1 mrg
30 1.1 mrg C TODO
31 1.1 mrg C * Further optimize feed-in and wind-down code, both for speed and code size.
32 1.1 mrg C * Handle low limb input and results specially, using a common stf8 in the
33 1.1 mrg C epilogue.
34 1.1 mrg C * Use 1 c/l carry propagation scheme in wind-down code.
35 1.1 mrg C * Use extra pointer registers for `up' and rp to speed up feed-in loads.
36 1.1 mrg C * Work out final differences with mul_1.asm. That function is 300 bytes
37 1.1 mrg C smaller than this due to better loop scheduling and thus simpler feed-in
38 1.1 mrg C code.
39 1.1 mrg
40 1.1 mrg C INPUT PARAMETERS
41 1.1 mrg define(`rp', `r32')
42 1.1 mrg define(`up', `r33')
43 1.1 mrg define(`n', `r34')
44 1.1 mrg define(`vl', `r35')
45 1.1 mrg
46 1.1 mrg ASM_START()
47 1.1 mrg PROLOGUE(mpn_addmul_1)
48 1.1 mrg .prologue
49 1.1 mrg .save ar.lc, r2
50 1.1 mrg .body
51 1.1 mrg
52 1.1 mrg ifdef(`HAVE_ABI_32',
53 1.1 mrg ` addp4 rp = 0, rp C M I
54 1.1 mrg addp4 up = 0, up C M I
55 1.1 mrg zxt4 n = n C I
56 1.1 mrg ;;
57 1.1 mrg ')
58 1.1 mrg {.mmi
59 1.1 mrg adds r15 = -1, n C M I
60 1.1 mrg mov r20 = rp C M I
61 1.1 mrg mov.i r2 = ar.lc C I0
62 1.1 mrg }
63 1.1 mrg {.mmi
64 1.1 mrg ldf8 f7 = [up], 8 C M
65 1.1 mrg ldf8 f8 = [rp], 8 C M
66 1.1 mrg and r14 = 3, n C M I
67 1.1 mrg ;;
68 1.1 mrg }
69 1.1 mrg {.mmi
70 1.1 mrg setf.sig f6 = vl C M2 M3
71 1.1 mrg cmp.eq p10, p0 = 0, r14 C M I
72 1.1 mrg shr.u r31 = r15, 2 C I0
73 1.1 mrg }
74 1.1 mrg {.mmi
75 1.1 mrg cmp.eq p11, p0 = 2, r14 C M I
76 1.1 mrg cmp.eq p12, p0 = 3, r14 C M I
77 1.1 mrg nop.i 0 C I
78 1.1 mrg ;;
79 1.1 mrg }
80 1.1 mrg {.mii
81 1.1 mrg cmp.ne p6, p7 = r0, r0 C M I
82 1.1 mrg mov.i ar.lc = r31 C I0
83 1.1 mrg cmp.ne p8, p9 = r0, r0 C M I
84 1.1 mrg }
85 1.1 mrg {.bbb
86 1.1 mrg (p10) br.dptk .Lb00 C B
87 1.1 mrg (p11) br.dptk .Lb10 C B
88 1.1 mrg (p12) br.dptk .Lb11 C B
89 1.1 mrg ;;
90 1.1 mrg }
91 1.1 mrg
92 1.1 mrg .Lb01: br.cloop.dptk .grt1 C B
93 1.1 mrg
94 1.1 mrg xma.l f39 = f7, f6, f8 C F
95 1.1 mrg xma.hu f43 = f7, f6, f8 C F
96 1.1 mrg ;;
97 1.1 mrg getf.sig r8 = f43 C M2
98 1.1 mrg stf8 [r20] = f39 C M2 M3
99 1.1 mrg mov.i ar.lc = r2 C I0
100 1.1 mrg br.ret.sptk.many b0 C B
101 1.1 mrg
102 1.1 mrg .grt1:
103 1.1 mrg ldf8 f32 = [up], 8
104 1.1 mrg ldf8 f44 = [rp], 8
105 1.1 mrg ;;
106 1.1 mrg ldf8 f33 = [up], 8
107 1.1 mrg ldf8 f45 = [rp], 8
108 1.1 mrg ;;
109 1.1 mrg ldf8 f34 = [up], 8
110 1.1 mrg xma.l f39 = f7, f6, f8
111 1.1 mrg ldf8 f46 = [rp], 8
112 1.1 mrg xma.hu f43 = f7, f6, f8
113 1.1 mrg ;;
114 1.1 mrg ldf8 f35 = [up], 8
115 1.1 mrg ldf8 f47 = [rp], 8
116 1.1 mrg br.cloop.dptk .grt5
117 1.1 mrg
118 1.1 mrg xma.l f36 = f32, f6, f44
119 1.1 mrg xma.hu f40 = f32, f6, f44
120 1.1 mrg ;;
121 1.1 mrg stf8 [r20] = f39, 8
122 1.1 mrg xma.l f37 = f33, f6, f45
123 1.1 mrg xma.hu f41 = f33, f6, f45
124 1.1 mrg ;;
125 1.1 mrg getf.sig r31 = f43
126 1.1 mrg getf.sig r24 = f36
127 1.1 mrg xma.l f38 = f34, f6, f46
128 1.1 mrg xma.hu f42 = f34, f6, f46
129 1.1 mrg ;;
130 1.1 mrg getf.sig r28 = f40
131 1.1 mrg getf.sig r25 = f37
132 1.1 mrg xma.l f39 = f35, f6, f47
133 1.1 mrg xma.hu f43 = f35, f6, f47
134 1.1 mrg ;;
135 1.1 mrg getf.sig r29 = f41
136 1.1 mrg getf.sig r26 = f38
137 1.1 mrg br .Lcj5
138 1.1 mrg
139 1.1 mrg .grt5:
140 1.1 mrg mov r30 = 0
141 1.1 mrg xma.l f36 = f32, f6, f44
142 1.1 mrg xma.hu f40 = f32, f6, f44
143 1.1 mrg ;;
144 1.1 mrg ldf8 f32 = [up], 8
145 1.1 mrg xma.l f37 = f33, f6, f45
146 1.1 mrg ldf8 f44 = [rp], 8
147 1.1 mrg xma.hu f41 = f33, f6, f45
148 1.1 mrg ;;
149 1.1 mrg ldf8 f33 = [up], 8
150 1.1 mrg getf.sig r27 = f39
151 1.1 mrg ;;
152 1.1 mrg getf.sig r31 = f43
153 1.1 mrg xma.l f38 = f34, f6, f46
154 1.1 mrg ldf8 f45 = [rp], 8
155 1.1 mrg xma.hu f42 = f34, f6, f46
156 1.1 mrg ;;
157 1.1 mrg ldf8 f34 = [up], 8
158 1.1 mrg getf.sig r24 = f36
159 1.1 mrg ;;
160 1.1 mrg getf.sig r28 = f40
161 1.1 mrg xma.l f39 = f35, f6, f47
162 1.1 mrg ldf8 f46 = [rp], 8
163 1.1 mrg xma.hu f43 = f35, f6, f47
164 1.1 mrg ;;
165 1.1 mrg ldf8 f35 = [up], 8
166 1.1 mrg getf.sig r25 = f37
167 1.1 mrg br.cloop.dptk .Loop
168 1.1 mrg br .Le0
169 1.1 mrg
170 1.1 mrg
171 1.1 mrg .Lb10: ldf8 f35 = [up], 8
172 1.1 mrg ldf8 f47 = [rp], 8
173 1.1 mrg br.cloop.dptk .grt2
174 1.1 mrg
175 1.1 mrg xma.l f38 = f7, f6, f8
176 1.1 mrg xma.hu f42 = f7, f6, f8
177 1.1 mrg ;;
178 1.1 mrg xma.l f39 = f35, f6, f47
179 1.1 mrg xma.hu f43 = f35, f6, f47
180 1.1 mrg ;;
181 1.1 mrg getf.sig r30 = f42
182 1.1 mrg stf8 [r20] = f38, 8
183 1.1 mrg getf.sig r27 = f39
184 1.1 mrg getf.sig r8 = f43
185 1.1 mrg br .Lcj2
186 1.1 mrg
187 1.1 mrg .grt2:
188 1.1 mrg ldf8 f32 = [up], 8
189 1.1 mrg ldf8 f44 = [rp], 8
190 1.1 mrg ;;
191 1.1 mrg ldf8 f33 = [up], 8
192 1.1 mrg xma.l f38 = f7, f6, f8
193 1.1 mrg ldf8 f45 = [rp], 8
194 1.1 mrg xma.hu f42 = f7, f6, f8
195 1.1 mrg ;;
196 1.1 mrg ldf8 f34 = [up], 8
197 1.1 mrg xma.l f39 = f35, f6, f47
198 1.1 mrg ldf8 f46 = [rp], 8
199 1.1 mrg xma.hu f43 = f35, f6, f47
200 1.1 mrg ;;
201 1.1 mrg ldf8 f35 = [up], 8
202 1.1 mrg ldf8 f47 = [rp], 8
203 1.1 mrg br.cloop.dptk .grt6
204 1.1 mrg
205 1.1 mrg stf8 [r20] = f38, 8
206 1.1 mrg xma.l f36 = f32, f6, f44
207 1.1 mrg xma.hu f40 = f32, f6, f44
208 1.1 mrg ;;
209 1.1 mrg getf.sig r30 = f42
210 1.1 mrg getf.sig r27 = f39
211 1.1 mrg xma.l f37 = f33, f6, f45
212 1.1 mrg xma.hu f41 = f33, f6, f45
213 1.1 mrg ;;
214 1.1 mrg getf.sig r31 = f43
215 1.1 mrg getf.sig r24 = f36
216 1.1 mrg xma.l f38 = f34, f6, f46
217 1.1 mrg xma.hu f42 = f34, f6, f46
218 1.1 mrg ;;
219 1.1 mrg getf.sig r28 = f40
220 1.1 mrg getf.sig r25 = f37
221 1.1 mrg xma.l f39 = f35, f6, f47
222 1.1 mrg xma.hu f43 = f35, f6, f47
223 1.1 mrg br .Lcj6
224 1.1 mrg
225 1.1 mrg .grt6:
226 1.1 mrg mov r29 = 0
227 1.1 mrg xma.l f36 = f32, f6, f44
228 1.1 mrg xma.hu f40 = f32, f6, f44
229 1.1 mrg ;;
230 1.1 mrg ldf8 f32 = [up], 8
231 1.1 mrg getf.sig r26 = f38
232 1.1 mrg ;;
233 1.1 mrg getf.sig r30 = f42
234 1.1 mrg xma.l f37 = f33, f6, f45
235 1.1 mrg ldf8 f44 = [rp], 8
236 1.1 mrg xma.hu f41 = f33, f6, f45
237 1.1 mrg ;;
238 1.1 mrg ldf8 f33 = [up], 8
239 1.1 mrg getf.sig r27 = f39
240 1.1 mrg ;;
241 1.1 mrg getf.sig r31 = f43
242 1.1 mrg xma.l f38 = f34, f6, f46
243 1.1 mrg ldf8 f45 = [rp], 8
244 1.1 mrg xma.hu f42 = f34, f6, f46
245 1.1 mrg ;;
246 1.1 mrg ldf8 f34 = [up], 8
247 1.1 mrg getf.sig r24 = f36
248 1.1 mrg br .LL10
249 1.1 mrg
250 1.1 mrg
251 1.1 mrg .Lb11: ldf8 f34 = [up], 8
252 1.1 mrg ldf8 f46 = [rp], 8
253 1.1 mrg ;;
254 1.1 mrg ldf8 f35 = [up], 8
255 1.1 mrg ldf8 f47 = [rp], 8
256 1.1 mrg br.cloop.dptk .grt3
257 1.1 mrg ;;
258 1.1 mrg
259 1.1 mrg xma.l f37 = f7, f6, f8
260 1.1 mrg xma.hu f41 = f7, f6, f8
261 1.1 mrg xma.l f38 = f34, f6, f46
262 1.1 mrg xma.hu f42 = f34, f6, f46
263 1.1 mrg xma.l f39 = f35, f6, f47
264 1.1 mrg xma.hu f43 = f35, f6, f47
265 1.1 mrg ;;
266 1.1 mrg getf.sig r29 = f41
267 1.1 mrg stf8 [r20] = f37, 8
268 1.1 mrg getf.sig r26 = f38
269 1.1 mrg getf.sig r30 = f42
270 1.1 mrg getf.sig r27 = f39
271 1.1 mrg getf.sig r8 = f43
272 1.1 mrg br .Lcj3
273 1.1 mrg
274 1.1 mrg .grt3:
275 1.1 mrg ldf8 f32 = [up], 8
276 1.1 mrg xma.l f37 = f7, f6, f8
277 1.1 mrg ldf8 f44 = [rp], 8
278 1.1 mrg xma.hu f41 = f7, f6, f8
279 1.1 mrg ;;
280 1.1 mrg ldf8 f33 = [up], 8
281 1.1 mrg xma.l f38 = f34, f6, f46
282 1.1 mrg ldf8 f45 = [rp], 8
283 1.1 mrg xma.hu f42 = f34, f6, f46
284 1.1 mrg ;;
285 1.1 mrg ldf8 f34 = [up], 8
286 1.1 mrg xma.l f39 = f35, f6, f47
287 1.1 mrg ldf8 f46 = [rp], 8
288 1.1 mrg xma.hu f43 = f35, f6, f47
289 1.1 mrg ;;
290 1.1 mrg ldf8 f35 = [up], 8
291 1.1 mrg getf.sig r25 = f37 C FIXME
292 1.1 mrg ldf8 f47 = [rp], 8
293 1.1 mrg br.cloop.dptk .grt7
294 1.1 mrg
295 1.1 mrg getf.sig r29 = f41
296 1.1 mrg stf8 [r20] = f37, 8 C FIXME
297 1.1 mrg xma.l f36 = f32, f6, f44
298 1.1 mrg getf.sig r26 = f38
299 1.1 mrg xma.hu f40 = f32, f6, f44
300 1.1 mrg ;;
301 1.1 mrg getf.sig r30 = f42
302 1.1 mrg xma.l f37 = f33, f6, f45
303 1.1 mrg getf.sig r27 = f39
304 1.1 mrg xma.hu f41 = f33, f6, f45
305 1.1 mrg ;;
306 1.1 mrg getf.sig r31 = f43
307 1.1 mrg xma.l f38 = f34, f6, f46
308 1.1 mrg getf.sig r24 = f36
309 1.1 mrg xma.hu f42 = f34, f6, f46
310 1.1 mrg br .Lcj7
311 1.1 mrg
312 1.1 mrg .grt7:
313 1.1 mrg getf.sig r29 = f41
314 1.1 mrg xma.l f36 = f32, f6, f44
315 1.1 mrg mov r28 = 0
316 1.1 mrg xma.hu f40 = f32, f6, f44
317 1.1 mrg ;;
318 1.1 mrg ldf8 f32 = [up], 8
319 1.1 mrg getf.sig r26 = f38
320 1.1 mrg ;;
321 1.1 mrg getf.sig r30 = f42
322 1.1 mrg xma.l f37 = f33, f6, f45
323 1.1 mrg ldf8 f44 = [rp], 8
324 1.1 mrg xma.hu f41 = f33, f6, f45
325 1.1 mrg ;;
326 1.1 mrg ldf8 f33 = [up], 8
327 1.1 mrg getf.sig r27 = f39
328 1.1 mrg br .LL11
329 1.1 mrg
330 1.1 mrg
331 1.1 mrg .Lb00: ldf8 f33 = [up], 8
332 1.1 mrg ldf8 f45 = [rp], 8
333 1.1 mrg ;;
334 1.1 mrg ldf8 f34 = [up], 8
335 1.1 mrg ldf8 f46 = [rp], 8
336 1.1 mrg ;;
337 1.1 mrg ldf8 f35 = [up], 8
338 1.1 mrg xma.l f36 = f7, f6, f8
339 1.1 mrg ldf8 f47 = [rp], 8
340 1.1 mrg xma.hu f40 = f7, f6, f8
341 1.1 mrg br.cloop.dptk .grt4
342 1.1 mrg
343 1.1 mrg xma.l f37 = f33, f6, f45
344 1.1 mrg xma.hu f41 = f33, f6, f45
345 1.1 mrg xma.l f38 = f34, f6, f46
346 1.1 mrg xma.hu f42 = f34, f6, f46
347 1.1 mrg ;;
348 1.1 mrg getf.sig r28 = f40
349 1.1 mrg stf8 [r20] = f36, 8
350 1.1 mrg xma.l f39 = f35, f6, f47
351 1.1 mrg getf.sig r25 = f37
352 1.1 mrg xma.hu f43 = f35, f6, f47
353 1.1 mrg ;;
354 1.1 mrg getf.sig r29 = f41
355 1.1 mrg getf.sig r26 = f38
356 1.1 mrg getf.sig r30 = f42
357 1.1 mrg getf.sig r27 = f39
358 1.1 mrg br .Lcj4
359 1.1 mrg
360 1.1 mrg .grt4:
361 1.1 mrg ldf8 f32 = [up], 8
362 1.1 mrg xma.l f37 = f33, f6, f45
363 1.1 mrg ldf8 f44 = [rp], 8
364 1.1 mrg xma.hu f41 = f33, f6, f45
365 1.1 mrg ;;
366 1.1 mrg ldf8 f33 = [up], 8
367 1.1 mrg xma.l f38 = f34, f6, f46
368 1.1 mrg ldf8 f45 = [rp], 8
369 1.1 mrg xma.hu f42 = f34, f6, f46
370 1.1 mrg ;;
371 1.1 mrg ldf8 f34 = [up], 8
372 1.1 mrg getf.sig r24 = f36 C FIXME
373 1.1 mrg xma.l f39 = f35, f6, f47
374 1.1 mrg ldf8 f46 = [rp], 8
375 1.1 mrg getf.sig r28 = f40
376 1.1 mrg xma.hu f43 = f35, f6, f47
377 1.1 mrg ;;
378 1.1 mrg ldf8 f35 = [up], 8
379 1.1 mrg getf.sig r25 = f37
380 1.1 mrg ldf8 f47 = [rp], 8
381 1.1 mrg br.cloop.dptk .grt8
382 1.1 mrg
383 1.1 mrg getf.sig r29 = f41
384 1.1 mrg stf8 [r20] = f36, 8 C FIXME
385 1.1 mrg xma.l f36 = f32, f6, f44
386 1.1 mrg getf.sig r26 = f38
387 1.1 mrg getf.sig r30 = f42
388 1.1 mrg xma.hu f40 = f32, f6, f44
389 1.1 mrg ;;
390 1.1 mrg xma.l f37 = f33, f6, f45
391 1.1 mrg getf.sig r27 = f39
392 1.1 mrg xma.hu f41 = f33, f6, f45
393 1.1 mrg br .Lcj8
394 1.1 mrg
395 1.1 mrg .grt8:
396 1.1 mrg getf.sig r29 = f41
397 1.1 mrg xma.l f36 = f32, f6, f44
398 1.1 mrg mov r31 = 0
399 1.1 mrg xma.hu f40 = f32, f6, f44
400 1.1 mrg ;;
401 1.1 mrg ldf8 f32 = [up], 8
402 1.1 mrg getf.sig r26 = f38
403 1.1 mrg br .LL00
404 1.1 mrg
405 1.1 mrg
406 1.1 mrg C *** MAIN LOOP START ***
407 1.1 mrg ALIGN(32) C insn fed cycle #
408 1.1 mrg .Loop:
409 1.1 mrg .pred.rel "mutex", p6, p7 C num by i1 i2
410 1.1 mrg getf.sig r29 = f41 C 00 16 0 0
411 1.1 mrg xma.l f36 = f32, f6, f44 C 01 06,15 0 0
412 1.1 mrg (p6) add r14 = r30, r27, 1 C 02 0 0
413 1.1 mrg ldf8 f47 = [rp], 8 C 03 0 0
414 1.1 mrg xma.hu f40 = f32, f6, f44 C 04 06,15 0 0
415 1.1 mrg (p7) add r14 = r30, r27 C 05 0 0
416 1.1 mrg ;;
417 1.1 mrg .pred.rel "mutex", p6, p7
418 1.1 mrg ldf8 f32 = [up], 8 C 06 1 1
419 1.1 mrg (p6) cmp.leu p8, p9 = r14, r27 C 07 1 1
420 1.1 mrg (p7) cmp.ltu p8, p9 = r14, r27 C 08 1 1
421 1.1 mrg getf.sig r26 = f38 C 09 25 2 1
422 1.1 mrg st8 [r20] = r14, 8 C 10 2 1
423 1.1 mrg nop.b 0 C 11 2 1
424 1.1 mrg ;;
425 1.1 mrg .LL00:
426 1.1 mrg .pred.rel "mutex", p8, p9
427 1.1 mrg getf.sig r30 = f42 C 12 28 3 2
428 1.1 mrg xma.l f37 = f33, f6, f45 C 13 18,27 3 2
429 1.1 mrg (p8) add r16 = r31, r24, 1 C 14 3 2
430 1.1 mrg ldf8 f44 = [rp], 8 C 15 3 2
431 1.1 mrg xma.hu f41 = f33, f6, f45 C 16 18,27 3 2
432 1.1 mrg (p9) add r16 = r31, r24 C 17 3 2
433 1.1 mrg ;;
434 1.1 mrg .pred.rel "mutex", p8, p9
435 1.1 mrg ldf8 f33 = [up], 8 C 18 4 3
436 1.1 mrg (p8) cmp.leu p6, p7 = r16, r24 C 19 4 3
437 1.1 mrg (p9) cmp.ltu p6, p7 = r16, r24 C 20 4 3
438 1.1 mrg getf.sig r27 = f39 C 21 37 5 3
439 1.1 mrg st8 [r20] = r16, 8 C 22 5 3
440 1.1 mrg nop.b 0 C 23 5 3
441 1.1 mrg ;;
442 1.1 mrg .LL11:
443 1.1 mrg .pred.rel "mutex", p6, p7
444 1.1 mrg getf.sig r31 = f43 C 24 40 6 4
445 1.1 mrg xma.l f38 = f34, f6, f46 C 25 30,39 6 4
446 1.1 mrg (p6) add r14 = r28, r25, 1 C 26 6 4
447 1.1 mrg ldf8 f45 = [rp], 8 C 27 6 4
448 1.1 mrg xma.hu f42 = f34, f6, f46 C 28 30,39 6 4
449 1.1 mrg (p7) add r14 = r28, r25 C 29 6 4
450 1.1 mrg ;;
451 1.1 mrg .pred.rel "mutex", p6, p7
452 1.1 mrg ldf8 f34 = [up], 8 C 30 7 5
453 1.1 mrg (p6) cmp.leu p8, p9 = r14, r25 C 31 7 5
454 1.1 mrg (p7) cmp.ltu p8, p9 = r14, r25 C 32 7 5
455 1.1 mrg getf.sig r24 = f36 C 33 01 8 5
456 1.1 mrg st8 [r20] = r14, 8 C 34 8 5
457 1.1 mrg nop.b 0 C 35 8 5
458 1.1 mrg ;;
459 1.1 mrg .LL10:
460 1.1 mrg .pred.rel "mutex", p8, p9
461 1.1 mrg getf.sig r28 = f40 C 36 04 9 6
462 1.1 mrg xma.l f39 = f35, f6, f47 C 37 42,03 9 6
463 1.1 mrg (p8) add r16 = r29, r26, 1 C 38 9 6
464 1.1 mrg ldf8 f46 = [rp], 8 C 39 9 6
465 1.1 mrg xma.hu f43 = f35, f6, f47 C 40 42,03 9 6
466 1.1 mrg (p9) add r16 = r29, r26 C 41 9 6
467 1.1 mrg ;;
468 1.1 mrg .pred.rel "mutex", p8, p9
469 1.1 mrg ldf8 f35 = [up], 8 C 42 10 7
470 1.1 mrg (p8) cmp.leu p6, p7 = r16, r26 C 43 10 7
471 1.1 mrg (p9) cmp.ltu p6, p7 = r16, r26 C 44 10 7
472 1.1 mrg getf.sig r25 = f37 C 45 13 11 7
473 1.1 mrg st8 [r20] = r16, 8 C 46 11 7
474 1.1 mrg br.cloop.dptk .Loop C 47 11 7
475 1.1 mrg C *** MAIN LOOP END ***
476 1.1 mrg ;;
477 1.1 mrg .Le0:
478 1.1 mrg .pred.rel "mutex", p6, p7
479 1.1 mrg getf.sig r29 = f41 C
480 1.1 mrg xma.l f36 = f32, f6, f44 C
481 1.1 mrg (p6) add r14 = r30, r27, 1 C
482 1.1 mrg ldf8 f47 = [rp], 8 C
483 1.1 mrg xma.hu f40 = f32, f6, f44 C
484 1.1 mrg (p7) add r14 = r30, r27 C
485 1.1 mrg ;;
486 1.1 mrg .pred.rel "mutex", p6, p7
487 1.1 mrg (p6) cmp.leu p8, p9 = r14, r27 C
488 1.1 mrg (p7) cmp.ltu p8, p9 = r14, r27 C
489 1.1 mrg getf.sig r26 = f38 C
490 1.1 mrg st8 [r20] = r14, 8 C
491 1.1 mrg ;;
492 1.1 mrg .pred.rel "mutex", p8, p9
493 1.1 mrg getf.sig r30 = f42 C
494 1.1 mrg xma.l f37 = f33, f6, f45 C
495 1.1 mrg (p8) add r16 = r31, r24, 1 C
496 1.1 mrg xma.hu f41 = f33, f6, f45 C
497 1.1 mrg (p9) add r16 = r31, r24 C
498 1.1 mrg ;;
499 1.1 mrg .pred.rel "mutex", p8, p9
500 1.1 mrg (p8) cmp.leu p6, p7 = r16, r24 C
501 1.1 mrg (p9) cmp.ltu p6, p7 = r16, r24 C
502 1.1 mrg getf.sig r27 = f39 C
503 1.1 mrg st8 [r20] = r16, 8 C
504 1.1 mrg ;;
505 1.1 mrg .Lcj8:
506 1.1 mrg .pred.rel "mutex", p6, p7
507 1.1 mrg getf.sig r31 = f43 C
508 1.1 mrg xma.l f38 = f34, f6, f46 C
509 1.1 mrg (p6) add r14 = r28, r25, 1 C
510 1.1 mrg xma.hu f42 = f34, f6, f46 C
511 1.1 mrg (p7) add r14 = r28, r25 C
512 1.1 mrg ;;
513 1.1 mrg .pred.rel "mutex", p6, p7
514 1.1 mrg (p6) cmp.leu p8, p9 = r14, r25 C
515 1.1 mrg (p7) cmp.ltu p8, p9 = r14, r25 C
516 1.1 mrg getf.sig r24 = f36 C
517 1.1 mrg st8 [r20] = r14, 8 C
518 1.1 mrg ;;
519 1.1 mrg .Lcj7:
520 1.1 mrg .pred.rel "mutex", p8, p9
521 1.1 mrg getf.sig r28 = f40 C
522 1.1 mrg xma.l f39 = f35, f6, f47 C
523 1.1 mrg (p8) add r16 = r29, r26, 1 C
524 1.1 mrg xma.hu f43 = f35, f6, f47 C
525 1.1 mrg (p9) add r16 = r29, r26 C
526 1.1 mrg ;;
527 1.1 mrg .pred.rel "mutex", p8, p9
528 1.1 mrg (p8) cmp.leu p6, p7 = r16, r26 C
529 1.1 mrg (p9) cmp.ltu p6, p7 = r16, r26 C
530 1.1 mrg getf.sig r25 = f37 C
531 1.1 mrg st8 [r20] = r16, 8 C
532 1.1 mrg ;;
533 1.1 mrg .Lcj6:
534 1.1 mrg .pred.rel "mutex", p6, p7
535 1.1 mrg getf.sig r29 = f41 C
536 1.1 mrg (p6) add r14 = r30, r27, 1 C
537 1.1 mrg (p7) add r14 = r30, r27 C
538 1.1 mrg ;;
539 1.1 mrg .pred.rel "mutex", p6, p7
540 1.1 mrg (p6) cmp.leu p8, p9 = r14, r27 C
541 1.1 mrg (p7) cmp.ltu p8, p9 = r14, r27 C
542 1.1 mrg getf.sig r26 = f38 C
543 1.1 mrg st8 [r20] = r14, 8 C
544 1.1 mrg ;;
545 1.1 mrg .Lcj5:
546 1.1 mrg .pred.rel "mutex", p8, p9
547 1.1 mrg getf.sig r30 = f42 C
548 1.1 mrg (p8) add r16 = r31, r24, 1 C
549 1.1 mrg (p9) add r16 = r31, r24 C
550 1.1 mrg ;;
551 1.1 mrg .pred.rel "mutex", p8, p9
552 1.1 mrg (p8) cmp.leu p6, p7 = r16, r24 C
553 1.1 mrg (p9) cmp.ltu p6, p7 = r16, r24 C
554 1.1 mrg getf.sig r27 = f39 C
555 1.1 mrg st8 [r20] = r16, 8 C
556 1.1 mrg ;;
557 1.1 mrg .Lcj4:
558 1.1 mrg .pred.rel "mutex", p6, p7
559 1.1 mrg getf.sig r8 = f43 C
560 1.1 mrg (p6) add r14 = r28, r25, 1 C
561 1.1 mrg (p7) add r14 = r28, r25 C
562 1.1 mrg ;;
563 1.1 mrg .pred.rel "mutex", p6, p7
564 1.1 mrg st8 [r20] = r14, 8 C
565 1.1 mrg (p6) cmp.leu p8, p9 = r14, r25 C
566 1.1 mrg (p7) cmp.ltu p8, p9 = r14, r25 C
567 1.1 mrg ;;
568 1.1 mrg .Lcj3:
569 1.1 mrg .pred.rel "mutex", p8, p9
570 1.1 mrg (p8) add r16 = r29, r26, 1 C
571 1.1 mrg (p9) add r16 = r29, r26 C
572 1.1 mrg ;;
573 1.1 mrg .pred.rel "mutex", p8, p9
574 1.1 mrg st8 [r20] = r16, 8 C
575 1.1 mrg (p8) cmp.leu p6, p7 = r16, r26 C
576 1.1 mrg (p9) cmp.ltu p6, p7 = r16, r26 C
577 1.1 mrg ;;
578 1.1 mrg .Lcj2:
579 1.1 mrg .pred.rel "mutex", p6, p7
580 1.1 mrg (p6) add r14 = r30, r27, 1 C
581 1.1 mrg (p7) add r14 = r30, r27 C
582 1.1 mrg ;;
583 1.1 mrg .pred.rel "mutex", p6, p7
584 1.1 mrg st8 [r20] = r14 C
585 1.1 mrg (p6) cmp.leu p8, p9 = r14, r27 C
586 1.1 mrg (p7) cmp.ltu p8, p9 = r14, r27 C
587 1.1 mrg ;;
588 1.1 mrg (p8) add r8 = 1, r8 C M I
589 1.1 mrg mov.i ar.lc = r2 C I0
590 1.1 mrg br.ret.sptk.many b0 C B
591 1.1 mrg EPILOGUE()
592 1.1 mrg ASM_END()
593