Home | History | Annotate | Line # | Download | only in ia64
addmul_1.asm revision 1.1.1.2
      1      1.1  mrg dnl  IA-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
      2      1.1  mrg dnl  result to a second limb vector.
      3      1.1  mrg 
      4  1.1.1.2  mrg dnl  Contributed to the GNU project by Torbjorn Granlund.
      5  1.1.1.2  mrg 
      6      1.1  mrg dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2007 Free Software
      7      1.1  mrg dnl  Foundation, Inc.
      8      1.1  mrg 
      9      1.1  mrg dnl  This file is part of the GNU MP Library.
     10      1.1  mrg 
     11      1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     12      1.1  mrg dnl  it under the terms of the GNU Lesser General Public License as published
     13      1.1  mrg dnl  by the Free Software Foundation; either version 3 of the License, or (at
     14      1.1  mrg dnl  your option) any later version.
     15      1.1  mrg 
     16      1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     17      1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     18      1.1  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     19      1.1  mrg dnl  License for more details.
     20      1.1  mrg 
     21      1.1  mrg dnl  You should have received a copy of the GNU Lesser General Public License
     22      1.1  mrg dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     23      1.1  mrg 
     24      1.1  mrg include(`../config.m4')
     25      1.1  mrg 
     26      1.1  mrg C         cycles/limb
     27      1.1  mrg C Itanium:    3.0
     28      1.1  mrg C Itanium 2:  2.0
     29      1.1  mrg 
     30      1.1  mrg C TODO
     31      1.1  mrg C  * Further optimize feed-in and wind-down code, both for speed and code size.
     32      1.1  mrg C  * Handle low limb input and results specially, using a common stf8 in the
     33      1.1  mrg C    epilogue.
     34      1.1  mrg C  * Use 1 c/l carry propagation scheme in wind-down code.
     35      1.1  mrg C  * Use extra pointer registers for `up' and rp to speed up feed-in loads.
     36      1.1  mrg C  * Work out final differences with mul_1.asm.  That function is 300 bytes
     37      1.1  mrg C    smaller than this due to better loop scheduling and thus simpler feed-in
     38      1.1  mrg C    code.
     39      1.1  mrg 
     40      1.1  mrg C INPUT PARAMETERS
     41      1.1  mrg define(`rp', `r32')
     42      1.1  mrg define(`up', `r33')
     43      1.1  mrg define(`n', `r34')
     44      1.1  mrg define(`vl', `r35')
     45      1.1  mrg 
     46      1.1  mrg ASM_START()
     47      1.1  mrg PROLOGUE(mpn_addmul_1)
     48      1.1  mrg 	.prologue
     49      1.1  mrg 	.save	ar.lc, r2
     50      1.1  mrg 	.body
     51      1.1  mrg 
     52      1.1  mrg ifdef(`HAVE_ABI_32',
     53      1.1  mrg `	addp4		rp = 0, rp		C M I
     54      1.1  mrg 	addp4		up = 0, up		C M I
     55      1.1  mrg 	zxt4		n = n			C I
     56      1.1  mrg 	;;
     57      1.1  mrg ')
     58      1.1  mrg {.mmi
     59      1.1  mrg 	adds		r15 = -1, n		C M I
     60      1.1  mrg 	mov		r20 = rp		C M I
     61      1.1  mrg 	mov.i		r2 = ar.lc		C I0
     62      1.1  mrg }
     63      1.1  mrg {.mmi
     64      1.1  mrg 	ldf8		f7 = [up], 8		C M
     65      1.1  mrg 	ldf8		f8 = [rp], 8		C M
     66      1.1  mrg 	and		r14 = 3, n		C M I
     67      1.1  mrg 	;;
     68      1.1  mrg }
     69      1.1  mrg {.mmi
     70      1.1  mrg 	setf.sig	f6 = vl			C M2 M3
     71      1.1  mrg 	cmp.eq		p10, p0 = 0, r14	C M I
     72      1.1  mrg 	shr.u		r31 = r15, 2		C I0
     73      1.1  mrg }
     74      1.1  mrg {.mmi
     75      1.1  mrg 	cmp.eq		p11, p0 = 2, r14	C M I
     76      1.1  mrg 	cmp.eq		p12, p0 = 3, r14	C M I
     77      1.1  mrg 	nop.i		0			C I
     78      1.1  mrg 	;;
     79      1.1  mrg }
     80      1.1  mrg {.mii
     81      1.1  mrg 	cmp.ne		p6, p7 = r0, r0		C M I
     82      1.1  mrg 	mov.i		ar.lc = r31		C I0
     83      1.1  mrg 	cmp.ne		p8, p9 = r0, r0		C M I
     84      1.1  mrg }
     85      1.1  mrg {.bbb
     86      1.1  mrg   (p10)	br.dptk		.Lb00			C B
     87      1.1  mrg   (p11)	br.dptk		.Lb10			C B
     88      1.1  mrg   (p12)	br.dptk		.Lb11			C B
     89      1.1  mrg 	;;
     90      1.1  mrg }
     91      1.1  mrg 
     92      1.1  mrg .Lb01:	br.cloop.dptk	.grt1			C B
     93      1.1  mrg 
     94      1.1  mrg 	xma.l		f39 = f7, f6, f8	C F
     95      1.1  mrg 	xma.hu		f43 = f7, f6, f8	C F
     96      1.1  mrg 	;;
     97      1.1  mrg 	getf.sig	r8 = f43		C M2
     98      1.1  mrg 	stf8		[r20] = f39		C M2 M3
     99      1.1  mrg 	mov.i		ar.lc = r2		C I0
    100      1.1  mrg 	br.ret.sptk.many b0			C B
    101      1.1  mrg 
    102      1.1  mrg .grt1:
    103      1.1  mrg 	ldf8		f32 = [up], 8
    104      1.1  mrg 	ldf8		f44 = [rp], 8
    105      1.1  mrg 	;;
    106      1.1  mrg 	ldf8		f33 = [up], 8
    107      1.1  mrg 	ldf8		f45 = [rp], 8
    108      1.1  mrg 	;;
    109      1.1  mrg 	ldf8		f34 = [up], 8
    110      1.1  mrg 	xma.l		f39 = f7, f6, f8
    111      1.1  mrg 	ldf8		f46 = [rp], 8
    112      1.1  mrg 	xma.hu		f43 = f7, f6, f8
    113      1.1  mrg 	;;
    114      1.1  mrg 	ldf8		f35 = [up], 8
    115      1.1  mrg 	ldf8		f47 = [rp], 8
    116      1.1  mrg 	br.cloop.dptk	.grt5
    117      1.1  mrg 
    118      1.1  mrg 	xma.l		f36 = f32, f6, f44
    119      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    120      1.1  mrg 	;;
    121      1.1  mrg 	stf8		[r20] = f39, 8
    122      1.1  mrg 	xma.l		f37 = f33, f6, f45
    123      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    124      1.1  mrg 	;;
    125      1.1  mrg 	getf.sig	r31 = f43
    126      1.1  mrg 	getf.sig	r24 = f36
    127      1.1  mrg 	xma.l		f38 = f34, f6, f46
    128      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    129      1.1  mrg 	;;
    130      1.1  mrg 	getf.sig	r28 = f40
    131      1.1  mrg 	getf.sig	r25 = f37
    132      1.1  mrg 	xma.l		f39 = f35, f6, f47
    133      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    134      1.1  mrg 	;;
    135      1.1  mrg 	getf.sig	r29 = f41
    136      1.1  mrg 	getf.sig	r26 = f38
    137      1.1  mrg 	br		.Lcj5
    138      1.1  mrg 
    139      1.1  mrg .grt5:
    140      1.1  mrg 	mov		r30 = 0
    141      1.1  mrg 	xma.l		f36 = f32, f6, f44
    142      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    143      1.1  mrg 	;;
    144      1.1  mrg 	ldf8		f32 = [up], 8
    145      1.1  mrg 	xma.l		f37 = f33, f6, f45
    146      1.1  mrg 	ldf8		f44 = [rp], 8
    147      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    148      1.1  mrg 	;;
    149      1.1  mrg 	ldf8		f33 = [up], 8
    150      1.1  mrg 	getf.sig	r27 = f39
    151      1.1  mrg 	;;
    152      1.1  mrg 	getf.sig	r31 = f43
    153      1.1  mrg 	xma.l		f38 = f34, f6, f46
    154      1.1  mrg 	ldf8		f45 = [rp], 8
    155      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    156      1.1  mrg 	;;
    157      1.1  mrg 	ldf8		f34 = [up], 8
    158      1.1  mrg 	getf.sig	r24 = f36
    159      1.1  mrg 	;;
    160      1.1  mrg 	getf.sig	r28 = f40
    161      1.1  mrg 	xma.l		f39 = f35, f6, f47
    162      1.1  mrg 	ldf8		f46 = [rp], 8
    163      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    164      1.1  mrg 	;;
    165      1.1  mrg 	ldf8		f35 = [up], 8
    166      1.1  mrg 	getf.sig	r25 = f37
    167      1.1  mrg 	br.cloop.dptk	.Loop
    168      1.1  mrg 	br		.Le0
    169      1.1  mrg 
    170      1.1  mrg 
    171      1.1  mrg .Lb10:	ldf8		f35 = [up], 8
    172      1.1  mrg 	ldf8		f47 = [rp], 8
    173      1.1  mrg 	br.cloop.dptk	.grt2
    174      1.1  mrg 
    175      1.1  mrg 	xma.l		f38 = f7, f6, f8
    176      1.1  mrg 	xma.hu		f42 = f7, f6, f8
    177      1.1  mrg 	;;
    178      1.1  mrg 	xma.l		f39 = f35, f6, f47
    179      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    180      1.1  mrg 	;;
    181      1.1  mrg 	getf.sig	r30 = f42
    182      1.1  mrg 	stf8		[r20] = f38, 8
    183      1.1  mrg 	getf.sig	r27 = f39
    184      1.1  mrg 	getf.sig	r8 = f43
    185      1.1  mrg 	br		.Lcj2
    186      1.1  mrg 
    187      1.1  mrg .grt2:
    188      1.1  mrg 	ldf8		f32 = [up], 8
    189      1.1  mrg 	ldf8		f44 = [rp], 8
    190      1.1  mrg 	;;
    191      1.1  mrg 	ldf8		f33 = [up], 8
    192      1.1  mrg 	xma.l		f38 = f7, f6, f8
    193      1.1  mrg 	ldf8		f45 = [rp], 8
    194      1.1  mrg 	xma.hu		f42 = f7, f6, f8
    195      1.1  mrg 	;;
    196      1.1  mrg 	ldf8		f34 = [up], 8
    197      1.1  mrg 	xma.l		f39 = f35, f6, f47
    198      1.1  mrg 	ldf8		f46 = [rp], 8
    199      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    200      1.1  mrg 	;;
    201      1.1  mrg 	ldf8		f35 = [up], 8
    202      1.1  mrg 	ldf8		f47 = [rp], 8
    203      1.1  mrg 	br.cloop.dptk	.grt6
    204      1.1  mrg 
    205      1.1  mrg 	stf8		[r20] = f38, 8
    206      1.1  mrg 	xma.l		f36 = f32, f6, f44
    207      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    208      1.1  mrg 	;;
    209      1.1  mrg 	getf.sig	r30 = f42
    210      1.1  mrg 	getf.sig	r27 = f39
    211      1.1  mrg 	xma.l		f37 = f33, f6, f45
    212      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    213      1.1  mrg 	;;
    214      1.1  mrg 	getf.sig	r31 = f43
    215      1.1  mrg 	getf.sig	r24 = f36
    216      1.1  mrg 	xma.l		f38 = f34, f6, f46
    217      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    218      1.1  mrg 	;;
    219      1.1  mrg 	getf.sig	r28 = f40
    220      1.1  mrg 	getf.sig	r25 = f37
    221      1.1  mrg 	xma.l		f39 = f35, f6, f47
    222      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    223      1.1  mrg 	br		.Lcj6
    224      1.1  mrg 
    225      1.1  mrg .grt6:
    226      1.1  mrg 	mov		r29 = 0
    227      1.1  mrg 	xma.l		f36 = f32, f6, f44
    228      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    229      1.1  mrg 	;;
    230      1.1  mrg 	ldf8		f32 = [up], 8
    231      1.1  mrg 	getf.sig	r26 = f38
    232      1.1  mrg 	;;
    233      1.1  mrg 	getf.sig	r30 = f42
    234      1.1  mrg 	xma.l		f37 = f33, f6, f45
    235      1.1  mrg 	ldf8		f44 = [rp], 8
    236      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    237      1.1  mrg 	;;
    238      1.1  mrg 	ldf8		f33 = [up], 8
    239      1.1  mrg 	getf.sig	r27 = f39
    240      1.1  mrg 	;;
    241      1.1  mrg 	getf.sig	r31 = f43
    242      1.1  mrg 	xma.l		f38 = f34, f6, f46
    243      1.1  mrg 	ldf8		f45 = [rp], 8
    244      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    245      1.1  mrg 	;;
    246      1.1  mrg 	ldf8		f34 = [up], 8
    247      1.1  mrg 	getf.sig	r24 = f36
    248      1.1  mrg 	br		.LL10
    249      1.1  mrg 
    250      1.1  mrg 
    251      1.1  mrg .Lb11:	ldf8		f34 = [up], 8
    252      1.1  mrg 	ldf8		f46 = [rp], 8
    253      1.1  mrg 	;;
    254      1.1  mrg 	ldf8		f35 = [up], 8
    255      1.1  mrg 	ldf8		f47 = [rp], 8
    256      1.1  mrg 	br.cloop.dptk	.grt3
    257      1.1  mrg 	;;
    258      1.1  mrg 
    259      1.1  mrg 	xma.l		f37 = f7, f6, f8
    260      1.1  mrg 	xma.hu		f41 = f7, f6, f8
    261      1.1  mrg 	xma.l		f38 = f34, f6, f46
    262      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    263      1.1  mrg 	xma.l		f39 = f35, f6, f47
    264      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    265      1.1  mrg 	;;
    266      1.1  mrg 	getf.sig	r29 = f41
    267      1.1  mrg 	stf8		[r20] = f37, 8
    268      1.1  mrg 	getf.sig	r26 = f38
    269      1.1  mrg 	getf.sig	r30 = f42
    270      1.1  mrg 	getf.sig	r27 = f39
    271      1.1  mrg 	getf.sig	r8 = f43
    272      1.1  mrg 	br		.Lcj3
    273      1.1  mrg 
    274      1.1  mrg .grt3:
    275      1.1  mrg 	ldf8		f32 = [up], 8
    276      1.1  mrg 	xma.l		f37 = f7, f6, f8
    277      1.1  mrg 	ldf8		f44 = [rp], 8
    278      1.1  mrg 	xma.hu		f41 = f7, f6, f8
    279      1.1  mrg 	;;
    280      1.1  mrg 	ldf8		f33 = [up], 8
    281      1.1  mrg 	xma.l		f38 = f34, f6, f46
    282      1.1  mrg 	ldf8		f45 = [rp], 8
    283      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    284      1.1  mrg 	;;
    285      1.1  mrg 	ldf8		f34 = [up], 8
    286      1.1  mrg 	xma.l		f39 = f35, f6, f47
    287      1.1  mrg 	ldf8		f46 = [rp], 8
    288      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    289      1.1  mrg 	;;
    290      1.1  mrg 	ldf8		f35 = [up], 8
    291      1.1  mrg 	getf.sig	r25 = f37		C FIXME
    292      1.1  mrg 	ldf8		f47 = [rp], 8
    293      1.1  mrg 	br.cloop.dptk	.grt7
    294      1.1  mrg 
    295      1.1  mrg 	getf.sig	r29 = f41
    296      1.1  mrg 	stf8		[r20] = f37, 8		C FIXME
    297      1.1  mrg 	xma.l		f36 = f32, f6, f44
    298      1.1  mrg 	getf.sig	r26 = f38
    299      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    300      1.1  mrg 	;;
    301      1.1  mrg 	getf.sig	r30 = f42
    302      1.1  mrg 	xma.l		f37 = f33, f6, f45
    303      1.1  mrg 	getf.sig	r27 = f39
    304      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    305      1.1  mrg 	;;
    306      1.1  mrg 	getf.sig	r31 = f43
    307      1.1  mrg 	xma.l		f38 = f34, f6, f46
    308      1.1  mrg 	getf.sig	r24 = f36
    309      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    310      1.1  mrg 	br		.Lcj7
    311      1.1  mrg 
    312      1.1  mrg .grt7:
    313      1.1  mrg 	getf.sig	r29 = f41
    314      1.1  mrg 	xma.l		f36 = f32, f6, f44
    315      1.1  mrg 	mov		r28 = 0
    316      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    317      1.1  mrg 	;;
    318      1.1  mrg 	ldf8		f32 = [up], 8
    319      1.1  mrg 	getf.sig	r26 = f38
    320      1.1  mrg 	;;
    321      1.1  mrg 	getf.sig	r30 = f42
    322      1.1  mrg 	xma.l		f37 = f33, f6, f45
    323      1.1  mrg 	ldf8		f44 = [rp], 8
    324      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    325      1.1  mrg 	;;
    326      1.1  mrg 	ldf8		f33 = [up], 8
    327      1.1  mrg 	getf.sig	r27 = f39
    328      1.1  mrg 	br		.LL11
    329      1.1  mrg 
    330      1.1  mrg 
    331      1.1  mrg .Lb00:	ldf8		f33 = [up], 8
    332      1.1  mrg 	ldf8		f45 = [rp], 8
    333      1.1  mrg 	;;
    334      1.1  mrg 	ldf8		f34 = [up], 8
    335      1.1  mrg 	ldf8		f46 = [rp], 8
    336      1.1  mrg 	;;
    337      1.1  mrg 	ldf8		f35 = [up], 8
    338      1.1  mrg 	xma.l		f36 = f7, f6, f8
    339      1.1  mrg 	ldf8		f47 = [rp], 8
    340      1.1  mrg 	xma.hu		f40 = f7, f6, f8
    341      1.1  mrg 	br.cloop.dptk	.grt4
    342      1.1  mrg 
    343      1.1  mrg 	xma.l		f37 = f33, f6, f45
    344      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    345      1.1  mrg 	xma.l		f38 = f34, f6, f46
    346      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    347      1.1  mrg 	;;
    348      1.1  mrg 	getf.sig	r28 = f40
    349      1.1  mrg 	stf8		[r20] = f36, 8
    350      1.1  mrg 	xma.l		f39 = f35, f6, f47
    351      1.1  mrg 	getf.sig	r25 = f37
    352      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    353      1.1  mrg 	;;
    354      1.1  mrg 	getf.sig	r29 = f41
    355      1.1  mrg 	getf.sig	r26 = f38
    356      1.1  mrg 	getf.sig	r30 = f42
    357      1.1  mrg 	getf.sig	r27 = f39
    358      1.1  mrg 	br		.Lcj4
    359      1.1  mrg 
    360      1.1  mrg .grt4:
    361      1.1  mrg 	ldf8		f32 = [up], 8
    362      1.1  mrg 	xma.l		f37 = f33, f6, f45
    363      1.1  mrg 	ldf8		f44 = [rp], 8
    364      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    365      1.1  mrg 	;;
    366      1.1  mrg 	ldf8		f33 = [up], 8
    367      1.1  mrg 	xma.l		f38 = f34, f6, f46
    368      1.1  mrg 	ldf8		f45 = [rp], 8
    369      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    370      1.1  mrg 	;;
    371      1.1  mrg 	ldf8		f34 = [up], 8
    372      1.1  mrg 	getf.sig	r24 = f36		C FIXME
    373      1.1  mrg 	xma.l		f39 = f35, f6, f47
    374      1.1  mrg 	ldf8		f46 = [rp], 8
    375      1.1  mrg 	getf.sig	r28 = f40
    376      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    377      1.1  mrg 	;;
    378      1.1  mrg 	ldf8		f35 = [up], 8
    379      1.1  mrg 	getf.sig	r25 = f37
    380      1.1  mrg 	ldf8		f47 = [rp], 8
    381      1.1  mrg 	br.cloop.dptk	.grt8
    382      1.1  mrg 
    383      1.1  mrg 	getf.sig	r29 = f41
    384      1.1  mrg 	stf8		[r20] = f36, 8		C FIXME
    385      1.1  mrg 	xma.l		f36 = f32, f6, f44
    386      1.1  mrg 	getf.sig	r26 = f38
    387      1.1  mrg 	getf.sig	r30 = f42
    388      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    389      1.1  mrg 	;;
    390      1.1  mrg 	xma.l		f37 = f33, f6, f45
    391      1.1  mrg 	getf.sig	r27 = f39
    392      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    393      1.1  mrg 	br		.Lcj8
    394      1.1  mrg 
    395      1.1  mrg .grt8:
    396      1.1  mrg 	getf.sig	r29 = f41
    397      1.1  mrg 	xma.l		f36 = f32, f6, f44
    398      1.1  mrg 	mov		r31 = 0
    399      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    400      1.1  mrg 	;;
    401      1.1  mrg 	ldf8		f32 = [up], 8
    402      1.1  mrg 	getf.sig	r26 = f38
    403      1.1  mrg 	br		.LL00
    404      1.1  mrg 
    405      1.1  mrg 
    406      1.1  mrg C *** MAIN LOOP START ***
    407      1.1  mrg 	ALIGN(32)				C insn	fed	cycle #
    408      1.1  mrg .Loop:
    409      1.1  mrg 	.pred.rel "mutex", p6, p7		C num	by	i1 i2
    410      1.1  mrg 	getf.sig	r29 = f41		C 00	16	0   0
    411      1.1  mrg 	xma.l		f36 = f32, f6, f44	C 01	06,15	0   0
    412      1.1  mrg    (p6)	add		r14 = r30, r27, 1	C 02		0   0
    413      1.1  mrg 	ldf8		f47 = [rp], 8		C 03		0   0
    414      1.1  mrg 	xma.hu		f40 = f32, f6, f44	C 04	06,15	0   0
    415      1.1  mrg    (p7)	add		r14 = r30, r27		C 05		0   0
    416      1.1  mrg 	;;
    417      1.1  mrg 	.pred.rel "mutex", p6, p7
    418      1.1  mrg 	ldf8		f32 = [up], 8		C 06		1   1
    419      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C 07		1   1
    420      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C 08		1   1
    421      1.1  mrg 	getf.sig	r26 = f38		C 09	25	2   1
    422      1.1  mrg 	st8		[r20] = r14, 8		C 10		2   1
    423      1.1  mrg 	nop.b		0			C 11		2   1
    424      1.1  mrg 	;;
    425      1.1  mrg .LL00:
    426      1.1  mrg 	.pred.rel "mutex", p8, p9
    427      1.1  mrg 	getf.sig	r30 = f42		C 12	28	3   2
    428      1.1  mrg 	xma.l		f37 = f33, f6, f45	C 13	18,27	3   2
    429      1.1  mrg    (p8)	add		r16 = r31, r24, 1	C 14		3   2
    430      1.1  mrg 	ldf8		f44 = [rp], 8		C 15		3   2
    431      1.1  mrg 	xma.hu		f41 = f33, f6, f45	C 16	18,27	3   2
    432      1.1  mrg    (p9)	add		r16 = r31, r24		C 17		3   2
    433      1.1  mrg 	;;
    434      1.1  mrg 	.pred.rel "mutex", p8, p9
    435      1.1  mrg 	ldf8		f33 = [up], 8		C 18		4   3
    436      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r24	C 19		4   3
    437      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r24	C 20		4   3
    438      1.1  mrg 	getf.sig	r27 = f39		C 21	37	5   3
    439      1.1  mrg 	st8		[r20] = r16, 8		C 22		5   3
    440      1.1  mrg 	nop.b		0			C 23		5   3
    441      1.1  mrg 	;;
    442      1.1  mrg .LL11:
    443      1.1  mrg 	.pred.rel "mutex", p6, p7
    444      1.1  mrg 	getf.sig	r31 = f43		C 24	40	6   4
    445      1.1  mrg 	xma.l		f38 = f34, f6, f46	C 25	30,39	6   4
    446      1.1  mrg    (p6)	add		r14 = r28, r25, 1	C 26		6   4
    447      1.1  mrg 	ldf8		f45 = [rp], 8		C 27		6   4
    448      1.1  mrg 	xma.hu		f42 = f34, f6, f46	C 28	30,39	6   4
    449      1.1  mrg    (p7)	add		r14 = r28, r25		C 29		6   4
    450      1.1  mrg 	;;
    451      1.1  mrg 	.pred.rel "mutex", p6, p7
    452      1.1  mrg 	ldf8		f34 = [up], 8		C 30		7   5
    453      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r25	C 31		7   5
    454      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r25	C 32		7   5
    455      1.1  mrg 	getf.sig	r24 = f36		C 33	01	8   5
    456      1.1  mrg 	st8		[r20] = r14, 8		C 34		8   5
    457      1.1  mrg 	nop.b		0			C 35		8   5
    458      1.1  mrg 	;;
    459      1.1  mrg .LL10:
    460      1.1  mrg 	.pred.rel "mutex", p8, p9
    461      1.1  mrg 	getf.sig	r28 = f40		C 36	04	9   6
    462      1.1  mrg 	xma.l		f39 = f35, f6, f47	C 37	42,03	9   6
    463      1.1  mrg    (p8)	add		r16 = r29, r26, 1	C 38		9   6
    464      1.1  mrg 	ldf8		f46 = [rp], 8		C 39		9   6
    465      1.1  mrg 	xma.hu		f43 = f35, f6, f47	C 40	42,03	9   6
    466      1.1  mrg    (p9)	add		r16 = r29, r26		C 41		9   6
    467      1.1  mrg 	;;
    468      1.1  mrg 	.pred.rel "mutex", p8, p9
    469      1.1  mrg 	ldf8		f35 = [up], 8		C 42	       10   7
    470      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r26	C 43	       10   7
    471      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r26	C 44	       10   7
    472      1.1  mrg 	getf.sig	r25 = f37		C 45	13     11   7
    473      1.1  mrg 	st8		[r20] = r16, 8		C 46	       11   7
    474      1.1  mrg 	br.cloop.dptk	.Loop			C 47	       11   7
    475      1.1  mrg C *** MAIN LOOP END ***
    476      1.1  mrg 	;;
    477      1.1  mrg .Le0:
    478      1.1  mrg 	.pred.rel "mutex", p6, p7
    479      1.1  mrg 	getf.sig	r29 = f41		C
    480      1.1  mrg 	xma.l		f36 = f32, f6, f44	C
    481      1.1  mrg    (p6)	add		r14 = r30, r27, 1	C
    482      1.1  mrg 	ldf8		f47 = [rp], 8		C
    483      1.1  mrg 	xma.hu		f40 = f32, f6, f44	C
    484      1.1  mrg    (p7)	add		r14 = r30, r27		C
    485      1.1  mrg 	;;
    486      1.1  mrg 	.pred.rel "mutex", p6, p7
    487      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C
    488      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C
    489      1.1  mrg 	getf.sig	r26 = f38		C
    490      1.1  mrg 	st8		[r20] = r14, 8		C
    491      1.1  mrg 	;;
    492      1.1  mrg 	.pred.rel "mutex", p8, p9
    493      1.1  mrg 	getf.sig	r30 = f42		C
    494      1.1  mrg 	xma.l		f37 = f33, f6, f45	C
    495      1.1  mrg    (p8)	add		r16 = r31, r24, 1	C
    496      1.1  mrg 	xma.hu		f41 = f33, f6, f45	C
    497      1.1  mrg    (p9)	add		r16 = r31, r24		C
    498      1.1  mrg 	;;
    499      1.1  mrg 	.pred.rel "mutex", p8, p9
    500      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r24	C
    501      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r24	C
    502      1.1  mrg 	getf.sig	r27 = f39		C
    503      1.1  mrg 	st8		[r20] = r16, 8		C
    504      1.1  mrg 	;;
    505      1.1  mrg .Lcj8:
    506      1.1  mrg 	.pred.rel "mutex", p6, p7
    507      1.1  mrg 	getf.sig	r31 = f43		C
    508      1.1  mrg 	xma.l		f38 = f34, f6, f46	C
    509      1.1  mrg    (p6)	add		r14 = r28, r25, 1	C
    510      1.1  mrg 	xma.hu		f42 = f34, f6, f46	C
    511      1.1  mrg    (p7)	add		r14 = r28, r25		C
    512      1.1  mrg 	;;
    513      1.1  mrg 	.pred.rel "mutex", p6, p7
    514      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r25	C
    515      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r25	C
    516      1.1  mrg 	getf.sig	r24 = f36		C
    517      1.1  mrg 	st8		[r20] = r14, 8		C
    518      1.1  mrg 	;;
    519      1.1  mrg .Lcj7:
    520      1.1  mrg 	.pred.rel "mutex", p8, p9
    521      1.1  mrg 	getf.sig	r28 = f40		C
    522      1.1  mrg 	xma.l		f39 = f35, f6, f47	C
    523      1.1  mrg    (p8)	add		r16 = r29, r26, 1	C
    524      1.1  mrg 	xma.hu		f43 = f35, f6, f47	C
    525      1.1  mrg    (p9)	add		r16 = r29, r26		C
    526      1.1  mrg 	;;
    527      1.1  mrg 	.pred.rel "mutex", p8, p9
    528      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r26	C
    529      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r26	C
    530      1.1  mrg 	getf.sig	r25 = f37		C
    531      1.1  mrg 	st8		[r20] = r16, 8		C
    532      1.1  mrg 	;;
    533      1.1  mrg .Lcj6:
    534      1.1  mrg 	.pred.rel "mutex", p6, p7
    535      1.1  mrg 	getf.sig	r29 = f41		C
    536      1.1  mrg    (p6)	add		r14 = r30, r27, 1	C
    537      1.1  mrg    (p7)	add		r14 = r30, r27		C
    538      1.1  mrg 	;;
    539      1.1  mrg 	.pred.rel "mutex", p6, p7
    540      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C
    541      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C
    542      1.1  mrg 	getf.sig	r26 = f38		C
    543      1.1  mrg 	st8		[r20] = r14, 8		C
    544      1.1  mrg 	;;
    545      1.1  mrg .Lcj5:
    546      1.1  mrg 	.pred.rel "mutex", p8, p9
    547      1.1  mrg 	getf.sig	r30 = f42		C
    548      1.1  mrg    (p8)	add		r16 = r31, r24, 1	C
    549      1.1  mrg    (p9)	add		r16 = r31, r24		C
    550      1.1  mrg 	;;
    551      1.1  mrg 	.pred.rel "mutex", p8, p9
    552      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r24	C
    553      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r24	C
    554      1.1  mrg 	getf.sig	r27 = f39		C
    555      1.1  mrg 	st8		[r20] = r16, 8		C
    556      1.1  mrg 	;;
    557      1.1  mrg .Lcj4:
    558      1.1  mrg 	.pred.rel "mutex", p6, p7
    559      1.1  mrg 	getf.sig	r8 = f43		C
    560      1.1  mrg    (p6)	add		r14 = r28, r25, 1	C
    561      1.1  mrg    (p7)	add		r14 = r28, r25		C
    562      1.1  mrg 	;;
    563      1.1  mrg 	.pred.rel "mutex", p6, p7
    564      1.1  mrg 	st8		[r20] = r14, 8		C
    565      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r25	C
    566      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r25	C
    567      1.1  mrg 	;;
    568      1.1  mrg .Lcj3:
    569      1.1  mrg 	.pred.rel "mutex", p8, p9
    570      1.1  mrg    (p8)	add		r16 = r29, r26, 1	C
    571      1.1  mrg    (p9)	add		r16 = r29, r26		C
    572      1.1  mrg 	;;
    573      1.1  mrg 	.pred.rel "mutex", p8, p9
    574      1.1  mrg 	st8		[r20] = r16, 8		C
    575      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r26	C
    576      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r26	C
    577      1.1  mrg 	;;
    578      1.1  mrg .Lcj2:
    579      1.1  mrg 	.pred.rel "mutex", p6, p7
    580      1.1  mrg    (p6)	add		r14 = r30, r27, 1	C
    581      1.1  mrg    (p7)	add		r14 = r30, r27		C
    582      1.1  mrg 	;;
    583      1.1  mrg 	.pred.rel "mutex", p6, p7
    584      1.1  mrg 	st8		[r20] = r14		C
    585      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C
    586      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C
    587      1.1  mrg 	;;
    588      1.1  mrg    (p8)	add		r8 = 1, r8		C M I
    589      1.1  mrg 	mov.i		ar.lc = r2		C I0
    590      1.1  mrg 	br.ret.sptk.many b0			C B
    591      1.1  mrg EPILOGUE()
    592      1.1  mrg ASM_END()
    593