Home | History | Annotate | Line # | Download | only in ia64
addmul_1.asm revision 1.1
      1  1.1  mrg dnl  IA-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
      2  1.1  mrg dnl  result to a second limb vector.
      3  1.1  mrg 
      4  1.1  mrg dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2007 Free Software
      5  1.1  mrg dnl  Foundation, Inc.
      6  1.1  mrg 
      7  1.1  mrg dnl  This file is part of the GNU MP Library.
      8  1.1  mrg 
      9  1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10  1.1  mrg dnl  it under the terms of the GNU Lesser General Public License as published
     11  1.1  mrg dnl  by the Free Software Foundation; either version 3 of the License, or (at
     12  1.1  mrg dnl  your option) any later version.
     13  1.1  mrg 
     14  1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     15  1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     16  1.1  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     17  1.1  mrg dnl  License for more details.
     18  1.1  mrg 
     19  1.1  mrg dnl  You should have received a copy of the GNU Lesser General Public License
     20  1.1  mrg dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     21  1.1  mrg 
     22  1.1  mrg include(`../config.m4')
     23  1.1  mrg 
     24  1.1  mrg C         cycles/limb
     25  1.1  mrg C Itanium:    3.0
     26  1.1  mrg C Itanium 2:  2.0
     27  1.1  mrg 
     28  1.1  mrg C TODO
     29  1.1  mrg C  * Further optimize feed-in and wind-down code, both for speed and code size.
     30  1.1  mrg C  * Handle low limb input and results specially, using a common stf8 in the
     31  1.1  mrg C    epilogue.
     32  1.1  mrg C  * Use 1 c/l carry propagation scheme in wind-down code.
     33  1.1  mrg C  * Use extra pointer registers for `up' and rp to speed up feed-in loads.
     34  1.1  mrg C  * Work out final differences with mul_1.asm.  That function is 300 bytes
     35  1.1  mrg C    smaller than this due to better loop scheduling and thus simpler feed-in
     36  1.1  mrg C    code.
     37  1.1  mrg 
     38  1.1  mrg C INPUT PARAMETERS
     39  1.1  mrg define(`rp', `r32')
     40  1.1  mrg define(`up', `r33')
     41  1.1  mrg define(`n', `r34')
     42  1.1  mrg define(`vl', `r35')
     43  1.1  mrg 
     44  1.1  mrg ASM_START()
     45  1.1  mrg PROLOGUE(mpn_addmul_1)
     46  1.1  mrg 	.prologue
     47  1.1  mrg 	.save	ar.lc, r2
     48  1.1  mrg 	.body
     49  1.1  mrg 
     50  1.1  mrg ifdef(`HAVE_ABI_32',
     51  1.1  mrg `	addp4		rp = 0, rp		C M I
     52  1.1  mrg 	addp4		up = 0, up		C M I
     53  1.1  mrg 	zxt4		n = n			C I
     54  1.1  mrg 	;;
     55  1.1  mrg ')
     56  1.1  mrg {.mmi
     57  1.1  mrg 	adds		r15 = -1, n		C M I
     58  1.1  mrg 	mov		r20 = rp		C M I
     59  1.1  mrg 	mov.i		r2 = ar.lc		C I0
     60  1.1  mrg }
     61  1.1  mrg {.mmi
     62  1.1  mrg 	ldf8		f7 = [up], 8		C M
     63  1.1  mrg 	ldf8		f8 = [rp], 8		C M
     64  1.1  mrg 	and		r14 = 3, n		C M I
     65  1.1  mrg 	;;
     66  1.1  mrg }
     67  1.1  mrg {.mmi
     68  1.1  mrg 	setf.sig	f6 = vl			C M2 M3
     69  1.1  mrg 	cmp.eq		p10, p0 = 0, r14	C M I
     70  1.1  mrg 	shr.u		r31 = r15, 2		C I0
     71  1.1  mrg }
     72  1.1  mrg {.mmi
     73  1.1  mrg 	cmp.eq		p11, p0 = 2, r14	C M I
     74  1.1  mrg 	cmp.eq		p12, p0 = 3, r14	C M I
     75  1.1  mrg 	nop.i		0			C I
     76  1.1  mrg 	;;
     77  1.1  mrg }
     78  1.1  mrg {.mii
     79  1.1  mrg 	cmp.ne		p6, p7 = r0, r0		C M I
     80  1.1  mrg 	mov.i		ar.lc = r31		C I0
     81  1.1  mrg 	cmp.ne		p8, p9 = r0, r0		C M I
     82  1.1  mrg }
     83  1.1  mrg {.bbb
     84  1.1  mrg   (p10)	br.dptk		.Lb00			C B
     85  1.1  mrg   (p11)	br.dptk		.Lb10			C B
     86  1.1  mrg   (p12)	br.dptk		.Lb11			C B
     87  1.1  mrg 	;;
     88  1.1  mrg }
     89  1.1  mrg 
     90  1.1  mrg .Lb01:	br.cloop.dptk	.grt1			C B
     91  1.1  mrg 
     92  1.1  mrg 	xma.l		f39 = f7, f6, f8	C F
     93  1.1  mrg 	xma.hu		f43 = f7, f6, f8	C F
     94  1.1  mrg 	;;
     95  1.1  mrg 	getf.sig	r8 = f43		C M2
     96  1.1  mrg 	stf8		[r20] = f39		C M2 M3
     97  1.1  mrg 	mov.i		ar.lc = r2		C I0
     98  1.1  mrg 	br.ret.sptk.many b0			C B
     99  1.1  mrg 
    100  1.1  mrg .grt1:
    101  1.1  mrg 	ldf8		f32 = [up], 8
    102  1.1  mrg 	ldf8		f44 = [rp], 8
    103  1.1  mrg 	;;
    104  1.1  mrg 	ldf8		f33 = [up], 8
    105  1.1  mrg 	ldf8		f45 = [rp], 8
    106  1.1  mrg 	;;
    107  1.1  mrg 	ldf8		f34 = [up], 8
    108  1.1  mrg 	xma.l		f39 = f7, f6, f8
    109  1.1  mrg 	ldf8		f46 = [rp], 8
    110  1.1  mrg 	xma.hu		f43 = f7, f6, f8
    111  1.1  mrg 	;;
    112  1.1  mrg 	ldf8		f35 = [up], 8
    113  1.1  mrg 	ldf8		f47 = [rp], 8
    114  1.1  mrg 	br.cloop.dptk	.grt5
    115  1.1  mrg 
    116  1.1  mrg 	xma.l		f36 = f32, f6, f44
    117  1.1  mrg 	xma.hu		f40 = f32, f6, f44
    118  1.1  mrg 	;;
    119  1.1  mrg 	stf8		[r20] = f39, 8
    120  1.1  mrg 	xma.l		f37 = f33, f6, f45
    121  1.1  mrg 	xma.hu		f41 = f33, f6, f45
    122  1.1  mrg 	;;
    123  1.1  mrg 	getf.sig	r31 = f43
    124  1.1  mrg 	getf.sig	r24 = f36
    125  1.1  mrg 	xma.l		f38 = f34, f6, f46
    126  1.1  mrg 	xma.hu		f42 = f34, f6, f46
    127  1.1  mrg 	;;
    128  1.1  mrg 	getf.sig	r28 = f40
    129  1.1  mrg 	getf.sig	r25 = f37
    130  1.1  mrg 	xma.l		f39 = f35, f6, f47
    131  1.1  mrg 	xma.hu		f43 = f35, f6, f47
    132  1.1  mrg 	;;
    133  1.1  mrg 	getf.sig	r29 = f41
    134  1.1  mrg 	getf.sig	r26 = f38
    135  1.1  mrg 	br		.Lcj5
    136  1.1  mrg 
    137  1.1  mrg .grt5:
    138  1.1  mrg 	mov		r30 = 0
    139  1.1  mrg 	xma.l		f36 = f32, f6, f44
    140  1.1  mrg 	xma.hu		f40 = f32, f6, f44
    141  1.1  mrg 	;;
    142  1.1  mrg 	ldf8		f32 = [up], 8
    143  1.1  mrg 	xma.l		f37 = f33, f6, f45
    144  1.1  mrg 	ldf8		f44 = [rp], 8
    145  1.1  mrg 	xma.hu		f41 = f33, f6, f45
    146  1.1  mrg 	;;
    147  1.1  mrg 	ldf8		f33 = [up], 8
    148  1.1  mrg 	getf.sig	r27 = f39
    149  1.1  mrg 	;;
    150  1.1  mrg 	getf.sig	r31 = f43
    151  1.1  mrg 	xma.l		f38 = f34, f6, f46
    152  1.1  mrg 	ldf8		f45 = [rp], 8
    153  1.1  mrg 	xma.hu		f42 = f34, f6, f46
    154  1.1  mrg 	;;
    155  1.1  mrg 	ldf8		f34 = [up], 8
    156  1.1  mrg 	getf.sig	r24 = f36
    157  1.1  mrg 	;;
    158  1.1  mrg 	getf.sig	r28 = f40
    159  1.1  mrg 	xma.l		f39 = f35, f6, f47
    160  1.1  mrg 	ldf8		f46 = [rp], 8
    161  1.1  mrg 	xma.hu		f43 = f35, f6, f47
    162  1.1  mrg 	;;
    163  1.1  mrg 	ldf8		f35 = [up], 8
    164  1.1  mrg 	getf.sig	r25 = f37
    165  1.1  mrg 	br.cloop.dptk	.Loop
    166  1.1  mrg 	br		.Le0
    167  1.1  mrg 
    168  1.1  mrg 
    169  1.1  mrg .Lb10:	ldf8		f35 = [up], 8
    170  1.1  mrg 	ldf8		f47 = [rp], 8
    171  1.1  mrg 	br.cloop.dptk	.grt2
    172  1.1  mrg 
    173  1.1  mrg 	xma.l		f38 = f7, f6, f8
    174  1.1  mrg 	xma.hu		f42 = f7, f6, f8
    175  1.1  mrg 	;;
    176  1.1  mrg 	xma.l		f39 = f35, f6, f47
    177  1.1  mrg 	xma.hu		f43 = f35, f6, f47
    178  1.1  mrg 	;;
    179  1.1  mrg 	getf.sig	r30 = f42
    180  1.1  mrg 	stf8		[r20] = f38, 8
    181  1.1  mrg 	getf.sig	r27 = f39
    182  1.1  mrg 	getf.sig	r8 = f43
    183  1.1  mrg 	br		.Lcj2
    184  1.1  mrg 
    185  1.1  mrg .grt2:
    186  1.1  mrg 	ldf8		f32 = [up], 8
    187  1.1  mrg 	ldf8		f44 = [rp], 8
    188  1.1  mrg 	;;
    189  1.1  mrg 	ldf8		f33 = [up], 8
    190  1.1  mrg 	xma.l		f38 = f7, f6, f8
    191  1.1  mrg 	ldf8		f45 = [rp], 8
    192  1.1  mrg 	xma.hu		f42 = f7, f6, f8
    193  1.1  mrg 	;;
    194  1.1  mrg 	ldf8		f34 = [up], 8
    195  1.1  mrg 	xma.l		f39 = f35, f6, f47
    196  1.1  mrg 	ldf8		f46 = [rp], 8
    197  1.1  mrg 	xma.hu		f43 = f35, f6, f47
    198  1.1  mrg 	;;
    199  1.1  mrg 	ldf8		f35 = [up], 8
    200  1.1  mrg 	ldf8		f47 = [rp], 8
    201  1.1  mrg 	br.cloop.dptk	.grt6
    202  1.1  mrg 
    203  1.1  mrg 	stf8		[r20] = f38, 8
    204  1.1  mrg 	xma.l		f36 = f32, f6, f44
    205  1.1  mrg 	xma.hu		f40 = f32, f6, f44
    206  1.1  mrg 	;;
    207  1.1  mrg 	getf.sig	r30 = f42
    208  1.1  mrg 	getf.sig	r27 = f39
    209  1.1  mrg 	xma.l		f37 = f33, f6, f45
    210  1.1  mrg 	xma.hu		f41 = f33, f6, f45
    211  1.1  mrg 	;;
    212  1.1  mrg 	getf.sig	r31 = f43
    213  1.1  mrg 	getf.sig	r24 = f36
    214  1.1  mrg 	xma.l		f38 = f34, f6, f46
    215  1.1  mrg 	xma.hu		f42 = f34, f6, f46
    216  1.1  mrg 	;;
    217  1.1  mrg 	getf.sig	r28 = f40
    218  1.1  mrg 	getf.sig	r25 = f37
    219  1.1  mrg 	xma.l		f39 = f35, f6, f47
    220  1.1  mrg 	xma.hu		f43 = f35, f6, f47
    221  1.1  mrg 	br		.Lcj6
    222  1.1  mrg 
    223  1.1  mrg .grt6:
    224  1.1  mrg 	mov		r29 = 0
    225  1.1  mrg 	xma.l		f36 = f32, f6, f44
    226  1.1  mrg 	xma.hu		f40 = f32, f6, f44
    227  1.1  mrg 	;;
    228  1.1  mrg 	ldf8		f32 = [up], 8
    229  1.1  mrg 	getf.sig	r26 = f38
    230  1.1  mrg 	;;
    231  1.1  mrg 	getf.sig	r30 = f42
    232  1.1  mrg 	xma.l		f37 = f33, f6, f45
    233  1.1  mrg 	ldf8		f44 = [rp], 8
    234  1.1  mrg 	xma.hu		f41 = f33, f6, f45
    235  1.1  mrg 	;;
    236  1.1  mrg 	ldf8		f33 = [up], 8
    237  1.1  mrg 	getf.sig	r27 = f39
    238  1.1  mrg 	;;
    239  1.1  mrg 	getf.sig	r31 = f43
    240  1.1  mrg 	xma.l		f38 = f34, f6, f46
    241  1.1  mrg 	ldf8		f45 = [rp], 8
    242  1.1  mrg 	xma.hu		f42 = f34, f6, f46
    243  1.1  mrg 	;;
    244  1.1  mrg 	ldf8		f34 = [up], 8
    245  1.1  mrg 	getf.sig	r24 = f36
    246  1.1  mrg 	br		.LL10
    247  1.1  mrg 
    248  1.1  mrg 
    249  1.1  mrg .Lb11:	ldf8		f34 = [up], 8
    250  1.1  mrg 	ldf8		f46 = [rp], 8
    251  1.1  mrg 	;;
    252  1.1  mrg 	ldf8		f35 = [up], 8
    253  1.1  mrg 	ldf8		f47 = [rp], 8
    254  1.1  mrg 	br.cloop.dptk	.grt3
    255  1.1  mrg 	;;
    256  1.1  mrg 
    257  1.1  mrg 	xma.l		f37 = f7, f6, f8
    258  1.1  mrg 	xma.hu		f41 = f7, f6, f8
    259  1.1  mrg 	xma.l		f38 = f34, f6, f46
    260  1.1  mrg 	xma.hu		f42 = f34, f6, f46
    261  1.1  mrg 	xma.l		f39 = f35, f6, f47
    262  1.1  mrg 	xma.hu		f43 = f35, f6, f47
    263  1.1  mrg 	;;
    264  1.1  mrg 	getf.sig	r29 = f41
    265  1.1  mrg 	stf8		[r20] = f37, 8
    266  1.1  mrg 	getf.sig	r26 = f38
    267  1.1  mrg 	getf.sig	r30 = f42
    268  1.1  mrg 	getf.sig	r27 = f39
    269  1.1  mrg 	getf.sig	r8 = f43
    270  1.1  mrg 	br		.Lcj3
    271  1.1  mrg 
    272  1.1  mrg .grt3:
    273  1.1  mrg 	ldf8		f32 = [up], 8
    274  1.1  mrg 	xma.l		f37 = f7, f6, f8
    275  1.1  mrg 	ldf8		f44 = [rp], 8
    276  1.1  mrg 	xma.hu		f41 = f7, f6, f8
    277  1.1  mrg 	;;
    278  1.1  mrg 	ldf8		f33 = [up], 8
    279  1.1  mrg 	xma.l		f38 = f34, f6, f46
    280  1.1  mrg 	ldf8		f45 = [rp], 8
    281  1.1  mrg 	xma.hu		f42 = f34, f6, f46
    282  1.1  mrg 	;;
    283  1.1  mrg 	ldf8		f34 = [up], 8
    284  1.1  mrg 	xma.l		f39 = f35, f6, f47
    285  1.1  mrg 	ldf8		f46 = [rp], 8
    286  1.1  mrg 	xma.hu		f43 = f35, f6, f47
    287  1.1  mrg 	;;
    288  1.1  mrg 	ldf8		f35 = [up], 8
    289  1.1  mrg 	getf.sig	r25 = f37		C FIXME
    290  1.1  mrg 	ldf8		f47 = [rp], 8
    291  1.1  mrg 	br.cloop.dptk	.grt7
    292  1.1  mrg 
    293  1.1  mrg 	getf.sig	r29 = f41
    294  1.1  mrg 	stf8		[r20] = f37, 8		C FIXME
    295  1.1  mrg 	xma.l		f36 = f32, f6, f44
    296  1.1  mrg 	getf.sig	r26 = f38
    297  1.1  mrg 	xma.hu		f40 = f32, f6, f44
    298  1.1  mrg 	;;
    299  1.1  mrg 	getf.sig	r30 = f42
    300  1.1  mrg 	xma.l		f37 = f33, f6, f45
    301  1.1  mrg 	getf.sig	r27 = f39
    302  1.1  mrg 	xma.hu		f41 = f33, f6, f45
    303  1.1  mrg 	;;
    304  1.1  mrg 	getf.sig	r31 = f43
    305  1.1  mrg 	xma.l		f38 = f34, f6, f46
    306  1.1  mrg 	getf.sig	r24 = f36
    307  1.1  mrg 	xma.hu		f42 = f34, f6, f46
    308  1.1  mrg 	br		.Lcj7
    309  1.1  mrg 
    310  1.1  mrg .grt7:
    311  1.1  mrg 	getf.sig	r29 = f41
    312  1.1  mrg 	xma.l		f36 = f32, f6, f44
    313  1.1  mrg 	mov		r28 = 0
    314  1.1  mrg 	xma.hu		f40 = f32, f6, f44
    315  1.1  mrg 	;;
    316  1.1  mrg 	ldf8		f32 = [up], 8
    317  1.1  mrg 	getf.sig	r26 = f38
    318  1.1  mrg 	;;
    319  1.1  mrg 	getf.sig	r30 = f42
    320  1.1  mrg 	xma.l		f37 = f33, f6, f45
    321  1.1  mrg 	ldf8		f44 = [rp], 8
    322  1.1  mrg 	xma.hu		f41 = f33, f6, f45
    323  1.1  mrg 	;;
    324  1.1  mrg 	ldf8		f33 = [up], 8
    325  1.1  mrg 	getf.sig	r27 = f39
    326  1.1  mrg 	br		.LL11
    327  1.1  mrg 
    328  1.1  mrg 
    329  1.1  mrg .Lb00:	ldf8		f33 = [up], 8
    330  1.1  mrg 	ldf8		f45 = [rp], 8
    331  1.1  mrg 	;;
    332  1.1  mrg 	ldf8		f34 = [up], 8
    333  1.1  mrg 	ldf8		f46 = [rp], 8
    334  1.1  mrg 	;;
    335  1.1  mrg 	ldf8		f35 = [up], 8
    336  1.1  mrg 	xma.l		f36 = f7, f6, f8
    337  1.1  mrg 	ldf8		f47 = [rp], 8
    338  1.1  mrg 	xma.hu		f40 = f7, f6, f8
    339  1.1  mrg 	br.cloop.dptk	.grt4
    340  1.1  mrg 
    341  1.1  mrg 	xma.l		f37 = f33, f6, f45
    342  1.1  mrg 	xma.hu		f41 = f33, f6, f45
    343  1.1  mrg 	xma.l		f38 = f34, f6, f46
    344  1.1  mrg 	xma.hu		f42 = f34, f6, f46
    345  1.1  mrg 	;;
    346  1.1  mrg 	getf.sig	r28 = f40
    347  1.1  mrg 	stf8		[r20] = f36, 8
    348  1.1  mrg 	xma.l		f39 = f35, f6, f47
    349  1.1  mrg 	getf.sig	r25 = f37
    350  1.1  mrg 	xma.hu		f43 = f35, f6, f47
    351  1.1  mrg 	;;
    352  1.1  mrg 	getf.sig	r29 = f41
    353  1.1  mrg 	getf.sig	r26 = f38
    354  1.1  mrg 	getf.sig	r30 = f42
    355  1.1  mrg 	getf.sig	r27 = f39
    356  1.1  mrg 	br		.Lcj4
    357  1.1  mrg 
    358  1.1  mrg .grt4:
    359  1.1  mrg 	ldf8		f32 = [up], 8
    360  1.1  mrg 	xma.l		f37 = f33, f6, f45
    361  1.1  mrg 	ldf8		f44 = [rp], 8
    362  1.1  mrg 	xma.hu		f41 = f33, f6, f45
    363  1.1  mrg 	;;
    364  1.1  mrg 	ldf8		f33 = [up], 8
    365  1.1  mrg 	xma.l		f38 = f34, f6, f46
    366  1.1  mrg 	ldf8		f45 = [rp], 8
    367  1.1  mrg 	xma.hu		f42 = f34, f6, f46
    368  1.1  mrg 	;;
    369  1.1  mrg 	ldf8		f34 = [up], 8
    370  1.1  mrg 	getf.sig	r24 = f36		C FIXME
    371  1.1  mrg 	xma.l		f39 = f35, f6, f47
    372  1.1  mrg 	ldf8		f46 = [rp], 8
    373  1.1  mrg 	getf.sig	r28 = f40
    374  1.1  mrg 	xma.hu		f43 = f35, f6, f47
    375  1.1  mrg 	;;
    376  1.1  mrg 	ldf8		f35 = [up], 8
    377  1.1  mrg 	getf.sig	r25 = f37
    378  1.1  mrg 	ldf8		f47 = [rp], 8
    379  1.1  mrg 	br.cloop.dptk	.grt8
    380  1.1  mrg 
    381  1.1  mrg 	getf.sig	r29 = f41
    382  1.1  mrg 	stf8		[r20] = f36, 8		C FIXME
    383  1.1  mrg 	xma.l		f36 = f32, f6, f44
    384  1.1  mrg 	getf.sig	r26 = f38
    385  1.1  mrg 	getf.sig	r30 = f42
    386  1.1  mrg 	xma.hu		f40 = f32, f6, f44
    387  1.1  mrg 	;;
    388  1.1  mrg 	xma.l		f37 = f33, f6, f45
    389  1.1  mrg 	getf.sig	r27 = f39
    390  1.1  mrg 	xma.hu		f41 = f33, f6, f45
    391  1.1  mrg 	br		.Lcj8
    392  1.1  mrg 
    393  1.1  mrg .grt8:
    394  1.1  mrg 	getf.sig	r29 = f41
    395  1.1  mrg 	xma.l		f36 = f32, f6, f44
    396  1.1  mrg 	mov		r31 = 0
    397  1.1  mrg 	xma.hu		f40 = f32, f6, f44
    398  1.1  mrg 	;;
    399  1.1  mrg 	ldf8		f32 = [up], 8
    400  1.1  mrg 	getf.sig	r26 = f38
    401  1.1  mrg 	br		.LL00
    402  1.1  mrg 
    403  1.1  mrg 
    404  1.1  mrg C *** MAIN LOOP START ***
    405  1.1  mrg 	ALIGN(32)				C insn	fed	cycle #
    406  1.1  mrg .Loop:
    407  1.1  mrg 	.pred.rel "mutex", p6, p7		C num	by	i1 i2
    408  1.1  mrg 	getf.sig	r29 = f41		C 00	16	0   0
    409  1.1  mrg 	xma.l		f36 = f32, f6, f44	C 01	06,15	0   0
    410  1.1  mrg    (p6)	add		r14 = r30, r27, 1	C 02		0   0
    411  1.1  mrg 	ldf8		f47 = [rp], 8		C 03		0   0
    412  1.1  mrg 	xma.hu		f40 = f32, f6, f44	C 04	06,15	0   0
    413  1.1  mrg    (p7)	add		r14 = r30, r27		C 05		0   0
    414  1.1  mrg 	;;
    415  1.1  mrg 	.pred.rel "mutex", p6, p7
    416  1.1  mrg 	ldf8		f32 = [up], 8		C 06		1   1
    417  1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C 07		1   1
    418  1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C 08		1   1
    419  1.1  mrg 	getf.sig	r26 = f38		C 09	25	2   1
    420  1.1  mrg 	st8		[r20] = r14, 8		C 10		2   1
    421  1.1  mrg 	nop.b		0			C 11		2   1
    422  1.1  mrg 	;;
    423  1.1  mrg .LL00:
    424  1.1  mrg 	.pred.rel "mutex", p8, p9
    425  1.1  mrg 	getf.sig	r30 = f42		C 12	28	3   2
    426  1.1  mrg 	xma.l		f37 = f33, f6, f45	C 13	18,27	3   2
    427  1.1  mrg    (p8)	add		r16 = r31, r24, 1	C 14		3   2
    428  1.1  mrg 	ldf8		f44 = [rp], 8		C 15		3   2
    429  1.1  mrg 	xma.hu		f41 = f33, f6, f45	C 16	18,27	3   2
    430  1.1  mrg    (p9)	add		r16 = r31, r24		C 17		3   2
    431  1.1  mrg 	;;
    432  1.1  mrg 	.pred.rel "mutex", p8, p9
    433  1.1  mrg 	ldf8		f33 = [up], 8		C 18		4   3
    434  1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r24	C 19		4   3
    435  1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r24	C 20		4   3
    436  1.1  mrg 	getf.sig	r27 = f39		C 21	37	5   3
    437  1.1  mrg 	st8		[r20] = r16, 8		C 22		5   3
    438  1.1  mrg 	nop.b		0			C 23		5   3
    439  1.1  mrg 	;;
    440  1.1  mrg .LL11:
    441  1.1  mrg 	.pred.rel "mutex", p6, p7
    442  1.1  mrg 	getf.sig	r31 = f43		C 24	40	6   4
    443  1.1  mrg 	xma.l		f38 = f34, f6, f46	C 25	30,39	6   4
    444  1.1  mrg    (p6)	add		r14 = r28, r25, 1	C 26		6   4
    445  1.1  mrg 	ldf8		f45 = [rp], 8		C 27		6   4
    446  1.1  mrg 	xma.hu		f42 = f34, f6, f46	C 28	30,39	6   4
    447  1.1  mrg    (p7)	add		r14 = r28, r25		C 29		6   4
    448  1.1  mrg 	;;
    449  1.1  mrg 	.pred.rel "mutex", p6, p7
    450  1.1  mrg 	ldf8		f34 = [up], 8		C 30		7   5
    451  1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r25	C 31		7   5
    452  1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r25	C 32		7   5
    453  1.1  mrg 	getf.sig	r24 = f36		C 33	01	8   5
    454  1.1  mrg 	st8		[r20] = r14, 8		C 34		8   5
    455  1.1  mrg 	nop.b		0			C 35		8   5
    456  1.1  mrg 	;;
    457  1.1  mrg .LL10:
    458  1.1  mrg 	.pred.rel "mutex", p8, p9
    459  1.1  mrg 	getf.sig	r28 = f40		C 36	04	9   6
    460  1.1  mrg 	xma.l		f39 = f35, f6, f47	C 37	42,03	9   6
    461  1.1  mrg    (p8)	add		r16 = r29, r26, 1	C 38		9   6
    462  1.1  mrg 	ldf8		f46 = [rp], 8		C 39		9   6
    463  1.1  mrg 	xma.hu		f43 = f35, f6, f47	C 40	42,03	9   6
    464  1.1  mrg    (p9)	add		r16 = r29, r26		C 41		9   6
    465  1.1  mrg 	;;
    466  1.1  mrg 	.pred.rel "mutex", p8, p9
    467  1.1  mrg 	ldf8		f35 = [up], 8		C 42	       10   7
    468  1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r26	C 43	       10   7
    469  1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r26	C 44	       10   7
    470  1.1  mrg 	getf.sig	r25 = f37		C 45	13     11   7
    471  1.1  mrg 	st8		[r20] = r16, 8		C 46	       11   7
    472  1.1  mrg 	br.cloop.dptk	.Loop			C 47	       11   7
    473  1.1  mrg C *** MAIN LOOP END ***
    474  1.1  mrg 	;;
    475  1.1  mrg .Le0:
    476  1.1  mrg 	.pred.rel "mutex", p6, p7
    477  1.1  mrg 	getf.sig	r29 = f41		C
    478  1.1  mrg 	xma.l		f36 = f32, f6, f44	C
    479  1.1  mrg    (p6)	add		r14 = r30, r27, 1	C
    480  1.1  mrg 	ldf8		f47 = [rp], 8		C
    481  1.1  mrg 	xma.hu		f40 = f32, f6, f44	C
    482  1.1  mrg    (p7)	add		r14 = r30, r27		C
    483  1.1  mrg 	;;
    484  1.1  mrg 	.pred.rel "mutex", p6, p7
    485  1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C
    486  1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C
    487  1.1  mrg 	getf.sig	r26 = f38		C
    488  1.1  mrg 	st8		[r20] = r14, 8		C
    489  1.1  mrg 	;;
    490  1.1  mrg 	.pred.rel "mutex", p8, p9
    491  1.1  mrg 	getf.sig	r30 = f42		C
    492  1.1  mrg 	xma.l		f37 = f33, f6, f45	C
    493  1.1  mrg    (p8)	add		r16 = r31, r24, 1	C
    494  1.1  mrg 	xma.hu		f41 = f33, f6, f45	C
    495  1.1  mrg    (p9)	add		r16 = r31, r24		C
    496  1.1  mrg 	;;
    497  1.1  mrg 	.pred.rel "mutex", p8, p9
    498  1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r24	C
    499  1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r24	C
    500  1.1  mrg 	getf.sig	r27 = f39		C
    501  1.1  mrg 	st8		[r20] = r16, 8		C
    502  1.1  mrg 	;;
    503  1.1  mrg .Lcj8:
    504  1.1  mrg 	.pred.rel "mutex", p6, p7
    505  1.1  mrg 	getf.sig	r31 = f43		C
    506  1.1  mrg 	xma.l		f38 = f34, f6, f46	C
    507  1.1  mrg    (p6)	add		r14 = r28, r25, 1	C
    508  1.1  mrg 	xma.hu		f42 = f34, f6, f46	C
    509  1.1  mrg    (p7)	add		r14 = r28, r25		C
    510  1.1  mrg 	;;
    511  1.1  mrg 	.pred.rel "mutex", p6, p7
    512  1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r25	C
    513  1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r25	C
    514  1.1  mrg 	getf.sig	r24 = f36		C
    515  1.1  mrg 	st8		[r20] = r14, 8		C
    516  1.1  mrg 	;;
    517  1.1  mrg .Lcj7:
    518  1.1  mrg 	.pred.rel "mutex", p8, p9
    519  1.1  mrg 	getf.sig	r28 = f40		C
    520  1.1  mrg 	xma.l		f39 = f35, f6, f47	C
    521  1.1  mrg    (p8)	add		r16 = r29, r26, 1	C
    522  1.1  mrg 	xma.hu		f43 = f35, f6, f47	C
    523  1.1  mrg    (p9)	add		r16 = r29, r26		C
    524  1.1  mrg 	;;
    525  1.1  mrg 	.pred.rel "mutex", p8, p9
    526  1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r26	C
    527  1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r26	C
    528  1.1  mrg 	getf.sig	r25 = f37		C
    529  1.1  mrg 	st8		[r20] = r16, 8		C
    530  1.1  mrg 	;;
    531  1.1  mrg .Lcj6:
    532  1.1  mrg 	.pred.rel "mutex", p6, p7
    533  1.1  mrg 	getf.sig	r29 = f41		C
    534  1.1  mrg    (p6)	add		r14 = r30, r27, 1	C
    535  1.1  mrg    (p7)	add		r14 = r30, r27		C
    536  1.1  mrg 	;;
    537  1.1  mrg 	.pred.rel "mutex", p6, p7
    538  1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C
    539  1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C
    540  1.1  mrg 	getf.sig	r26 = f38		C
    541  1.1  mrg 	st8		[r20] = r14, 8		C
    542  1.1  mrg 	;;
    543  1.1  mrg .Lcj5:
    544  1.1  mrg 	.pred.rel "mutex", p8, p9
    545  1.1  mrg 	getf.sig	r30 = f42		C
    546  1.1  mrg    (p8)	add		r16 = r31, r24, 1	C
    547  1.1  mrg    (p9)	add		r16 = r31, r24		C
    548  1.1  mrg 	;;
    549  1.1  mrg 	.pred.rel "mutex", p8, p9
    550  1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r24	C
    551  1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r24	C
    552  1.1  mrg 	getf.sig	r27 = f39		C
    553  1.1  mrg 	st8		[r20] = r16, 8		C
    554  1.1  mrg 	;;
    555  1.1  mrg .Lcj4:
    556  1.1  mrg 	.pred.rel "mutex", p6, p7
    557  1.1  mrg 	getf.sig	r8 = f43		C
    558  1.1  mrg    (p6)	add		r14 = r28, r25, 1	C
    559  1.1  mrg    (p7)	add		r14 = r28, r25		C
    560  1.1  mrg 	;;
    561  1.1  mrg 	.pred.rel "mutex", p6, p7
    562  1.1  mrg 	st8		[r20] = r14, 8		C
    563  1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r25	C
    564  1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r25	C
    565  1.1  mrg 	;;
    566  1.1  mrg .Lcj3:
    567  1.1  mrg 	.pred.rel "mutex", p8, p9
    568  1.1  mrg    (p8)	add		r16 = r29, r26, 1	C
    569  1.1  mrg    (p9)	add		r16 = r29, r26		C
    570  1.1  mrg 	;;
    571  1.1  mrg 	.pred.rel "mutex", p8, p9
    572  1.1  mrg 	st8		[r20] = r16, 8		C
    573  1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r26	C
    574  1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r26	C
    575  1.1  mrg 	;;
    576  1.1  mrg .Lcj2:
    577  1.1  mrg 	.pred.rel "mutex", p6, p7
    578  1.1  mrg    (p6)	add		r14 = r30, r27, 1	C
    579  1.1  mrg    (p7)	add		r14 = r30, r27		C
    580  1.1  mrg 	;;
    581  1.1  mrg 	.pred.rel "mutex", p6, p7
    582  1.1  mrg 	st8		[r20] = r14		C
    583  1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C
    584  1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C
    585  1.1  mrg 	;;
    586  1.1  mrg    (p8)	add		r8 = 1, r8		C M I
    587  1.1  mrg 	mov.i		ar.lc = r2		C I0
    588  1.1  mrg 	br.ret.sptk.many b0			C B
    589  1.1  mrg EPILOGUE()
    590  1.1  mrg ASM_END()
    591