Home | History | Annotate | Line # | Download | only in ia64
      1      1.1  mrg dnl  IA-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
      2      1.1  mrg dnl  result to a second limb vector.
      3      1.1  mrg 
      4  1.1.1.2  mrg dnl  Contributed to the GNU project by Torbjorn Granlund.
      5  1.1.1.2  mrg 
      6  1.1.1.3  mrg dnl  Copyright 2000-2005, 2007 Free Software Foundation, Inc.
      7      1.1  mrg 
      8      1.1  mrg dnl  This file is part of the GNU MP Library.
      9  1.1.1.3  mrg dnl
     10      1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     11  1.1.1.3  mrg dnl  it under the terms of either:
     12  1.1.1.3  mrg dnl
     13  1.1.1.3  mrg dnl    * the GNU Lesser General Public License as published by the Free
     14  1.1.1.3  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     15  1.1.1.3  mrg dnl      option) any later version.
     16  1.1.1.3  mrg dnl
     17  1.1.1.3  mrg dnl  or
     18  1.1.1.3  mrg dnl
     19  1.1.1.3  mrg dnl    * the GNU General Public License as published by the Free Software
     20  1.1.1.3  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     21  1.1.1.3  mrg dnl      later version.
     22  1.1.1.3  mrg dnl
     23  1.1.1.3  mrg dnl  or both in parallel, as here.
     24  1.1.1.3  mrg dnl
     25      1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     26      1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     27  1.1.1.3  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     28  1.1.1.3  mrg dnl  for more details.
     29  1.1.1.3  mrg dnl
     30  1.1.1.3  mrg dnl  You should have received copies of the GNU General Public License and the
     31  1.1.1.3  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     32  1.1.1.3  mrg dnl  see https://www.gnu.org/licenses/.
     33      1.1  mrg 
     34      1.1  mrg include(`../config.m4')
     35      1.1  mrg 
     36      1.1  mrg C         cycles/limb
     37      1.1  mrg C Itanium:    3.0
     38      1.1  mrg C Itanium 2:  2.0
     39      1.1  mrg 
     40      1.1  mrg C TODO
     41      1.1  mrg C  * Further optimize feed-in and wind-down code, both for speed and code size.
     42      1.1  mrg C  * Handle low limb input and results specially, using a common stf8 in the
     43      1.1  mrg C    epilogue.
     44      1.1  mrg C  * Use 1 c/l carry propagation scheme in wind-down code.
     45      1.1  mrg C  * Use extra pointer registers for `up' and rp to speed up feed-in loads.
     46      1.1  mrg C  * Work out final differences with mul_1.asm.  That function is 300 bytes
     47      1.1  mrg C    smaller than this due to better loop scheduling and thus simpler feed-in
     48      1.1  mrg C    code.
     49      1.1  mrg 
     50      1.1  mrg C INPUT PARAMETERS
     51      1.1  mrg define(`rp', `r32')
     52      1.1  mrg define(`up', `r33')
     53      1.1  mrg define(`n', `r34')
     54      1.1  mrg define(`vl', `r35')
     55      1.1  mrg 
     56      1.1  mrg ASM_START()
     57      1.1  mrg PROLOGUE(mpn_addmul_1)
     58      1.1  mrg 	.prologue
     59      1.1  mrg 	.save	ar.lc, r2
     60      1.1  mrg 	.body
     61      1.1  mrg 
     62      1.1  mrg ifdef(`HAVE_ABI_32',
     63      1.1  mrg `	addp4		rp = 0, rp		C M I
     64      1.1  mrg 	addp4		up = 0, up		C M I
     65      1.1  mrg 	zxt4		n = n			C I
     66      1.1  mrg 	;;
     67      1.1  mrg ')
     68      1.1  mrg {.mmi
     69      1.1  mrg 	adds		r15 = -1, n		C M I
     70      1.1  mrg 	mov		r20 = rp		C M I
     71      1.1  mrg 	mov.i		r2 = ar.lc		C I0
     72      1.1  mrg }
     73      1.1  mrg {.mmi
     74      1.1  mrg 	ldf8		f7 = [up], 8		C M
     75      1.1  mrg 	ldf8		f8 = [rp], 8		C M
     76      1.1  mrg 	and		r14 = 3, n		C M I
     77      1.1  mrg 	;;
     78      1.1  mrg }
     79      1.1  mrg {.mmi
     80      1.1  mrg 	setf.sig	f6 = vl			C M2 M3
     81      1.1  mrg 	cmp.eq		p10, p0 = 0, r14	C M I
     82      1.1  mrg 	shr.u		r31 = r15, 2		C I0
     83      1.1  mrg }
     84      1.1  mrg {.mmi
     85      1.1  mrg 	cmp.eq		p11, p0 = 2, r14	C M I
     86      1.1  mrg 	cmp.eq		p12, p0 = 3, r14	C M I
     87      1.1  mrg 	nop.i		0			C I
     88      1.1  mrg 	;;
     89      1.1  mrg }
     90      1.1  mrg {.mii
     91      1.1  mrg 	cmp.ne		p6, p7 = r0, r0		C M I
     92      1.1  mrg 	mov.i		ar.lc = r31		C I0
     93      1.1  mrg 	cmp.ne		p8, p9 = r0, r0		C M I
     94      1.1  mrg }
     95      1.1  mrg {.bbb
     96      1.1  mrg   (p10)	br.dptk		.Lb00			C B
     97      1.1  mrg   (p11)	br.dptk		.Lb10			C B
     98      1.1  mrg   (p12)	br.dptk		.Lb11			C B
     99      1.1  mrg 	;;
    100      1.1  mrg }
    101      1.1  mrg 
    102      1.1  mrg .Lb01:	br.cloop.dptk	.grt1			C B
    103      1.1  mrg 
    104      1.1  mrg 	xma.l		f39 = f7, f6, f8	C F
    105      1.1  mrg 	xma.hu		f43 = f7, f6, f8	C F
    106      1.1  mrg 	;;
    107      1.1  mrg 	getf.sig	r8 = f43		C M2
    108      1.1  mrg 	stf8		[r20] = f39		C M2 M3
    109      1.1  mrg 	mov.i		ar.lc = r2		C I0
    110      1.1  mrg 	br.ret.sptk.many b0			C B
    111      1.1  mrg 
    112      1.1  mrg .grt1:
    113      1.1  mrg 	ldf8		f32 = [up], 8
    114      1.1  mrg 	ldf8		f44 = [rp], 8
    115      1.1  mrg 	;;
    116      1.1  mrg 	ldf8		f33 = [up], 8
    117      1.1  mrg 	ldf8		f45 = [rp], 8
    118      1.1  mrg 	;;
    119      1.1  mrg 	ldf8		f34 = [up], 8
    120      1.1  mrg 	xma.l		f39 = f7, f6, f8
    121      1.1  mrg 	ldf8		f46 = [rp], 8
    122      1.1  mrg 	xma.hu		f43 = f7, f6, f8
    123      1.1  mrg 	;;
    124      1.1  mrg 	ldf8		f35 = [up], 8
    125      1.1  mrg 	ldf8		f47 = [rp], 8
    126      1.1  mrg 	br.cloop.dptk	.grt5
    127      1.1  mrg 
    128      1.1  mrg 	xma.l		f36 = f32, f6, f44
    129      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    130      1.1  mrg 	;;
    131      1.1  mrg 	stf8		[r20] = f39, 8
    132      1.1  mrg 	xma.l		f37 = f33, f6, f45
    133      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    134      1.1  mrg 	;;
    135      1.1  mrg 	getf.sig	r31 = f43
    136      1.1  mrg 	getf.sig	r24 = f36
    137      1.1  mrg 	xma.l		f38 = f34, f6, f46
    138      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    139      1.1  mrg 	;;
    140      1.1  mrg 	getf.sig	r28 = f40
    141      1.1  mrg 	getf.sig	r25 = f37
    142      1.1  mrg 	xma.l		f39 = f35, f6, f47
    143      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    144      1.1  mrg 	;;
    145      1.1  mrg 	getf.sig	r29 = f41
    146      1.1  mrg 	getf.sig	r26 = f38
    147      1.1  mrg 	br		.Lcj5
    148      1.1  mrg 
    149      1.1  mrg .grt5:
    150      1.1  mrg 	mov		r30 = 0
    151      1.1  mrg 	xma.l		f36 = f32, f6, f44
    152      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    153      1.1  mrg 	;;
    154      1.1  mrg 	ldf8		f32 = [up], 8
    155      1.1  mrg 	xma.l		f37 = f33, f6, f45
    156      1.1  mrg 	ldf8		f44 = [rp], 8
    157      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    158      1.1  mrg 	;;
    159      1.1  mrg 	ldf8		f33 = [up], 8
    160      1.1  mrg 	getf.sig	r27 = f39
    161      1.1  mrg 	;;
    162      1.1  mrg 	getf.sig	r31 = f43
    163      1.1  mrg 	xma.l		f38 = f34, f6, f46
    164      1.1  mrg 	ldf8		f45 = [rp], 8
    165      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    166      1.1  mrg 	;;
    167      1.1  mrg 	ldf8		f34 = [up], 8
    168      1.1  mrg 	getf.sig	r24 = f36
    169      1.1  mrg 	;;
    170      1.1  mrg 	getf.sig	r28 = f40
    171      1.1  mrg 	xma.l		f39 = f35, f6, f47
    172      1.1  mrg 	ldf8		f46 = [rp], 8
    173      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    174      1.1  mrg 	;;
    175      1.1  mrg 	ldf8		f35 = [up], 8
    176      1.1  mrg 	getf.sig	r25 = f37
    177      1.1  mrg 	br.cloop.dptk	.Loop
    178      1.1  mrg 	br		.Le0
    179      1.1  mrg 
    180      1.1  mrg 
    181      1.1  mrg .Lb10:	ldf8		f35 = [up], 8
    182      1.1  mrg 	ldf8		f47 = [rp], 8
    183      1.1  mrg 	br.cloop.dptk	.grt2
    184      1.1  mrg 
    185      1.1  mrg 	xma.l		f38 = f7, f6, f8
    186      1.1  mrg 	xma.hu		f42 = f7, f6, f8
    187      1.1  mrg 	;;
    188      1.1  mrg 	xma.l		f39 = f35, f6, f47
    189      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    190      1.1  mrg 	;;
    191      1.1  mrg 	getf.sig	r30 = f42
    192      1.1  mrg 	stf8		[r20] = f38, 8
    193      1.1  mrg 	getf.sig	r27 = f39
    194      1.1  mrg 	getf.sig	r8 = f43
    195      1.1  mrg 	br		.Lcj2
    196      1.1  mrg 
    197      1.1  mrg .grt2:
    198      1.1  mrg 	ldf8		f32 = [up], 8
    199      1.1  mrg 	ldf8		f44 = [rp], 8
    200      1.1  mrg 	;;
    201      1.1  mrg 	ldf8		f33 = [up], 8
    202      1.1  mrg 	xma.l		f38 = f7, f6, f8
    203      1.1  mrg 	ldf8		f45 = [rp], 8
    204      1.1  mrg 	xma.hu		f42 = f7, f6, f8
    205      1.1  mrg 	;;
    206      1.1  mrg 	ldf8		f34 = [up], 8
    207      1.1  mrg 	xma.l		f39 = f35, f6, f47
    208      1.1  mrg 	ldf8		f46 = [rp], 8
    209      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    210      1.1  mrg 	;;
    211      1.1  mrg 	ldf8		f35 = [up], 8
    212      1.1  mrg 	ldf8		f47 = [rp], 8
    213      1.1  mrg 	br.cloop.dptk	.grt6
    214      1.1  mrg 
    215      1.1  mrg 	stf8		[r20] = f38, 8
    216      1.1  mrg 	xma.l		f36 = f32, f6, f44
    217      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    218      1.1  mrg 	;;
    219      1.1  mrg 	getf.sig	r30 = f42
    220      1.1  mrg 	getf.sig	r27 = f39
    221      1.1  mrg 	xma.l		f37 = f33, f6, f45
    222      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    223      1.1  mrg 	;;
    224      1.1  mrg 	getf.sig	r31 = f43
    225      1.1  mrg 	getf.sig	r24 = f36
    226      1.1  mrg 	xma.l		f38 = f34, f6, f46
    227      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    228      1.1  mrg 	;;
    229      1.1  mrg 	getf.sig	r28 = f40
    230      1.1  mrg 	getf.sig	r25 = f37
    231      1.1  mrg 	xma.l		f39 = f35, f6, f47
    232      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    233      1.1  mrg 	br		.Lcj6
    234      1.1  mrg 
    235      1.1  mrg .grt6:
    236      1.1  mrg 	mov		r29 = 0
    237      1.1  mrg 	xma.l		f36 = f32, f6, f44
    238      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    239      1.1  mrg 	;;
    240      1.1  mrg 	ldf8		f32 = [up], 8
    241      1.1  mrg 	getf.sig	r26 = f38
    242      1.1  mrg 	;;
    243      1.1  mrg 	getf.sig	r30 = f42
    244      1.1  mrg 	xma.l		f37 = f33, f6, f45
    245      1.1  mrg 	ldf8		f44 = [rp], 8
    246      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    247      1.1  mrg 	;;
    248      1.1  mrg 	ldf8		f33 = [up], 8
    249      1.1  mrg 	getf.sig	r27 = f39
    250      1.1  mrg 	;;
    251      1.1  mrg 	getf.sig	r31 = f43
    252      1.1  mrg 	xma.l		f38 = f34, f6, f46
    253      1.1  mrg 	ldf8		f45 = [rp], 8
    254      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    255      1.1  mrg 	;;
    256      1.1  mrg 	ldf8		f34 = [up], 8
    257      1.1  mrg 	getf.sig	r24 = f36
    258      1.1  mrg 	br		.LL10
    259      1.1  mrg 
    260      1.1  mrg 
    261      1.1  mrg .Lb11:	ldf8		f34 = [up], 8
    262      1.1  mrg 	ldf8		f46 = [rp], 8
    263      1.1  mrg 	;;
    264      1.1  mrg 	ldf8		f35 = [up], 8
    265      1.1  mrg 	ldf8		f47 = [rp], 8
    266      1.1  mrg 	br.cloop.dptk	.grt3
    267      1.1  mrg 	;;
    268      1.1  mrg 
    269      1.1  mrg 	xma.l		f37 = f7, f6, f8
    270      1.1  mrg 	xma.hu		f41 = f7, f6, f8
    271      1.1  mrg 	xma.l		f38 = f34, f6, f46
    272      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    273      1.1  mrg 	xma.l		f39 = f35, f6, f47
    274      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    275      1.1  mrg 	;;
    276      1.1  mrg 	getf.sig	r29 = f41
    277      1.1  mrg 	stf8		[r20] = f37, 8
    278      1.1  mrg 	getf.sig	r26 = f38
    279      1.1  mrg 	getf.sig	r30 = f42
    280      1.1  mrg 	getf.sig	r27 = f39
    281      1.1  mrg 	getf.sig	r8 = f43
    282      1.1  mrg 	br		.Lcj3
    283      1.1  mrg 
    284      1.1  mrg .grt3:
    285      1.1  mrg 	ldf8		f32 = [up], 8
    286      1.1  mrg 	xma.l		f37 = f7, f6, f8
    287      1.1  mrg 	ldf8		f44 = [rp], 8
    288      1.1  mrg 	xma.hu		f41 = f7, f6, f8
    289      1.1  mrg 	;;
    290      1.1  mrg 	ldf8		f33 = [up], 8
    291      1.1  mrg 	xma.l		f38 = f34, f6, f46
    292      1.1  mrg 	ldf8		f45 = [rp], 8
    293      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    294      1.1  mrg 	;;
    295      1.1  mrg 	ldf8		f34 = [up], 8
    296      1.1  mrg 	xma.l		f39 = f35, f6, f47
    297      1.1  mrg 	ldf8		f46 = [rp], 8
    298      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    299      1.1  mrg 	;;
    300      1.1  mrg 	ldf8		f35 = [up], 8
    301      1.1  mrg 	getf.sig	r25 = f37		C FIXME
    302      1.1  mrg 	ldf8		f47 = [rp], 8
    303      1.1  mrg 	br.cloop.dptk	.grt7
    304      1.1  mrg 
    305      1.1  mrg 	getf.sig	r29 = f41
    306      1.1  mrg 	stf8		[r20] = f37, 8		C FIXME
    307      1.1  mrg 	xma.l		f36 = f32, f6, f44
    308      1.1  mrg 	getf.sig	r26 = f38
    309      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    310      1.1  mrg 	;;
    311      1.1  mrg 	getf.sig	r30 = f42
    312      1.1  mrg 	xma.l		f37 = f33, f6, f45
    313      1.1  mrg 	getf.sig	r27 = f39
    314      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    315      1.1  mrg 	;;
    316      1.1  mrg 	getf.sig	r31 = f43
    317      1.1  mrg 	xma.l		f38 = f34, f6, f46
    318      1.1  mrg 	getf.sig	r24 = f36
    319      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    320      1.1  mrg 	br		.Lcj7
    321      1.1  mrg 
    322      1.1  mrg .grt7:
    323      1.1  mrg 	getf.sig	r29 = f41
    324      1.1  mrg 	xma.l		f36 = f32, f6, f44
    325      1.1  mrg 	mov		r28 = 0
    326      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    327      1.1  mrg 	;;
    328      1.1  mrg 	ldf8		f32 = [up], 8
    329      1.1  mrg 	getf.sig	r26 = f38
    330      1.1  mrg 	;;
    331      1.1  mrg 	getf.sig	r30 = f42
    332      1.1  mrg 	xma.l		f37 = f33, f6, f45
    333      1.1  mrg 	ldf8		f44 = [rp], 8
    334      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    335      1.1  mrg 	;;
    336      1.1  mrg 	ldf8		f33 = [up], 8
    337      1.1  mrg 	getf.sig	r27 = f39
    338      1.1  mrg 	br		.LL11
    339      1.1  mrg 
    340      1.1  mrg 
    341      1.1  mrg .Lb00:	ldf8		f33 = [up], 8
    342      1.1  mrg 	ldf8		f45 = [rp], 8
    343      1.1  mrg 	;;
    344      1.1  mrg 	ldf8		f34 = [up], 8
    345      1.1  mrg 	ldf8		f46 = [rp], 8
    346      1.1  mrg 	;;
    347      1.1  mrg 	ldf8		f35 = [up], 8
    348      1.1  mrg 	xma.l		f36 = f7, f6, f8
    349      1.1  mrg 	ldf8		f47 = [rp], 8
    350      1.1  mrg 	xma.hu		f40 = f7, f6, f8
    351      1.1  mrg 	br.cloop.dptk	.grt4
    352      1.1  mrg 
    353      1.1  mrg 	xma.l		f37 = f33, f6, f45
    354      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    355      1.1  mrg 	xma.l		f38 = f34, f6, f46
    356      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    357      1.1  mrg 	;;
    358      1.1  mrg 	getf.sig	r28 = f40
    359      1.1  mrg 	stf8		[r20] = f36, 8
    360      1.1  mrg 	xma.l		f39 = f35, f6, f47
    361      1.1  mrg 	getf.sig	r25 = f37
    362      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    363      1.1  mrg 	;;
    364      1.1  mrg 	getf.sig	r29 = f41
    365      1.1  mrg 	getf.sig	r26 = f38
    366      1.1  mrg 	getf.sig	r30 = f42
    367      1.1  mrg 	getf.sig	r27 = f39
    368      1.1  mrg 	br		.Lcj4
    369      1.1  mrg 
    370      1.1  mrg .grt4:
    371      1.1  mrg 	ldf8		f32 = [up], 8
    372      1.1  mrg 	xma.l		f37 = f33, f6, f45
    373      1.1  mrg 	ldf8		f44 = [rp], 8
    374      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    375      1.1  mrg 	;;
    376      1.1  mrg 	ldf8		f33 = [up], 8
    377      1.1  mrg 	xma.l		f38 = f34, f6, f46
    378      1.1  mrg 	ldf8		f45 = [rp], 8
    379      1.1  mrg 	xma.hu		f42 = f34, f6, f46
    380      1.1  mrg 	;;
    381      1.1  mrg 	ldf8		f34 = [up], 8
    382      1.1  mrg 	getf.sig	r24 = f36		C FIXME
    383      1.1  mrg 	xma.l		f39 = f35, f6, f47
    384      1.1  mrg 	ldf8		f46 = [rp], 8
    385      1.1  mrg 	getf.sig	r28 = f40
    386      1.1  mrg 	xma.hu		f43 = f35, f6, f47
    387      1.1  mrg 	;;
    388      1.1  mrg 	ldf8		f35 = [up], 8
    389      1.1  mrg 	getf.sig	r25 = f37
    390      1.1  mrg 	ldf8		f47 = [rp], 8
    391      1.1  mrg 	br.cloop.dptk	.grt8
    392      1.1  mrg 
    393      1.1  mrg 	getf.sig	r29 = f41
    394      1.1  mrg 	stf8		[r20] = f36, 8		C FIXME
    395      1.1  mrg 	xma.l		f36 = f32, f6, f44
    396      1.1  mrg 	getf.sig	r26 = f38
    397      1.1  mrg 	getf.sig	r30 = f42
    398      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    399      1.1  mrg 	;;
    400      1.1  mrg 	xma.l		f37 = f33, f6, f45
    401      1.1  mrg 	getf.sig	r27 = f39
    402      1.1  mrg 	xma.hu		f41 = f33, f6, f45
    403      1.1  mrg 	br		.Lcj8
    404      1.1  mrg 
    405      1.1  mrg .grt8:
    406      1.1  mrg 	getf.sig	r29 = f41
    407      1.1  mrg 	xma.l		f36 = f32, f6, f44
    408      1.1  mrg 	mov		r31 = 0
    409      1.1  mrg 	xma.hu		f40 = f32, f6, f44
    410      1.1  mrg 	;;
    411      1.1  mrg 	ldf8		f32 = [up], 8
    412      1.1  mrg 	getf.sig	r26 = f38
    413      1.1  mrg 	br		.LL00
    414      1.1  mrg 
    415      1.1  mrg 
    416      1.1  mrg C *** MAIN LOOP START ***
    417      1.1  mrg 	ALIGN(32)				C insn	fed	cycle #
    418      1.1  mrg .Loop:
    419      1.1  mrg 	.pred.rel "mutex", p6, p7		C num	by	i1 i2
    420      1.1  mrg 	getf.sig	r29 = f41		C 00	16	0   0
    421      1.1  mrg 	xma.l		f36 = f32, f6, f44	C 01	06,15	0   0
    422      1.1  mrg    (p6)	add		r14 = r30, r27, 1	C 02		0   0
    423      1.1  mrg 	ldf8		f47 = [rp], 8		C 03		0   0
    424      1.1  mrg 	xma.hu		f40 = f32, f6, f44	C 04	06,15	0   0
    425      1.1  mrg    (p7)	add		r14 = r30, r27		C 05		0   0
    426      1.1  mrg 	;;
    427      1.1  mrg 	.pred.rel "mutex", p6, p7
    428      1.1  mrg 	ldf8		f32 = [up], 8		C 06		1   1
    429      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C 07		1   1
    430      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C 08		1   1
    431      1.1  mrg 	getf.sig	r26 = f38		C 09	25	2   1
    432      1.1  mrg 	st8		[r20] = r14, 8		C 10		2   1
    433      1.1  mrg 	nop.b		0			C 11		2   1
    434      1.1  mrg 	;;
    435      1.1  mrg .LL00:
    436      1.1  mrg 	.pred.rel "mutex", p8, p9
    437      1.1  mrg 	getf.sig	r30 = f42		C 12	28	3   2
    438      1.1  mrg 	xma.l		f37 = f33, f6, f45	C 13	18,27	3   2
    439      1.1  mrg    (p8)	add		r16 = r31, r24, 1	C 14		3   2
    440      1.1  mrg 	ldf8		f44 = [rp], 8		C 15		3   2
    441      1.1  mrg 	xma.hu		f41 = f33, f6, f45	C 16	18,27	3   2
    442      1.1  mrg    (p9)	add		r16 = r31, r24		C 17		3   2
    443      1.1  mrg 	;;
    444      1.1  mrg 	.pred.rel "mutex", p8, p9
    445      1.1  mrg 	ldf8		f33 = [up], 8		C 18		4   3
    446      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r24	C 19		4   3
    447      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r24	C 20		4   3
    448      1.1  mrg 	getf.sig	r27 = f39		C 21	37	5   3
    449      1.1  mrg 	st8		[r20] = r16, 8		C 22		5   3
    450      1.1  mrg 	nop.b		0			C 23		5   3
    451      1.1  mrg 	;;
    452      1.1  mrg .LL11:
    453      1.1  mrg 	.pred.rel "mutex", p6, p7
    454      1.1  mrg 	getf.sig	r31 = f43		C 24	40	6   4
    455      1.1  mrg 	xma.l		f38 = f34, f6, f46	C 25	30,39	6   4
    456      1.1  mrg    (p6)	add		r14 = r28, r25, 1	C 26		6   4
    457      1.1  mrg 	ldf8		f45 = [rp], 8		C 27		6   4
    458      1.1  mrg 	xma.hu		f42 = f34, f6, f46	C 28	30,39	6   4
    459      1.1  mrg    (p7)	add		r14 = r28, r25		C 29		6   4
    460      1.1  mrg 	;;
    461      1.1  mrg 	.pred.rel "mutex", p6, p7
    462      1.1  mrg 	ldf8		f34 = [up], 8		C 30		7   5
    463      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r25	C 31		7   5
    464      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r25	C 32		7   5
    465      1.1  mrg 	getf.sig	r24 = f36		C 33	01	8   5
    466      1.1  mrg 	st8		[r20] = r14, 8		C 34		8   5
    467      1.1  mrg 	nop.b		0			C 35		8   5
    468      1.1  mrg 	;;
    469      1.1  mrg .LL10:
    470      1.1  mrg 	.pred.rel "mutex", p8, p9
    471      1.1  mrg 	getf.sig	r28 = f40		C 36	04	9   6
    472      1.1  mrg 	xma.l		f39 = f35, f6, f47	C 37	42,03	9   6
    473      1.1  mrg    (p8)	add		r16 = r29, r26, 1	C 38		9   6
    474      1.1  mrg 	ldf8		f46 = [rp], 8		C 39		9   6
    475      1.1  mrg 	xma.hu		f43 = f35, f6, f47	C 40	42,03	9   6
    476      1.1  mrg    (p9)	add		r16 = r29, r26		C 41		9   6
    477      1.1  mrg 	;;
    478      1.1  mrg 	.pred.rel "mutex", p8, p9
    479      1.1  mrg 	ldf8		f35 = [up], 8		C 42	       10   7
    480      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r26	C 43	       10   7
    481      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r26	C 44	       10   7
    482      1.1  mrg 	getf.sig	r25 = f37		C 45	13     11   7
    483      1.1  mrg 	st8		[r20] = r16, 8		C 46	       11   7
    484      1.1  mrg 	br.cloop.dptk	.Loop			C 47	       11   7
    485      1.1  mrg C *** MAIN LOOP END ***
    486      1.1  mrg 	;;
    487      1.1  mrg .Le0:
    488      1.1  mrg 	.pred.rel "mutex", p6, p7
    489      1.1  mrg 	getf.sig	r29 = f41		C
    490      1.1  mrg 	xma.l		f36 = f32, f6, f44	C
    491      1.1  mrg    (p6)	add		r14 = r30, r27, 1	C
    492      1.1  mrg 	ldf8		f47 = [rp], 8		C
    493      1.1  mrg 	xma.hu		f40 = f32, f6, f44	C
    494      1.1  mrg    (p7)	add		r14 = r30, r27		C
    495      1.1  mrg 	;;
    496      1.1  mrg 	.pred.rel "mutex", p6, p7
    497      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C
    498      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C
    499      1.1  mrg 	getf.sig	r26 = f38		C
    500      1.1  mrg 	st8		[r20] = r14, 8		C
    501      1.1  mrg 	;;
    502      1.1  mrg 	.pred.rel "mutex", p8, p9
    503      1.1  mrg 	getf.sig	r30 = f42		C
    504      1.1  mrg 	xma.l		f37 = f33, f6, f45	C
    505      1.1  mrg    (p8)	add		r16 = r31, r24, 1	C
    506      1.1  mrg 	xma.hu		f41 = f33, f6, f45	C
    507      1.1  mrg    (p9)	add		r16 = r31, r24		C
    508      1.1  mrg 	;;
    509      1.1  mrg 	.pred.rel "mutex", p8, p9
    510      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r24	C
    511      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r24	C
    512      1.1  mrg 	getf.sig	r27 = f39		C
    513      1.1  mrg 	st8		[r20] = r16, 8		C
    514      1.1  mrg 	;;
    515      1.1  mrg .Lcj8:
    516      1.1  mrg 	.pred.rel "mutex", p6, p7
    517      1.1  mrg 	getf.sig	r31 = f43		C
    518      1.1  mrg 	xma.l		f38 = f34, f6, f46	C
    519      1.1  mrg    (p6)	add		r14 = r28, r25, 1	C
    520      1.1  mrg 	xma.hu		f42 = f34, f6, f46	C
    521      1.1  mrg    (p7)	add		r14 = r28, r25		C
    522      1.1  mrg 	;;
    523      1.1  mrg 	.pred.rel "mutex", p6, p7
    524      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r25	C
    525      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r25	C
    526      1.1  mrg 	getf.sig	r24 = f36		C
    527      1.1  mrg 	st8		[r20] = r14, 8		C
    528      1.1  mrg 	;;
    529      1.1  mrg .Lcj7:
    530      1.1  mrg 	.pred.rel "mutex", p8, p9
    531      1.1  mrg 	getf.sig	r28 = f40		C
    532      1.1  mrg 	xma.l		f39 = f35, f6, f47	C
    533      1.1  mrg    (p8)	add		r16 = r29, r26, 1	C
    534      1.1  mrg 	xma.hu		f43 = f35, f6, f47	C
    535      1.1  mrg    (p9)	add		r16 = r29, r26		C
    536      1.1  mrg 	;;
    537      1.1  mrg 	.pred.rel "mutex", p8, p9
    538      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r26	C
    539      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r26	C
    540      1.1  mrg 	getf.sig	r25 = f37		C
    541      1.1  mrg 	st8		[r20] = r16, 8		C
    542      1.1  mrg 	;;
    543      1.1  mrg .Lcj6:
    544      1.1  mrg 	.pred.rel "mutex", p6, p7
    545      1.1  mrg 	getf.sig	r29 = f41		C
    546      1.1  mrg    (p6)	add		r14 = r30, r27, 1	C
    547      1.1  mrg    (p7)	add		r14 = r30, r27		C
    548      1.1  mrg 	;;
    549      1.1  mrg 	.pred.rel "mutex", p6, p7
    550      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C
    551      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C
    552      1.1  mrg 	getf.sig	r26 = f38		C
    553      1.1  mrg 	st8		[r20] = r14, 8		C
    554      1.1  mrg 	;;
    555      1.1  mrg .Lcj5:
    556      1.1  mrg 	.pred.rel "mutex", p8, p9
    557      1.1  mrg 	getf.sig	r30 = f42		C
    558      1.1  mrg    (p8)	add		r16 = r31, r24, 1	C
    559      1.1  mrg    (p9)	add		r16 = r31, r24		C
    560      1.1  mrg 	;;
    561      1.1  mrg 	.pred.rel "mutex", p8, p9
    562      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r24	C
    563      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r24	C
    564      1.1  mrg 	getf.sig	r27 = f39		C
    565      1.1  mrg 	st8		[r20] = r16, 8		C
    566      1.1  mrg 	;;
    567      1.1  mrg .Lcj4:
    568      1.1  mrg 	.pred.rel "mutex", p6, p7
    569      1.1  mrg 	getf.sig	r8 = f43		C
    570      1.1  mrg    (p6)	add		r14 = r28, r25, 1	C
    571      1.1  mrg    (p7)	add		r14 = r28, r25		C
    572      1.1  mrg 	;;
    573      1.1  mrg 	.pred.rel "mutex", p6, p7
    574      1.1  mrg 	st8		[r20] = r14, 8		C
    575      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r25	C
    576      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r25	C
    577      1.1  mrg 	;;
    578      1.1  mrg .Lcj3:
    579      1.1  mrg 	.pred.rel "mutex", p8, p9
    580      1.1  mrg    (p8)	add		r16 = r29, r26, 1	C
    581      1.1  mrg    (p9)	add		r16 = r29, r26		C
    582      1.1  mrg 	;;
    583      1.1  mrg 	.pred.rel "mutex", p8, p9
    584      1.1  mrg 	st8		[r20] = r16, 8		C
    585      1.1  mrg    (p8)	cmp.leu		p6, p7 = r16, r26	C
    586      1.1  mrg    (p9)	cmp.ltu		p6, p7 = r16, r26	C
    587      1.1  mrg 	;;
    588      1.1  mrg .Lcj2:
    589      1.1  mrg 	.pred.rel "mutex", p6, p7
    590      1.1  mrg    (p6)	add		r14 = r30, r27, 1	C
    591      1.1  mrg    (p7)	add		r14 = r30, r27		C
    592      1.1  mrg 	;;
    593      1.1  mrg 	.pred.rel "mutex", p6, p7
    594      1.1  mrg 	st8		[r20] = r14		C
    595      1.1  mrg    (p6)	cmp.leu		p8, p9 = r14, r27	C
    596      1.1  mrg    (p7)	cmp.ltu		p8, p9 = r14, r27	C
    597      1.1  mrg 	;;
    598      1.1  mrg    (p8)	add		r8 = 1, r8		C M I
    599      1.1  mrg 	mov.i		ar.lc = r2		C I0
    600      1.1  mrg 	br.ret.sptk.many b0			C B
    601      1.1  mrg EPILOGUE()
    602      1.1  mrg ASM_END()
    603