Home | History | Annotate | Line # | Download | only in ia64
      1      1.1  mrg dnl  IA-64 mpn_bdiv_dbm1.
      2      1.1  mrg 
      3  1.1.1.2  mrg dnl  Contributed to the GNU project by Torbjorn Granlund.
      4  1.1.1.2  mrg 
      5      1.1  mrg dnl  Copyright 2008, 2009 Free Software Foundation, Inc.
      6      1.1  mrg 
      7      1.1  mrg dnl  This file is part of the GNU MP Library.
      8  1.1.1.3  mrg dnl
      9      1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10  1.1.1.3  mrg dnl  it under the terms of either:
     11  1.1.1.3  mrg dnl
     12  1.1.1.3  mrg dnl    * the GNU Lesser General Public License as published by the Free
     13  1.1.1.3  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     14  1.1.1.3  mrg dnl      option) any later version.
     15  1.1.1.3  mrg dnl
     16  1.1.1.3  mrg dnl  or
     17  1.1.1.3  mrg dnl
     18  1.1.1.3  mrg dnl    * the GNU General Public License as published by the Free Software
     19  1.1.1.3  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     20  1.1.1.3  mrg dnl      later version.
     21  1.1.1.3  mrg dnl
     22  1.1.1.3  mrg dnl  or both in parallel, as here.
     23  1.1.1.3  mrg dnl
     24      1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25      1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26  1.1.1.3  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27  1.1.1.3  mrg dnl  for more details.
     28  1.1.1.3  mrg dnl
     29  1.1.1.3  mrg dnl  You should have received copies of the GNU General Public License and the
     30  1.1.1.3  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31  1.1.1.3  mrg dnl  see https://www.gnu.org/licenses/.
     32      1.1  mrg 
     33      1.1  mrg include(`../config.m4')
     34      1.1  mrg 
     35      1.1  mrg C         cycles/limb
     36      1.1  mrg C Itanium:    4
     37      1.1  mrg C Itanium 2:  2
     38      1.1  mrg 
     39      1.1  mrg C TODO
     40      1.1  mrg C  * Optimize feed-in and wind-down code, both for speed and code size.
     41      1.1  mrg 
     42      1.1  mrg C INPUT PARAMETERS
     43      1.1  mrg define(`rp', `r32')
     44      1.1  mrg define(`up', `r33')
     45      1.1  mrg define(`n', `r34')
     46      1.1  mrg define(`bd', `r35')
     47      1.1  mrg 
     48      1.1  mrg ASM_START()
     49      1.1  mrg PROLOGUE(mpn_bdiv_dbm1c)
     50      1.1  mrg 	.prologue
     51      1.1  mrg 	.save		ar.lc, r2
     52      1.1  mrg 	.body
     53      1.1  mrg 
     54      1.1  mrg ifdef(`HAVE_ABI_32',
     55      1.1  mrg `	addp4		rp = 0, rp		C M I
     56      1.1  mrg 	addp4		up = 0, up		C M I
     57      1.1  mrg 	zxt4		n = n			C I
     58      1.1  mrg 	;;
     59      1.1  mrg ')
     60      1.1  mrg {.mmb
     61      1.1  mrg 	mov		r15 = r36		C M I
     62      1.1  mrg 	ldf8		f9 = [up], 8		C M
     63      1.1  mrg 	nop.b		0			C B
     64      1.1  mrg }
     65      1.1  mrg .Lcommon:
     66      1.1  mrg {.mii
     67      1.1  mrg 	adds		r16 = -1, n		C M I
     68      1.1  mrg 	mov		r2 = ar.lc		C I0
     69      1.1  mrg 	and		r14 = 3, n		C M I
     70      1.1  mrg 	;;
     71      1.1  mrg }
     72      1.1  mrg {.mii
     73      1.1  mrg 	setf.sig	f6 = bd			C M2 M3
     74      1.1  mrg 	shr.u		r31 = r16, 2		C I0
     75      1.1  mrg 	cmp.eq		p10, p0 = 0, r14	C M I
     76      1.1  mrg }
     77      1.1  mrg {.mii
     78      1.1  mrg 	nop.m		0			C M
     79      1.1  mrg 	cmp.eq		p11, p0 = 2, r14	C M I
     80      1.1  mrg 	cmp.eq		p12, p0 = 3, r14	C M I
     81      1.1  mrg 	;;
     82      1.1  mrg }
     83      1.1  mrg {.mii
     84      1.1  mrg 	cmp.ne		p6, p7 = r0, r0		C M I
     85      1.1  mrg 	mov.i		ar.lc = r31		C I0
     86      1.1  mrg 	cmp.ne		p8, p9 = r0, r0		C M I
     87      1.1  mrg }
     88      1.1  mrg {.bbb
     89      1.1  mrg   (p10)	br.dptk		.Lb00			C B
     90      1.1  mrg   (p11)	br.dptk		.Lb10			C B
     91      1.1  mrg   (p12)	br.dptk		.Lb11			C B
     92      1.1  mrg 	;;
     93      1.1  mrg }
     94      1.1  mrg 
     95      1.1  mrg .Lb01:	br.cloop.dptk	.grt1
     96      1.1  mrg 	;;
     97      1.1  mrg 	xma.l		f38 = f9, f6, f0
     98      1.1  mrg 	xma.hu		f39 = f9, f6, f0
     99      1.1  mrg 	;;
    100      1.1  mrg 	getf.sig	r26 = f38
    101      1.1  mrg 	getf.sig	r27 = f39
    102      1.1  mrg 	br		.Lcj1
    103      1.1  mrg 
    104      1.1  mrg .grt1:	ldf8		f10 = [r33], 8
    105      1.1  mrg 	;;
    106      1.1  mrg 	ldf8		f11 = [r33], 8
    107      1.1  mrg 	;;
    108      1.1  mrg 	ldf8		f12 = [r33], 8
    109      1.1  mrg 	;;
    110      1.1  mrg 	xma.l		f38 = f9, f6, f0
    111      1.1  mrg 	xma.hu		f39 = f9, f6, f0
    112      1.1  mrg 	;;
    113      1.1  mrg 	ldf8		f13 = [r33], 8
    114      1.1  mrg 	;;
    115      1.1  mrg 	xma.l		f32 = f10, f6, f0
    116      1.1  mrg 	xma.hu		f33 = f10, f6, f0
    117      1.1  mrg 	br.cloop.dptk	.grt5
    118      1.1  mrg 
    119      1.1  mrg 	;;
    120      1.1  mrg 	getf.sig	r26 = f38
    121      1.1  mrg 	xma.l		f34 = f11, f6, f0
    122      1.1  mrg 	xma.hu		f35 = f11, f6, f0
    123      1.1  mrg 	;;
    124      1.1  mrg 	getf.sig	r27 = f39
    125      1.1  mrg 	;;
    126      1.1  mrg 	getf.sig	r20 = f32
    127      1.1  mrg 	xma.l		f36 = f12, f6, f0
    128      1.1  mrg 	xma.hu		f37 = f12, f6, f0
    129      1.1  mrg 	;;
    130      1.1  mrg 	getf.sig	r21 = f33
    131      1.1  mrg 	;;
    132      1.1  mrg 	getf.sig	r22 = f34
    133      1.1  mrg 	xma.l		f38 = f13, f6, f0
    134      1.1  mrg 	xma.hu		f39 = f13, f6, f0
    135      1.1  mrg 	br		.Lcj5
    136      1.1  mrg 
    137      1.1  mrg .grt5:	ldf8		f10 = [r33], 8
    138      1.1  mrg 	;;
    139      1.1  mrg 	getf.sig	r26 = f38
    140      1.1  mrg 	xma.l		f34 = f11, f6, f0
    141      1.1  mrg 	xma.hu		f35 = f11, f6, f0
    142      1.1  mrg 	;;
    143      1.1  mrg 	getf.sig	r27 = f39
    144      1.1  mrg 	ldf8		f11 = [r33], 8
    145      1.1  mrg 	;;
    146      1.1  mrg 	getf.sig	r20 = f32
    147      1.1  mrg 	xma.l		f36 = f12, f6, f0
    148      1.1  mrg 	xma.hu		f37 = f12, f6, f0
    149      1.1  mrg 	;;
    150      1.1  mrg 	getf.sig	r21 = f33
    151      1.1  mrg 	ldf8		f12 = [r33], 8
    152      1.1  mrg 	;;
    153      1.1  mrg 	getf.sig	r22 = f34
    154      1.1  mrg 	xma.l		f38 = f13, f6, f0
    155      1.1  mrg 	xma.hu		f39 = f13, f6, f0
    156      1.1  mrg 	br		.LL01
    157      1.1  mrg 
    158      1.1  mrg .Lb10:	ldf8		f13 = [r33], 8
    159      1.1  mrg 	br.cloop.dptk	.grt2
    160      1.1  mrg 	;;
    161      1.1  mrg 
    162      1.1  mrg 	xma.l		f36 = f9, f6, f0
    163      1.1  mrg 	xma.hu		f37 = f9, f6, f0
    164      1.1  mrg 	;;
    165      1.1  mrg 	xma.l		f38 = f13, f6, f0
    166      1.1  mrg 	xma.hu		f39 = f13, f6, f0
    167      1.1  mrg 	;;
    168      1.1  mrg 	getf.sig	r24 = f36
    169      1.1  mrg 	;;
    170      1.1  mrg 	getf.sig	r25 = f37
    171      1.1  mrg 	;;
    172      1.1  mrg 	getf.sig	r26 = f38
    173      1.1  mrg 	;;
    174      1.1  mrg 	getf.sig	r27 = f39
    175      1.1  mrg 	br		.Lcj2
    176      1.1  mrg 
    177      1.1  mrg .grt2:	ldf8		f10 = [r33], 8
    178      1.1  mrg 	;;
    179      1.1  mrg 	ldf8		f11 = [r33], 8
    180      1.1  mrg 	;;
    181      1.1  mrg 	xma.l		f36 = f9, f6, f0
    182      1.1  mrg 	xma.hu		f37 = f9, f6, f0
    183      1.1  mrg 	;;
    184      1.1  mrg 	ldf8		f12 = [r33], 8
    185      1.1  mrg 	;;
    186      1.1  mrg 	xma.l		f38 = f13, f6, f0
    187      1.1  mrg 	xma.hu		f39 = f13, f6, f0
    188      1.1  mrg 	;;
    189      1.1  mrg 	ldf8		f13 = [r33], 8
    190      1.1  mrg 	;;
    191      1.1  mrg 	getf.sig	r24 = f36
    192      1.1  mrg 	xma.l		f32 = f10, f6, f0
    193      1.1  mrg 	xma.hu		f33 = f10, f6, f0
    194      1.1  mrg 	br.cloop.dptk	.grt6
    195      1.1  mrg 
    196      1.1  mrg 	getf.sig	r25 = f37
    197      1.1  mrg 	;;
    198      1.1  mrg 	getf.sig	r26 = f38
    199      1.1  mrg 	xma.l		f34 = f11, f6, f0
    200      1.1  mrg 	xma.hu		f35 = f11, f6, f0
    201      1.1  mrg 	;;
    202      1.1  mrg 	getf.sig	r27 = f39
    203      1.1  mrg 	;;
    204      1.1  mrg 	getf.sig	r20 = f32
    205      1.1  mrg 	xma.l		f36 = f12, f6, f0
    206      1.1  mrg 	xma.hu		f37 = f12, f6, f0
    207      1.1  mrg 	br		.Lcj6
    208      1.1  mrg 
    209      1.1  mrg .grt6:	getf.sig	r25 = f37
    210      1.1  mrg 	ldf8		f10 = [r33], 8
    211      1.1  mrg 	;;
    212      1.1  mrg 	getf.sig	r26 = f38
    213      1.1  mrg 	xma.l		f34 = f11, f6, f0
    214      1.1  mrg 	xma.hu		f35 = f11, f6, f0
    215      1.1  mrg 	;;
    216      1.1  mrg 	getf.sig	r27 = f39
    217      1.1  mrg 	ldf8		f11 = [r33], 8
    218      1.1  mrg 	;;
    219      1.1  mrg 	getf.sig	r20 = f32
    220      1.1  mrg 	xma.l		f36 = f12, f6, f0
    221      1.1  mrg 	xma.hu		f37 = f12, f6, f0
    222      1.1  mrg 	br		.LL10
    223      1.1  mrg 
    224      1.1  mrg 
    225      1.1  mrg .Lb11:	ldf8		f12 = [r33], 8
    226      1.1  mrg 	;;
    227      1.1  mrg 	ldf8		f13 = [r33], 8
    228      1.1  mrg 	br.cloop.dptk	.grt3
    229      1.1  mrg 	;;
    230      1.1  mrg 
    231      1.1  mrg 	xma.l		f34 = f9, f6, f0
    232      1.1  mrg 	xma.hu		f35 = f9, f6, f0
    233      1.1  mrg 	;;
    234      1.1  mrg 	xma.l		f36 = f12, f6, f0
    235      1.1  mrg 	xma.hu		f37 = f12, f6, f0
    236      1.1  mrg 	;;
    237      1.1  mrg 	getf.sig	r22 = f34
    238      1.1  mrg 	xma.l		f38 = f13, f6, f0
    239      1.1  mrg 	xma.hu		f39 = f13, f6, f0
    240      1.1  mrg 	;;
    241      1.1  mrg 	getf.sig	r23 = f35
    242      1.1  mrg 	;;
    243      1.1  mrg 	getf.sig	r24 = f36
    244      1.1  mrg 	;;
    245      1.1  mrg 	getf.sig	r25 = f37
    246      1.1  mrg 	;;
    247      1.1  mrg 	getf.sig	r26 = f38
    248      1.1  mrg 	br		.Lcj3
    249      1.1  mrg 
    250      1.1  mrg .grt3:	ldf8		f10 = [r33], 8
    251      1.1  mrg 	;;
    252      1.1  mrg 	xma.l		f34 = f9, f6, f0
    253      1.1  mrg 	xma.hu		f35 = f9, f6, f0
    254      1.1  mrg 	;;
    255      1.1  mrg 	ldf8		f11 = [r33], 8
    256      1.1  mrg 	;;
    257      1.1  mrg 	xma.l		f36 = f12, f6, f0
    258      1.1  mrg 	xma.hu		f37 = f12, f6, f0
    259      1.1  mrg 	;;
    260      1.1  mrg 	ldf8		f12 = [r33], 8
    261      1.1  mrg 	;;
    262      1.1  mrg 	getf.sig	r22 = f34
    263      1.1  mrg 	xma.l		f38 = f13, f6, f0
    264      1.1  mrg 	xma.hu		f39 = f13, f6, f0
    265      1.1  mrg 	;;
    266      1.1  mrg 	getf.sig	r23 = f35
    267      1.1  mrg 	ldf8		f13 = [r33], 8
    268      1.1  mrg 	;;
    269      1.1  mrg 	getf.sig	r24 = f36
    270      1.1  mrg 	xma.l		f32 = f10, f6, f0
    271      1.1  mrg 	xma.hu		f33 = f10, f6, f0
    272      1.1  mrg 	br.cloop.dptk	.grt7
    273      1.1  mrg 
    274      1.1  mrg 	getf.sig	r25 = f37
    275      1.1  mrg 	;;
    276      1.1  mrg 	getf.sig	r26 = f38
    277      1.1  mrg 	xma.l		f34 = f11, f6, f0
    278      1.1  mrg 	xma.hu		f35 = f11, f6, f0
    279      1.1  mrg 	br		.Lcj7
    280      1.1  mrg 
    281      1.1  mrg .grt7:	getf.sig	r25 = f37
    282      1.1  mrg 	ldf8		f10 = [r33], 8
    283      1.1  mrg 	;;
    284      1.1  mrg 	getf.sig	r26 = f38
    285      1.1  mrg 	xma.l		f34 = f11, f6, f0
    286      1.1  mrg 	xma.hu		f35 = f11, f6, f0
    287      1.1  mrg 	br		.LL11
    288      1.1  mrg 
    289      1.1  mrg 
    290      1.1  mrg .Lb00:	ldf8		f11 = [r33], 8
    291      1.1  mrg 	;;
    292      1.1  mrg 	ldf8		f12 = [r33], 8
    293      1.1  mrg 	;;
    294      1.1  mrg 	ldf8		f13 = [r33], 8
    295      1.1  mrg 	br.cloop.dptk	.grt4
    296      1.1  mrg 	;;
    297      1.1  mrg 
    298      1.1  mrg 	xma.l		f32 = f9, f6, f0
    299      1.1  mrg 	xma.hu		f33 = f9, f6, f0
    300      1.1  mrg 	;;
    301      1.1  mrg 	xma.l		f34 = f11, f6, f0
    302      1.1  mrg 	xma.hu		f35 = f11, f6, f0
    303      1.1  mrg 	;;
    304      1.1  mrg 	getf.sig	r20 = f32
    305      1.1  mrg 	xma.l		f36 = f12, f6, f0
    306      1.1  mrg 	xma.hu		f37 = f12, f6, f0
    307      1.1  mrg 	;;
    308      1.1  mrg 	getf.sig	r21 = f33
    309      1.1  mrg 	;;
    310      1.1  mrg 	getf.sig	r22 = f34
    311      1.1  mrg 	xma.l		f38 = f13, f6, f0
    312      1.1  mrg 	xma.hu		f39 = f13, f6, f0
    313      1.1  mrg 	;;
    314      1.1  mrg 	getf.sig	r23 = f35
    315      1.1  mrg 	;;
    316      1.1  mrg 	getf.sig	r24 = f36
    317      1.1  mrg 	br		.Lcj4
    318      1.1  mrg 
    319      1.1  mrg .grt4:	xma.l		f32 = f9, f6, f0
    320      1.1  mrg 	xma.hu		f33 = f9, f6, f0
    321      1.1  mrg 	;;
    322      1.1  mrg 	ldf8		f10 = [r33], 8
    323      1.1  mrg 	;;
    324      1.1  mrg 	xma.l		f34 = f11, f6, f0
    325      1.1  mrg 	xma.hu		f35 = f11, f6, f0
    326      1.1  mrg 	;;
    327      1.1  mrg 	ldf8		f11 = [r33], 8
    328      1.1  mrg 	;;
    329      1.1  mrg 	getf.sig	r20 = f32
    330      1.1  mrg 	xma.l		f36 = f12, f6, f0
    331      1.1  mrg 	xma.hu		f37 = f12, f6, f0
    332      1.1  mrg 	;;
    333      1.1  mrg 	getf.sig	r21 = f33
    334      1.1  mrg 	ldf8		f12 = [r33], 8
    335      1.1  mrg 	;;
    336      1.1  mrg 	getf.sig	r22 = f34
    337      1.1  mrg 	xma.l		f38 = f13, f6, f0
    338      1.1  mrg 	xma.hu		f39 = f13, f6, f0
    339      1.1  mrg 	;;
    340      1.1  mrg 	getf.sig	r23 = f35
    341      1.1  mrg 	ldf8		f13 = [r33], 8
    342      1.1  mrg 	;;
    343      1.1  mrg 	getf.sig	r24 = f36
    344      1.1  mrg 	xma.l		f32 = f10, f6, f0
    345      1.1  mrg 	xma.hu		f33 = f10, f6, f0
    346      1.1  mrg 	br.cloop.dptk	.LL00
    347      1.1  mrg 	br		.Lcj8
    348      1.1  mrg 
    349      1.1  mrg C *** MAIN LOOP START ***
    350      1.1  mrg 	ALIGN(32)
    351      1.1  mrg .Ltop:
    352      1.1  mrg 	.pred.rel "mutex",p6,p7
    353      1.1  mrg C	.mfi
    354      1.1  mrg 	getf.sig	r24 = f36
    355      1.1  mrg 	xma.l		f32 = f10, f6, f0
    356      1.1  mrg   (p6)	sub		r15 = r19, r27, 1
    357      1.1  mrg C	.mfi
    358      1.1  mrg 	st8		[r32] = r19, 8
    359      1.1  mrg 	xma.hu		f33 = f10, f6, f0
    360      1.1  mrg   (p7)	sub		r15 = r19, r27
    361      1.1  mrg 	;;
    362      1.1  mrg .LL00:
    363      1.1  mrg C	.mfi
    364      1.1  mrg 	getf.sig	r25 = f37
    365      1.1  mrg 	nop.f 0
    366      1.1  mrg 	cmp.ltu		p6, p7 = r15, r20
    367      1.1  mrg C	.mib
    368      1.1  mrg 	ldf8		f10 = [r33], 8
    369      1.1  mrg 	sub		r16 = r15, r20
    370      1.1  mrg 	nop.b 0
    371      1.1  mrg 	;;
    372      1.1  mrg 
    373      1.1  mrg C	.mfi
    374      1.1  mrg 	getf.sig	r26 = f38
    375      1.1  mrg 	xma.l		f34 = f11, f6, f0
    376      1.1  mrg   (p6)	sub		r15 = r16, r21, 1
    377      1.1  mrg C	.mfi
    378      1.1  mrg 	st8		[r32] = r16, 8
    379      1.1  mrg 	xma.hu		f35 = f11, f6, f0
    380      1.1  mrg   (p7)	sub		r15 = r16, r21
    381      1.1  mrg 	;;
    382      1.1  mrg .LL11:
    383      1.1  mrg C	.mfi
    384      1.1  mrg 	getf.sig	r27 = f39
    385      1.1  mrg 	nop.f 0
    386      1.1  mrg 	cmp.ltu		p6, p7 = r15, r22
    387      1.1  mrg C	.mib
    388      1.1  mrg 	ldf8		f11 = [r33], 8
    389      1.1  mrg 	sub		r17 = r15, r22
    390      1.1  mrg 	nop.b 0
    391      1.1  mrg 	;;
    392      1.1  mrg 
    393      1.1  mrg C	.mfi
    394      1.1  mrg 	getf.sig	r20 = f32
    395      1.1  mrg 	xma.l		f36 = f12, f6, f0
    396      1.1  mrg   (p6)	sub		r15 = r17, r23, 1
    397      1.1  mrg C	.mfi
    398      1.1  mrg 	st8		[r32] = r17, 8
    399      1.1  mrg 	xma.hu		f37 = f12, f6, f0
    400      1.1  mrg   (p7)	sub		r15 = r17, r23
    401      1.1  mrg 	;;
    402      1.1  mrg .LL10:
    403      1.1  mrg C	.mfi
    404      1.1  mrg 	getf.sig	r21 = f33
    405      1.1  mrg 	nop.f 0
    406      1.1  mrg 	cmp.ltu		p6, p7 = r15, r24
    407      1.1  mrg C	.mib
    408      1.1  mrg 	ldf8		f12 = [r33], 8
    409      1.1  mrg 	sub		r18 = r15, r24
    410      1.1  mrg 	nop.b 0
    411      1.1  mrg 	;;
    412      1.1  mrg 
    413      1.1  mrg C	.mfi
    414      1.1  mrg 	getf.sig	r22 = f34
    415      1.1  mrg 	xma.l		f38 = f13, f6, f0
    416      1.1  mrg   (p6)	sub		r15 = r18, r25, 1
    417      1.1  mrg C	.mfi
    418      1.1  mrg 	st8		[r32] = r18, 8
    419      1.1  mrg 	xma.hu		f39 = f13, f6, f0
    420      1.1  mrg   (p7)	sub		r15 = r18, r25
    421      1.1  mrg 	;;
    422      1.1  mrg .LL01:
    423      1.1  mrg C	.mfi
    424      1.1  mrg 	getf.sig	r23 = f35
    425      1.1  mrg 	nop.f 0
    426      1.1  mrg 	cmp.ltu		p6, p7 = r15, r26
    427      1.1  mrg C	.mib
    428      1.1  mrg 	ldf8		f13 = [r33], 8
    429      1.1  mrg 	sub		r19 = r15, r26
    430      1.1  mrg 	br.cloop.sptk.few .Ltop
    431      1.1  mrg C *** MAIN LOOP END ***
    432      1.1  mrg 	;;
    433      1.1  mrg 
    434      1.1  mrg 	getf.sig	r24 = f36
    435      1.1  mrg 	xma.l		f32 = f10, f6, f0
    436      1.1  mrg   (p6)	sub		r15 = r19, r27, 1
    437      1.1  mrg 	st8		[r32] = r19, 8
    438      1.1  mrg 	xma.hu		f33 = f10, f6, f0
    439      1.1  mrg   (p7)	sub		r15 = r19, r27
    440      1.1  mrg 	;;
    441      1.1  mrg .Lcj8:	getf.sig	r25 = f37
    442      1.1  mrg 	cmp.ltu		p6, p7 = r15, r20
    443      1.1  mrg 	sub		r16 = r15, r20
    444      1.1  mrg 	;;
    445      1.1  mrg 	getf.sig	r26 = f38
    446      1.1  mrg 	xma.l		f34 = f11, f6, f0
    447      1.1  mrg   (p6)	sub		r15 = r16, r21, 1
    448      1.1  mrg 	st8		[r32] = r16, 8
    449      1.1  mrg 	xma.hu		f35 = f11, f6, f0
    450      1.1  mrg   (p7)	sub		r15 = r16, r21
    451      1.1  mrg 	;;
    452      1.1  mrg .Lcj7:	getf.sig	r27 = f39
    453      1.1  mrg 	cmp.ltu		p6, p7 = r15, r22
    454      1.1  mrg 	sub		r17 = r15, r22
    455      1.1  mrg 	;;
    456      1.1  mrg 	getf.sig	r20 = f32
    457      1.1  mrg 	xma.l		f36 = f12, f6, f0
    458      1.1  mrg   (p6)	sub		r15 = r17, r23, 1
    459      1.1  mrg 	st8		[r32] = r17, 8
    460      1.1  mrg 	xma.hu		f37 = f12, f6, f0
    461      1.1  mrg   (p7)	sub		r15 = r17, r23
    462      1.1  mrg 	;;
    463      1.1  mrg .Lcj6:	getf.sig	r21 = f33
    464      1.1  mrg 	cmp.ltu		p6, p7 = r15, r24
    465      1.1  mrg 	sub		r18 = r15, r24
    466      1.1  mrg 	;;
    467      1.1  mrg 	getf.sig	r22 = f34
    468      1.1  mrg 	xma.l		f38 = f13, f6, f0
    469      1.1  mrg   (p6)	sub		r15 = r18, r25, 1
    470      1.1  mrg 	st8		[r32] = r18, 8
    471      1.1  mrg 	xma.hu		f39 = f13, f6, f0
    472      1.1  mrg   (p7)	sub		r15 = r18, r25
    473      1.1  mrg 	;;
    474      1.1  mrg .Lcj5:	getf.sig	r23 = f35
    475      1.1  mrg 	cmp.ltu		p6, p7 = r15, r26
    476      1.1  mrg 	sub		r19 = r15, r26
    477      1.1  mrg 	;;
    478      1.1  mrg 	getf.sig	r24 = f36
    479      1.1  mrg   (p6)	sub		r15 = r19, r27, 1
    480      1.1  mrg 	st8		[r32] = r19, 8
    481      1.1  mrg   (p7)	sub		r15 = r19, r27
    482      1.1  mrg 	;;
    483      1.1  mrg .Lcj4:	getf.sig	r25 = f37
    484      1.1  mrg 	cmp.ltu		p6, p7 = r15, r20
    485      1.1  mrg 	sub		r16 = r15, r20
    486      1.1  mrg 	;;
    487      1.1  mrg 	getf.sig	r26 = f38
    488      1.1  mrg   (p6)	sub		r15 = r16, r21, 1
    489      1.1  mrg 	st8		[r32] = r16, 8
    490      1.1  mrg   (p7)	sub		r15 = r16, r21
    491      1.1  mrg 	;;
    492      1.1  mrg .Lcj3:	getf.sig	r27 = f39
    493      1.1  mrg 	cmp.ltu		p6, p7 = r15, r22
    494      1.1  mrg 	sub		r17 = r15, r22
    495      1.1  mrg 	;;
    496      1.1  mrg   (p6)	sub		r15 = r17, r23, 1
    497      1.1  mrg 	st8		[r32] = r17, 8
    498      1.1  mrg   (p7)	sub		r15 = r17, r23
    499      1.1  mrg 	;;
    500      1.1  mrg .Lcj2:	cmp.ltu		p6, p7 = r15, r24
    501      1.1  mrg 	sub		r18 = r15, r24
    502      1.1  mrg 	;;
    503      1.1  mrg   (p6)	sub		r15 = r18, r25, 1
    504      1.1  mrg 	st8		[r32] = r18, 8
    505      1.1  mrg   (p7)	sub		r15 = r18, r25
    506      1.1  mrg 	;;
    507      1.1  mrg .Lcj1:	cmp.ltu		p6, p7 = r15, r26
    508      1.1  mrg 	sub		r19 = r15, r26
    509      1.1  mrg 	;;
    510      1.1  mrg   (p6)	sub		r8 = r19, r27, 1
    511      1.1  mrg 	st8		[r32] = r19
    512      1.1  mrg   (p7)	sub		r8 = r19, r27
    513      1.1  mrg 	mov ar.lc = r2
    514      1.1  mrg 	br.ret.sptk.many b0
    515      1.1  mrg EPILOGUE()
    516      1.1  mrg ASM_END()
    517