Home | History | Annotate | Line # | Download | only in pa64
      1      1.1  mrg dnl  HP-PA 2.0 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and
      2      1.1  mrg dnl  add the result to a second limb vector.
      3      1.1  mrg 
      4  1.1.1.2  mrg dnl  Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc.
      5      1.1  mrg 
      6      1.1  mrg dnl  This file is part of the GNU MP Library.
      7  1.1.1.2  mrg dnl
      8      1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      9  1.1.1.2  mrg dnl  it under the terms of either:
     10  1.1.1.2  mrg dnl
     11  1.1.1.2  mrg dnl    * the GNU Lesser General Public License as published by the Free
     12  1.1.1.2  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     13  1.1.1.2  mrg dnl      option) any later version.
     14  1.1.1.2  mrg dnl
     15  1.1.1.2  mrg dnl  or
     16  1.1.1.2  mrg dnl
     17  1.1.1.2  mrg dnl    * the GNU General Public License as published by the Free Software
     18  1.1.1.2  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     19  1.1.1.2  mrg dnl      later version.
     20  1.1.1.2  mrg dnl
     21  1.1.1.2  mrg dnl  or both in parallel, as here.
     22  1.1.1.2  mrg dnl
     23      1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     24      1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     25  1.1.1.2  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     26  1.1.1.2  mrg dnl  for more details.
     27  1.1.1.2  mrg dnl
     28  1.1.1.2  mrg dnl  You should have received copies of the GNU General Public License and the
     29  1.1.1.2  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     30  1.1.1.2  mrg dnl  see https://www.gnu.org/licenses/.
     31      1.1  mrg 
     32      1.1  mrg include(`../config.m4')
     33      1.1  mrg 
     34      1.1  mrg C		    cycles/limb
     35      1.1  mrg C 8000,8200:		7
     36      1.1  mrg C 8500,8600,8700:	6.375
     37      1.1  mrg 
     38      1.1  mrg C  The feed-in and wind-down code has not yet been scheduled.  Many cycles
     39      1.1  mrg C  could be saved there per call.
     40      1.1  mrg 
     41      1.1  mrg C  DESCRIPTION:
     42      1.1  mrg C  The main loop "BIG" is 4-way unrolled, mainly to allow
     43      1.1  mrg C  effective use of ADD,DC.  Delays in moving data via the cache from the FP
     44      1.1  mrg C  registers to the IU registers, have demanded a deep software pipeline, and
     45      1.1  mrg C  a lot of stack slots for partial products in flight.
     46      1.1  mrg C
     47      1.1  mrg C  CODE STRUCTURE:
     48      1.1  mrg C  save-some-registers
     49      1.1  mrg C  do 0, 1, 2, or 3 limbs
     50      1.1  mrg C  if done, restore-some-regs and return
     51      1.1  mrg C  save-many-regs
     52      1.1  mrg C  do 4, 8, ... limb
     53      1.1  mrg C  restore-all-regs
     54      1.1  mrg 
     55      1.1  mrg C  STACK LAYOUT:
     56      1.1  mrg C  HP-PA stack grows upwards.  We could allocate 8 fewer slots by using the
     57      1.1  mrg C  slots marked FREE, as well as some slots in the caller's "frame marker".
     58      1.1  mrg C
     59      1.1  mrg C -00 <- r30
     60      1.1  mrg C -08  FREE
     61      1.1  mrg C -10  tmp
     62      1.1  mrg C -18  tmp
     63      1.1  mrg C -20  tmp
     64      1.1  mrg C -28  tmp
     65      1.1  mrg C -30  tmp
     66      1.1  mrg C -38  tmp
     67      1.1  mrg C -40  tmp
     68      1.1  mrg C -48  tmp
     69      1.1  mrg C -50  tmp
     70      1.1  mrg C -58  tmp
     71      1.1  mrg C -60  tmp
     72      1.1  mrg C -68  tmp
     73      1.1  mrg C -70  tmp
     74      1.1  mrg C -78  tmp
     75      1.1  mrg C -80  tmp
     76      1.1  mrg C -88  tmp
     77      1.1  mrg C -90  FREE
     78      1.1  mrg C -98  FREE
     79      1.1  mrg C -a0  FREE
     80      1.1  mrg C -a8  FREE
     81      1.1  mrg C -b0  r13
     82      1.1  mrg C -b8  r12
     83      1.1  mrg C -c0  r11
     84      1.1  mrg C -c8  r10
     85      1.1  mrg C -d0  r8
     86      1.1  mrg C -d8  r8
     87      1.1  mrg C -e0  r7
     88      1.1  mrg C -e8  r6
     89      1.1  mrg C -f0  r5
     90      1.1  mrg C -f8  r4
     91      1.1  mrg C -100 r3
     92      1.1  mrg C  Previous frame:
     93      1.1  mrg C  [unused area]
     94      1.1  mrg C -38/-138 vlimb home slot.  For 2.0N, the vlimb arg will arrive here.
     95      1.1  mrg 
     96      1.1  mrg 
     97      1.1  mrg include(`../config.m4')
     98      1.1  mrg 
     99      1.1  mrg C INPUT PARAMETERS:
    100      1.1  mrg define(`rp',`%r26')	C
    101      1.1  mrg define(`up',`%r25')	C
    102      1.1  mrg define(`n',`%r24')	C
    103      1.1  mrg define(`vlimb',`%r23')	C
    104      1.1  mrg 
    105      1.1  mrg define(`climb',`%r23')	C
    106      1.1  mrg 
    107      1.1  mrg ifdef(`HAVE_ABI_2_0w',
    108      1.1  mrg `	.level	2.0w
    109      1.1  mrg ',`	.level	2.0
    110      1.1  mrg ')
    111      1.1  mrg PROLOGUE(mpn_addmul_1)
    112      1.1  mrg 
    113      1.1  mrg ifdef(`HAVE_ABI_2_0w',
    114      1.1  mrg `	std		vlimb, -0x38(%r30)	C store vlimb into "home" slot
    115      1.1  mrg ')
    116      1.1  mrg 	std,ma		%r3, 0x100(%r30)
    117      1.1  mrg 	std		%r4, -0xf8(%r30)
    118      1.1  mrg 	std		%r5, -0xf0(%r30)
    119      1.1  mrg 	ldo		0(%r0), climb		C clear climb
    120      1.1  mrg 	fldd		-0x138(%r30), %fr8	C put vlimb in fp register
    121      1.1  mrg 
    122      1.1  mrg define(`p032a1',`%r1')	C
    123      1.1  mrg define(`p032a2',`%r19')	C
    124      1.1  mrg 
    125      1.1  mrg define(`m032',`%r20')	C
    126      1.1  mrg define(`m096',`%r21')	C
    127      1.1  mrg 
    128      1.1  mrg define(`p000a',`%r22')	C
    129      1.1  mrg define(`p064a',`%r29')	C
    130      1.1  mrg 
    131      1.1  mrg define(`s000',`%r31')	C
    132      1.1  mrg 
    133      1.1  mrg define(`ma000',`%r4')	C
    134      1.1  mrg define(`ma064',`%r20')	C
    135      1.1  mrg 
    136      1.1  mrg define(`r000',`%r3')	C
    137      1.1  mrg 
    138      1.1  mrg 	extrd,u		n, 63, 2, %r5
    139      1.1  mrg 	cmpb,=		%r5, %r0, L(BIG)
    140      1.1  mrg 	nop
    141      1.1  mrg 
    142      1.1  mrg 	fldd		0(up), %fr4
    143      1.1  mrg 	ldo		8(up), up
    144      1.1  mrg 	xmpyu		%fr8R, %fr4L, %fr22
    145      1.1  mrg 	xmpyu		%fr8L, %fr4R, %fr23
    146      1.1  mrg 	fstd		%fr22, -0x78(%r30)	C mid product to  -0x78..-0x71
    147      1.1  mrg 	xmpyu		%fr8R, %fr4R, %fr24
    148      1.1  mrg 	xmpyu		%fr8L, %fr4L, %fr25
    149      1.1  mrg 	fstd		%fr23, -0x70(%r30)	C mid product to  -0x70..-0x69
    150      1.1  mrg 	fstd		%fr24, -0x80(%r30)	C low product to  -0x80..-0x79
    151      1.1  mrg 	addib,<>	-1, %r5, L(two_or_more)
    152      1.1  mrg 	fstd		%fr25, -0x68(%r30)	C high product to -0x68..-0x61
    153      1.1  mrg LDEF(one)
    154      1.1  mrg 	ldd		-0x78(%r30), p032a1
    155      1.1  mrg 	ldd		-0x70(%r30), p032a2
    156      1.1  mrg 	ldd		-0x80(%r30), p000a
    157      1.1  mrg 	b		L(0_one_out)
    158      1.1  mrg 	ldd		-0x68(%r30), p064a
    159      1.1  mrg 
    160      1.1  mrg LDEF(two_or_more)
    161      1.1  mrg 	fldd		0(up), %fr4
    162      1.1  mrg 	ldo		8(up), up
    163      1.1  mrg 	xmpyu		%fr8R, %fr4L, %fr22
    164      1.1  mrg 	xmpyu		%fr8L, %fr4R, %fr23
    165      1.1  mrg 	ldd		-0x78(%r30), p032a1
    166      1.1  mrg 	fstd		%fr22, -0x78(%r30)	C mid product to  -0x78..-0x71
    167      1.1  mrg 	xmpyu		%fr8R, %fr4R, %fr24
    168      1.1  mrg 	xmpyu		%fr8L, %fr4L, %fr25
    169      1.1  mrg 	ldd		-0x70(%r30), p032a2
    170      1.1  mrg 	fstd		%fr23, -0x70(%r30)	C mid product to  -0x70..-0x69
    171      1.1  mrg 	ldd		-0x80(%r30), p000a
    172      1.1  mrg 	fstd		%fr24, -0x80(%r30)	C low product to  -0x80..-0x79
    173      1.1  mrg 	ldd		-0x68(%r30), p064a
    174      1.1  mrg 	addib,<>	-1, %r5, L(three_or_more)
    175      1.1  mrg 	fstd		%fr25, -0x68(%r30)	C high product to -0x68..-0x61
    176      1.1  mrg LDEF(two)
    177      1.1  mrg 	add		p032a1, p032a2, m032
    178      1.1  mrg 	add,dc		%r0, %r0, m096
    179      1.1  mrg 	depd,z		m032, 31, 32, ma000
    180      1.1  mrg 	extrd,u		m032, 31, 32, ma064
    181      1.1  mrg 	ldd		0(rp), r000
    182      1.1  mrg 	b		L(0_two_out)
    183      1.1  mrg 	depd		m096, 31, 32, ma064
    184      1.1  mrg 
    185      1.1  mrg LDEF(three_or_more)
    186      1.1  mrg 	fldd		0(up), %fr4
    187      1.1  mrg 	add		p032a1, p032a2, m032
    188      1.1  mrg 	add,dc		%r0, %r0, m096
    189      1.1  mrg 	depd,z		m032, 31, 32, ma000
    190      1.1  mrg 	extrd,u		m032, 31, 32, ma064
    191      1.1  mrg 	ldd		0(rp), r000
    192      1.1  mrg C	addib,=		-1, %r5, L(0_out)
    193      1.1  mrg 	depd		m096, 31, 32, ma064
    194      1.1  mrg LDEF(loop0)
    195      1.1  mrg C	xmpyu		%fr8R, %fr4L, %fr22
    196      1.1  mrg C	xmpyu		%fr8L, %fr4R, %fr23
    197      1.1  mrg C	ldd		-0x78(%r30), p032a1
    198      1.1  mrg C	fstd		%fr22, -0x78(%r30)	C mid product to  -0x78..-0x71
    199      1.1  mrg C
    200      1.1  mrg C	xmpyu		%fr8R, %fr4R, %fr24
    201      1.1  mrg C	xmpyu		%fr8L, %fr4L, %fr25
    202      1.1  mrg C	ldd		-0x70(%r30), p032a2
    203      1.1  mrg C	fstd		%fr23, -0x70(%r30)	C mid product to  -0x70..-0x69
    204      1.1  mrg C
    205      1.1  mrg C	ldo		8(rp), rp
    206      1.1  mrg C	add		climb, p000a, s000
    207      1.1  mrg C	ldd		-0x80(%r30), p000a
    208      1.1  mrg C	fstd		%fr24, -0x80(%r30)	C low product to  -0x80..-0x79
    209      1.1  mrg C
    210      1.1  mrg C	add,dc		p064a, %r0, climb
    211      1.1  mrg C	ldo		8(up), up
    212      1.1  mrg C	ldd		-0x68(%r30), p064a
    213      1.1  mrg C	fstd		%fr25, -0x68(%r30)	C high product to -0x68..-0x61
    214      1.1  mrg C
    215      1.1  mrg C	add		ma000, s000, s000
    216      1.1  mrg C	add,dc		ma064, climb, climb
    217      1.1  mrg C	fldd		0(up), %fr4
    218      1.1  mrg C
    219      1.1  mrg C	add		r000, s000, s000
    220      1.1  mrg C	add,dc		%r0, climb, climb
    221      1.1  mrg C	std		s000, -8(rp)
    222      1.1  mrg C
    223      1.1  mrg C	add		p032a1, p032a2, m032
    224      1.1  mrg C	add,dc		%r0, %r0, m096
    225      1.1  mrg C
    226      1.1  mrg C	depd,z		m032, 31, 32, ma000
    227      1.1  mrg C	extrd,u		m032, 31, 32, ma064
    228      1.1  mrg C	ldd		0(rp), r000
    229      1.1  mrg C	addib,<>	-1, %r5, L(loop0)
    230      1.1  mrg C	depd		m096, 31, 32, ma064
    231      1.1  mrg LDEF(0_out)
    232      1.1  mrg 	ldo		8(up), up
    233      1.1  mrg 	xmpyu		%fr8R, %fr4L, %fr22
    234      1.1  mrg 	xmpyu		%fr8L, %fr4R, %fr23
    235      1.1  mrg 	ldd		-0x78(%r30), p032a1
    236      1.1  mrg 	fstd		%fr22, -0x78(%r30)	C mid product to  -0x78..-0x71
    237      1.1  mrg 	xmpyu		%fr8R, %fr4R, %fr24
    238      1.1  mrg 	xmpyu		%fr8L, %fr4L, %fr25
    239      1.1  mrg 	ldd		-0x70(%r30), p032a2
    240      1.1  mrg 	fstd		%fr23, -0x70(%r30)	C mid product to  -0x70..-0x69
    241      1.1  mrg 	ldo		8(rp), rp
    242      1.1  mrg 	add		climb, p000a, s000
    243      1.1  mrg 	ldd		-0x80(%r30), p000a
    244      1.1  mrg 	fstd		%fr24, -0x80(%r30)	C low product to  -0x80..-0x79
    245      1.1  mrg 	add,dc		p064a, %r0, climb
    246      1.1  mrg 	ldd		-0x68(%r30), p064a
    247      1.1  mrg 	fstd		%fr25, -0x68(%r30)	C high product to -0x68..-0x61
    248      1.1  mrg 	add		ma000, s000, s000
    249      1.1  mrg 	add,dc		ma064, climb, climb
    250      1.1  mrg 	add		r000, s000, s000
    251      1.1  mrg 	add,dc		%r0, climb, climb
    252      1.1  mrg 	std		s000, -8(rp)
    253      1.1  mrg 	add		p032a1, p032a2, m032
    254      1.1  mrg 	add,dc		%r0, %r0, m096
    255      1.1  mrg 	depd,z		m032, 31, 32, ma000
    256      1.1  mrg 	extrd,u		m032, 31, 32, ma064
    257      1.1  mrg 	ldd		0(rp), r000
    258      1.1  mrg 	depd		m096, 31, 32, ma064
    259      1.1  mrg LDEF(0_two_out)
    260      1.1  mrg 	ldd		-0x78(%r30), p032a1
    261      1.1  mrg 	ldd		-0x70(%r30), p032a2
    262      1.1  mrg 	ldo		8(rp), rp
    263      1.1  mrg 	add		climb, p000a, s000
    264      1.1  mrg 	ldd		-0x80(%r30), p000a
    265      1.1  mrg 	add,dc		p064a, %r0, climb
    266      1.1  mrg 	ldd		-0x68(%r30), p064a
    267      1.1  mrg 	add		ma000, s000, s000
    268      1.1  mrg 	add,dc		ma064, climb, climb
    269      1.1  mrg 	add		r000, s000, s000
    270      1.1  mrg 	add,dc		%r0, climb, climb
    271      1.1  mrg 	std		s000, -8(rp)
    272      1.1  mrg LDEF(0_one_out)
    273      1.1  mrg 	add		p032a1, p032a2, m032
    274      1.1  mrg 	add,dc		%r0, %r0, m096
    275      1.1  mrg 	depd,z		m032, 31, 32, ma000
    276      1.1  mrg 	extrd,u		m032, 31, 32, ma064
    277      1.1  mrg 	ldd		0(rp), r000
    278      1.1  mrg 	depd		m096, 31, 32, ma064
    279      1.1  mrg 
    280      1.1  mrg 	add		climb, p000a, s000
    281      1.1  mrg 	add,dc		p064a, %r0, climb
    282      1.1  mrg 	add		ma000, s000, s000
    283      1.1  mrg 	add,dc		ma064, climb, climb
    284      1.1  mrg 	add		r000, s000, s000
    285      1.1  mrg 	add,dc		%r0, climb, climb
    286      1.1  mrg 	std		s000, 0(rp)
    287      1.1  mrg 
    288      1.1  mrg 	cmpib,>=	4, n, L(done)
    289      1.1  mrg 	ldo		8(rp), rp
    290      1.1  mrg 
    291      1.1  mrg C 4-way unrolled code.
    292      1.1  mrg 
    293      1.1  mrg LDEF(BIG)
    294      1.1  mrg 
    295      1.1  mrg define(`p032a1',`%r1')	C
    296      1.1  mrg define(`p032a2',`%r19')	C
    297      1.1  mrg define(`p096b1',`%r20')	C
    298      1.1  mrg define(`p096b2',`%r21')	C
    299      1.1  mrg define(`p160c1',`%r22')	C
    300      1.1  mrg define(`p160c2',`%r29')	C
    301      1.1  mrg define(`p224d1',`%r31')	C
    302      1.1  mrg define(`p224d2',`%r3')	C
    303      1.1  mrg 			C
    304      1.1  mrg define(`m032',`%r4')	C
    305      1.1  mrg define(`m096',`%r5')	C
    306      1.1  mrg define(`m160',`%r6')	C
    307      1.1  mrg define(`m224',`%r7')	C
    308      1.1  mrg define(`m288',`%r8')	C
    309      1.1  mrg 			C
    310      1.1  mrg define(`p000a',`%r1')	C
    311      1.1  mrg define(`p064a',`%r19')	C
    312      1.1  mrg define(`p064b',`%r20')	C
    313      1.1  mrg define(`p128b',`%r21')	C
    314      1.1  mrg define(`p128c',`%r22')	C
    315      1.1  mrg define(`p192c',`%r29')	C
    316      1.1  mrg define(`p192d',`%r31')	C
    317      1.1  mrg define(`p256d',`%r3')	C
    318      1.1  mrg 			C
    319      1.1  mrg define(`s000',`%r10')	C
    320      1.1  mrg define(`s064',`%r11')	C
    321      1.1  mrg define(`s128',`%r12')	C
    322      1.1  mrg define(`s192',`%r13')	C
    323      1.1  mrg 			C
    324      1.1  mrg define(`ma000',`%r9')	C
    325      1.1  mrg define(`ma064',`%r4')	C
    326      1.1  mrg define(`ma128',`%r5')	C
    327      1.1  mrg define(`ma192',`%r6')	C
    328      1.1  mrg define(`ma256',`%r7')	C
    329      1.1  mrg 			C
    330      1.1  mrg define(`r000',`%r1')	C
    331      1.1  mrg define(`r064',`%r19')	C
    332      1.1  mrg define(`r128',`%r20')	C
    333      1.1  mrg define(`r192',`%r21')	C
    334      1.1  mrg 
    335      1.1  mrg 	std		%r6, -0xe8(%r30)
    336      1.1  mrg 	std		%r7, -0xe0(%r30)
    337      1.1  mrg 	std		%r8, -0xd8(%r30)
    338      1.1  mrg 	std		%r9, -0xd0(%r30)
    339      1.1  mrg 	std		%r10, -0xc8(%r30)
    340      1.1  mrg 	std		%r11, -0xc0(%r30)
    341      1.1  mrg 	std		%r12, -0xb8(%r30)
    342      1.1  mrg 	std		%r13, -0xb0(%r30)
    343      1.1  mrg 
    344      1.1  mrg ifdef(`HAVE_ABI_2_0w',
    345      1.1  mrg `	extrd,u		n, 61, 62, n		C right shift 2
    346      1.1  mrg ',`	extrd,u		n, 61, 30, n		C right shift 2, zero extend
    347      1.1  mrg ')
    348      1.1  mrg 
    349      1.1  mrg LDEF(4_or_more)
    350      1.1  mrg 	fldd		0(up), %fr4
    351      1.1  mrg 	fldd		8(up), %fr5
    352      1.1  mrg 	fldd		16(up), %fr6
    353      1.1  mrg 	fldd		24(up), %fr7
    354      1.1  mrg 	xmpyu		%fr8R, %fr4L, %fr22
    355      1.1  mrg 	xmpyu		%fr8L, %fr4R, %fr23
    356      1.1  mrg 	xmpyu		%fr8R, %fr5L, %fr24
    357      1.1  mrg 	xmpyu		%fr8L, %fr5R, %fr25
    358      1.1  mrg 	xmpyu		%fr8R, %fr6L, %fr26
    359      1.1  mrg 	xmpyu		%fr8L, %fr6R, %fr27
    360      1.1  mrg 	fstd		%fr22, -0x78(%r30)	C mid product to  -0x78..-0x71
    361      1.1  mrg 	xmpyu		%fr8R, %fr7L, %fr28
    362      1.1  mrg 	xmpyu		%fr8L, %fr7R, %fr29
    363      1.1  mrg 	fstd		%fr23, -0x70(%r30)	C mid product to  -0x70..-0x69
    364      1.1  mrg 	xmpyu		%fr8R, %fr4R, %fr30
    365      1.1  mrg 	xmpyu		%fr8L, %fr4L, %fr31
    366      1.1  mrg 	fstd		%fr24, -0x38(%r30)	C mid product to  -0x38..-0x31
    367      1.1  mrg 	xmpyu		%fr8R, %fr5R, %fr22
    368      1.1  mrg 	xmpyu		%fr8L, %fr5L, %fr23
    369      1.1  mrg 	fstd		%fr25, -0x30(%r30)	C mid product to  -0x30..-0x29
    370      1.1  mrg 	xmpyu		%fr8R, %fr6R, %fr24
    371      1.1  mrg 	xmpyu		%fr8L, %fr6L, %fr25
    372      1.1  mrg 	fstd		%fr26, -0x58(%r30)	C mid product to  -0x58..-0x51
    373      1.1  mrg 	xmpyu		%fr8R, %fr7R, %fr26
    374      1.1  mrg 	fstd		%fr27, -0x50(%r30)	C mid product to  -0x50..-0x49
    375      1.1  mrg 	addib,<>	-1, n, L(8_or_more)
    376      1.1  mrg 	xmpyu		%fr8L, %fr7L, %fr27
    377      1.1  mrg 	fstd		%fr28, -0x18(%r30)	C mid product to  -0x18..-0x11
    378      1.1  mrg 	fstd		%fr29, -0x10(%r30)	C mid product to  -0x10..-0x09
    379      1.1  mrg 	fstd		%fr30, -0x80(%r30)	C low product to  -0x80..-0x79
    380      1.1  mrg 	fstd		%fr31, -0x68(%r30)	C high product to -0x68..-0x61
    381      1.1  mrg 	fstd		%fr22, -0x40(%r30)	C low product to  -0x40..-0x39
    382      1.1  mrg 	fstd		%fr23, -0x28(%r30)	C high product to -0x28..-0x21
    383      1.1  mrg 	fstd		%fr24, -0x60(%r30)	C low product to  -0x60..-0x59
    384      1.1  mrg 	fstd		%fr25, -0x48(%r30)	C high product to -0x48..-0x41
    385      1.1  mrg 	fstd		%fr26, -0x20(%r30)	C low product to  -0x20..-0x19
    386      1.1  mrg 	fstd		%fr27, -0x88(%r30)	C high product to -0x88..-0x81
    387      1.1  mrg 	ldd		-0x78(%r30), p032a1
    388      1.1  mrg 	ldd		-0x70(%r30), p032a2
    389      1.1  mrg 	ldd		-0x38(%r30), p096b1
    390      1.1  mrg 	ldd		-0x30(%r30), p096b2
    391      1.1  mrg 	ldd		-0x58(%r30), p160c1
    392      1.1  mrg 	ldd		-0x50(%r30), p160c2
    393      1.1  mrg 	ldd		-0x18(%r30), p224d1
    394      1.1  mrg 	ldd		-0x10(%r30), p224d2
    395      1.1  mrg 	b		L(end1)
    396      1.1  mrg 	nop
    397      1.1  mrg 
    398      1.1  mrg LDEF(8_or_more)
    399      1.1  mrg 	fstd		%fr28, -0x18(%r30)	C mid product to  -0x18..-0x11
    400      1.1  mrg 	fstd		%fr29, -0x10(%r30)	C mid product to  -0x10..-0x09
    401      1.1  mrg 	ldo		32(up), up
    402      1.1  mrg 	fstd		%fr30, -0x80(%r30)	C low product to  -0x80..-0x79
    403      1.1  mrg 	fstd		%fr31, -0x68(%r30)	C high product to -0x68..-0x61
    404      1.1  mrg 	fstd		%fr22, -0x40(%r30)	C low product to  -0x40..-0x39
    405      1.1  mrg 	fstd		%fr23, -0x28(%r30)	C high product to -0x28..-0x21
    406      1.1  mrg 	fstd		%fr24, -0x60(%r30)	C low product to  -0x60..-0x59
    407      1.1  mrg 	fstd		%fr25, -0x48(%r30)	C high product to -0x48..-0x41
    408      1.1  mrg 	fstd		%fr26, -0x20(%r30)	C low product to  -0x20..-0x19
    409      1.1  mrg 	fstd		%fr27, -0x88(%r30)	C high product to -0x88..-0x81
    410      1.1  mrg 	fldd		0(up), %fr4
    411      1.1  mrg 	fldd		8(up), %fr5
    412      1.1  mrg 	fldd		16(up), %fr6
    413      1.1  mrg 	fldd		24(up), %fr7
    414      1.1  mrg 	xmpyu		%fr8R, %fr4L, %fr22
    415      1.1  mrg 	ldd		-0x78(%r30), p032a1
    416      1.1  mrg 	xmpyu		%fr8L, %fr4R, %fr23
    417      1.1  mrg 	xmpyu		%fr8R, %fr5L, %fr24
    418      1.1  mrg 	ldd		-0x70(%r30), p032a2
    419      1.1  mrg 	xmpyu		%fr8L, %fr5R, %fr25
    420      1.1  mrg 	xmpyu		%fr8R, %fr6L, %fr26
    421      1.1  mrg 	ldd		-0x38(%r30), p096b1
    422      1.1  mrg 	xmpyu		%fr8L, %fr6R, %fr27
    423      1.1  mrg 	fstd		%fr22, -0x78(%r30)	C mid product to  -0x78..-0x71
    424      1.1  mrg 	xmpyu		%fr8R, %fr7L, %fr28
    425      1.1  mrg 	ldd		-0x30(%r30), p096b2
    426      1.1  mrg 	xmpyu		%fr8L, %fr7R, %fr29
    427      1.1  mrg 	fstd		%fr23, -0x70(%r30)	C mid product to  -0x70..-0x69
    428      1.1  mrg 	xmpyu		%fr8R, %fr4R, %fr30
    429      1.1  mrg 	ldd		-0x58(%r30), p160c1
    430      1.1  mrg 	xmpyu		%fr8L, %fr4L, %fr31
    431      1.1  mrg 	fstd		%fr24, -0x38(%r30)	C mid product to  -0x38..-0x31
    432      1.1  mrg 	xmpyu		%fr8R, %fr5R, %fr22
    433      1.1  mrg 	ldd		-0x50(%r30), p160c2
    434      1.1  mrg 	xmpyu		%fr8L, %fr5L, %fr23
    435      1.1  mrg 	fstd		%fr25, -0x30(%r30)	C mid product to  -0x30..-0x29
    436      1.1  mrg 	xmpyu		%fr8R, %fr6R, %fr24
    437      1.1  mrg 	ldd		-0x18(%r30), p224d1
    438      1.1  mrg 	xmpyu		%fr8L, %fr6L, %fr25
    439      1.1  mrg 	fstd		%fr26, -0x58(%r30)	C mid product to  -0x58..-0x51
    440      1.1  mrg 	xmpyu		%fr8R, %fr7R, %fr26
    441      1.1  mrg 	ldd		-0x10(%r30), p224d2
    442      1.1  mrg 	fstd		%fr27, -0x50(%r30)	C mid product to  -0x50..-0x49
    443      1.1  mrg 	addib,=		-1, n, L(end2)
    444      1.1  mrg 	xmpyu		%fr8L, %fr7L, %fr27
    445      1.1  mrg LDEF(loop)
    446      1.1  mrg 	add		p032a1, p032a2, m032
    447      1.1  mrg 	ldd		-0x80(%r30), p000a
    448      1.1  mrg 	add,dc		p096b1, p096b2, m096
    449      1.1  mrg 	fstd		%fr28, -0x18(%r30)	C mid product to  -0x18..-0x11
    450      1.1  mrg 
    451      1.1  mrg 	add,dc		p160c1, p160c2, m160
    452      1.1  mrg 	ldd		-0x68(%r30), p064a
    453      1.1  mrg 	add,dc		p224d1, p224d2, m224
    454      1.1  mrg 	fstd		%fr29, -0x10(%r30)	C mid product to  -0x10..-0x09
    455      1.1  mrg 
    456      1.1  mrg 	add,dc		%r0, %r0, m288
    457      1.1  mrg 	ldd		-0x40(%r30), p064b
    458      1.1  mrg 	ldo		32(up), up
    459      1.1  mrg 	fstd		%fr30, -0x80(%r30)	C low product to  -0x80..-0x79
    460      1.1  mrg 
    461      1.1  mrg 	depd,z		m032, 31, 32, ma000
    462      1.1  mrg 	ldd		-0x28(%r30), p128b
    463      1.1  mrg 	extrd,u		m032, 31, 32, ma064
    464      1.1  mrg 	fstd		%fr31, -0x68(%r30)	C high product to -0x68..-0x61
    465      1.1  mrg 
    466      1.1  mrg 	depd		m096, 31, 32, ma064
    467      1.1  mrg 	ldd		-0x60(%r30), p128c
    468      1.1  mrg 	extrd,u		m096, 31, 32, ma128
    469      1.1  mrg 	fstd		%fr22, -0x40(%r30)	C low product to  -0x40..-0x39
    470      1.1  mrg 
    471      1.1  mrg 	depd		m160, 31, 32, ma128
    472      1.1  mrg 	ldd		-0x48(%r30), p192c
    473      1.1  mrg 	extrd,u		m160, 31, 32, ma192
    474      1.1  mrg 	fstd		%fr23, -0x28(%r30)	C high product to -0x28..-0x21
    475      1.1  mrg 
    476      1.1  mrg 	depd		m224, 31, 32, ma192
    477      1.1  mrg 	ldd		-0x20(%r30), p192d
    478      1.1  mrg 	extrd,u		m224, 31, 32, ma256
    479      1.1  mrg 	fstd		%fr24, -0x60(%r30)	C low product to  -0x60..-0x59
    480      1.1  mrg 
    481      1.1  mrg 	depd		m288, 31, 32, ma256
    482      1.1  mrg 	ldd		-0x88(%r30), p256d
    483      1.1  mrg 	add		climb, p000a, s000
    484      1.1  mrg 	fstd		%fr25, -0x48(%r30)	C high product to -0x48..-0x41
    485      1.1  mrg 
    486      1.1  mrg 	add,dc		p064a, p064b, s064
    487      1.1  mrg 	ldd		0(rp), r000
    488      1.1  mrg 	add,dc		p128b, p128c, s128
    489      1.1  mrg 	fstd		%fr26, -0x20(%r30)	C low product to  -0x20..-0x19
    490      1.1  mrg 
    491      1.1  mrg 	add,dc		p192c, p192d, s192
    492      1.1  mrg 	ldd		8(rp), r064
    493      1.1  mrg 	add,dc		p256d, %r0, climb
    494      1.1  mrg 	fstd		%fr27, -0x88(%r30)	C high product to -0x88..-0x81
    495      1.1  mrg 
    496      1.1  mrg 	ldd		16(rp), r128
    497      1.1  mrg 	add		ma000, s000, s000	C accum mid 0
    498      1.1  mrg 	ldd		24(rp), r192
    499      1.1  mrg 	add,dc		ma064, s064, s064	C accum mid 1
    500      1.1  mrg 
    501      1.1  mrg 	add,dc		ma128, s128, s128	C accum mid 2
    502      1.1  mrg 	fldd		0(up), %fr4
    503      1.1  mrg 	add,dc		ma192, s192, s192	C accum mid 3
    504      1.1  mrg 	fldd		8(up), %fr5
    505      1.1  mrg 
    506      1.1  mrg 	add,dc		ma256, climb, climb
    507      1.1  mrg 	fldd		16(up), %fr6
    508      1.1  mrg 	add		r000, s000, s000	C accum rlimb 0
    509      1.1  mrg 	fldd		24(up), %fr7
    510      1.1  mrg 
    511      1.1  mrg 	add,dc		r064, s064, s064	C accum rlimb 1
    512      1.1  mrg 	add,dc		r128, s128, s128	C accum rlimb 2
    513      1.1  mrg 	std		s000, 0(rp)
    514      1.1  mrg 
    515      1.1  mrg 	add,dc		r192, s192, s192	C accum rlimb 3
    516      1.1  mrg 	add,dc		%r0, climb, climb
    517      1.1  mrg 	std		s064, 8(rp)
    518      1.1  mrg 
    519      1.1  mrg 	xmpyu		%fr8R, %fr4L, %fr22
    520      1.1  mrg 	ldd		-0x78(%r30), p032a1
    521      1.1  mrg 	xmpyu		%fr8L, %fr4R, %fr23
    522      1.1  mrg 	std		s128, 16(rp)
    523      1.1  mrg 
    524      1.1  mrg 	xmpyu		%fr8R, %fr5L, %fr24
    525      1.1  mrg 	ldd		-0x70(%r30), p032a2
    526      1.1  mrg 	xmpyu		%fr8L, %fr5R, %fr25
    527      1.1  mrg 	std		s192, 24(rp)
    528      1.1  mrg 
    529      1.1  mrg 	xmpyu		%fr8R, %fr6L, %fr26
    530      1.1  mrg 	ldd		-0x38(%r30), p096b1
    531      1.1  mrg 	xmpyu		%fr8L, %fr6R, %fr27
    532      1.1  mrg 	fstd		%fr22, -0x78(%r30)	C mid product to  -0x78..-0x71
    533      1.1  mrg 
    534      1.1  mrg 	xmpyu		%fr8R, %fr7L, %fr28
    535      1.1  mrg 	ldd		-0x30(%r30), p096b2
    536      1.1  mrg 	xmpyu		%fr8L, %fr7R, %fr29
    537      1.1  mrg 	fstd		%fr23, -0x70(%r30)	C mid product to  -0x70..-0x69
    538      1.1  mrg 
    539      1.1  mrg 	xmpyu		%fr8R, %fr4R, %fr30
    540      1.1  mrg 	ldd		-0x58(%r30), p160c1
    541      1.1  mrg 	xmpyu		%fr8L, %fr4L, %fr31
    542      1.1  mrg 	fstd		%fr24, -0x38(%r30)	C mid product to  -0x38..-0x31
    543      1.1  mrg 
    544      1.1  mrg 	xmpyu		%fr8R, %fr5R, %fr22
    545      1.1  mrg 	ldd		-0x50(%r30), p160c2
    546      1.1  mrg 	xmpyu		%fr8L, %fr5L, %fr23
    547      1.1  mrg 	fstd		%fr25, -0x30(%r30)	C mid product to  -0x30..-0x29
    548      1.1  mrg 
    549      1.1  mrg 	xmpyu		%fr8R, %fr6R, %fr24
    550      1.1  mrg 	ldd		-0x18(%r30), p224d1
    551      1.1  mrg 	xmpyu		%fr8L, %fr6L, %fr25
    552      1.1  mrg 	fstd		%fr26, -0x58(%r30)	C mid product to  -0x58..-0x51
    553      1.1  mrg 
    554      1.1  mrg 	xmpyu		%fr8R, %fr7R, %fr26
    555      1.1  mrg 	ldd		-0x10(%r30), p224d2
    556      1.1  mrg 	fstd		%fr27, -0x50(%r30)	C mid product to  -0x50..-0x49
    557      1.1  mrg 	xmpyu		%fr8L, %fr7L, %fr27
    558      1.1  mrg 
    559      1.1  mrg 	addib,<>	-1, n, L(loop)
    560      1.1  mrg 	ldo		32(rp), rp
    561      1.1  mrg 
    562      1.1  mrg LDEF(end2)
    563      1.1  mrg 	add		p032a1, p032a2, m032
    564      1.1  mrg 	ldd		-0x80(%r30), p000a
    565      1.1  mrg 	add,dc		p096b1, p096b2, m096
    566      1.1  mrg 	fstd		%fr28, -0x18(%r30)	C mid product to  -0x18..-0x11
    567      1.1  mrg 	add,dc		p160c1, p160c2, m160
    568      1.1  mrg 	ldd		-0x68(%r30), p064a
    569      1.1  mrg 	add,dc		p224d1, p224d2, m224
    570      1.1  mrg 	fstd		%fr29, -0x10(%r30)	C mid product to  -0x10..-0x09
    571      1.1  mrg 	add,dc		%r0, %r0, m288
    572      1.1  mrg 	ldd		-0x40(%r30), p064b
    573      1.1  mrg 	fstd		%fr30, -0x80(%r30)	C low product to  -0x80..-0x79
    574      1.1  mrg 	depd,z		m032, 31, 32, ma000
    575      1.1  mrg 	ldd		-0x28(%r30), p128b
    576      1.1  mrg 	extrd,u		m032, 31, 32, ma064
    577      1.1  mrg 	fstd		%fr31, -0x68(%r30)	C high product to -0x68..-0x61
    578      1.1  mrg 	depd		m096, 31, 32, ma064
    579      1.1  mrg 	ldd		-0x60(%r30), p128c
    580      1.1  mrg 	extrd,u		m096, 31, 32, ma128
    581      1.1  mrg 	fstd		%fr22, -0x40(%r30)	C low product to  -0x40..-0x39
    582      1.1  mrg 	depd		m160, 31, 32, ma128
    583      1.1  mrg 	ldd		-0x48(%r30), p192c
    584      1.1  mrg 	extrd,u		m160, 31, 32, ma192
    585      1.1  mrg 	fstd		%fr23, -0x28(%r30)	C high product to -0x28..-0x21
    586      1.1  mrg 	depd		m224, 31, 32, ma192
    587      1.1  mrg 	ldd		-0x20(%r30), p192d
    588      1.1  mrg 	extrd,u		m224, 31, 32, ma256
    589      1.1  mrg 	fstd		%fr24, -0x60(%r30)	C low product to  -0x60..-0x59
    590      1.1  mrg 	depd		m288, 31, 32, ma256
    591      1.1  mrg 	ldd		-0x88(%r30), p256d
    592      1.1  mrg 	add		climb, p000a, s000
    593      1.1  mrg 	fstd		%fr25, -0x48(%r30)	C high product to -0x48..-0x41
    594      1.1  mrg 	add,dc		p064a, p064b, s064
    595      1.1  mrg 	ldd		0(rp), r000
    596      1.1  mrg 	add,dc		p128b, p128c, s128
    597      1.1  mrg 	fstd		%fr26, -0x20(%r30)	C low product to  -0x20..-0x19
    598      1.1  mrg 	add,dc		p192c, p192d, s192
    599      1.1  mrg 	ldd		8(rp), r064
    600      1.1  mrg 	add,dc		p256d, %r0, climb
    601      1.1  mrg 	fstd		%fr27, -0x88(%r30)	C high product to -0x88..-0x81
    602      1.1  mrg 	ldd		16(rp), r128
    603      1.1  mrg 	add		ma000, s000, s000	C accum mid 0
    604      1.1  mrg 	ldd		24(rp), r192
    605      1.1  mrg 	add,dc		ma064, s064, s064	C accum mid 1
    606      1.1  mrg 	add,dc		ma128, s128, s128	C accum mid 2
    607      1.1  mrg 	add,dc		ma192, s192, s192	C accum mid 3
    608      1.1  mrg 	add,dc		ma256, climb, climb
    609      1.1  mrg 	add		r000, s000, s000	C accum rlimb 0
    610      1.1  mrg 	add,dc		r064, s064, s064	C accum rlimb 1
    611      1.1  mrg 	add,dc		r128, s128, s128	C accum rlimb 2
    612      1.1  mrg 	std		s000, 0(rp)
    613      1.1  mrg 	add,dc		r192, s192, s192	C accum rlimb 3
    614      1.1  mrg 	add,dc		%r0, climb, climb
    615      1.1  mrg 	std		s064, 8(rp)
    616      1.1  mrg 	ldd		-0x78(%r30), p032a1
    617      1.1  mrg 	std		s128, 16(rp)
    618      1.1  mrg 	ldd		-0x70(%r30), p032a2
    619      1.1  mrg 	std		s192, 24(rp)
    620      1.1  mrg 	ldd		-0x38(%r30), p096b1
    621      1.1  mrg 	ldd		-0x30(%r30), p096b2
    622      1.1  mrg 	ldd		-0x58(%r30), p160c1
    623      1.1  mrg 	ldd		-0x50(%r30), p160c2
    624      1.1  mrg 	ldd		-0x18(%r30), p224d1
    625      1.1  mrg 	ldd		-0x10(%r30), p224d2
    626      1.1  mrg 	ldo		32(rp), rp
    627      1.1  mrg 
    628      1.1  mrg LDEF(end1)
    629      1.1  mrg 	add		p032a1, p032a2, m032
    630      1.1  mrg 	ldd		-0x80(%r30), p000a
    631      1.1  mrg 	add,dc		p096b1, p096b2, m096
    632      1.1  mrg 	add,dc		p160c1, p160c2, m160
    633      1.1  mrg 	ldd		-0x68(%r30), p064a
    634      1.1  mrg 	add,dc		p224d1, p224d2, m224
    635      1.1  mrg 	add,dc		%r0, %r0, m288
    636      1.1  mrg 	ldd		-0x40(%r30), p064b
    637      1.1  mrg 	depd,z		m032, 31, 32, ma000
    638      1.1  mrg 	ldd		-0x28(%r30), p128b
    639      1.1  mrg 	extrd,u		m032, 31, 32, ma064
    640      1.1  mrg 	depd		m096, 31, 32, ma064
    641      1.1  mrg 	ldd		-0x60(%r30), p128c
    642      1.1  mrg 	extrd,u		m096, 31, 32, ma128
    643      1.1  mrg 	depd		m160, 31, 32, ma128
    644      1.1  mrg 	ldd		-0x48(%r30), p192c
    645      1.1  mrg 	extrd,u		m160, 31, 32, ma192
    646      1.1  mrg 	depd		m224, 31, 32, ma192
    647      1.1  mrg 	ldd		-0x20(%r30), p192d
    648      1.1  mrg 	extrd,u		m224, 31, 32, ma256
    649      1.1  mrg 	depd		m288, 31, 32, ma256
    650      1.1  mrg 	ldd		-0x88(%r30), p256d
    651      1.1  mrg 	add		climb, p000a, s000
    652      1.1  mrg 	add,dc		p064a, p064b, s064
    653      1.1  mrg 	ldd		0(rp), r000
    654      1.1  mrg 	add,dc		p128b, p128c, s128
    655      1.1  mrg 	add,dc		p192c, p192d, s192
    656      1.1  mrg 	ldd		8(rp), r064
    657      1.1  mrg 	add,dc		p256d, %r0, climb
    658      1.1  mrg 	ldd		16(rp), r128
    659      1.1  mrg 	add		ma000, s000, s000	C accum mid 0
    660      1.1  mrg 	ldd		24(rp), r192
    661      1.1  mrg 	add,dc		ma064, s064, s064	C accum mid 1
    662      1.1  mrg 	add,dc		ma128, s128, s128	C accum mid 2
    663      1.1  mrg 	add,dc		ma192, s192, s192	C accum mid 3
    664      1.1  mrg 	add,dc		ma256, climb, climb
    665      1.1  mrg 	add		r000, s000, s000	C accum rlimb 0
    666      1.1  mrg 	add,dc		r064, s064, s064	C accum rlimb 1
    667      1.1  mrg 	add,dc		r128, s128, s128	C accum rlimb 2
    668      1.1  mrg 	std		s000, 0(rp)
    669      1.1  mrg 	add,dc		r192, s192, s192	C accum rlimb 3
    670      1.1  mrg 	add,dc		%r0, climb, climb
    671      1.1  mrg 	std		s064, 8(rp)
    672      1.1  mrg 	std		s128, 16(rp)
    673      1.1  mrg 	std		s192, 24(rp)
    674      1.1  mrg 
    675      1.1  mrg 	ldd		-0xb0(%r30), %r13
    676      1.1  mrg 	ldd		-0xb8(%r30), %r12
    677      1.1  mrg 	ldd		-0xc0(%r30), %r11
    678      1.1  mrg 	ldd		-0xc8(%r30), %r10
    679      1.1  mrg 	ldd		-0xd0(%r30), %r9
    680      1.1  mrg 	ldd		-0xd8(%r30), %r8
    681      1.1  mrg 	ldd		-0xe0(%r30), %r7
    682      1.1  mrg 	ldd		-0xe8(%r30), %r6
    683      1.1  mrg LDEF(done)
    684      1.1  mrg ifdef(`HAVE_ABI_2_0w',
    685      1.1  mrg `	copy		climb, %r28
    686      1.1  mrg ',`	extrd,u		climb, 63, 32, %r29
    687      1.1  mrg 	extrd,u		climb, 31, 32, %r28
    688      1.1  mrg ')
    689      1.1  mrg 	ldd		-0xf0(%r30), %r5
    690      1.1  mrg 	ldd		-0xf8(%r30), %r4
    691      1.1  mrg 	bve		(%r2)
    692      1.1  mrg 	ldd,mb		-0x100(%r30), %r3
    693      1.1  mrg EPILOGUE(mpn_addmul_1)
    694