Home | History | Annotate | Line # | Download | only in nails
      1      1.1  mrg dnl  Alpha ev6 nails mpn_addmul_3.
      2      1.1  mrg 
      3      1.1  mrg dnl  Copyright 2002, 2006 Free Software Foundation, Inc.
      4  1.1.1.2  mrg 
      5      1.1  mrg dnl  This file is part of the GNU MP Library.
      6      1.1  mrg dnl
      7  1.1.1.2  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8  1.1.1.2  mrg dnl  it under the terms of either:
      9  1.1.1.2  mrg dnl
     10  1.1.1.2  mrg dnl    * the GNU Lesser General Public License as published by the Free
     11  1.1.1.2  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     12  1.1.1.2  mrg dnl      option) any later version.
     13  1.1.1.2  mrg dnl
     14  1.1.1.2  mrg dnl  or
     15  1.1.1.2  mrg dnl
     16  1.1.1.2  mrg dnl    * the GNU General Public License as published by the Free Software
     17  1.1.1.2  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     18  1.1.1.2  mrg dnl      later version.
     19  1.1.1.2  mrg dnl
     20  1.1.1.2  mrg dnl  or both in parallel, as here.
     21  1.1.1.2  mrg dnl
     22  1.1.1.2  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23  1.1.1.2  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24  1.1.1.2  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25  1.1.1.2  mrg dnl  for more details.
     26      1.1  mrg dnl
     27  1.1.1.2  mrg dnl  You should have received copies of the GNU General Public License and the
     28  1.1.1.2  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29  1.1.1.2  mrg dnl  see https://www.gnu.org/licenses/.
     30      1.1  mrg 
     31      1.1  mrg include(`../config.m4')
     32      1.1  mrg 
     33      1.1  mrg C Runs at 3.0 cycles/limb.
     34      1.1  mrg 
     35      1.1  mrg C With 2-way unrolling, we could probably reach 2.25 c/l (3.33 i/c).
     36      1.1  mrg 
     37      1.1  mrg 
     38      1.1  mrg C  INPUT PARAMETERS
     39      1.1  mrg define(`rp',`r16')
     40      1.1  mrg define(`up',`r17')
     41      1.1  mrg define(`n',`r18')
     42      1.1  mrg define(`vp',`r19')
     43      1.1  mrg 
     44      1.1  mrg C  Useful register aliases
     45      1.1  mrg define(`numb_mask',`r24')
     46      1.1  mrg define(`ulimb',`r25')
     47      1.1  mrg define(`rlimb',`r27')
     48      1.1  mrg 
     49      1.1  mrg define(`m0a',`r0')
     50      1.1  mrg define(`m0b',`r1')
     51      1.1  mrg define(`m1a',`r2')
     52      1.1  mrg define(`m1b',`r3')
     53      1.1  mrg define(`m2a',`r20')
     54      1.1  mrg define(`m2b',`r21')
     55      1.1  mrg 
     56      1.1  mrg define(`acc0',`r4')
     57      1.1  mrg define(`acc1',`r5')
     58      1.1  mrg define(`acc2',`r22')
     59      1.1  mrg 
     60      1.1  mrg define(`v0',`r6')
     61      1.1  mrg define(`v1',`r7')
     62      1.1  mrg define(`v2',`r23')
     63      1.1  mrg 
     64      1.1  mrg C Used for temps: r8 r19 r28
     65      1.1  mrg 
     66      1.1  mrg define(`NAIL_BITS',`GMP_NAIL_BITS')
     67      1.1  mrg define(`NUMB_BITS',`GMP_NUMB_BITS')
     68      1.1  mrg 
     69      1.1  mrg C  This declaration is munged by configure
     70      1.1  mrg NAILS_SUPPORT(3-63)
     71      1.1  mrg 
     72      1.1  mrg ASM_START()
     73      1.1  mrg PROLOGUE(mpn_addmul_3)
     74      1.1  mrg 	lda	numb_mask,-1(r31)
     75      1.1  mrg 	srl	numb_mask,NAIL_BITS,numb_mask
     76      1.1  mrg 
     77      1.1  mrg 	ldq	v0,	0(vp)
     78      1.1  mrg 	ldq	v1,	8(vp)
     79      1.1  mrg 	ldq	v2,	16(vp)
     80      1.1  mrg 
     81      1.1  mrg 	bis	r31,	r31,	acc0		C	zero acc0
     82      1.1  mrg 	sll	v0,NAIL_BITS,	v0
     83      1.1  mrg 	bis	r31,	r31,	acc1		C	zero acc1
     84      1.1  mrg 	sll	v1,NAIL_BITS,	v1
     85      1.1  mrg 	bis	r31,	r31,	acc2		C	zero acc2
     86      1.1  mrg 	sll	v2,NAIL_BITS,	v2
     87      1.1  mrg 	bis	r31,	r31,	r19
     88      1.1  mrg 
     89      1.1  mrg 	ldq	ulimb,	0(up)
     90      1.1  mrg 	lda	up,	8(up)
     91      1.1  mrg 	mulq	v0,	ulimb,	m0a		C U1
     92      1.1  mrg 	umulh	v0,	ulimb,	m0b		C U1
     93      1.1  mrg 	mulq	v1,	ulimb,	m1a		C U1
     94      1.1  mrg 	umulh	v1,	ulimb,	m1b		C U1
     95      1.1  mrg 	lda	n,	-1(n)
     96      1.1  mrg 	mulq	v2,	ulimb,	m2a		C U1
     97      1.1  mrg 	umulh	v2,	ulimb,	m2b		C U1
     98      1.1  mrg 	beq	n,	L(end)			C U0
     99      1.1  mrg 
    100      1.1  mrg 	ALIGN(16)
    101      1.1  mrg L(top):	ldq	rlimb,	0(rp)			C L1
    102      1.1  mrg 	ldq	ulimb,	0(up)			C L0
    103      1.1  mrg 	bis	r31,	r31,	r31		C U0	nop
    104      1.1  mrg 	addq	r19,	acc0,	acc0		C U1	propagate nail
    105      1.1  mrg 
    106      1.1  mrg 	lda	rp,	8(rp)			C L1
    107      1.1  mrg 	srl	m0a,NAIL_BITS,	r8		C U0
    108      1.1  mrg 	lda	up,	8(up)			C L0
    109      1.1  mrg 	mulq	v0,	ulimb,	m0a		C U1
    110      1.1  mrg 
    111      1.1  mrg 	addq	r8,	acc0,	r19		C U0
    112      1.1  mrg 	addq	m0b,	acc1,	acc0		C L1
    113      1.1  mrg 	umulh	v0,	ulimb,	m0b		C U1
    114      1.1  mrg 	bis	r31,	r31,	r31		C L0	nop
    115      1.1  mrg 
    116      1.1  mrg 	addq	rlimb,	r19,	r19		C L1
    117      1.1  mrg 	srl	m1a,NAIL_BITS,	r8		C U0
    118      1.1  mrg 	bis	r31,	r31,	r31		C L0	nop
    119      1.1  mrg 	mulq	v1,	ulimb,	m1a		C U1
    120      1.1  mrg 
    121      1.1  mrg 	addq	r8,	acc0,	acc0		C U0
    122      1.1  mrg 	addq	m1b,	acc2,	acc1		C L1
    123      1.1  mrg 	umulh	v1,	ulimb,	m1b		C U1
    124      1.1  mrg 	and	r19,numb_mask,	r28		C L0	extract numb part
    125      1.1  mrg 
    126      1.1  mrg 	bis	r31,	r31,	r31		C L1	nop
    127      1.1  mrg 	srl	m2a,NAIL_BITS,	r8		C U0
    128      1.1  mrg 	lda	n,	-1(n)			C L0
    129      1.1  mrg 	mulq	v2,	ulimb,	m2a		C U1
    130      1.1  mrg 
    131      1.1  mrg 	addq	r8,	acc1,	acc1		C L0
    132      1.1  mrg 	bis	r31,	m2b,	acc2		C L1
    133      1.1  mrg 	umulh	v2,	ulimb,	m2b		C U1
    134      1.1  mrg 	srl	r19,NUMB_BITS,	r19		C U0	extract nail part
    135      1.1  mrg 
    136      1.1  mrg 	stq	r28,	-8(rp)			C L
    137      1.1  mrg 	bne	n,	L(top)			C U0
    138      1.1  mrg 
    139      1.1  mrg L(end):	ldq	rlimb,	0(rp)
    140      1.1  mrg 	addq	r19,	acc0,	acc0		C	propagate nail
    141      1.1  mrg 	lda	rp,	8(rp)
    142      1.1  mrg 	srl	m0a,NAIL_BITS,	r8		C U0
    143      1.1  mrg 	addq	r8,	acc0,	r19
    144      1.1  mrg 	addq	m0b,	acc1,	acc0
    145      1.1  mrg 	addq	rlimb,	r19,	r19
    146      1.1  mrg 	srl	m1a,NAIL_BITS,	r8		C U0
    147      1.1  mrg 	addq	r8,	acc0,	acc0
    148      1.1  mrg 	addq	m1b,	acc2,	acc1
    149      1.1  mrg 	and	r19,numb_mask,	r28		C extract limb
    150      1.1  mrg 	srl	m2a,NAIL_BITS,	r8		C U0
    151      1.1  mrg 	addq	r8,	acc1,	acc1
    152      1.1  mrg 	bis	r31,	m2b,	acc2
    153      1.1  mrg 	srl	r19,NUMB_BITS,	r19		C extract nail
    154      1.1  mrg 	stq	r28,	-8(rp)
    155      1.1  mrg 
    156      1.1  mrg 	addq	r19,	acc0,	acc0		C propagate nail
    157      1.1  mrg 	and	acc0,numb_mask,	r28
    158      1.1  mrg 	stq	r28,	0(rp)
    159      1.1  mrg 	srl	acc0,NUMB_BITS,	r19
    160      1.1  mrg 	addq	r19,	acc1,	acc1
    161      1.1  mrg 
    162      1.1  mrg 	and	acc1,numb_mask,	r28
    163      1.1  mrg 	stq	r28,	8(rp)
    164      1.1  mrg 	srl	acc1,NUMB_BITS,	r19
    165      1.1  mrg 	addq	r19,	acc2,	m0a
    166      1.1  mrg 
    167      1.1  mrg 	ret	r31,	(r26),	1
    168      1.1  mrg EPILOGUE()
    169      1.1  mrg ASM_END()
    170