Home | History | Annotate | Line # | Download | only in nails
      1 dnl  Alpha ev6 nails mpn_addmul_3.
      2 
      3 dnl  Copyright 2002, 2006 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 C Runs at 3.0 cycles/limb.
     34 
     35 C With 2-way unrolling, we could probably reach 2.25 c/l (3.33 i/c).
     36 
     37 
     38 C  INPUT PARAMETERS
     39 define(`rp',`r16')
     40 define(`up',`r17')
     41 define(`n',`r18')
     42 define(`vp',`r19')
     43 
     44 C  Useful register aliases
     45 define(`numb_mask',`r24')
     46 define(`ulimb',`r25')
     47 define(`rlimb',`r27')
     48 
     49 define(`m0a',`r0')
     50 define(`m0b',`r1')
     51 define(`m1a',`r2')
     52 define(`m1b',`r3')
     53 define(`m2a',`r20')
     54 define(`m2b',`r21')
     55 
     56 define(`acc0',`r4')
     57 define(`acc1',`r5')
     58 define(`acc2',`r22')
     59 
     60 define(`v0',`r6')
     61 define(`v1',`r7')
     62 define(`v2',`r23')
     63 
     64 C Used for temps: r8 r19 r28
     65 
     66 define(`NAIL_BITS',`GMP_NAIL_BITS')
     67 define(`NUMB_BITS',`GMP_NUMB_BITS')
     68 
     69 C  This declaration is munged by configure
     70 NAILS_SUPPORT(3-63)
     71 
     72 ASM_START()
     73 PROLOGUE(mpn_addmul_3)
     74 	lda	numb_mask,-1(r31)
     75 	srl	numb_mask,NAIL_BITS,numb_mask
     76 
     77 	ldq	v0,	0(vp)
     78 	ldq	v1,	8(vp)
     79 	ldq	v2,	16(vp)
     80 
     81 	bis	r31,	r31,	acc0		C	zero acc0
     82 	sll	v0,NAIL_BITS,	v0
     83 	bis	r31,	r31,	acc1		C	zero acc1
     84 	sll	v1,NAIL_BITS,	v1
     85 	bis	r31,	r31,	acc2		C	zero acc2
     86 	sll	v2,NAIL_BITS,	v2
     87 	bis	r31,	r31,	r19
     88 
     89 	ldq	ulimb,	0(up)
     90 	lda	up,	8(up)
     91 	mulq	v0,	ulimb,	m0a		C U1
     92 	umulh	v0,	ulimb,	m0b		C U1
     93 	mulq	v1,	ulimb,	m1a		C U1
     94 	umulh	v1,	ulimb,	m1b		C U1
     95 	lda	n,	-1(n)
     96 	mulq	v2,	ulimb,	m2a		C U1
     97 	umulh	v2,	ulimb,	m2b		C U1
     98 	beq	n,	L(end)			C U0
     99 
    100 	ALIGN(16)
    101 L(top):	ldq	rlimb,	0(rp)			C L1
    102 	ldq	ulimb,	0(up)			C L0
    103 	bis	r31,	r31,	r31		C U0	nop
    104 	addq	r19,	acc0,	acc0		C U1	propagate nail
    105 
    106 	lda	rp,	8(rp)			C L1
    107 	srl	m0a,NAIL_BITS,	r8		C U0
    108 	lda	up,	8(up)			C L0
    109 	mulq	v0,	ulimb,	m0a		C U1
    110 
    111 	addq	r8,	acc0,	r19		C U0
    112 	addq	m0b,	acc1,	acc0		C L1
    113 	umulh	v0,	ulimb,	m0b		C U1
    114 	bis	r31,	r31,	r31		C L0	nop
    115 
    116 	addq	rlimb,	r19,	r19		C L1
    117 	srl	m1a,NAIL_BITS,	r8		C U0
    118 	bis	r31,	r31,	r31		C L0	nop
    119 	mulq	v1,	ulimb,	m1a		C U1
    120 
    121 	addq	r8,	acc0,	acc0		C U0
    122 	addq	m1b,	acc2,	acc1		C L1
    123 	umulh	v1,	ulimb,	m1b		C U1
    124 	and	r19,numb_mask,	r28		C L0	extract numb part
    125 
    126 	bis	r31,	r31,	r31		C L1	nop
    127 	srl	m2a,NAIL_BITS,	r8		C U0
    128 	lda	n,	-1(n)			C L0
    129 	mulq	v2,	ulimb,	m2a		C U1
    130 
    131 	addq	r8,	acc1,	acc1		C L0
    132 	bis	r31,	m2b,	acc2		C L1
    133 	umulh	v2,	ulimb,	m2b		C U1
    134 	srl	r19,NUMB_BITS,	r19		C U0	extract nail part
    135 
    136 	stq	r28,	-8(rp)			C L
    137 	bne	n,	L(top)			C U0
    138 
    139 L(end):	ldq	rlimb,	0(rp)
    140 	addq	r19,	acc0,	acc0		C	propagate nail
    141 	lda	rp,	8(rp)
    142 	srl	m0a,NAIL_BITS,	r8		C U0
    143 	addq	r8,	acc0,	r19
    144 	addq	m0b,	acc1,	acc0
    145 	addq	rlimb,	r19,	r19
    146 	srl	m1a,NAIL_BITS,	r8		C U0
    147 	addq	r8,	acc0,	acc0
    148 	addq	m1b,	acc2,	acc1
    149 	and	r19,numb_mask,	r28		C extract limb
    150 	srl	m2a,NAIL_BITS,	r8		C U0
    151 	addq	r8,	acc1,	acc1
    152 	bis	r31,	m2b,	acc2
    153 	srl	r19,NUMB_BITS,	r19		C extract nail
    154 	stq	r28,	-8(rp)
    155 
    156 	addq	r19,	acc0,	acc0		C propagate nail
    157 	and	acc0,numb_mask,	r28
    158 	stq	r28,	0(rp)
    159 	srl	acc0,NUMB_BITS,	r19
    160 	addq	r19,	acc1,	acc1
    161 
    162 	and	acc1,numb_mask,	r28
    163 	stq	r28,	8(rp)
    164 	srl	acc1,NUMB_BITS,	r19
    165 	addq	r19,	acc2,	m0a
    166 
    167 	ret	r31,	(r26),	1
    168 EPILOGUE()
    169 ASM_END()
    170