Home | History | Annotate | Line # | Download | only in ultrasparct3
      1 dnl  SPARC v9 mpn_addmul_1 for T3/T4/T5.
      2 
      3 dnl  Contributed to the GNU project by David Miller and Torbjrn Granlund.
      4 
      5 dnl  Copyright 2013 Free Software Foundation, Inc.
      6 
      7 dnl  This file is part of the GNU MP Library.
      8 dnl
      9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10 dnl  it under the terms of either:
     11 dnl
     12 dnl    * the GNU Lesser General Public License as published by the Free
     13 dnl      Software Foundation; either version 3 of the License, or (at your
     14 dnl      option) any later version.
     15 dnl
     16 dnl  or
     17 dnl
     18 dnl    * the GNU General Public License as published by the Free Software
     19 dnl      Foundation; either version 2 of the License, or (at your option) any
     20 dnl      later version.
     21 dnl
     22 dnl  or both in parallel, as here.
     23 dnl
     24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27 dnl  for more details.
     28 dnl
     29 dnl  You should have received copies of the GNU General Public License and the
     30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31 dnl  see https://www.gnu.org/licenses/.
     32 
     33 include(`../config.m4')
     34 
     35 C		   cycles/limb
     36 C UltraSPARC T3:	26
     37 C UltraSPARC T4:	4.5
     38 
     39 C INPUT PARAMETERS
     40 define(`rp', `%i0')
     41 define(`up', `%i1')
     42 define(`n',  `%i2')
     43 define(`v0', `%i3')
     44 
     45 define(`u0',  `%l0')
     46 define(`u1',  `%l1')
     47 define(`u2',  `%l2')
     48 define(`u3',  `%l3')
     49 define(`r0',  `%l4')
     50 define(`r1',  `%l5')
     51 define(`r2',  `%l6')
     52 define(`r3',  `%l7')
     53 
     54 ASM_START()
     55 	REGISTER(%g2,#scratch)
     56 	REGISTER(%g3,#scratch)
     57 PROLOGUE(mpn_addmul_1)
     58 	save	%sp, -176, %sp
     59 	ldx	[up+0], %g1
     60 
     61 	and	n, 3, %g3
     62 	brz	%g3, L(b0)
     63 	 addcc	%g0, %g0, %g5			C clear carry limb, flag
     64 	cmp	%g3, 2
     65 	bcs	%xcc, L(b01)
     66 	 nop
     67 	be	%xcc, L(b10)
     68 	 ldx	[up+8], %g5
     69 
     70 L(b11):	ldx	[up+16], u3
     71 	mulx	%g1, v0, %o2
     72 	umulxhi(%g1, v0, %o3)
     73 	ldx	[rp+0], r1
     74 	mulx	%g5, v0, %o4
     75 	ldx	[rp+8], r2
     76 	umulxhi(%g5, v0, %o5)
     77 	ldx	[rp+16], r3
     78 	mulx	u3, v0, %g4
     79 	umulxhi(u3, v0, %g5)
     80 	addcc	%o3, %o4, %o4
     81 	addxccc(%o5, %g4, %g4)
     82 	addxc(	%g0, %g5, %g5)
     83 	addcc	r1, %o2, r1
     84 	stx	r1, [rp+0]
     85 	addxccc(r2, %o4, r2)
     86 	stx	r2, [rp+8]
     87 	addxccc(r3, %g4, r3)
     88 	stx	r3, [rp+16]
     89 	add	n, -3, n
     90 	add	up, 24, up
     91 	brz	n, L(xit)
     92 	 add	rp, 24, rp
     93 	b	L(com)
     94 	 nop
     95 
     96 L(b10):	mulx	%g1, v0, %o4
     97 	ldx	[rp+0], r2
     98 	umulxhi(%g1, v0, %o5)
     99 	ldx	[rp+8], r3
    100 	mulx	%g5, v0, %g4
    101 	umulxhi(%g5, v0, %g5)
    102 	addcc	%o5, %g4, %g4
    103 	addxc(	%g0, %g5, %g5)
    104 	addcc	r2, %o4, r2
    105 	stx	r2, [rp+0]
    106 	addxccc(r3, %g4, r3)
    107 	stx	r3, [rp+8]
    108 	add	n, -2, n
    109 	add	up, 16, up
    110 	brz	n, L(xit)
    111 	 add	rp, 16, rp
    112 	b	L(com)
    113 	 nop
    114 
    115 L(b01):	ldx	[rp+0], r3
    116 	mulx	%g1, v0, %g4
    117 	umulxhi(%g1, v0, %g5)
    118 	addcc	r3, %g4, r3
    119 	stx	r3, [rp+0]
    120 	add	n, -1, n
    121 	add	up, 8, up
    122 	brz	n, L(xit)
    123 	 add	rp, 8, rp
    124 
    125 L(com):	ldx	[up+0], %g1
    126 L(b0):	ldx	[up+8], u1
    127 	ldx	[up+16], u2
    128 	ldx	[up+24], u3
    129 	mulx	%g1, v0, %o0
    130 	umulxhi(%g1, v0, %o1)
    131 	b	L(lo0)
    132 	 nop
    133 
    134 	ALIGN(16)
    135 L(top):	ldx	[up+0], u0
    136 	addxc(	%g0, %g5, %g5)		C propagate carry into carry limb
    137 	ldx	[up+8], u1
    138 	addcc	r0, %o0, r0
    139 	ldx	[up+16], u2
    140 	addxccc(r1, %o2, r1)
    141 	ldx	[up+24], u3
    142 	addxccc(r2, %o4, r2)
    143 	stx	r0, [rp-32]
    144 	addxccc(r3, %g4, r3)
    145 	stx	r1, [rp-24]
    146 	mulx	u0, v0, %o0
    147 	stx	r2, [rp-16]
    148 	umulxhi(u0, v0, %o1)
    149 	stx	r3, [rp-8]
    150 L(lo0):	mulx	u1, v0, %o2
    151 	ldx	[rp+0], r0
    152 	umulxhi(u1, v0, %o3)
    153 	ldx	[rp+8], r1
    154 	mulx	u2, v0, %o4
    155 	ldx	[rp+16], r2
    156 	umulxhi(u2, v0, %o5)
    157 	ldx	[rp+24], r3
    158 	mulx	u3, v0, %g4
    159 	addxccc(%g5, %o0, %o0)
    160 	umulxhi(u3, v0, %g5)
    161 	add	up, 32, up
    162 	addxccc(%o1, %o2, %o2)
    163 	add	rp, 32, rp
    164 	addxccc(%o3, %o4, %o4)
    165 	add	n, -4, n
    166 	addxccc(%o5, %g4, %g4)
    167 	brgz	n, L(top)
    168 	 nop
    169 
    170 	addxc(	%g0, %g5, %g5)
    171 	addcc	r0, %o0, r0
    172 	stx	r0, [rp-32]
    173 	addxccc(r1, %o2, r1)
    174 	stx	r1, [rp-24]
    175 	addxccc(r2, %o4, r2)
    176 	stx	r2, [rp-16]
    177 	addxccc(r3, %g4, r3)
    178 	stx	r3, [rp-8]
    179 L(xit):	addxc(	%g0, %g5, %i0)
    180 	ret
    181 	 restore
    182 EPILOGUE()
    183