Home | History | Annotate | Line # | Download | only in ultrasparct3
      1 dnl  SPARC v9 mpn_cnd_add_n and mpn_cnd_sub_n for T3/T4/T5.
      2 
      3 dnl  Contributed to the GNU project by David Miller and Torbjrn Granlund.
      4 
      5 dnl  Copyright 2013, 2017 Free Software Foundation, Inc.
      6 
      7 dnl  This file is part of the GNU MP Library.
      8 dnl
      9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10 dnl  it under the terms of either:
     11 dnl
     12 dnl    * the GNU Lesser General Public License as published by the Free
     13 dnl      Software Foundation; either version 3 of the License, or (at your
     14 dnl      option) any later version.
     15 dnl
     16 dnl  or
     17 dnl
     18 dnl    * the GNU General Public License as published by the Free Software
     19 dnl      Foundation; either version 2 of the License, or (at your option) any
     20 dnl      later version.
     21 dnl
     22 dnl  or both in parallel, as here.
     23 dnl
     24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27 dnl  for more details.
     28 dnl
     29 dnl  You should have received copies of the GNU General Public License and the
     30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31 dnl  see https://www.gnu.org/licenses/.
     32 
     33 include(`../config.m4')
     34 
     35 C		   cycles/limb
     36 C UltraSPARC T3:	 8.5
     37 C UltraSPARC T4:	 3
     38 
     39 C We use a double-pointer trick to allow indexed addressing.  Its setup
     40 C cost might be a problem in these functions, since we don't expect huge n
     41 C arguments.
     42 C
     43 C For sub we need ~(a & mask) = (~a | ~mask) but by complementing mask we can
     44 C instead do ~(a & ~mask) = (~a | mask), allowing us to use the orn insn.
     45 
     46 C INPUT PARAMETERS
     47 define(`cnd', `%i0')
     48 define(`rp',  `%i1')
     49 define(`up',  `%i2')
     50 define(`vp',  `%i3')
     51 define(`n',   `%i4')
     52 
     53 define(`mask',   `cnd')
     54 define(`up0', `%l0')  define(`up1', `%l1')
     55 define(`vp0', `%l2')  define(`vp1', `%l3')
     56 define(`rp0', `%g4')  define(`rp1', `%g5')
     57 define(`u0',  `%l4')  define(`u1',  `%l5')
     58 define(`v0',  `%l6')  define(`v1',  `%l7')
     59 define(`x0',  `%g1')  define(`x1',  `%g3')
     60 define(`w0',  `%g1')  define(`w1',  `%g3')
     61 
     62 ifdef(`OPERATION_cnd_add_n',`
     63   define(`LOGOP',   `and	$1, $2, $3')
     64   define(`MAKEMASK',`cmp	%g0, $1
     65 		     addxc(	%g0, %g0, $2)
     66 		     neg	$2, $2')
     67   define(`INITCY',  `addcc	%g0, 0, %g0')
     68   define(`RETVAL',  `addxc(	%g0, %g0, %i0)')
     69   define(`func',    `mpn_cnd_add_n')
     70 ')
     71 ifdef(`OPERATION_cnd_sub_n',`
     72   define(`LOGOP',   `orn	$2, $1, $3')
     73   define(`MAKEMASK',`cmp	$1, 1
     74 		     addxc(	%g0, %g0, $2)
     75 		     neg	$2, $2')
     76   define(`INITCY',  `subcc	%g0, 1, %g0')
     77   define(`RETVAL',  `addxc(	%g0, %g0, %i0)
     78 		     xor	%i0, 1, %i0')
     79   define(`func',    `mpn_cnd_sub_n')
     80 ')
     81 
     82 MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
     83 
     84 ASM_START()
     85 	REGISTER(%g2,#scratch)
     86 	REGISTER(%g3,#scratch)
     87 PROLOGUE(func)
     88 	save	%sp, -176, %sp
     89 
     90 	MAKEMASK(cnd,mask)
     91 
     92 	andcc	n, 1, %g0
     93 	sllx	n, 3, n
     94 	add	n, -16, n
     95 	add	vp, n, vp0
     96 	add	up, n, up0
     97 	add	rp, n, rp0
     98 	neg	n, n
     99 	be	L(evn)
    100 	 INITCY
    101 
    102 L(odd):	ldx	[vp0 + n], v1
    103 	ldx	[up0 + n], u1
    104 	LOGOP(	v1, mask, x1)
    105 	addxccc(u1, x1, w1)
    106 	stx	w1, [rp0 + n]
    107 	add	n, 8, n
    108 	brgz	n, L(rtn)
    109 	 nop
    110 
    111 L(evn):	add	vp0, 8, vp1
    112 	add	up0, 8, up1
    113 	add	rp0, -24, rp1
    114 	ldx	[vp0 + n], v0
    115 	ldx	[vp1 + n], v1
    116 	ldx	[up0 + n], u0
    117 	ldx	[up1 + n], u1
    118 	add	n, 16, n
    119 	brgz	n, L(end)
    120 	 add	rp0, -16, rp0
    121 
    122 L(top):	LOGOP(	v0, mask, x0)
    123 	ldx	[vp0 + n], v0
    124 	LOGOP(	v1, mask, x1)
    125 	ldx	[vp1 + n], v1
    126 	addxccc(u0, x0, w0)
    127 	ldx	[up0 + n], u0
    128 	addxccc(u1, x1, w1)
    129 	ldx	[up1 + n], u1
    130 	stx	w0, [rp0 + n]
    131 	add	n, 16, n
    132 	brlez	n, L(top)
    133 	 stx	w1, [rp1 + n]
    134 
    135 L(end):	LOGOP(	v0, mask, x0)
    136 	LOGOP(	v1, mask, x1)
    137 	addxccc(u0, x0, w0)
    138 	addxccc(u1, x1, w1)
    139 	stx	w0, [rp0 + n]
    140 	stx	w1, [rp1 + 32]
    141 
    142 L(rtn):	RETVAL
    143 	ret
    144 	 restore
    145 EPILOGUE()
    146