Home | History | Annotate | Line # | Download | only in ultrasparct3
      1  1.1  mrg dnl  SPARC v9 mpn_addlsh_n and mpn_sublsh_n for T3/T4/T5.
      2  1.1  mrg 
      3  1.1  mrg dnl  Contributed to the GNU project by Torbjrn Granlund.
      4  1.1  mrg 
      5  1.1  mrg dnl  Copyright 2013 Free Software Foundation, Inc.
      6  1.1  mrg 
      7  1.1  mrg dnl  This file is part of the GNU MP Library.
      8  1.1  mrg dnl
      9  1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10  1.1  mrg dnl  it under the terms of either:
     11  1.1  mrg dnl
     12  1.1  mrg dnl    * the GNU Lesser General Public License as published by the Free
     13  1.1  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     14  1.1  mrg dnl      option) any later version.
     15  1.1  mrg dnl
     16  1.1  mrg dnl  or
     17  1.1  mrg dnl
     18  1.1  mrg dnl    * the GNU General Public License as published by the Free Software
     19  1.1  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     20  1.1  mrg dnl      later version.
     21  1.1  mrg dnl
     22  1.1  mrg dnl  or both in parallel, as here.
     23  1.1  mrg dnl
     24  1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25  1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26  1.1  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27  1.1  mrg dnl  for more details.
     28  1.1  mrg dnl
     29  1.1  mrg dnl  You should have received copies of the GNU General Public License and the
     30  1.1  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31  1.1  mrg dnl  see https://www.gnu.org/licenses/.
     32  1.1  mrg 
     33  1.1  mrg include(`../config.m4')
     34  1.1  mrg 
     35  1.1  mrg C		   cycles/limb
     36  1.1  mrg C UltraSPARC T3:	11
     37  1.1  mrg C UltraSPARC T4:	 4
     38  1.1  mrg 
     39  1.1  mrg C For sublsh_n we combine the two shifted limbs using xnor, using the identity
     40  1.1  mrg C (a xor not b) = (not (a xor b)) which equals (not (a or b)) when (a and b) =
     41  1.1  mrg C 0 as it is in our usage.  This gives us the ones complement for free.
     42  1.1  mrg C Unfortunately, the same trick will not work for rsblsh_n, which will instead
     43  1.1  mrg C require a separate negation.
     44  1.1  mrg C
     45  1.1  mrg C FIXME: Add rsblsh_n to this file.
     46  1.1  mrg 
     47  1.1  mrg define(`rp', `%i0')
     48  1.1  mrg define(`up', `%i1')
     49  1.1  mrg define(`vp', `%i2')
     50  1.1  mrg define(`n',  `%i3')
     51  1.1  mrg define(`cnt',`%i4')
     52  1.1  mrg 
     53  1.1  mrg define(`tnc',`%o5')
     54  1.1  mrg 
     55  1.1  mrg ifdef(`OPERATION_addlsh_n',`
     56  1.1  mrg   define(`INITCY', `subcc	%g0, 0, %g0')
     57  1.1  mrg   define(`MERGE',  `or')
     58  1.1  mrg   define(`func',   `mpn_addlsh_n')
     59  1.1  mrg ')
     60  1.1  mrg ifdef(`OPERATION_sublsh_n',`
     61  1.1  mrg   define(`INITCY', `subcc	%g0, 1, %g0')
     62  1.1  mrg   define(`MERGE',  `xnor')
     63  1.1  mrg   define(`func',   `mpn_sublsh_n')
     64  1.1  mrg ')
     65  1.1  mrg 
     66  1.1  mrg define(`rp0',  `rp')
     67  1.1  mrg define(`rp1',  `%o2')
     68  1.1  mrg define(`up0',  `up')
     69  1.1  mrg define(`up1',  `%o3')
     70  1.1  mrg define(`vp0',  `vp')
     71  1.1  mrg define(`vp1',  `%o4')
     72  1.1  mrg 
     73  1.1  mrg MULFUNC_PROLOGUE(mpn_addlsh_n mpn_sublsh_n)
     74  1.1  mrg ASM_START()
     75  1.1  mrg 	REGISTER(%g2,#scratch)
     76  1.1  mrg 	REGISTER(%g3,#scratch)
     77  1.1  mrg PROLOGUE(func)
     78  1.1  mrg 	save	%sp, -176, %sp
     79  1.1  mrg 	mov	64, tnc
     80  1.1  mrg 	sub	tnc, cnt, tnc
     81  1.1  mrg 
     82  1.1  mrg 	andcc	n, 1, %g0
     83  1.1  mrg 	sllx	n, 3, n
     84  1.1  mrg 	add	n, -16, n
     85  1.1  mrg 	add	up, n, up0
     86  1.1  mrg 	add	vp, n, vp0
     87  1.1  mrg 	add	rp, n, rp0
     88  1.1  mrg 	add	up0, 8, up1
     89  1.1  mrg 	add	vp0, 8, vp1
     90  1.1  mrg 	add	rp0, -8, rp1
     91  1.1  mrg 	add	rp0, -16, rp0
     92  1.1  mrg 	neg	n, n
     93  1.1  mrg 	be	L(evn)
     94  1.1  mrg 	 INITCY
     95  1.1  mrg 
     96  1.1  mrg L(odd):	ldx	[vp0 + n], %l1
     97  1.1  mrg 	mov	0, %l2
     98  1.1  mrg 	ldx	[up0 + n], %l5
     99  1.1  mrg 	sllx	%l1, cnt, %g3
    100  1.1  mrg 	brgez	n, L(wd1)
    101  1.1  mrg 	 add	n, 8, n
    102  1.1  mrg 	ldx	[vp0 + n], %l0
    103  1.1  mrg 	b	L(lo1)
    104  1.1  mrg 	 sllx	%l1, cnt, %g3
    105  1.1  mrg 
    106  1.1  mrg L(evn):	ldx	[vp0 + n], %l0
    107  1.1  mrg 	mov	0, %l3
    108  1.1  mrg 	ldx	[up0 + n], %l4
    109  1.1  mrg 	ldx	[vp1 + n], %l1
    110  1.1  mrg 	b	L(lo0)
    111  1.1  mrg 	 sllx	%l0, cnt, %g1
    112  1.1  mrg 
    113  1.1  mrg L(top):	addxccc(%l6, %l4, %o0)
    114  1.1  mrg 	ldx	[vp0 + n], %l0
    115  1.1  mrg 	sllx	%l1, cnt, %g3
    116  1.1  mrg 	stx	%o0, [rp0 + n]
    117  1.1  mrg L(lo1):	srlx	%l1, tnc, %l3
    118  1.1  mrg 	MERGE	%l2, %g3, %l7
    119  1.1  mrg 	ldx	[up0 + n], %l4
    120  1.1  mrg 	addxccc(%l7, %l5, %o1)
    121  1.1  mrg 	ldx	[vp1 + n], %l1
    122  1.1  mrg 	sllx	%l0, cnt, %g1
    123  1.1  mrg 	stx	%o1, [rp1 + n]
    124  1.1  mrg L(lo0):	srlx	%l0, tnc, %l2
    125  1.1  mrg 	MERGE	%l3, %g1, %l6
    126  1.1  mrg 	ldx	[up1 + n], %l5
    127  1.1  mrg 	brlz,pt	n, L(top)
    128  1.1  mrg 	 add	n, 16, n
    129  1.1  mrg 
    130  1.1  mrg 	addxccc(%l6, %l4, %o0)
    131  1.1  mrg 	sllx	%l1, cnt, %g3
    132  1.1  mrg 	stx	%o0, [rp0 + n]
    133  1.1  mrg L(wd1):	srlx	%l1, tnc, %l3
    134  1.1  mrg 	MERGE	%l2, %g3, %l7
    135  1.1  mrg 	addxccc(%l7, %l5, %o1)
    136  1.1  mrg 	stx	%o1, [rp1 + n]
    137  1.1  mrg 
    138  1.1  mrg ifdef(`OPERATION_addlsh_n',
    139  1.1  mrg `	addxc(	%l3, %g0, %i0)')
    140  1.1  mrg ifdef(`OPERATION_sublsh_n',
    141  1.1  mrg `	addxc(	%g0, %g0, %g1)
    142  1.1  mrg 	add	%g1, -1, %g1
    143  1.1  mrg 	sub	%l3, %g1, %i0')
    144  1.1  mrg 
    145  1.1  mrg 	ret
    146  1.1  mrg 	 restore
    147  1.1  mrg EPILOGUE()
    148