Home | History | Annotate | Line # | Download | only in ultrasparc1234
      1      1.1  mrg dnl  SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
      2      1.1  mrg dnl  store difference in a third limb vector.
      3      1.1  mrg 
      4  1.1.1.2  mrg dnl  Copyright 2001-2003, 2011 Free Software Foundation, Inc.
      5      1.1  mrg 
      6      1.1  mrg dnl  This file is part of the GNU MP Library.
      7  1.1.1.2  mrg dnl
      8      1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      9  1.1.1.2  mrg dnl  it under the terms of either:
     10  1.1.1.2  mrg dnl
     11  1.1.1.2  mrg dnl    * the GNU Lesser General Public License as published by the Free
     12  1.1.1.2  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     13  1.1.1.2  mrg dnl      option) any later version.
     14  1.1.1.2  mrg dnl
     15  1.1.1.2  mrg dnl  or
     16  1.1.1.2  mrg dnl
     17  1.1.1.2  mrg dnl    * the GNU General Public License as published by the Free Software
     18  1.1.1.2  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     19  1.1.1.2  mrg dnl      later version.
     20  1.1.1.2  mrg dnl
     21  1.1.1.2  mrg dnl  or both in parallel, as here.
     22  1.1.1.2  mrg dnl
     23      1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     24      1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     25  1.1.1.2  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     26  1.1.1.2  mrg dnl  for more details.
     27  1.1.1.2  mrg dnl
     28  1.1.1.2  mrg dnl  You should have received copies of the GNU General Public License and the
     29  1.1.1.2  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     30  1.1.1.2  mrg dnl  see https://www.gnu.org/licenses/.
     31      1.1  mrg 
     32      1.1  mrg include(`../config.m4')
     33      1.1  mrg 
     34      1.1  mrg C		   cycles/limb
     35      1.1  mrg C UltraSPARC 1&2:     4
     36      1.1  mrg C UltraSPARC 3:	      4.5
     37      1.1  mrg 
     38      1.1  mrg C Compute carry-out from the most significant bits of u,v, and r, where
     39      1.1  mrg C r=u-v-carry_in, using logic operations.
     40      1.1  mrg 
     41      1.1  mrg C This code runs at 4 cycles/limb on UltraSPARC 1 and 2.  It has a 4 insn
     42      1.1  mrg C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
     43      1.1  mrg C Therefore, it seems futile to try to optimize this any further...
     44      1.1  mrg 
     45      1.1  mrg C INPUT PARAMETERS
     46      1.1  mrg define(`rp',`%i0')
     47      1.1  mrg define(`up',`%i1')
     48      1.1  mrg define(`vp',`%i2')
     49      1.1  mrg define(`n',`%i3')
     50      1.1  mrg 
     51      1.1  mrg define(`u0',`%l0')
     52      1.1  mrg define(`u1',`%l2')
     53      1.1  mrg define(`u2',`%l4')
     54      1.1  mrg define(`u3',`%l6')
     55      1.1  mrg define(`v0',`%l1')
     56      1.1  mrg define(`v1',`%l3')
     57      1.1  mrg define(`v2',`%l5')
     58      1.1  mrg define(`v3',`%l7')
     59      1.1  mrg 
     60      1.1  mrg define(`cy',`%i4')
     61      1.1  mrg 
     62      1.1  mrg define(`fanop',`fitod %f0,%f2')		dnl  A quasi nop running in the FA pipe
     63      1.1  mrg define(`fmnop',`fmuld %f0,%f0,%f4')	dnl  A quasi nop running in the FM pipe
     64      1.1  mrg 
     65      1.1  mrg ASM_START()
     66      1.1  mrg 	REGISTER(%g2,#scratch)
     67      1.1  mrg 	REGISTER(%g3,#scratch)
     68      1.1  mrg PROLOGUE(mpn_sub_nc)
     69      1.1  mrg 	save	%sp,-160,%sp
     70      1.1  mrg 
     71      1.1  mrg 	fitod	%f0,%f0		C make sure f0 contains small, quiet number
     72      1.1  mrg 	subcc	n,4,%g0
     73      1.1  mrg 	bl,pn	%xcc,.Loop0
     74      1.1  mrg 	nop
     75      1.1  mrg 	b,a	L(com)
     76      1.1  mrg EPILOGUE()
     77      1.1  mrg 
     78      1.1  mrg PROLOGUE(mpn_sub_n)
     79      1.1  mrg 	save	%sp,-160,%sp
     80      1.1  mrg 
     81      1.1  mrg 	fitod	%f0,%f0		C make sure f0 contains small, quiet number
     82      1.1  mrg 	subcc	n,4,%g0
     83      1.1  mrg 	bl,pn	%xcc,.Loop0
     84      1.1  mrg 	mov	0,cy
     85      1.1  mrg L(com):
     86      1.1  mrg 	ldx	[up+0],u0
     87      1.1  mrg 	ldx	[vp+0],v0
     88      1.1  mrg 	add	up,32,up
     89      1.1  mrg 	ldx	[up-24],u1
     90      1.1  mrg 	ldx	[vp+8],v1
     91      1.1  mrg 	add	vp,32,vp
     92      1.1  mrg 	ldx	[up-16],u2
     93      1.1  mrg 	ldx	[vp-16],v2
     94      1.1  mrg 	ldx	[up-8],u3
     95      1.1  mrg 	ldx	[vp-8],v3
     96      1.1  mrg 	subcc	n,8,n
     97      1.1  mrg 	sub	u0,v0,%g1	C main sub
     98  1.1.1.2  mrg 	sub	%g1,cy,%g5	C carry sub
     99      1.1  mrg 	orn	u0,v0,%g2
    100      1.1  mrg 	bl,pn	%xcc,.Lend4567
    101      1.1  mrg 	fanop
    102      1.1  mrg 	b,a	.Loop
    103      1.1  mrg 
    104      1.1  mrg 	.align	16
    105      1.1  mrg C START MAIN LOOP
    106  1.1.1.2  mrg .Loop:	orn	%g5,%g2,%g2
    107      1.1  mrg 	andn	u0,v0,%g3
    108      1.1  mrg 	ldx	[up+0],u0
    109      1.1  mrg 	fanop
    110      1.1  mrg C --
    111      1.1  mrg 	andn	%g2,%g3,%g2
    112      1.1  mrg 	ldx	[vp+0],v0
    113      1.1  mrg 	add	up,32,up
    114      1.1  mrg 	fanop
    115      1.1  mrg C --
    116      1.1  mrg 	srlx	%g2,63,cy
    117      1.1  mrg 	sub	u1,v1,%g1
    118  1.1.1.2  mrg 	stx	%g5,[rp+0]
    119      1.1  mrg 	fanop
    120      1.1  mrg C --
    121  1.1.1.2  mrg 	sub	%g1,cy,%g5
    122      1.1  mrg 	orn	u1,v1,%g2
    123      1.1  mrg 	fmnop
    124      1.1  mrg 	fanop
    125      1.1  mrg C --
    126  1.1.1.2  mrg 	orn	%g5,%g2,%g2
    127      1.1  mrg 	andn	u1,v1,%g3
    128      1.1  mrg 	ldx	[up-24],u1
    129      1.1  mrg 	fanop
    130      1.1  mrg C --
    131      1.1  mrg 	andn	%g2,%g3,%g2
    132      1.1  mrg 	ldx	[vp+8],v1
    133      1.1  mrg 	add	vp,32,vp
    134      1.1  mrg 	fanop
    135      1.1  mrg C --
    136      1.1  mrg 	srlx	%g2,63,cy
    137      1.1  mrg 	sub	u2,v2,%g1
    138  1.1.1.2  mrg 	stx	%g5,[rp+8]
    139      1.1  mrg 	fanop
    140      1.1  mrg C --
    141  1.1.1.2  mrg 	sub	%g1,cy,%g5
    142      1.1  mrg 	orn	u2,v2,%g2
    143      1.1  mrg 	fmnop
    144      1.1  mrg 	fanop
    145      1.1  mrg C --
    146  1.1.1.2  mrg 	orn	%g5,%g2,%g2
    147      1.1  mrg 	andn	u2,v2,%g3
    148      1.1  mrg 	ldx	[up-16],u2
    149      1.1  mrg 	fanop
    150      1.1  mrg C --
    151      1.1  mrg 	andn	%g2,%g3,%g2
    152      1.1  mrg 	ldx	[vp-16],v2
    153      1.1  mrg 	add	rp,32,rp
    154      1.1  mrg 	fanop
    155      1.1  mrg C --
    156      1.1  mrg 	srlx	%g2,63,cy
    157      1.1  mrg 	sub	u3,v3,%g1
    158  1.1.1.2  mrg 	stx	%g5,[rp-16]
    159      1.1  mrg 	fanop
    160      1.1  mrg C --
    161  1.1.1.2  mrg 	sub	%g1,cy,%g5
    162      1.1  mrg 	orn	u3,v3,%g2
    163      1.1  mrg 	fmnop
    164      1.1  mrg 	fanop
    165      1.1  mrg C --
    166  1.1.1.2  mrg 	orn	%g5,%g2,%g2
    167      1.1  mrg 	andn	u3,v3,%g3
    168      1.1  mrg 	ldx	[up-8],u3
    169      1.1  mrg 	fanop
    170      1.1  mrg C --
    171      1.1  mrg 	andn	%g2,%g3,%g2
    172      1.1  mrg 	subcc	n,4,n
    173      1.1  mrg 	ldx	[vp-8],v3
    174      1.1  mrg 	fanop
    175      1.1  mrg C --
    176      1.1  mrg 	srlx	%g2,63,cy
    177      1.1  mrg 	sub	u0,v0,%g1
    178  1.1.1.2  mrg 	stx	%g5,[rp-8]
    179      1.1  mrg 	fanop
    180      1.1  mrg C --
    181  1.1.1.2  mrg 	sub	%g1,cy,%g5
    182      1.1  mrg 	orn	u0,v0,%g2
    183      1.1  mrg 	bge,pt	%xcc,.Loop
    184      1.1  mrg 	fanop
    185      1.1  mrg C END MAIN LOOP
    186      1.1  mrg .Lend4567:
    187  1.1.1.2  mrg 	orn	%g5,%g2,%g2
    188      1.1  mrg 	andn	u0,v0,%g3
    189      1.1  mrg 	andn	%g2,%g3,%g2
    190      1.1  mrg 	srlx	%g2,63,cy
    191      1.1  mrg 	sub	u1,v1,%g1
    192  1.1.1.2  mrg 	stx	%g5,[rp+0]
    193  1.1.1.2  mrg 	sub	%g1,cy,%g5
    194      1.1  mrg 	orn	u1,v1,%g2
    195  1.1.1.2  mrg 	orn	%g5,%g2,%g2
    196      1.1  mrg 	andn	u1,v1,%g3
    197      1.1  mrg 	andn	%g2,%g3,%g2
    198      1.1  mrg 	srlx	%g2,63,cy
    199      1.1  mrg 	sub	u2,v2,%g1
    200  1.1.1.2  mrg 	stx	%g5,[rp+8]
    201  1.1.1.2  mrg 	sub	%g1,cy,%g5
    202      1.1  mrg 	orn	u2,v2,%g2
    203  1.1.1.2  mrg 	orn	%g5,%g2,%g2
    204      1.1  mrg 	andn	u2,v2,%g3
    205      1.1  mrg 	andn	%g2,%g3,%g2
    206      1.1  mrg 	add	rp,32,rp
    207      1.1  mrg 	srlx	%g2,63,cy
    208      1.1  mrg 	sub	u3,v3,%g1
    209  1.1.1.2  mrg 	stx	%g5,[rp-16]
    210  1.1.1.2  mrg 	sub	%g1,cy,%g5
    211      1.1  mrg 	orn	u3,v3,%g2
    212  1.1.1.2  mrg 	orn	%g5,%g2,%g2
    213      1.1  mrg 	andn	u3,v3,%g3
    214      1.1  mrg 	andn	%g2,%g3,%g2
    215      1.1  mrg 	srlx	%g2,63,cy
    216  1.1.1.2  mrg 	stx	%g5,[rp-8]
    217      1.1  mrg 
    218      1.1  mrg 	addcc	n,4,n
    219      1.1  mrg 	bz,pn	%xcc,.Lret
    220      1.1  mrg 	fanop
    221      1.1  mrg 
    222      1.1  mrg .Loop0:	ldx	[up],u0
    223      1.1  mrg 	add	up,8,up
    224      1.1  mrg 	ldx	[vp],v0
    225      1.1  mrg 	add	vp,8,vp
    226      1.1  mrg 	add	rp,8,rp
    227      1.1  mrg 	subcc	n,1,n
    228      1.1  mrg 	sub	u0,v0,%g1
    229      1.1  mrg 	orn	u0,v0,%g2
    230  1.1.1.2  mrg 	sub	%g1,cy,%g5
    231      1.1  mrg 	andn	u0,v0,%g3
    232  1.1.1.2  mrg 	orn	%g5,%g2,%g2
    233  1.1.1.2  mrg 	stx	%g5,[rp-8]
    234      1.1  mrg 	andn	%g2,%g3,%g2
    235      1.1  mrg 	bnz,pt	%xcc,.Loop0
    236      1.1  mrg 	srlx	%g2,63,cy
    237      1.1  mrg 
    238      1.1  mrg .Lret:	mov	cy,%i0
    239      1.1  mrg 	ret
    240      1.1  mrg 	restore
    241      1.1  mrg EPILOGUE(mpn_sub_n)
    242