Home | History | Annotate | Line # | Download | only in ultrasparct3
      1 dnl  SPARC v9 mpn_mod_34lsub1 for T3/T4/T5.
      2 
      3 dnl  Copyright 2005, 2013 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 C		    cycles/limb
     34 C UltraSPARC T1:	 -
     35 C UltraSPARC T3:	 5
     36 C UltraSPARC T4:	 1.57
     37 
     38 C This is based on the powerpc64/mode64 code.
     39 
     40 C INPUT PARAMETERS
     41 define(`up', `%i0')
     42 define(`n',  `%i1')
     43 
     44 ASM_START()
     45 	REGISTER(%g2,#scratch)
     46 	REGISTER(%g3,#scratch)
     47 PROLOGUE(mpn_mod_34lsub1)
     48 	save	%sp, -176, %sp
     49 
     50 	mov	0, %g1
     51 	mov	0, %g3
     52 	mov	0, %g4
     53 	addcc	%g0, 0, %g5
     54 
     55 	add	n, -3, n
     56 	brlz	n, L(lt3)
     57 	 nop
     58 
     59 	add	n, -3, n
     60 	ldx	[up+0], %l5
     61 	ldx	[up+8], %l6
     62 	ldx	[up+16], %l7
     63 	brlz	n, L(end)
     64 	 add	up, 24, up
     65 
     66 	ALIGN(16)
     67 L(top):	addxccc(%g1, %l5, %g1)
     68 	ldx	[up+0], %l5
     69 	addxccc(%g3, %l6, %g3)
     70 	ldx	[up+8], %l6
     71 	addxccc(%g4, %l7, %g4)
     72 	ldx	[up+16], %l7
     73 	add	n, -3, n
     74 	brgez	n, L(top)
     75 	 add	up, 24, up
     76 
     77 L(end):	addxccc(	%g1, %l5, %g1)
     78 	addxccc(%g3, %l6, %g3)
     79 	addxccc(%g4, %l7, %g4)
     80 	addxc(	%g5, %g0, %g5)
     81 
     82 L(lt3):	cmp	n, -2
     83 	blt	L(2)
     84 	 nop
     85 
     86 	ldx	[up+0], %l5
     87 	mov	0, %l6
     88 	beq	L(1)
     89 	 addcc	%g1, %l5, %g1
     90 
     91 	ldx	[up+8], %l6
     92 L(1):	addxccc(%g3, %l6, %g3)
     93 	addxccc(%g4, %g0, %g4)
     94 	addxc(	%g5, %g0, %g5)
     95 
     96 L(2):	sllx	%g1, 16, %l0
     97 	srlx	%l0, 16, %l0		C %l0 = %g1 mod 2^48
     98 	srlx	%g1, 48, %l3		C %l3 = %g1 div 2^48
     99 	srl	%g3, 0, %g1
    100 	sllx	%g1, 16, %l4		C %l4 = (%g3 mod 2^32) << 16
    101 	srlx	%g3, 32, %l5		C %l5 = %g3 div 2^32
    102 	sethi	%hi(0xffff0000), %g1
    103 	andn	%g4, %g1, %g1
    104 	sllx	%g1, 32, %l6		C %l6 = (%g4 mod 2^16) << 32
    105 	srlx	%g4, 16, %l7		C %l7 = %g4 div 2^16
    106 
    107 	add	%l0, %l3, %l0
    108 	add	%l4, %l5, %l4
    109 	add	%l6, %l7, %l6
    110 
    111 	add	%l0, %l4, %l0
    112 	add	%l6, %g5, %l6
    113 
    114 	add	%l0, %l6, %i0
    115 	ret
    116 	 restore
    117 EPILOGUE()
    118