Home | History | Annotate | Line # | Download | only in ultrasparct3
      1  1.1  mrg dnl  SPARC T3/T4/T5 mpn_bdiv_q_1.
      2  1.1  mrg 
      3  1.1  mrg dnl  Contributed to the GNU project by Torbjrn Granlund.
      4  1.1  mrg 
      5  1.1  mrg dnl  Copyright 2013, 2017 Free Software Foundation, Inc.
      6  1.1  mrg 
      7  1.1  mrg dnl  This file is part of the GNU MP Library.
      8  1.1  mrg dnl
      9  1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
     10  1.1  mrg dnl  it under the terms of either:
     11  1.1  mrg dnl
     12  1.1  mrg dnl    * the GNU Lesser General Public License as published by the Free
     13  1.1  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     14  1.1  mrg dnl      option) any later version.
     15  1.1  mrg dnl
     16  1.1  mrg dnl  or
     17  1.1  mrg dnl
     18  1.1  mrg dnl    * the GNU General Public License as published by the Free Software
     19  1.1  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     20  1.1  mrg dnl      later version.
     21  1.1  mrg dnl
     22  1.1  mrg dnl  or both in parallel, as here.
     23  1.1  mrg dnl
     24  1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     25  1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     26  1.1  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     27  1.1  mrg dnl  for more details.
     28  1.1  mrg dnl
     29  1.1  mrg dnl  You should have received copies of the GNU General Public License and the
     30  1.1  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     31  1.1  mrg dnl  see https://www.gnu.org/licenses/.
     32  1.1  mrg 
     33  1.1  mrg include(`../config.m4')
     34  1.1  mrg 
     35  1.1  mrg C                  cycles/limb
     36  1.1  mrg C UltraSPARC T3:	31
     37  1.1  mrg C UltraSPARC T4/T5:	20-26  hits 20 early, then sharply drops
     38  1.1  mrg 
     39  1.1  mrg C INPUT PARAMETERS
     40  1.1  mrg define(`qp',  `%i0')
     41  1.1  mrg define(`ap',  `%i1')
     42  1.1  mrg define(`n',   `%i2')
     43  1.1  mrg define(`d',   `%i3')
     44  1.1  mrg define(`dinv',`%i4')
     45  1.1  mrg define(`cnt', `%i5')
     46  1.1  mrg 
     47  1.1  mrg define(`tnc', `%o2')
     48  1.1  mrg 
     49  1.1  mrg ASM_START()
     50  1.1  mrg 	REGISTER(%g2,#scratch)
     51  1.1  mrg 	REGISTER(%g3,#scratch)
     52  1.1  mrg PROLOGUE(mpn_bdiv_q_1)
     53  1.1  mrg 	save	%sp, -176, %sp
     54  1.1  mrg 	ldx	[ap], %o5
     55  1.1  mrg 	add	d, -1, %g1
     56  1.1  mrg 	andn	%g1, d, %g1
     57  1.1  mrg 	popc	%g1, cnt
     58  1.1  mrg 
     59  1.1  mrg 	srlx	d, cnt, d
     60  1.1  mrg 	srlx	d, 1, %g1
     61  1.1  mrg 	and	%g1, 127, %g1
     62  1.1  mrg 	LEA64(binvert_limb_table, g2, g4)
     63  1.1  mrg 	ldub	[%g2+%g1], %g1
     64  1.1  mrg 	add	%g1, %g1, %g2
     65  1.1  mrg 	mulx	%g1, %g1, %g1
     66  1.1  mrg 	mulx	%g1, d, %g1
     67  1.1  mrg 	sub	%g2, %g1, %g2
     68  1.1  mrg 	add	%g2, %g2, %g1
     69  1.1  mrg 	mulx	%g2, %g2, %g2
     70  1.1  mrg 	mulx	%g2, d, %g2
     71  1.1  mrg 	sub	%g1, %g2, %g1
     72  1.1  mrg 	add	%g1, %g1, %o7
     73  1.1  mrg 	mulx	%g1, %g1, %g1
     74  1.1  mrg 	mulx	%g1, d, %g1
     75  1.1  mrg 	add	n, -2, n
     76  1.1  mrg 	brz,pt	cnt, L(norm)
     77  1.1  mrg 	 sub	%o7, %g1, dinv
     78  1.1  mrg 
     79  1.1  mrg 	brlz,pt	n, L(edu)
     80  1.1  mrg 	 srlx	%o5, cnt, %o5
     81  1.1  mrg 	b	L(eee)
     82  1.1  mrg 	 mov	0, %g4
     83  1.1  mrg EPILOGUE()
     84  1.1  mrg 
     85  1.1  mrg PROLOGUE(mpn_pi1_bdiv_q_1)
     86  1.1  mrg 	save	%sp, -176, %sp
     87  1.1  mrg 	ldx	[ap], %o5
     88  1.1  mrg 
     89  1.1  mrg 	brz,pt	cnt, L(norm)
     90  1.1  mrg 	 add	n, -2, n
     91  1.1  mrg 
     92  1.1  mrg L(unorm):
     93  1.1  mrg 	brlz,pt	n, L(edu)
     94  1.1  mrg 	 srlx	%o5, cnt, %o5
     95  1.1  mrg 	mov	0, %g4
     96  1.1  mrg L(eee):	sub	%g0, cnt, tnc
     97  1.1  mrg 
     98  1.1  mrg L(tpu):	ldx	[ap+8], %g3
     99  1.1  mrg 	add	ap, 8, ap
    100  1.1  mrg 	sllx	%g3, tnc, %g5
    101  1.1  mrg 	or	%g5, %o5, %g5
    102  1.1  mrg 	srlx	%g3, cnt, %o5
    103  1.1  mrg 	subcc	%g5, %g4, %g4
    104  1.1  mrg 	mulx	%g4, dinv, %g1
    105  1.1  mrg 	stx	%g1, [qp]
    106  1.1  mrg 	add	qp, 8, qp
    107  1.1  mrg 	umulxhi(d, %g1, %g1)
    108  1.1  mrg 	addxc(	%g1, %g0, %g4)
    109  1.1  mrg 	brgz,pt	n, L(tpu)
    110  1.1  mrg 	 add	n, -1, n
    111  1.1  mrg 
    112  1.1  mrg 	sub	%o5, %g4, %o5
    113  1.1  mrg L(edu):	mulx	%o5, dinv, %g1
    114  1.1  mrg 	return	%i7+8
    115  1.1  mrg 	 stx	%g1, [%o0]
    116  1.1  mrg 
    117  1.1  mrg L(norm):
    118  1.1  mrg 	mulx	dinv, %o5, %g1
    119  1.1  mrg 	brlz,pt	n, L(edn)
    120  1.1  mrg 	 stx	%g1, [qp]
    121  1.1  mrg 	add	qp, 8, qp
    122  1.1  mrg 	addcc	%g0, 0, %g4
    123  1.1  mrg 
    124  1.1  mrg L(tpn):	umulxhi(d, %g1, %g1)
    125  1.1  mrg 	ldx	[ap+8], %g5
    126  1.1  mrg 	add	ap, 8, ap
    127  1.1  mrg 	addxc(	%g1, %g0, %g1)
    128  1.1  mrg 	subcc	%g5, %g1, %g1
    129  1.1  mrg 	mulx	%g1, dinv, %g1
    130  1.1  mrg 	stx	%g1, [qp]
    131  1.1  mrg 	add	qp, 8, qp
    132  1.1  mrg 	brgz,pt	n, L(tpn)
    133  1.1  mrg 	 add	n, -1, n
    134  1.1  mrg 
    135  1.1  mrg L(edn):	return	%i7+8
    136  1.1  mrg 	 nop
    137  1.1  mrg EPILOGUE()
    138