Home | History | Annotate | Line # | Download | only in p6
      1      1.1  mrg dnl  PowerPC-64 mpn_rshift -- rp[] = up[] << cnt
      2      1.1  mrg 
      3      1.1  mrg dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
      4      1.1  mrg 
      5      1.1  mrg dnl  This file is part of the GNU MP Library.
      6  1.1.1.2  mrg dnl
      7      1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8  1.1.1.2  mrg dnl  it under the terms of either:
      9  1.1.1.2  mrg dnl
     10  1.1.1.2  mrg dnl    * the GNU Lesser General Public License as published by the Free
     11  1.1.1.2  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     12  1.1.1.2  mrg dnl      option) any later version.
     13  1.1.1.2  mrg dnl
     14  1.1.1.2  mrg dnl  or
     15  1.1.1.2  mrg dnl
     16  1.1.1.2  mrg dnl    * the GNU General Public License as published by the Free Software
     17  1.1.1.2  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     18  1.1.1.2  mrg dnl      later version.
     19  1.1.1.2  mrg dnl
     20  1.1.1.2  mrg dnl  or both in parallel, as here.
     21  1.1.1.2  mrg dnl
     22      1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23      1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24  1.1.1.2  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25  1.1.1.2  mrg dnl  for more details.
     26  1.1.1.2  mrg dnl
     27  1.1.1.2  mrg dnl  You should have received copies of the GNU General Public License and the
     28  1.1.1.2  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29  1.1.1.2  mrg dnl  see https://www.gnu.org/licenses/.
     30      1.1  mrg 
     31      1.1  mrg include(`../config.m4')
     32      1.1  mrg 
     33      1.1  mrg C		    cycles/limb
     34      1.1  mrg C POWER3/PPC630		 ?
     35      1.1  mrg C POWER4/PPC970		 ?
     36      1.1  mrg C POWER5		 2
     37      1.1  mrg C POWER6		 3.5  (mysteriously 3.0 for cnt=1)
     38      1.1  mrg 
     39      1.1  mrg C TODO
     40      1.1  mrg C  * Micro-optimise header code
     41      1.1  mrg C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4248
     42      1.1  mrg C    bytes, 4-way code would become about 50% larger.
     43      1.1  mrg 
     44      1.1  mrg C INPUT PARAMETERS
     45      1.1  mrg define(`rp_param',  `r3')
     46      1.1  mrg define(`up',  `r4')
     47      1.1  mrg define(`n',   `r5')
     48      1.1  mrg define(`cnt', `r6')
     49      1.1  mrg 
     50      1.1  mrg define(`tnc',`r0')
     51      1.1  mrg define(`retval',`r3')
     52      1.1  mrg define(`rp',  `r7')
     53      1.1  mrg 
     54      1.1  mrg ASM_START()
     55  1.1.1.2  mrg PROLOGUE(mpn_rshift,toc)
     56      1.1  mrg 
     57      1.1  mrg ifdef(`HAVE_ABI_mode32',`
     58      1.1  mrg 	rldicl	n, n, 0,32		C FIXME: avoid this zero extend
     59      1.1  mrg ')
     60      1.1  mrg 	mflr	r12
     61      1.1  mrg 	LEAL(	r11, L(e1))		C address of L(e1) label in SHIFT(1)
     62      1.1  mrg 	sldi	r10, cnt, 6		C multiply cnt by size of a SHIFT block
     63      1.1  mrg 	add	r11, r11, r10		C address of L(oN) for N = cnt
     64      1.1  mrg 	srdi	r10, n, 1
     65      1.1  mrg 	mr	rp, rp_param
     66      1.1  mrg 	subfic	tnc, cnt, 64
     67      1.1  mrg 	rlwinm.	r8, n, 0,31,31		C extract bit 0
     68      1.1  mrg 	mtctr	r10
     69      1.1  mrg 	beq	L(evn)
     70      1.1  mrg 
     71      1.1  mrg L(odd):	ld	r9, 0(up)
     72      1.1  mrg 	cmpdi	cr0, n, 1		C n = 1?
     73      1.1  mrg 	beq	L(1)
     74      1.1  mrg 	ld	r8, 8(up)
     75      1.1  mrg 	addi	r11, r11, -84		C L(o1) - L(e1) - 64
     76      1.1  mrg 	mtlr	r11
     77      1.1  mrg 	sld	r3, r9, tnc		C retval
     78      1.1  mrg 	addi	up, up, 8
     79      1.1  mrg 	addi	rp, rp, 8
     80      1.1  mrg 	blr				C branch to L(oN)
     81      1.1  mrg 
     82      1.1  mrg L(evn):	ld	r8, 0(up)
     83      1.1  mrg 	ld	r9, 8(up)
     84      1.1  mrg 	addi	r11, r11, -64
     85      1.1  mrg 	mtlr	r11
     86      1.1  mrg 	sld	r3, r8, tnc		C retval
     87      1.1  mrg 	addi	up, up, 16
     88      1.1  mrg 	blr				C branch to L(eN)
     89      1.1  mrg 
     90      1.1  mrg L(1):	sld	r3, r9, tnc		C retval
     91      1.1  mrg 	srd	r8, r9, cnt
     92      1.1  mrg 	std	r8, 0(rp)
     93      1.1  mrg 	mtlr	r12
     94      1.1  mrg ifdef(`HAVE_ABI_mode32',
     95      1.1  mrg `	mr	r4, r3
     96      1.1  mrg 	srdi	r3, r3, 32
     97      1.1  mrg ')
     98      1.1  mrg 	blr
     99      1.1  mrg 
    100      1.1  mrg 
    101      1.1  mrg define(SHIFT,`
    102      1.1  mrg L(lo$1):ld	r8, 0(up)
    103      1.1  mrg 	std	r11, 0(rp)
    104      1.1  mrg 	addi	rp, rp, 16
    105      1.1  mrg L(o$1):	srdi	r10, r9, $1
    106      1.1  mrg 	rldimi	r10, r8, eval(64-$1), 0
    107      1.1  mrg 	ld	r9, 8(up)
    108      1.1  mrg 	addi	up, up, 16
    109      1.1  mrg 	std	r10, -8(rp)
    110      1.1  mrg L(e$1):	srdi	r11, r8, $1
    111      1.1  mrg 	rldimi	r11, r9, eval(64-$1), 0
    112      1.1  mrg 	bdnz	L(lo$1)
    113      1.1  mrg 	std	r11, 0(rp)
    114      1.1  mrg 	srdi	r10, r9, $1
    115      1.1  mrg 	b	L(com)
    116      1.1  mrg 	nop
    117      1.1  mrg 	nop
    118      1.1  mrg ')
    119      1.1  mrg 
    120      1.1  mrg 	ALIGN(64)
    121      1.1  mrg forloop(`i',1,63,`SHIFT(i)')
    122      1.1  mrg 
    123      1.1  mrg L(com):	std	r10, 8(rp)
    124      1.1  mrg 	mtlr	r12
    125      1.1  mrg ifdef(`HAVE_ABI_mode32',
    126      1.1  mrg `	mr	r4, r3
    127      1.1  mrg 	srdi	r3, r3, 32
    128      1.1  mrg ')
    129      1.1  mrg 	blr
    130      1.1  mrg EPILOGUE()
    131      1.1  mrg ASM_END()
    132