Home | History | Annotate | Line # | Download | only in p6
      1 dnl  PowerPC-64 mpn_rshift -- rp[] = up[] << cnt
      2 
      3 dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 C		    cycles/limb
     34 C POWER3/PPC630		 ?
     35 C POWER4/PPC970		 ?
     36 C POWER5		 2
     37 C POWER6		 3.5  (mysteriously 3.0 for cnt=1)
     38 
     39 C TODO
     40 C  * Micro-optimise header code
     41 C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4248
     42 C    bytes, 4-way code would become about 50% larger.
     43 
     44 C INPUT PARAMETERS
     45 define(`rp_param',  `r3')
     46 define(`up',  `r4')
     47 define(`n',   `r5')
     48 define(`cnt', `r6')
     49 
     50 define(`tnc',`r0')
     51 define(`retval',`r3')
     52 define(`rp',  `r7')
     53 
     54 ASM_START()
     55 PROLOGUE(mpn_rshift,toc)
     56 
     57 ifdef(`HAVE_ABI_mode32',`
     58 	rldicl	n, n, 0,32		C FIXME: avoid this zero extend
     59 ')
     60 	mflr	r12
     61 	LEAL(	r11, L(e1))		C address of L(e1) label in SHIFT(1)
     62 	sldi	r10, cnt, 6		C multiply cnt by size of a SHIFT block
     63 	add	r11, r11, r10		C address of L(oN) for N = cnt
     64 	srdi	r10, n, 1
     65 	mr	rp, rp_param
     66 	subfic	tnc, cnt, 64
     67 	rlwinm.	r8, n, 0,31,31		C extract bit 0
     68 	mtctr	r10
     69 	beq	L(evn)
     70 
     71 L(odd):	ld	r9, 0(up)
     72 	cmpdi	cr0, n, 1		C n = 1?
     73 	beq	L(1)
     74 	ld	r8, 8(up)
     75 	addi	r11, r11, -84		C L(o1) - L(e1) - 64
     76 	mtlr	r11
     77 	sld	r3, r9, tnc		C retval
     78 	addi	up, up, 8
     79 	addi	rp, rp, 8
     80 	blr				C branch to L(oN)
     81 
     82 L(evn):	ld	r8, 0(up)
     83 	ld	r9, 8(up)
     84 	addi	r11, r11, -64
     85 	mtlr	r11
     86 	sld	r3, r8, tnc		C retval
     87 	addi	up, up, 16
     88 	blr				C branch to L(eN)
     89 
     90 L(1):	sld	r3, r9, tnc		C retval
     91 	srd	r8, r9, cnt
     92 	std	r8, 0(rp)
     93 	mtlr	r12
     94 ifdef(`HAVE_ABI_mode32',
     95 `	mr	r4, r3
     96 	srdi	r3, r3, 32
     97 ')
     98 	blr
     99 
    100 
    101 define(SHIFT,`
    102 L(lo$1):ld	r8, 0(up)
    103 	std	r11, 0(rp)
    104 	addi	rp, rp, 16
    105 L(o$1):	srdi	r10, r9, $1
    106 	rldimi	r10, r8, eval(64-$1), 0
    107 	ld	r9, 8(up)
    108 	addi	up, up, 16
    109 	std	r10, -8(rp)
    110 L(e$1):	srdi	r11, r8, $1
    111 	rldimi	r11, r9, eval(64-$1), 0
    112 	bdnz	L(lo$1)
    113 	std	r11, 0(rp)
    114 	srdi	r10, r9, $1
    115 	b	L(com)
    116 	nop
    117 	nop
    118 ')
    119 
    120 	ALIGN(64)
    121 forloop(`i',1,63,`SHIFT(i)')
    122 
    123 L(com):	std	r10, 8(rp)
    124 	mtlr	r12
    125 ifdef(`HAVE_ABI_mode32',
    126 `	mr	r4, r3
    127 	srdi	r3, r3, 32
    128 ')
    129 	blr
    130 EPILOGUE()
    131 ASM_END()
    132