Home | History | Annotate | Line # | Download | only in powerpc64
rshift.asm revision 1.1.1.3
      1 dnl  PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt
      2 
      3 dnl  Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 C                   cycles/limb
     34 C POWER3/PPC630          ?
     35 C POWER4/PPC970          ?
     36 C POWER5                 2.25
     37 C POWER6                 9.75
     38 C POWER7                 2.15
     39 
     40 C TODO
     41 C  * Try to reduce the number of needed live registers
     42 C  * Micro-optimise header code
     43 C  * Keep in synch with lshift.asm and lshiftc.asm
     44 
     45 C INPUT PARAMETERS
     46 define(`rp',  `r3')
     47 define(`up',  `r4')
     48 define(`n',   `r5')
     49 define(`cnt', `r6')
     50 
     51 define(`tnc',`r0')
     52 define(`u0',`r30')
     53 define(`u1',`r31')
     54 define(`retval',`r5')
     55 
     56 ASM_START()
     57 PROLOGUE(mpn_rshift)
     58 	std	r31, -8(r1)
     59 	std	r30, -16(r1)
     60 	subfic	tnc, cnt, 64
     61 C	sldi	r30, n, 3	C byte count corresponding to n
     62 C	add	rp, rp, r30	C rp = rp + n
     63 C	add	up, up, r30	C up = up + n
     64 	rldicl.	r30, n, 0,62	C r30 = n & 3, set cr0
     65 	cmpdi	cr6, r30, 2
     66 	addi	r31, n, 3	C compute count...
     67 	ld	r10, 0(up)	C load 1st limb for b00...b11
     68 	sld	retval, r10, tnc
     69 ifdef(`HAVE_ABI_mode32',
     70 `	rldicl	r31, r31, 62,34',	C ...branch count
     71 `	srdi	r31, r31, 2')	C ...for ctr
     72 	mtctr	r31		C copy count into ctr
     73 	beq	cr0, L(b00)
     74 	blt	cr6, L(b01)
     75 	ld	r11, 8(up)	C load 2nd limb for b10 and b11
     76 	beq	cr6, L(b10)
     77 
     78 	ALIGN(16)
     79 L(b11):	srd	r8, r10, cnt
     80 	sld	r9, r11, tnc
     81 	ld	u1, 16(up)
     82 	addi	up, up, 24
     83 	srd	r12, r11, cnt
     84 	sld	r7, u1, tnc
     85 	addi	rp, rp, -16
     86 	bdnz	L(gt3)
     87 
     88 	or	r11, r8, r9
     89 	srd	r8, u1, cnt
     90 	b	L(cj3)
     91 
     92 	ALIGN(16)
     93 L(gt3):	ld	u0, 0(up)
     94 	or	r11, r8, r9
     95 	srd	r8, u1, cnt
     96 	sld	r9, u0, tnc
     97 	ld	u1, 8(up)
     98 	or	r10, r12, r7
     99 	b	L(L11)
    100 
    101 	ALIGN(32)
    102 L(b10):	srd	r12, r10, cnt
    103 	addi	rp, rp, -24
    104 	sld	r7, r11, tnc
    105 	bdnz	L(gt2)
    106 
    107 	srd	r8, r11, cnt
    108 	or	r10, r12, r7
    109 	b	L(cj2)
    110 
    111 L(gt2):	ld	u0, 16(up)
    112 	srd	r8, r11, cnt
    113 	sld	r9, u0, tnc
    114 	ld	u1, 24(up)
    115 	or	r10, r12, r7
    116 	srd	r12, u0, cnt
    117 	sld	r7, u1, tnc
    118 	ld	u0, 32(up)
    119 	or	r11, r8, r9
    120 	addi	up, up, 16
    121 	b	L(L10)
    122 
    123 	ALIGN(16)
    124 L(b00):	ld	u1, 8(up)
    125 	srd	r12, r10, cnt
    126 	sld	r7, u1, tnc
    127 	ld	u0, 16(up)
    128 	srd	r8, u1, cnt
    129 	sld	r9, u0, tnc
    130 	ld	u1, 24(up)
    131 	or	r10, r12, r7
    132 	srd	r12, u0, cnt
    133 	sld	r7, u1, tnc
    134 	addi	rp, rp, -8
    135 	bdz	L(cj4)
    136 
    137 L(gt4):	addi	up, up, 32
    138 	ld	u0, 0(up)
    139 	or	r11, r8, r9
    140 	b	L(L00)
    141 
    142 	ALIGN(16)
    143 L(b01):	bdnz	L(gt1)
    144 	srd	r8, r10, cnt
    145 	std	r8, 0(rp)
    146 	b	L(ret)
    147 
    148 L(gt1):	ld	u0, 8(up)
    149 	srd	r8, r10, cnt
    150 	sld	r9, u0, tnc
    151 	ld	u1, 16(up)
    152 	srd	r12, u0, cnt
    153 	sld	r7, u1, tnc
    154 	ld	u0, 24(up)
    155 	or	r11, r8, r9
    156 	srd	r8, u1, cnt
    157 	sld	r9, u0, tnc
    158 	ld	u1, 32(up)
    159 	addi	up, up, 40
    160 	or	r10, r12, r7
    161 	bdz	L(end)
    162 
    163 	ALIGN(32)
    164 L(top):	srd	r12, u0, cnt
    165 	sld	r7, u1, tnc
    166 	ld	u0, 0(up)
    167 	std	r11, 0(rp)
    168 	or	r11, r8, r9
    169 L(L00):	srd	r8, u1, cnt
    170 	sld	r9, u0, tnc
    171 	ld	u1, 8(up)
    172 	std	r10, 8(rp)
    173 	or	r10, r12, r7
    174 L(L11):	srd	r12, u0, cnt
    175 	sld	r7, u1, tnc
    176 	ld	u0, 16(up)
    177 	std	r11, 16(rp)
    178 	or	r11, r8, r9
    179 L(L10):	srd	r8, u1, cnt
    180 	sld	r9, u0, tnc
    181 	ld	u1, 24(up)
    182 	addi	up, up, 32
    183 	std	r10, 24(rp)
    184 	addi	rp, rp, 32
    185 	or	r10, r12, r7
    186 	bdnz	L(top)
    187 
    188 	ALIGN(32)
    189 L(end):	srd	r12, u0, cnt
    190 	sld	r7, u1, tnc
    191 	std	r11, 0(rp)
    192 L(cj4):	or	r11, r8, r9
    193 	srd	r8, u1, cnt
    194 	std	r10, 8(rp)
    195 L(cj3):	or	r10, r12, r7
    196 	std	r11, 16(rp)
    197 L(cj2):	std	r10, 24(rp)
    198 	std	r8, 32(rp)
    199 
    200 L(ret):	ld	r31, -8(r1)
    201 	ld	r30, -16(r1)
    202 ifdef(`HAVE_ABI_mode32',
    203 `	srdi	r3, retval, 32
    204 	mr	r4, retval
    205 ',`	mr	r3, retval')
    206 	blr
    207 EPILOGUE()
    208