Home | History | Annotate | Line # | Download | only in powerpc64
rshift.asm revision 1.1.1.3
      1      1.1  mrg dnl  PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt
      2      1.1  mrg 
      3  1.1.1.2  mrg dnl  Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
      4      1.1  mrg 
      5      1.1  mrg dnl  This file is part of the GNU MP Library.
      6  1.1.1.3  mrg dnl
      7      1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8  1.1.1.3  mrg dnl  it under the terms of either:
      9  1.1.1.3  mrg dnl
     10  1.1.1.3  mrg dnl    * the GNU Lesser General Public License as published by the Free
     11  1.1.1.3  mrg dnl      Software Foundation; either version 3 of the License, or (at your
     12  1.1.1.3  mrg dnl      option) any later version.
     13  1.1.1.3  mrg dnl
     14  1.1.1.3  mrg dnl  or
     15  1.1.1.3  mrg dnl
     16  1.1.1.3  mrg dnl    * the GNU General Public License as published by the Free Software
     17  1.1.1.3  mrg dnl      Foundation; either version 2 of the License, or (at your option) any
     18  1.1.1.3  mrg dnl      later version.
     19  1.1.1.3  mrg dnl
     20  1.1.1.3  mrg dnl  or both in parallel, as here.
     21  1.1.1.3  mrg dnl
     22      1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23      1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24  1.1.1.3  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25  1.1.1.3  mrg dnl  for more details.
     26  1.1.1.3  mrg dnl
     27  1.1.1.3  mrg dnl  You should have received copies of the GNU General Public License and the
     28  1.1.1.3  mrg dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29  1.1.1.3  mrg dnl  see https://www.gnu.org/licenses/.
     30      1.1  mrg 
     31      1.1  mrg include(`../config.m4')
     32      1.1  mrg 
     33  1.1.1.2  mrg C                   cycles/limb
     34  1.1.1.2  mrg C POWER3/PPC630          ?
     35  1.1.1.2  mrg C POWER4/PPC970          ?
     36  1.1.1.2  mrg C POWER5                 2.25
     37  1.1.1.2  mrg C POWER6                 9.75
     38  1.1.1.2  mrg C POWER7                 2.15
     39  1.1.1.2  mrg 
     40  1.1.1.2  mrg C TODO
     41  1.1.1.2  mrg C  * Try to reduce the number of needed live registers
     42  1.1.1.2  mrg C  * Micro-optimise header code
     43  1.1.1.2  mrg C  * Keep in synch with lshift.asm and lshiftc.asm
     44      1.1  mrg 
     45      1.1  mrg C INPUT PARAMETERS
     46  1.1.1.2  mrg define(`rp',  `r3')
     47  1.1.1.2  mrg define(`up',  `r4')
     48  1.1.1.2  mrg define(`n',   `r5')
     49  1.1.1.2  mrg define(`cnt', `r6')
     50  1.1.1.2  mrg 
     51  1.1.1.2  mrg define(`tnc',`r0')
     52  1.1.1.2  mrg define(`u0',`r30')
     53  1.1.1.2  mrg define(`u1',`r31')
     54  1.1.1.2  mrg define(`retval',`r5')
     55      1.1  mrg 
     56      1.1  mrg ASM_START()
     57      1.1  mrg PROLOGUE(mpn_rshift)
     58  1.1.1.2  mrg 	std	r31, -8(r1)
     59  1.1.1.2  mrg 	std	r30, -16(r1)
     60  1.1.1.2  mrg 	subfic	tnc, cnt, 64
     61  1.1.1.2  mrg C	sldi	r30, n, 3	C byte count corresponding to n
     62  1.1.1.2  mrg C	add	rp, rp, r30	C rp = rp + n
     63  1.1.1.2  mrg C	add	up, up, r30	C up = up + n
     64  1.1.1.2  mrg 	rldicl.	r30, n, 0,62	C r30 = n & 3, set cr0
     65  1.1.1.2  mrg 	cmpdi	cr6, r30, 2
     66  1.1.1.2  mrg 	addi	r31, n, 3	C compute count...
     67  1.1.1.2  mrg 	ld	r10, 0(up)	C load 1st limb for b00...b11
     68  1.1.1.2  mrg 	sld	retval, r10, tnc
     69      1.1  mrg ifdef(`HAVE_ABI_mode32',
     70  1.1.1.2  mrg `	rldicl	r31, r31, 62,34',	C ...branch count
     71  1.1.1.2  mrg `	srdi	r31, r31, 2')	C ...for ctr
     72  1.1.1.2  mrg 	mtctr	r31		C copy count into ctr
     73  1.1.1.2  mrg 	beq	cr0, L(b00)
     74  1.1.1.2  mrg 	blt	cr6, L(b01)
     75  1.1.1.2  mrg 	ld	r11, 8(up)	C load 2nd limb for b10 and b11
     76  1.1.1.2  mrg 	beq	cr6, L(b10)
     77  1.1.1.2  mrg 
     78  1.1.1.2  mrg 	ALIGN(16)
     79  1.1.1.2  mrg L(b11):	srd	r8, r10, cnt
     80  1.1.1.2  mrg 	sld	r9, r11, tnc
     81  1.1.1.2  mrg 	ld	u1, 16(up)
     82  1.1.1.2  mrg 	addi	up, up, 24
     83  1.1.1.2  mrg 	srd	r12, r11, cnt
     84  1.1.1.2  mrg 	sld	r7, u1, tnc
     85      1.1  mrg 	addi	rp, rp, -16
     86  1.1.1.2  mrg 	bdnz	L(gt3)
     87      1.1  mrg 
     88  1.1.1.2  mrg 	or	r11, r8, r9
     89  1.1.1.2  mrg 	srd	r8, u1, cnt
     90  1.1.1.2  mrg 	b	L(cj3)
     91  1.1.1.2  mrg 
     92  1.1.1.2  mrg 	ALIGN(16)
     93  1.1.1.2  mrg L(gt3):	ld	u0, 0(up)
     94  1.1.1.2  mrg 	or	r11, r8, r9
     95  1.1.1.2  mrg 	srd	r8, u1, cnt
     96  1.1.1.2  mrg 	sld	r9, u0, tnc
     97      1.1  mrg 	ld	u1, 8(up)
     98  1.1.1.2  mrg 	or	r10, r12, r7
     99  1.1.1.2  mrg 	b	L(L11)
    100      1.1  mrg 
    101  1.1.1.2  mrg 	ALIGN(32)
    102  1.1.1.2  mrg L(b10):	srd	r12, r10, cnt
    103  1.1.1.2  mrg 	addi	rp, rp, -24
    104  1.1.1.2  mrg 	sld	r7, r11, tnc
    105  1.1.1.2  mrg 	bdnz	L(gt2)
    106  1.1.1.2  mrg 
    107  1.1.1.2  mrg 	srd	r8, r11, cnt
    108  1.1.1.2  mrg 	or	r10, r12, r7
    109  1.1.1.2  mrg 	b	L(cj2)
    110  1.1.1.2  mrg 
    111  1.1.1.2  mrg L(gt2):	ld	u0, 16(up)
    112  1.1.1.2  mrg 	srd	r8, r11, cnt
    113  1.1.1.2  mrg 	sld	r9, u0, tnc
    114  1.1.1.2  mrg 	ld	u1, 24(up)
    115  1.1.1.2  mrg 	or	r10, r12, r7
    116  1.1.1.2  mrg 	srd	r12, u0, cnt
    117  1.1.1.2  mrg 	sld	r7, u1, tnc
    118  1.1.1.2  mrg 	ld	u0, 32(up)
    119  1.1.1.2  mrg 	or	r11, r8, r9
    120  1.1.1.2  mrg 	addi	up, up, 16
    121  1.1.1.2  mrg 	b	L(L10)
    122  1.1.1.2  mrg 
    123  1.1.1.2  mrg 	ALIGN(16)
    124  1.1.1.2  mrg L(b00):	ld	u1, 8(up)
    125  1.1.1.2  mrg 	srd	r12, r10, cnt
    126  1.1.1.2  mrg 	sld	r7, u1, tnc
    127  1.1.1.2  mrg 	ld	u0, 16(up)
    128  1.1.1.2  mrg 	srd	r8, u1, cnt
    129  1.1.1.2  mrg 	sld	r9, u0, tnc
    130  1.1.1.2  mrg 	ld	u1, 24(up)
    131  1.1.1.2  mrg 	or	r10, r12, r7
    132  1.1.1.2  mrg 	srd	r12, u0, cnt
    133  1.1.1.2  mrg 	sld	r7, u1, tnc
    134  1.1.1.2  mrg 	addi	rp, rp, -8
    135  1.1.1.2  mrg 	bdz	L(cj4)
    136      1.1  mrg 
    137  1.1.1.2  mrg L(gt4):	addi	up, up, 32
    138  1.1.1.2  mrg 	ld	u0, 0(up)
    139  1.1.1.2  mrg 	or	r11, r8, r9
    140  1.1.1.2  mrg 	b	L(L00)
    141      1.1  mrg 
    142  1.1.1.2  mrg 	ALIGN(16)
    143  1.1.1.2  mrg L(b01):	bdnz	L(gt1)
    144  1.1.1.2  mrg 	srd	r8, r10, cnt
    145  1.1.1.2  mrg 	std	r8, 0(rp)
    146  1.1.1.2  mrg 	b	L(ret)
    147  1.1.1.2  mrg 
    148  1.1.1.2  mrg L(gt1):	ld	u0, 8(up)
    149  1.1.1.2  mrg 	srd	r8, r10, cnt
    150  1.1.1.2  mrg 	sld	r9, u0, tnc
    151  1.1.1.2  mrg 	ld	u1, 16(up)
    152  1.1.1.2  mrg 	srd	r12, u0, cnt
    153  1.1.1.2  mrg 	sld	r7, u1, tnc
    154  1.1.1.2  mrg 	ld	u0, 24(up)
    155  1.1.1.2  mrg 	or	r11, r8, r9
    156  1.1.1.2  mrg 	srd	r8, u1, cnt
    157  1.1.1.2  mrg 	sld	r9, u0, tnc
    158  1.1.1.2  mrg 	ld	u1, 32(up)
    159  1.1.1.2  mrg 	addi	up, up, 40
    160  1.1.1.2  mrg 	or	r10, r12, r7
    161  1.1.1.2  mrg 	bdz	L(end)
    162  1.1.1.2  mrg 
    163  1.1.1.2  mrg 	ALIGN(32)
    164  1.1.1.2  mrg L(top):	srd	r12, u0, cnt
    165  1.1.1.2  mrg 	sld	r7, u1, tnc
    166  1.1.1.2  mrg 	ld	u0, 0(up)
    167  1.1.1.2  mrg 	std	r11, 0(rp)
    168  1.1.1.2  mrg 	or	r11, r8, r9
    169  1.1.1.2  mrg L(L00):	srd	r8, u1, cnt
    170  1.1.1.2  mrg 	sld	r9, u0, tnc
    171  1.1.1.2  mrg 	ld	u1, 8(up)
    172  1.1.1.2  mrg 	std	r10, 8(rp)
    173  1.1.1.2  mrg 	or	r10, r12, r7
    174  1.1.1.2  mrg L(L11):	srd	r12, u0, cnt
    175  1.1.1.2  mrg 	sld	r7, u1, tnc
    176  1.1.1.2  mrg 	ld	u0, 16(up)
    177  1.1.1.2  mrg 	std	r11, 16(rp)
    178  1.1.1.2  mrg 	or	r11, r8, r9
    179  1.1.1.2  mrg L(L10):	srd	r8, u1, cnt
    180  1.1.1.2  mrg 	sld	r9, u0, tnc
    181  1.1.1.2  mrg 	ld	u1, 24(up)
    182  1.1.1.2  mrg 	addi	up, up, 32
    183  1.1.1.2  mrg 	std	r10, 24(rp)
    184  1.1.1.2  mrg 	addi	rp, rp, 32
    185  1.1.1.2  mrg 	or	r10, r12, r7
    186  1.1.1.2  mrg 	bdnz	L(top)
    187  1.1.1.2  mrg 
    188  1.1.1.2  mrg 	ALIGN(32)
    189  1.1.1.2  mrg L(end):	srd	r12, u0, cnt
    190  1.1.1.2  mrg 	sld	r7, u1, tnc
    191  1.1.1.2  mrg 	std	r11, 0(rp)
    192  1.1.1.2  mrg L(cj4):	or	r11, r8, r9
    193  1.1.1.2  mrg 	srd	r8, u1, cnt
    194  1.1.1.2  mrg 	std	r10, 8(rp)
    195  1.1.1.2  mrg L(cj3):	or	r10, r12, r7
    196  1.1.1.2  mrg 	std	r11, 16(rp)
    197  1.1.1.2  mrg L(cj2):	std	r10, 24(rp)
    198  1.1.1.2  mrg 	std	r8, 32(rp)
    199      1.1  mrg 
    200  1.1.1.2  mrg L(ret):	ld	r31, -8(r1)
    201  1.1.1.2  mrg 	ld	r30, -16(r1)
    202      1.1  mrg ifdef(`HAVE_ABI_mode32',
    203  1.1.1.2  mrg `	srdi	r3, retval, 32
    204  1.1.1.2  mrg 	mr	r4, retval
    205  1.1.1.2  mrg ',`	mr	r3, retval')
    206      1.1  mrg 	blr
    207      1.1  mrg EPILOGUE()
    208