Home | History | Annotate | Line # | Download | only in powerpc32
lshiftc.asm revision 1.1
      1  1.1  mrg dnl  PowerPC-32 mpn_lshiftc.
      2  1.1  mrg 
      3  1.1  mrg dnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005, 2010 Free Software
      4  1.1  mrg dnl  Foundation, Inc.
      5  1.1  mrg 
      6  1.1  mrg dnl  This file is part of the GNU MP Library.
      7  1.1  mrg 
      8  1.1  mrg dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      9  1.1  mrg dnl  it under the terms of the GNU Lesser General Public License as published
     10  1.1  mrg dnl  by the Free Software Foundation; either version 3 of the License, or (at
     11  1.1  mrg dnl  your option) any later version.
     12  1.1  mrg 
     13  1.1  mrg dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     14  1.1  mrg dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15  1.1  mrg dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16  1.1  mrg dnl  License for more details.
     17  1.1  mrg 
     18  1.1  mrg dnl  You should have received a copy of the GNU Lesser General Public License
     19  1.1  mrg dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     20  1.1  mrg 
     21  1.1  mrg include(`../config.m4')
     22  1.1  mrg 
     23  1.1  mrg C                cycles/limb
     24  1.1  mrg C 603e:            ?
     25  1.1  mrg C 604e:            3.0
     26  1.1  mrg C 75x (G3):        3.0
     27  1.1  mrg C 7400,7410 (G4):  3.0
     28  1.1  mrg C 7445,7455 (G4+): 2.5
     29  1.1  mrg C 7447,7457 (G4+): 2.25
     30  1.1  mrg C power4/ppc970:   2.5
     31  1.1  mrg C power5:          2.5
     32  1.1  mrg 
     33  1.1  mrg C INPUT PARAMETERS
     34  1.1  mrg C rp	r3
     35  1.1  mrg C up	r4
     36  1.1  mrg C n	r5
     37  1.1  mrg C cnt	r6
     38  1.1  mrg 
     39  1.1  mrg ASM_START()
     40  1.1  mrg PROLOGUE(mpn_lshiftc)
     41  1.1  mrg 	cmpwi	cr0, r5, 30	C more than 30 limbs?
     42  1.1  mrg 	slwi	r0, r5, 2
     43  1.1  mrg 	add	r4, r4, r0	C make r4 point at end of s1
     44  1.1  mrg 	add	r7, r3, r0	C make r7 point at end of res
     45  1.1  mrg 	bgt	L(BIG)		C branch if more than 12 limbs
     46  1.1  mrg 
     47  1.1  mrg 	mtctr	r5		C copy size into CTR
     48  1.1  mrg 	subfic	r8, r6, 32
     49  1.1  mrg 	lwzu	r11, -4(r4)	C load first s1 limb
     50  1.1  mrg 	srw	r3, r11, r8	C compute function return value
     51  1.1  mrg 	bdz	L(end1)
     52  1.1  mrg 
     53  1.1  mrg L(oop):	lwzu	r10, -4(r4)
     54  1.1  mrg 	slw	r9, r11, r6
     55  1.1  mrg 	srw	r12, r10, r8
     56  1.1  mrg 	nor	r9, r9, r12
     57  1.1  mrg 	stwu	r9, -4(r7)
     58  1.1  mrg 	bdz	L(end2)
     59  1.1  mrg 	lwzu	r11, -4(r4)
     60  1.1  mrg 	slw	r9, r10, r6
     61  1.1  mrg 	srw	r12, r11, r8
     62  1.1  mrg 	nor	r9, r9, r12
     63  1.1  mrg 	stwu	r9, -4(r7)
     64  1.1  mrg 	bdnz	L(oop)
     65  1.1  mrg 
     66  1.1  mrg L(end1):
     67  1.1  mrg 	slw	r0, r11, r6
     68  1.1  mrg 	nor	r0, r0, r0
     69  1.1  mrg 	stw	r0, -4(r7)
     70  1.1  mrg 	blr
     71  1.1  mrg L(end2):
     72  1.1  mrg 	slw	r0, r10, r6
     73  1.1  mrg 	nor	r0, r0, r0
     74  1.1  mrg 	stw	r0, -4(r7)
     75  1.1  mrg 	blr
     76  1.1  mrg 
     77  1.1  mrg L(BIG):
     78  1.1  mrg 	stmw	r24, -32(r1)	C save registers we are supposed to preserve
     79  1.1  mrg 	lwzu	r9, -4(r4)
     80  1.1  mrg 	subfic	r8, r6, 32
     81  1.1  mrg 	srw	r3, r9, r8	C compute function return value
     82  1.1  mrg 	slw	r0, r9, r6
     83  1.1  mrg 	addi	r5, r5, -1
     84  1.1  mrg 
     85  1.1  mrg 	andi.	r10, r5, 3	C count for spill loop
     86  1.1  mrg 	beq	L(e)
     87  1.1  mrg 	mtctr	r10
     88  1.1  mrg 	lwzu	r28, -4(r4)
     89  1.1  mrg 	bdz	L(xe0)
     90  1.1  mrg 
     91  1.1  mrg L(loop0):
     92  1.1  mrg 	slw	r12, r28, r6
     93  1.1  mrg 	srw	r24, r28, r8
     94  1.1  mrg 	lwzu	r28, -4(r4)
     95  1.1  mrg 	nor	r24, r0, r24
     96  1.1  mrg 	stwu	r24, -4(r7)
     97  1.1  mrg 	mr	r0, r12
     98  1.1  mrg 	bdnz	L(loop0)	C taken at most once!
     99  1.1  mrg 
    100  1.1  mrg L(xe0):	slw	r12, r28, r6
    101  1.1  mrg 	srw	r24, r28, r8
    102  1.1  mrg 	nor	r24, r0, r24
    103  1.1  mrg 	stwu	r24, -4(r7)
    104  1.1  mrg 	mr	r0, r12
    105  1.1  mrg 
    106  1.1  mrg L(e):	srwi	r5, r5, 2	C count for unrolled loop
    107  1.1  mrg 	addi	r5, r5, -1
    108  1.1  mrg 	mtctr	r5
    109  1.1  mrg 	lwz	r28, -4(r4)
    110  1.1  mrg 	lwz	r29, -8(r4)
    111  1.1  mrg 	lwz	r30, -12(r4)
    112  1.1  mrg 	lwzu	r31, -16(r4)
    113  1.1  mrg 
    114  1.1  mrg L(loopU):
    115  1.1  mrg 	slw	r9, r28, r6
    116  1.1  mrg 	srw	r24, r28, r8
    117  1.1  mrg 	lwz	r28, -4(r4)
    118  1.1  mrg 	slw	r10, r29, r6
    119  1.1  mrg 	srw	r25, r29, r8
    120  1.1  mrg 	lwz	r29, -8(r4)
    121  1.1  mrg 	slw	r11, r30, r6
    122  1.1  mrg 	srw	r26, r30, r8
    123  1.1  mrg 	lwz	r30, -12(r4)
    124  1.1  mrg 	slw	r12, r31, r6
    125  1.1  mrg 	srw	r27, r31, r8
    126  1.1  mrg 	lwzu	r31, -16(r4)
    127  1.1  mrg 	nor	r24, r0, r24
    128  1.1  mrg 	stw	r24, -4(r7)
    129  1.1  mrg 	nor	r25, r9, r25
    130  1.1  mrg 	stw	r25, -8(r7)
    131  1.1  mrg 	nor	r26, r10, r26
    132  1.1  mrg 	stw	r26, -12(r7)
    133  1.1  mrg 	nor	r27, r11, r27
    134  1.1  mrg 	stwu	r27, -16(r7)
    135  1.1  mrg 	mr	r0, r12
    136  1.1  mrg 	bdnz	L(loopU)
    137  1.1  mrg 
    138  1.1  mrg 	slw	r9, r28, r6
    139  1.1  mrg 	srw	r24, r28, r8
    140  1.1  mrg 	slw	r10, r29, r6
    141  1.1  mrg 	srw	r25, r29, r8
    142  1.1  mrg 	slw	r11, r30, r6
    143  1.1  mrg 	srw	r26, r30, r8
    144  1.1  mrg 	slw	r12, r31, r6
    145  1.1  mrg 	srw	r27, r31, r8
    146  1.1  mrg 	nor	r24, r0, r24
    147  1.1  mrg 	stw	r24, -4(r7)
    148  1.1  mrg 	nor	r25, r9, r25
    149  1.1  mrg 	stw	r25, -8(r7)
    150  1.1  mrg 	nor	r26, r10, r26
    151  1.1  mrg 	stw	r26, -12(r7)
    152  1.1  mrg 	nor	r27, r11, r27
    153  1.1  mrg 	stw	r27, -16(r7)
    154  1.1  mrg 	nor	r12, r12, r12
    155  1.1  mrg 	stw	r12, -20(r7)
    156  1.1  mrg 	lmw	r24, -32(r1)	C restore registers
    157  1.1  mrg 	blr
    158  1.1  mrg EPILOGUE()
    159