Home | History | Annotate | Line # | Download | only in powerpc32
lshiftc.asm revision 1.1.1.1.4.2
      1 dnl  PowerPC-32 mpn_lshiftc.
      2 
      3 dnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005, 2010 Free Software
      4 dnl  Foundation, Inc.
      5 
      6 dnl  This file is part of the GNU MP Library.
      7 
      8 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      9 dnl  it under the terms of the GNU Lesser General Public License as published
     10 dnl  by the Free Software Foundation; either version 3 of the License, or (at
     11 dnl  your option) any later version.
     12 
     13 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     14 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16 dnl  License for more details.
     17 
     18 dnl  You should have received a copy of the GNU Lesser General Public License
     19 dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     20 
     21 include(`../config.m4')
     22 
     23 C                cycles/limb
     24 C 603e:            ?
     25 C 604e:            3.0
     26 C 75x (G3):        3.0
     27 C 7400,7410 (G4):  3.0
     28 C 7445,7455 (G4+): 2.5
     29 C 7447,7457 (G4+): 2.25
     30 C power4/ppc970:   2.5
     31 C power5:          2.5
     32 
     33 C INPUT PARAMETERS
     34 C rp	r3
     35 C up	r4
     36 C n	r5
     37 C cnt	r6
     38 
     39 ASM_START()
     40 PROLOGUE(mpn_lshiftc)
     41 	cmpwi	cr0, r5, 30	C more than 30 limbs?
     42 	slwi	r0, r5, 2
     43 	add	r4, r4, r0	C make r4 point at end of s1
     44 	add	r7, r3, r0	C make r7 point at end of res
     45 	bgt	L(BIG)		C branch if more than 12 limbs
     46 
     47 	mtctr	r5		C copy size into CTR
     48 	subfic	r8, r6, 32
     49 	lwzu	r11, -4(r4)	C load first s1 limb
     50 	srw	r3, r11, r8	C compute function return value
     51 	bdz	L(end1)
     52 
     53 L(oop):	lwzu	r10, -4(r4)
     54 	slw	r9, r11, r6
     55 	srw	r12, r10, r8
     56 	nor	r9, r9, r12
     57 	stwu	r9, -4(r7)
     58 	bdz	L(end2)
     59 	lwzu	r11, -4(r4)
     60 	slw	r9, r10, r6
     61 	srw	r12, r11, r8
     62 	nor	r9, r9, r12
     63 	stwu	r9, -4(r7)
     64 	bdnz	L(oop)
     65 
     66 L(end1):
     67 	slw	r0, r11, r6
     68 	nor	r0, r0, r0
     69 	stw	r0, -4(r7)
     70 	blr
     71 L(end2):
     72 	slw	r0, r10, r6
     73 	nor	r0, r0, r0
     74 	stw	r0, -4(r7)
     75 	blr
     76 
     77 L(BIG):
     78 	stmw	r24, -32(r1)	C save registers we are supposed to preserve
     79 	lwzu	r9, -4(r4)
     80 	subfic	r8, r6, 32
     81 	srw	r3, r9, r8	C compute function return value
     82 	slw	r0, r9, r6
     83 	addi	r5, r5, -1
     84 
     85 	andi.	r10, r5, 3	C count for spill loop
     86 	beq	L(e)
     87 	mtctr	r10
     88 	lwzu	r28, -4(r4)
     89 	bdz	L(xe0)
     90 
     91 L(loop0):
     92 	slw	r12, r28, r6
     93 	srw	r24, r28, r8
     94 	lwzu	r28, -4(r4)
     95 	nor	r24, r0, r24
     96 	stwu	r24, -4(r7)
     97 	mr	r0, r12
     98 	bdnz	L(loop0)	C taken at most once!
     99 
    100 L(xe0):	slw	r12, r28, r6
    101 	srw	r24, r28, r8
    102 	nor	r24, r0, r24
    103 	stwu	r24, -4(r7)
    104 	mr	r0, r12
    105 
    106 L(e):	srwi	r5, r5, 2	C count for unrolled loop
    107 	addi	r5, r5, -1
    108 	mtctr	r5
    109 	lwz	r28, -4(r4)
    110 	lwz	r29, -8(r4)
    111 	lwz	r30, -12(r4)
    112 	lwzu	r31, -16(r4)
    113 
    114 L(loopU):
    115 	slw	r9, r28, r6
    116 	srw	r24, r28, r8
    117 	lwz	r28, -4(r4)
    118 	slw	r10, r29, r6
    119 	srw	r25, r29, r8
    120 	lwz	r29, -8(r4)
    121 	slw	r11, r30, r6
    122 	srw	r26, r30, r8
    123 	lwz	r30, -12(r4)
    124 	slw	r12, r31, r6
    125 	srw	r27, r31, r8
    126 	lwzu	r31, -16(r4)
    127 	nor	r24, r0, r24
    128 	stw	r24, -4(r7)
    129 	nor	r25, r9, r25
    130 	stw	r25, -8(r7)
    131 	nor	r26, r10, r26
    132 	stw	r26, -12(r7)
    133 	nor	r27, r11, r27
    134 	stwu	r27, -16(r7)
    135 	mr	r0, r12
    136 	bdnz	L(loopU)
    137 
    138 	slw	r9, r28, r6
    139 	srw	r24, r28, r8
    140 	slw	r10, r29, r6
    141 	srw	r25, r29, r8
    142 	slw	r11, r30, r6
    143 	srw	r26, r30, r8
    144 	slw	r12, r31, r6
    145 	srw	r27, r31, r8
    146 	nor	r24, r0, r24
    147 	stw	r24, -4(r7)
    148 	nor	r25, r9, r25
    149 	stw	r25, -8(r7)
    150 	nor	r26, r10, r26
    151 	stw	r26, -12(r7)
    152 	nor	r27, r11, r27
    153 	stw	r27, -16(r7)
    154 	nor	r12, r12, r12
    155 	stw	r12, -20(r7)
    156 	lmw	r24, -32(r1)	C restore registers
    157 	blr
    158 EPILOGUE()
    159