Home | History | Annotate | Line # | Download | only in powerpc32
lshiftc.asm revision 1.1.1.2
      1 dnl  PowerPC-32 mpn_lshiftc.
      2 
      3 dnl  Copyright 1995, 1998, 2000, 2002-2005, 2010 Free Software Foundation, Inc.
      4 
      5 dnl  This file is part of the GNU MP Library.
      6 dnl
      7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
      8 dnl  it under the terms of either:
      9 dnl
     10 dnl    * the GNU Lesser General Public License as published by the Free
     11 dnl      Software Foundation; either version 3 of the License, or (at your
     12 dnl      option) any later version.
     13 dnl
     14 dnl  or
     15 dnl
     16 dnl    * the GNU General Public License as published by the Free Software
     17 dnl      Foundation; either version 2 of the License, or (at your option) any
     18 dnl      later version.
     19 dnl
     20 dnl  or both in parallel, as here.
     21 dnl
     22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
     23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25 dnl  for more details.
     26 dnl
     27 dnl  You should have received copies of the GNU General Public License and the
     28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
     29 dnl  see https://www.gnu.org/licenses/.
     30 
     31 include(`../config.m4')
     32 
     33 C                cycles/limb
     34 C 603e:            ?
     35 C 604e:            3.0
     36 C 75x (G3):        3.0
     37 C 7400,7410 (G4):  3.0
     38 C 7445,7455 (G4+): 2.5
     39 C 7447,7457 (G4+): 2.25
     40 C power4/ppc970:   2.5
     41 C power5:          2.5
     42 
     43 C INPUT PARAMETERS
     44 C rp	r3
     45 C up	r4
     46 C n	r5
     47 C cnt	r6
     48 
     49 ASM_START()
     50 PROLOGUE(mpn_lshiftc)
     51 	cmpwi	cr0, r5, 30	C more than 30 limbs?
     52 	slwi	r0, r5, 2
     53 	add	r4, r4, r0	C make r4 point at end of s1
     54 	add	r7, r3, r0	C make r7 point at end of res
     55 	bgt	L(BIG)		C branch if more than 12 limbs
     56 
     57 	mtctr	r5		C copy size into CTR
     58 	subfic	r8, r6, 32
     59 	lwzu	r11, -4(r4)	C load first s1 limb
     60 	srw	r3, r11, r8	C compute function return value
     61 	bdz	L(end1)
     62 
     63 L(oop):	lwzu	r10, -4(r4)
     64 	slw	r9, r11, r6
     65 	srw	r12, r10, r8
     66 	nor	r9, r9, r12
     67 	stwu	r9, -4(r7)
     68 	bdz	L(end2)
     69 	lwzu	r11, -4(r4)
     70 	slw	r9, r10, r6
     71 	srw	r12, r11, r8
     72 	nor	r9, r9, r12
     73 	stwu	r9, -4(r7)
     74 	bdnz	L(oop)
     75 
     76 L(end1):
     77 	slw	r0, r11, r6
     78 	nor	r0, r0, r0
     79 	stw	r0, -4(r7)
     80 	blr
     81 L(end2):
     82 	slw	r0, r10, r6
     83 	nor	r0, r0, r0
     84 	stw	r0, -4(r7)
     85 	blr
     86 
     87 L(BIG):
     88 	stwu	r1, -48(r1)
     89 	stmw	r24, 8(r1)	C save registers we are supposed to preserve
     90 	lwzu	r9, -4(r4)
     91 	subfic	r8, r6, 32
     92 	srw	r3, r9, r8	C compute function return value
     93 	slw	r0, r9, r6
     94 	addi	r5, r5, -1
     95 
     96 	andi.	r10, r5, 3	C count for spill loop
     97 	beq	L(e)
     98 	mtctr	r10
     99 	lwzu	r28, -4(r4)
    100 	bdz	L(xe0)
    101 
    102 L(loop0):
    103 	slw	r12, r28, r6
    104 	srw	r24, r28, r8
    105 	lwzu	r28, -4(r4)
    106 	nor	r24, r0, r24
    107 	stwu	r24, -4(r7)
    108 	mr	r0, r12
    109 	bdnz	L(loop0)	C taken at most once!
    110 
    111 L(xe0):	slw	r12, r28, r6
    112 	srw	r24, r28, r8
    113 	nor	r24, r0, r24
    114 	stwu	r24, -4(r7)
    115 	mr	r0, r12
    116 
    117 L(e):	srwi	r5, r5, 2	C count for unrolled loop
    118 	addi	r5, r5, -1
    119 	mtctr	r5
    120 	lwz	r28, -4(r4)
    121 	lwz	r29, -8(r4)
    122 	lwz	r30, -12(r4)
    123 	lwzu	r31, -16(r4)
    124 
    125 L(loopU):
    126 	slw	r9, r28, r6
    127 	srw	r24, r28, r8
    128 	lwz	r28, -4(r4)
    129 	slw	r10, r29, r6
    130 	srw	r25, r29, r8
    131 	lwz	r29, -8(r4)
    132 	slw	r11, r30, r6
    133 	srw	r26, r30, r8
    134 	lwz	r30, -12(r4)
    135 	slw	r12, r31, r6
    136 	srw	r27, r31, r8
    137 	lwzu	r31, -16(r4)
    138 	nor	r24, r0, r24
    139 	stw	r24, -4(r7)
    140 	nor	r25, r9, r25
    141 	stw	r25, -8(r7)
    142 	nor	r26, r10, r26
    143 	stw	r26, -12(r7)
    144 	nor	r27, r11, r27
    145 	stwu	r27, -16(r7)
    146 	mr	r0, r12
    147 	bdnz	L(loopU)
    148 
    149 	slw	r9, r28, r6
    150 	srw	r24, r28, r8
    151 	slw	r10, r29, r6
    152 	srw	r25, r29, r8
    153 	slw	r11, r30, r6
    154 	srw	r26, r30, r8
    155 	slw	r12, r31, r6
    156 	srw	r27, r31, r8
    157 	nor	r24, r0, r24
    158 	stw	r24, -4(r7)
    159 	nor	r25, r9, r25
    160 	stw	r25, -8(r7)
    161 	nor	r26, r10, r26
    162 	stw	r26, -12(r7)
    163 	nor	r27, r11, r27
    164 	stw	r27, -16(r7)
    165 	nor	r12, r12, r12
    166 	stw	r12, -20(r7)
    167 	lmw	r24, 8(r1)	C restore registers
    168 	addi	r1, r1, 48
    169 	blr
    170 EPILOGUE()
    171