1 1.1 mrg dnl PowerPC-32 mpn_lshiftc. 2 1.1 mrg 3 1.1.1.2 mrg dnl Copyright 1995, 1998, 2000, 2002-2005, 2010 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg dnl This file is part of the GNU MP Library. 6 1.1.1.2 mrg dnl 7 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 1.1.1.2 mrg dnl it under the terms of either: 9 1.1.1.2 mrg dnl 10 1.1.1.2 mrg dnl * the GNU Lesser General Public License as published by the Free 11 1.1.1.2 mrg dnl Software Foundation; either version 3 of the License, or (at your 12 1.1.1.2 mrg dnl option) any later version. 13 1.1.1.2 mrg dnl 14 1.1.1.2 mrg dnl or 15 1.1.1.2 mrg dnl 16 1.1.1.2 mrg dnl * the GNU General Public License as published by the Free Software 17 1.1.1.2 mrg dnl Foundation; either version 2 of the License, or (at your option) any 18 1.1.1.2 mrg dnl later version. 19 1.1.1.2 mrg dnl 20 1.1.1.2 mrg dnl or both in parallel, as here. 21 1.1.1.2 mrg dnl 22 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 1.1.1.2 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 1.1.1.2 mrg dnl for more details. 26 1.1.1.2 mrg dnl 27 1.1.1.2 mrg dnl You should have received copies of the GNU General Public License and the 28 1.1.1.2 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 1.1.1.2 mrg dnl see https://www.gnu.org/licenses/. 30 1.1 mrg 31 1.1 mrg include(`../config.m4') 32 1.1 mrg 33 1.1 mrg C cycles/limb 34 1.1 mrg C 603e: ? 35 1.1 mrg C 604e: 3.0 36 1.1 mrg C 75x (G3): 3.0 37 1.1 mrg C 7400,7410 (G4): 3.0 38 1.1 mrg C 7445,7455 (G4+): 2.5 39 1.1 mrg C 7447,7457 (G4+): 2.25 40 1.1 mrg C power4/ppc970: 2.5 41 1.1 mrg C power5: 2.5 42 1.1 mrg 43 1.1 mrg C INPUT PARAMETERS 44 1.1 mrg C rp r3 45 1.1 mrg C up r4 46 1.1 mrg C n r5 47 1.1 mrg C cnt r6 48 1.1 mrg 49 1.1 mrg ASM_START() 50 1.1 mrg PROLOGUE(mpn_lshiftc) 51 1.1 mrg cmpwi cr0, r5, 30 C more than 30 limbs? 52 1.1 mrg slwi r0, r5, 2 53 1.1 mrg add r4, r4, r0 C make r4 point at end of s1 54 1.1 mrg add r7, r3, r0 C make r7 point at end of res 55 1.1 mrg bgt L(BIG) C branch if more than 12 limbs 56 1.1 mrg 57 1.1 mrg mtctr r5 C copy size into CTR 58 1.1 mrg subfic r8, r6, 32 59 1.1 mrg lwzu r11, -4(r4) C load first s1 limb 60 1.1 mrg srw r3, r11, r8 C compute function return value 61 1.1 mrg bdz L(end1) 62 1.1 mrg 63 1.1 mrg L(oop): lwzu r10, -4(r4) 64 1.1 mrg slw r9, r11, r6 65 1.1 mrg srw r12, r10, r8 66 1.1 mrg nor r9, r9, r12 67 1.1 mrg stwu r9, -4(r7) 68 1.1 mrg bdz L(end2) 69 1.1 mrg lwzu r11, -4(r4) 70 1.1 mrg slw r9, r10, r6 71 1.1 mrg srw r12, r11, r8 72 1.1 mrg nor r9, r9, r12 73 1.1 mrg stwu r9, -4(r7) 74 1.1 mrg bdnz L(oop) 75 1.1 mrg 76 1.1 mrg L(end1): 77 1.1 mrg slw r0, r11, r6 78 1.1 mrg nor r0, r0, r0 79 1.1 mrg stw r0, -4(r7) 80 1.1 mrg blr 81 1.1 mrg L(end2): 82 1.1 mrg slw r0, r10, r6 83 1.1 mrg nor r0, r0, r0 84 1.1 mrg stw r0, -4(r7) 85 1.1 mrg blr 86 1.1 mrg 87 1.1 mrg L(BIG): 88 1.1.1.2 mrg stwu r1, -48(r1) 89 1.1.1.2 mrg stmw r24, 8(r1) C save registers we are supposed to preserve 90 1.1 mrg lwzu r9, -4(r4) 91 1.1 mrg subfic r8, r6, 32 92 1.1 mrg srw r3, r9, r8 C compute function return value 93 1.1 mrg slw r0, r9, r6 94 1.1 mrg addi r5, r5, -1 95 1.1 mrg 96 1.1 mrg andi. r10, r5, 3 C count for spill loop 97 1.1 mrg beq L(e) 98 1.1 mrg mtctr r10 99 1.1 mrg lwzu r28, -4(r4) 100 1.1 mrg bdz L(xe0) 101 1.1 mrg 102 1.1 mrg L(loop0): 103 1.1 mrg slw r12, r28, r6 104 1.1 mrg srw r24, r28, r8 105 1.1 mrg lwzu r28, -4(r4) 106 1.1 mrg nor r24, r0, r24 107 1.1 mrg stwu r24, -4(r7) 108 1.1 mrg mr r0, r12 109 1.1 mrg bdnz L(loop0) C taken at most once! 110 1.1 mrg 111 1.1 mrg L(xe0): slw r12, r28, r6 112 1.1 mrg srw r24, r28, r8 113 1.1 mrg nor r24, r0, r24 114 1.1 mrg stwu r24, -4(r7) 115 1.1 mrg mr r0, r12 116 1.1 mrg 117 1.1 mrg L(e): srwi r5, r5, 2 C count for unrolled loop 118 1.1 mrg addi r5, r5, -1 119 1.1 mrg mtctr r5 120 1.1 mrg lwz r28, -4(r4) 121 1.1 mrg lwz r29, -8(r4) 122 1.1 mrg lwz r30, -12(r4) 123 1.1 mrg lwzu r31, -16(r4) 124 1.1 mrg 125 1.1 mrg L(loopU): 126 1.1 mrg slw r9, r28, r6 127 1.1 mrg srw r24, r28, r8 128 1.1 mrg lwz r28, -4(r4) 129 1.1 mrg slw r10, r29, r6 130 1.1 mrg srw r25, r29, r8 131 1.1 mrg lwz r29, -8(r4) 132 1.1 mrg slw r11, r30, r6 133 1.1 mrg srw r26, r30, r8 134 1.1 mrg lwz r30, -12(r4) 135 1.1 mrg slw r12, r31, r6 136 1.1 mrg srw r27, r31, r8 137 1.1 mrg lwzu r31, -16(r4) 138 1.1 mrg nor r24, r0, r24 139 1.1 mrg stw r24, -4(r7) 140 1.1 mrg nor r25, r9, r25 141 1.1 mrg stw r25, -8(r7) 142 1.1 mrg nor r26, r10, r26 143 1.1 mrg stw r26, -12(r7) 144 1.1 mrg nor r27, r11, r27 145 1.1 mrg stwu r27, -16(r7) 146 1.1 mrg mr r0, r12 147 1.1 mrg bdnz L(loopU) 148 1.1 mrg 149 1.1 mrg slw r9, r28, r6 150 1.1 mrg srw r24, r28, r8 151 1.1 mrg slw r10, r29, r6 152 1.1 mrg srw r25, r29, r8 153 1.1 mrg slw r11, r30, r6 154 1.1 mrg srw r26, r30, r8 155 1.1 mrg slw r12, r31, r6 156 1.1 mrg srw r27, r31, r8 157 1.1 mrg nor r24, r0, r24 158 1.1 mrg stw r24, -4(r7) 159 1.1 mrg nor r25, r9, r25 160 1.1 mrg stw r25, -8(r7) 161 1.1 mrg nor r26, r10, r26 162 1.1 mrg stw r26, -12(r7) 163 1.1 mrg nor r27, r11, r27 164 1.1 mrg stw r27, -16(r7) 165 1.1 mrg nor r12, r12, r12 166 1.1 mrg stw r12, -20(r7) 167 1.1.1.2 mrg lmw r24, 8(r1) C restore registers 168 1.1.1.2 mrg addi r1, r1, 48 169 1.1 mrg blr 170 1.1 mrg EPILOGUE() 171