1 1.1 mrg dnl PowerPC-64 mpn_rshift -- rp[] = up[] << cnt 2 1.1 mrg 3 1.1 mrg dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg dnl This file is part of the GNU MP Library. 6 1.1.1.2 mrg dnl 7 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 1.1.1.2 mrg dnl it under the terms of either: 9 1.1.1.2 mrg dnl 10 1.1.1.2 mrg dnl * the GNU Lesser General Public License as published by the Free 11 1.1.1.2 mrg dnl Software Foundation; either version 3 of the License, or (at your 12 1.1.1.2 mrg dnl option) any later version. 13 1.1.1.2 mrg dnl 14 1.1.1.2 mrg dnl or 15 1.1.1.2 mrg dnl 16 1.1.1.2 mrg dnl * the GNU General Public License as published by the Free Software 17 1.1.1.2 mrg dnl Foundation; either version 2 of the License, or (at your option) any 18 1.1.1.2 mrg dnl later version. 19 1.1.1.2 mrg dnl 20 1.1.1.2 mrg dnl or both in parallel, as here. 21 1.1.1.2 mrg dnl 22 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 1.1.1.2 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 1.1.1.2 mrg dnl for more details. 26 1.1.1.2 mrg dnl 27 1.1.1.2 mrg dnl You should have received copies of the GNU General Public License and the 28 1.1.1.2 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 1.1.1.2 mrg dnl see https://www.gnu.org/licenses/. 30 1.1 mrg 31 1.1 mrg include(`../config.m4') 32 1.1 mrg 33 1.1 mrg C cycles/limb 34 1.1 mrg C POWER3/PPC630 ? 35 1.1 mrg C POWER4/PPC970 ? 36 1.1 mrg C POWER5 2 37 1.1 mrg C POWER6 3.5 (mysteriously 3.0 for cnt=1) 38 1.1 mrg 39 1.1 mrg C TODO 40 1.1 mrg C * Micro-optimise header code 41 1.1 mrg C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4248 42 1.1 mrg C bytes, 4-way code would become about 50% larger. 43 1.1 mrg 44 1.1 mrg C INPUT PARAMETERS 45 1.1 mrg define(`rp_param', `r3') 46 1.1 mrg define(`up', `r4') 47 1.1 mrg define(`n', `r5') 48 1.1 mrg define(`cnt', `r6') 49 1.1 mrg 50 1.1 mrg define(`tnc',`r0') 51 1.1 mrg define(`retval',`r3') 52 1.1 mrg define(`rp', `r7') 53 1.1 mrg 54 1.1 mrg ASM_START() 55 1.1.1.2 mrg PROLOGUE(mpn_rshift,toc) 56 1.1 mrg 57 1.1 mrg ifdef(`HAVE_ABI_mode32',` 58 1.1 mrg rldicl n, n, 0,32 C FIXME: avoid this zero extend 59 1.1 mrg ') 60 1.1 mrg mflr r12 61 1.1 mrg LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1) 62 1.1 mrg sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block 63 1.1 mrg add r11, r11, r10 C address of L(oN) for N = cnt 64 1.1 mrg srdi r10, n, 1 65 1.1 mrg mr rp, rp_param 66 1.1 mrg subfic tnc, cnt, 64 67 1.1 mrg rlwinm. r8, n, 0,31,31 C extract bit 0 68 1.1 mrg mtctr r10 69 1.1 mrg beq L(evn) 70 1.1 mrg 71 1.1 mrg L(odd): ld r9, 0(up) 72 1.1 mrg cmpdi cr0, n, 1 C n = 1? 73 1.1 mrg beq L(1) 74 1.1 mrg ld r8, 8(up) 75 1.1 mrg addi r11, r11, -84 C L(o1) - L(e1) - 64 76 1.1 mrg mtlr r11 77 1.1 mrg sld r3, r9, tnc C retval 78 1.1 mrg addi up, up, 8 79 1.1 mrg addi rp, rp, 8 80 1.1 mrg blr C branch to L(oN) 81 1.1 mrg 82 1.1 mrg L(evn): ld r8, 0(up) 83 1.1 mrg ld r9, 8(up) 84 1.1 mrg addi r11, r11, -64 85 1.1 mrg mtlr r11 86 1.1 mrg sld r3, r8, tnc C retval 87 1.1 mrg addi up, up, 16 88 1.1 mrg blr C branch to L(eN) 89 1.1 mrg 90 1.1 mrg L(1): sld r3, r9, tnc C retval 91 1.1 mrg srd r8, r9, cnt 92 1.1 mrg std r8, 0(rp) 93 1.1 mrg mtlr r12 94 1.1 mrg ifdef(`HAVE_ABI_mode32', 95 1.1 mrg ` mr r4, r3 96 1.1 mrg srdi r3, r3, 32 97 1.1 mrg ') 98 1.1 mrg blr 99 1.1 mrg 100 1.1 mrg 101 1.1 mrg define(SHIFT,` 102 1.1 mrg L(lo$1):ld r8, 0(up) 103 1.1 mrg std r11, 0(rp) 104 1.1 mrg addi rp, rp, 16 105 1.1 mrg L(o$1): srdi r10, r9, $1 106 1.1 mrg rldimi r10, r8, eval(64-$1), 0 107 1.1 mrg ld r9, 8(up) 108 1.1 mrg addi up, up, 16 109 1.1 mrg std r10, -8(rp) 110 1.1 mrg L(e$1): srdi r11, r8, $1 111 1.1 mrg rldimi r11, r9, eval(64-$1), 0 112 1.1 mrg bdnz L(lo$1) 113 1.1 mrg std r11, 0(rp) 114 1.1 mrg srdi r10, r9, $1 115 1.1 mrg b L(com) 116 1.1 mrg nop 117 1.1 mrg nop 118 1.1 mrg ') 119 1.1 mrg 120 1.1 mrg ALIGN(64) 121 1.1 mrg forloop(`i',1,63,`SHIFT(i)') 122 1.1 mrg 123 1.1 mrg L(com): std r10, 8(rp) 124 1.1 mrg mtlr r12 125 1.1 mrg ifdef(`HAVE_ABI_mode32', 126 1.1 mrg ` mr r4, r3 127 1.1 mrg srdi r3, r3, 32 128 1.1 mrg ') 129 1.1 mrg blr 130 1.1 mrg EPILOGUE() 131 1.1 mrg ASM_END() 132