1 1.1 mrg dnl SPARC v9 mpn_addlsh_n and mpn_sublsh_n for T3/T4/T5. 2 1.1 mrg 3 1.1 mrg dnl Contributed to the GNU project by Torbjrn Granlund. 4 1.1 mrg 5 1.1 mrg dnl Copyright 2013 Free Software Foundation, Inc. 6 1.1 mrg 7 1.1 mrg dnl This file is part of the GNU MP Library. 8 1.1 mrg dnl 9 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 1.1 mrg dnl it under the terms of either: 11 1.1 mrg dnl 12 1.1 mrg dnl * the GNU Lesser General Public License as published by the Free 13 1.1 mrg dnl Software Foundation; either version 3 of the License, or (at your 14 1.1 mrg dnl option) any later version. 15 1.1 mrg dnl 16 1.1 mrg dnl or 17 1.1 mrg dnl 18 1.1 mrg dnl * the GNU General Public License as published by the Free Software 19 1.1 mrg dnl Foundation; either version 2 of the License, or (at your option) any 20 1.1 mrg dnl later version. 21 1.1 mrg dnl 22 1.1 mrg dnl or both in parallel, as here. 23 1.1 mrg dnl 24 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 1.1 mrg dnl for more details. 28 1.1 mrg dnl 29 1.1 mrg dnl You should have received copies of the GNU General Public License and the 30 1.1 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 1.1 mrg dnl see https://www.gnu.org/licenses/. 32 1.1 mrg 33 1.1 mrg include(`../config.m4') 34 1.1 mrg 35 1.1 mrg C cycles/limb 36 1.1 mrg C UltraSPARC T3: 11 37 1.1 mrg C UltraSPARC T4: 4 38 1.1 mrg 39 1.1 mrg C For sublsh_n we combine the two shifted limbs using xnor, using the identity 40 1.1 mrg C (a xor not b) = (not (a xor b)) which equals (not (a or b)) when (a and b) = 41 1.1 mrg C 0 as it is in our usage. This gives us the ones complement for free. 42 1.1 mrg C Unfortunately, the same trick will not work for rsblsh_n, which will instead 43 1.1 mrg C require a separate negation. 44 1.1 mrg C 45 1.1 mrg C FIXME: Add rsblsh_n to this file. 46 1.1 mrg 47 1.1 mrg define(`rp', `%i0') 48 1.1 mrg define(`up', `%i1') 49 1.1 mrg define(`vp', `%i2') 50 1.1 mrg define(`n', `%i3') 51 1.1 mrg define(`cnt',`%i4') 52 1.1 mrg 53 1.1 mrg define(`tnc',`%o5') 54 1.1 mrg 55 1.1 mrg ifdef(`OPERATION_addlsh_n',` 56 1.1 mrg define(`INITCY', `subcc %g0, 0, %g0') 57 1.1 mrg define(`MERGE', `or') 58 1.1 mrg define(`func', `mpn_addlsh_n') 59 1.1 mrg ') 60 1.1 mrg ifdef(`OPERATION_sublsh_n',` 61 1.1 mrg define(`INITCY', `subcc %g0, 1, %g0') 62 1.1 mrg define(`MERGE', `xnor') 63 1.1 mrg define(`func', `mpn_sublsh_n') 64 1.1 mrg ') 65 1.1 mrg 66 1.1 mrg define(`rp0', `rp') 67 1.1 mrg define(`rp1', `%o2') 68 1.1 mrg define(`up0', `up') 69 1.1 mrg define(`up1', `%o3') 70 1.1 mrg define(`vp0', `vp') 71 1.1 mrg define(`vp1', `%o4') 72 1.1 mrg 73 1.1 mrg MULFUNC_PROLOGUE(mpn_addlsh_n mpn_sublsh_n) 74 1.1 mrg ASM_START() 75 1.1 mrg REGISTER(%g2,#scratch) 76 1.1 mrg REGISTER(%g3,#scratch) 77 1.1 mrg PROLOGUE(func) 78 1.1 mrg save %sp, -176, %sp 79 1.1 mrg mov 64, tnc 80 1.1 mrg sub tnc, cnt, tnc 81 1.1 mrg 82 1.1 mrg andcc n, 1, %g0 83 1.1 mrg sllx n, 3, n 84 1.1 mrg add n, -16, n 85 1.1 mrg add up, n, up0 86 1.1 mrg add vp, n, vp0 87 1.1 mrg add rp, n, rp0 88 1.1 mrg add up0, 8, up1 89 1.1 mrg add vp0, 8, vp1 90 1.1 mrg add rp0, -8, rp1 91 1.1 mrg add rp0, -16, rp0 92 1.1 mrg neg n, n 93 1.1 mrg be L(evn) 94 1.1 mrg INITCY 95 1.1 mrg 96 1.1 mrg L(odd): ldx [vp0 + n], %l1 97 1.1 mrg mov 0, %l2 98 1.1 mrg ldx [up0 + n], %l5 99 1.1 mrg sllx %l1, cnt, %g3 100 1.1 mrg brgez n, L(wd1) 101 1.1 mrg add n, 8, n 102 1.1 mrg ldx [vp0 + n], %l0 103 1.1 mrg b L(lo1) 104 1.1 mrg sllx %l1, cnt, %g3 105 1.1 mrg 106 1.1 mrg L(evn): ldx [vp0 + n], %l0 107 1.1 mrg mov 0, %l3 108 1.1 mrg ldx [up0 + n], %l4 109 1.1 mrg ldx [vp1 + n], %l1 110 1.1 mrg b L(lo0) 111 1.1 mrg sllx %l0, cnt, %g1 112 1.1 mrg 113 1.1 mrg L(top): addxccc(%l6, %l4, %o0) 114 1.1 mrg ldx [vp0 + n], %l0 115 1.1 mrg sllx %l1, cnt, %g3 116 1.1 mrg stx %o0, [rp0 + n] 117 1.1 mrg L(lo1): srlx %l1, tnc, %l3 118 1.1 mrg MERGE %l2, %g3, %l7 119 1.1 mrg ldx [up0 + n], %l4 120 1.1 mrg addxccc(%l7, %l5, %o1) 121 1.1 mrg ldx [vp1 + n], %l1 122 1.1 mrg sllx %l0, cnt, %g1 123 1.1 mrg stx %o1, [rp1 + n] 124 1.1 mrg L(lo0): srlx %l0, tnc, %l2 125 1.1 mrg MERGE %l3, %g1, %l6 126 1.1 mrg ldx [up1 + n], %l5 127 1.1 mrg brlz,pt n, L(top) 128 1.1 mrg add n, 16, n 129 1.1 mrg 130 1.1 mrg addxccc(%l6, %l4, %o0) 131 1.1 mrg sllx %l1, cnt, %g3 132 1.1 mrg stx %o0, [rp0 + n] 133 1.1 mrg L(wd1): srlx %l1, tnc, %l3 134 1.1 mrg MERGE %l2, %g3, %l7 135 1.1 mrg addxccc(%l7, %l5, %o1) 136 1.1 mrg stx %o1, [rp1 + n] 137 1.1 mrg 138 1.1 mrg ifdef(`OPERATION_addlsh_n', 139 1.1 mrg ` addxc( %l3, %g0, %i0)') 140 1.1 mrg ifdef(`OPERATION_sublsh_n', 141 1.1 mrg ` addxc( %g0, %g0, %g1) 142 1.1 mrg add %g1, -1, %g1 143 1.1 mrg sub %l3, %g1, %i0') 144 1.1 mrg 145 1.1 mrg ret 146 1.1 mrg restore 147 1.1 mrg EPILOGUE() 148