1 1.1 mrg dnl SPARC T3/T4/T5 mpn_bdiv_dbm1c. 2 1.1 mrg 3 1.1 mrg dnl Contributed to the GNU project by Torbjrn Granlund. 4 1.1 mrg 5 1.1 mrg dnl Copyright 2013 Free Software Foundation, Inc. 6 1.1 mrg 7 1.1 mrg dnl This file is part of the GNU MP Library. 8 1.1 mrg dnl 9 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 1.1 mrg dnl it under the terms of either: 11 1.1 mrg dnl 12 1.1 mrg dnl * the GNU Lesser General Public License as published by the Free 13 1.1 mrg dnl Software Foundation; either version 3 of the License, or (at your 14 1.1 mrg dnl option) any later version. 15 1.1 mrg dnl 16 1.1 mrg dnl or 17 1.1 mrg dnl 18 1.1 mrg dnl * the GNU General Public License as published by the Free Software 19 1.1 mrg dnl Foundation; either version 2 of the License, or (at your option) any 20 1.1 mrg dnl later version. 21 1.1 mrg dnl 22 1.1 mrg dnl or both in parallel, as here. 23 1.1 mrg dnl 24 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 1.1 mrg dnl for more details. 28 1.1 mrg dnl 29 1.1 mrg dnl You should have received copies of the GNU General Public License and the 30 1.1 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 1.1 mrg dnl see https://www.gnu.org/licenses/. 32 1.1 mrg 33 1.1 mrg include(`../config.m4') 34 1.1 mrg 35 1.1 mrg C cycles/limb 36 1.1 mrg C UltraSPARC T3: 25 37 1.1 mrg C UltraSPARC T4/T5: 4 38 1.1 mrg 39 1.1 mrg C INPUT PARAMETERS 40 1.1 mrg define(`qp', `%i0') 41 1.1 mrg define(`ap', `%i1') 42 1.1 mrg define(`n', `%i2') 43 1.1 mrg define(`bd', `%i3') 44 1.1 mrg define(`h', `%i4') 45 1.1 mrg 46 1.1 mrg define(`plo0',`%g4') define(`plo1',`%g5') 47 1.1 mrg define(`phi0',`%l0') define(`phi1',`%l1') 48 1.1 mrg define(`a0', `%g1') define(`a1', `%g3') 49 1.1 mrg 50 1.1 mrg ASM_START() 51 1.1 mrg REGISTER(%g2,#scratch) 52 1.1 mrg REGISTER(%g3,#scratch) 53 1.1 mrg PROLOGUE(mpn_bdiv_dbm1c) 54 1.1 mrg save %sp, -176, %sp 55 1.1 mrg 56 1.1 mrg and n, 3, %g5 57 1.1 mrg ldx [ap + 0], %g2 58 1.1 mrg add n, -5, n 59 1.1 mrg brz %g5, L(b0) 60 1.1 mrg cmp %g5, 2 61 1.1 mrg bcs %xcc, L(b1) 62 1.1 mrg nop 63 1.1 mrg be %xcc, L(b2) 64 1.1 mrg nop 65 1.1 mrg 66 1.1 mrg L(b3): ldx [ap + 8], a0 67 1.1 mrg mulx bd, %g2, plo1 68 1.1 mrg umulxhi(bd, %g2, phi1) 69 1.1 mrg ldx [ap + 16], a1 70 1.1 mrg add qp, -24, qp 71 1.1 mrg b L(lo3) 72 1.1 mrg add ap, -8, ap 73 1.1 mrg 74 1.1 mrg L(b2): ldx [ap + 8], a1 75 1.1 mrg mulx bd, %g2, plo0 76 1.1 mrg umulxhi(bd, %g2, phi0) 77 1.1 mrg brlz,pt n, L(wd2) 78 1.1 mrg nop 79 1.1 mrg L(gt2): ldx [ap + 16], a0 80 1.1 mrg add ap, 16, ap 81 1.1 mrg b L(lo2) 82 1.1 mrg add n, -1, n 83 1.1 mrg 84 1.1 mrg L(b1): mulx bd, %g2, plo1 85 1.1 mrg umulxhi(bd, %g2, phi1) 86 1.1 mrg brlz,pn n, L(wd1) 87 1.1 mrg add qp, -8, qp 88 1.1 mrg L(gt1): ldx [ap + 8], a0 89 1.1 mrg ldx [ap + 16], a1 90 1.1 mrg b L(lo1) 91 1.1 mrg add ap, 8, ap 92 1.1 mrg 93 1.1 mrg L(b0): ldx [ap + 8], a1 94 1.1 mrg mulx bd, %g2, plo0 95 1.1 mrg umulxhi(bd, %g2, phi0) 96 1.1 mrg ldx [ap + 16], a0 97 1.1 mrg b L(lo0) 98 1.1 mrg add qp, -16, qp 99 1.1 mrg 100 1.1 mrg L(top): ldx [ap + 0], a0 101 1.1 mrg sub h, phi1, h 102 1.1 mrg L(lo2): mulx bd, a1, plo1 103 1.1 mrg umulxhi(bd, a1, phi1) 104 1.1 mrg subcc h, plo0, h 105 1.1 mrg addxc( phi0, %g0, phi0) 106 1.1 mrg stx h, [qp + 0] 107 1.1 mrg ldx [ap + 8], a1 108 1.1 mrg sub h, phi0, h 109 1.1 mrg L(lo1): mulx bd, a0, plo0 110 1.1 mrg umulxhi(bd, a0, phi0) 111 1.1 mrg subcc h, plo1, h 112 1.1 mrg addxc( phi1, %g0, phi1) 113 1.1 mrg stx h, [qp + 8] 114 1.1 mrg ldx [ap + 16], a0 115 1.1 mrg sub h, phi1, h 116 1.1 mrg L(lo0): mulx bd, a1, plo1 117 1.1 mrg umulxhi(bd, a1, phi1) 118 1.1 mrg subcc h, plo0, h 119 1.1 mrg addxc( phi0, %g0, phi0) 120 1.1 mrg stx h, [qp + 16] 121 1.1 mrg ldx [ap + 24], a1 122 1.1 mrg sub h, phi0, h 123 1.1 mrg L(lo3): mulx bd, a0, plo0 124 1.1 mrg umulxhi(bd, a0, phi0) 125 1.1 mrg subcc h, plo1, h 126 1.1 mrg addxc( phi1, %g0, phi1) 127 1.1 mrg stx h, [qp + 24] 128 1.1 mrg add ap, 32, ap 129 1.1 mrg add qp, 32, qp 130 1.1 mrg brgz,pt n, L(top) 131 1.1 mrg add n, -4, n 132 1.1 mrg 133 1.1 mrg L(end): sub h, phi1, h 134 1.1 mrg L(wd2): mulx bd, a1, plo1 135 1.1 mrg umulxhi(bd, a1, phi1) 136 1.1 mrg subcc h, plo0, h 137 1.1 mrg addxc( phi0, %g0, phi0) 138 1.1 mrg stx h, [qp + 0] 139 1.1 mrg sub h, phi0, h 140 1.1 mrg L(wd1): subcc h, plo1, h 141 1.1 mrg addxc( phi1, %g0, phi1) 142 1.1 mrg stx h, [qp + 8] 143 1.1 mrg sub h, phi1, %i0 144 1.1 mrg 145 1.1 mrg ret 146 1.1 mrg restore 147 1.1 mrg EPILOGUE() 148