1 1.1 mrg dnl SPARC T3/T4/T5 mpn_bdiv_q_1. 2 1.1 mrg 3 1.1 mrg dnl Contributed to the GNU project by Torbjrn Granlund. 4 1.1 mrg 5 1.1 mrg dnl Copyright 2013, 2017 Free Software Foundation, Inc. 6 1.1 mrg 7 1.1 mrg dnl This file is part of the GNU MP Library. 8 1.1 mrg dnl 9 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 1.1 mrg dnl it under the terms of either: 11 1.1 mrg dnl 12 1.1 mrg dnl * the GNU Lesser General Public License as published by the Free 13 1.1 mrg dnl Software Foundation; either version 3 of the License, or (at your 14 1.1 mrg dnl option) any later version. 15 1.1 mrg dnl 16 1.1 mrg dnl or 17 1.1 mrg dnl 18 1.1 mrg dnl * the GNU General Public License as published by the Free Software 19 1.1 mrg dnl Foundation; either version 2 of the License, or (at your option) any 20 1.1 mrg dnl later version. 21 1.1 mrg dnl 22 1.1 mrg dnl or both in parallel, as here. 23 1.1 mrg dnl 24 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 1.1 mrg dnl for more details. 28 1.1 mrg dnl 29 1.1 mrg dnl You should have received copies of the GNU General Public License and the 30 1.1 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 1.1 mrg dnl see https://www.gnu.org/licenses/. 32 1.1 mrg 33 1.1 mrg include(`../config.m4') 34 1.1 mrg 35 1.1 mrg C cycles/limb 36 1.1 mrg C UltraSPARC T3: 31 37 1.1 mrg C UltraSPARC T4/T5: 20-26 hits 20 early, then sharply drops 38 1.1 mrg 39 1.1 mrg C INPUT PARAMETERS 40 1.1 mrg define(`qp', `%i0') 41 1.1 mrg define(`ap', `%i1') 42 1.1 mrg define(`n', `%i2') 43 1.1 mrg define(`d', `%i3') 44 1.1 mrg define(`dinv',`%i4') 45 1.1 mrg define(`cnt', `%i5') 46 1.1 mrg 47 1.1 mrg define(`tnc', `%o2') 48 1.1 mrg 49 1.1 mrg ASM_START() 50 1.1 mrg REGISTER(%g2,#scratch) 51 1.1 mrg REGISTER(%g3,#scratch) 52 1.1 mrg PROLOGUE(mpn_bdiv_q_1) 53 1.1 mrg save %sp, -176, %sp 54 1.1 mrg ldx [ap], %o5 55 1.1 mrg add d, -1, %g1 56 1.1 mrg andn %g1, d, %g1 57 1.1 mrg popc %g1, cnt 58 1.1 mrg 59 1.1 mrg srlx d, cnt, d 60 1.1 mrg srlx d, 1, %g1 61 1.1 mrg and %g1, 127, %g1 62 1.1 mrg LEA64(binvert_limb_table, g2, g4) 63 1.1 mrg ldub [%g2+%g1], %g1 64 1.1 mrg add %g1, %g1, %g2 65 1.1 mrg mulx %g1, %g1, %g1 66 1.1 mrg mulx %g1, d, %g1 67 1.1 mrg sub %g2, %g1, %g2 68 1.1 mrg add %g2, %g2, %g1 69 1.1 mrg mulx %g2, %g2, %g2 70 1.1 mrg mulx %g2, d, %g2 71 1.1 mrg sub %g1, %g2, %g1 72 1.1 mrg add %g1, %g1, %o7 73 1.1 mrg mulx %g1, %g1, %g1 74 1.1 mrg mulx %g1, d, %g1 75 1.1 mrg add n, -2, n 76 1.1 mrg brz,pt cnt, L(norm) 77 1.1 mrg sub %o7, %g1, dinv 78 1.1 mrg 79 1.1 mrg brlz,pt n, L(edu) 80 1.1 mrg srlx %o5, cnt, %o5 81 1.1 mrg b L(eee) 82 1.1 mrg mov 0, %g4 83 1.1 mrg EPILOGUE() 84 1.1 mrg 85 1.1 mrg PROLOGUE(mpn_pi1_bdiv_q_1) 86 1.1 mrg save %sp, -176, %sp 87 1.1 mrg ldx [ap], %o5 88 1.1 mrg 89 1.1 mrg brz,pt cnt, L(norm) 90 1.1 mrg add n, -2, n 91 1.1 mrg 92 1.1 mrg L(unorm): 93 1.1 mrg brlz,pt n, L(edu) 94 1.1 mrg srlx %o5, cnt, %o5 95 1.1 mrg mov 0, %g4 96 1.1 mrg L(eee): sub %g0, cnt, tnc 97 1.1 mrg 98 1.1 mrg L(tpu): ldx [ap+8], %g3 99 1.1 mrg add ap, 8, ap 100 1.1 mrg sllx %g3, tnc, %g5 101 1.1 mrg or %g5, %o5, %g5 102 1.1 mrg srlx %g3, cnt, %o5 103 1.1 mrg subcc %g5, %g4, %g4 104 1.1 mrg mulx %g4, dinv, %g1 105 1.1 mrg stx %g1, [qp] 106 1.1 mrg add qp, 8, qp 107 1.1 mrg umulxhi(d, %g1, %g1) 108 1.1 mrg addxc( %g1, %g0, %g4) 109 1.1 mrg brgz,pt n, L(tpu) 110 1.1 mrg add n, -1, n 111 1.1 mrg 112 1.1 mrg sub %o5, %g4, %o5 113 1.1 mrg L(edu): mulx %o5, dinv, %g1 114 1.1 mrg return %i7+8 115 1.1 mrg stx %g1, [%o0] 116 1.1 mrg 117 1.1 mrg L(norm): 118 1.1 mrg mulx dinv, %o5, %g1 119 1.1 mrg brlz,pt n, L(edn) 120 1.1 mrg stx %g1, [qp] 121 1.1 mrg add qp, 8, qp 122 1.1 mrg addcc %g0, 0, %g4 123 1.1 mrg 124 1.1 mrg L(tpn): umulxhi(d, %g1, %g1) 125 1.1 mrg ldx [ap+8], %g5 126 1.1 mrg add ap, 8, ap 127 1.1 mrg addxc( %g1, %g0, %g1) 128 1.1 mrg subcc %g5, %g1, %g1 129 1.1 mrg mulx %g1, dinv, %g1 130 1.1 mrg stx %g1, [qp] 131 1.1 mrg add qp, 8, qp 132 1.1 mrg brgz,pt n, L(tpn) 133 1.1 mrg add n, -1, n 134 1.1 mrg 135 1.1 mrg L(edn): return %i7+8 136 1.1 mrg nop 137 1.1 mrg EPILOGUE() 138