1 1.1 mrg dnl SPARC T3/T4/T5 mpn_divexact_1. 2 1.1 mrg 3 1.1 mrg dnl Contributed to the GNU project by Torbjrn Granlund. 4 1.1 mrg 5 1.1 mrg dnl Copyright 2013 Free Software Foundation, Inc. 6 1.1 mrg 7 1.1 mrg dnl This file is part of the GNU MP Library. 8 1.1 mrg dnl 9 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 1.1 mrg dnl it under the terms of either: 11 1.1 mrg dnl 12 1.1 mrg dnl * the GNU Lesser General Public License as published by the Free 13 1.1 mrg dnl Software Foundation; either version 3 of the License, or (at your 14 1.1 mrg dnl option) any later version. 15 1.1 mrg dnl 16 1.1 mrg dnl or 17 1.1 mrg dnl 18 1.1 mrg dnl * the GNU General Public License as published by the Free Software 19 1.1 mrg dnl Foundation; either version 2 of the License, or (at your option) any 20 1.1 mrg dnl later version. 21 1.1 mrg dnl 22 1.1 mrg dnl or both in parallel, as here. 23 1.1 mrg dnl 24 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 1.1 mrg dnl for more details. 28 1.1 mrg dnl 29 1.1 mrg dnl You should have received copies of the GNU General Public License and the 30 1.1 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 1.1 mrg dnl see https://www.gnu.org/licenses/. 32 1.1 mrg 33 1.1 mrg include(`../config.m4') 34 1.1 mrg 35 1.1 mrg C cycles/limb 36 1.1 mrg C UltraSPARC T3: 31 37 1.1 mrg C UltraSPARC T4/T5: 20-26 hits 20 early, then sharply drops 38 1.1 mrg 39 1.1 mrg C INPUT PARAMETERS 40 1.1 mrg define(`qp', `%i0') 41 1.1 mrg define(`ap', `%i1') 42 1.1 mrg define(`n', `%i2') 43 1.1 mrg define(`d', `%i3') 44 1.1 mrg 45 1.1 mrg define(`dinv',`%o4') 46 1.1 mrg 47 1.1 mrg ASM_START() 48 1.1 mrg REGISTER(%g2,#scratch) 49 1.1 mrg REGISTER(%g3,#scratch) 50 1.1 mrg PROLOGUE(mpn_divexact_1) 51 1.1 mrg save %sp, -176, %sp 52 1.1 mrg cmp n, 1 53 1.1 mrg bne,pt %xcc, L(gt1) 54 1.1 mrg ldx [ap], %o5 55 1.1 mrg udivx %o5, d, %g1 56 1.1 mrg stx %g1, [qp] 57 1.1 mrg return %i7+8 58 1.1 mrg nop 59 1.1 mrg 60 1.1 mrg L(gt1): add d, -1, %g1 61 1.1 mrg andn %g1, d, %g1 62 1.1 mrg popc %g1, %i4 C i4 = count_trailing_zeros(d) 63 1.1 mrg 64 1.1 mrg srlx d, %i4, d 65 1.1 mrg srlx d, 1, %g1 66 1.1 mrg and %g1, 127, %g1 67 1.1 mrg 68 1.1 mrg LEA64(binvert_limb_table, g2, g4) 69 1.1 mrg ldub [%g2+%g1], %g1 70 1.1 mrg add %g1, %g1, %g2 71 1.1 mrg mulx %g1, %g1, %g1 72 1.1 mrg mulx %g1, d, %g1 73 1.1 mrg sub %g2, %g1, %g2 74 1.1 mrg add %g2, %g2, %g1 75 1.1 mrg mulx %g2, %g2, %g2 76 1.1 mrg mulx %g2, d, %g2 77 1.1 mrg sub %g1, %g2, %g1 78 1.1 mrg add %g1, %g1, %o7 79 1.1 mrg mulx %g1, %g1, %g1 80 1.1 mrg mulx %g1, d, %g1 81 1.1 mrg add n, -2, n 82 1.1 mrg brz,pt %i4, L(norm) 83 1.1 mrg sub %o7, %g1, dinv 84 1.1 mrg 85 1.1 mrg L(unnorm): 86 1.1 mrg mov 0, %g4 87 1.1 mrg sub %g0, %i4, %o2 88 1.1 mrg srlx %o5, %i4, %o5 89 1.1 mrg L(top_unnorm): 90 1.1 mrg ldx [ap+8], %g3 91 1.1 mrg add ap, 8, ap 92 1.1 mrg sllx %g3, %o2, %g5 93 1.1 mrg or %g5, %o5, %g5 94 1.1 mrg srlx %g3, %i4, %o5 95 1.1 mrg subcc %g5, %g4, %g4 96 1.1 mrg mulx %g4, dinv, %g1 97 1.1 mrg stx %g1, [qp] 98 1.1 mrg add qp, 8, qp 99 1.1 mrg umulxhi(d, %g1, %g1) 100 1.1 mrg addxc( %g1, %g0, %g4) 101 1.1 mrg brgz,pt n, L(top_unnorm) 102 1.1 mrg add n, -1, n 103 1.1 mrg 104 1.1 mrg sub %o5, %g4, %g4 105 1.1 mrg mulx %g4, dinv, %g1 106 1.1 mrg stx %g1, [qp] 107 1.1 mrg return %i7+8 108 1.1 mrg nop 109 1.1 mrg 110 1.1 mrg L(norm): 111 1.1 mrg mulx dinv, %o5, %g1 112 1.1 mrg stx %g1, [qp] 113 1.1 mrg add qp, 8, qp 114 1.1 mrg addcc %g0, 0, %g4 115 1.1 mrg L(top_norm): 116 1.1 mrg umulxhi(d, %g1, %g1) 117 1.1 mrg ldx [ap+8], %g5 118 1.1 mrg add ap, 8, ap 119 1.1 mrg addxc( %g1, %g0, %g1) 120 1.1 mrg subcc %g5, %g1, %g1 121 1.1 mrg mulx %g1, dinv, %g1 122 1.1 mrg stx %g1, [qp] 123 1.1 mrg add qp, 8, qp 124 1.1 mrg brgz,pt n, L(top_norm) 125 1.1 mrg add n, -1, n 126 1.1 mrg 127 1.1 mrg return %i7+8 128 1.1 mrg nop 129 1.1 mrg EPILOGUE() 130