1 1.1 mrg dnl SPARC v9 mpn_addmul_1 for T3/T4/T5. 2 1.1 mrg 3 1.1 mrg dnl Contributed to the GNU project by David Miller and Torbjrn Granlund. 4 1.1 mrg 5 1.1 mrg dnl Copyright 2013 Free Software Foundation, Inc. 6 1.1 mrg 7 1.1 mrg dnl This file is part of the GNU MP Library. 8 1.1 mrg dnl 9 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 1.1 mrg dnl it under the terms of either: 11 1.1 mrg dnl 12 1.1 mrg dnl * the GNU Lesser General Public License as published by the Free 13 1.1 mrg dnl Software Foundation; either version 3 of the License, or (at your 14 1.1 mrg dnl option) any later version. 15 1.1 mrg dnl 16 1.1 mrg dnl or 17 1.1 mrg dnl 18 1.1 mrg dnl * the GNU General Public License as published by the Free Software 19 1.1 mrg dnl Foundation; either version 2 of the License, or (at your option) any 20 1.1 mrg dnl later version. 21 1.1 mrg dnl 22 1.1 mrg dnl or both in parallel, as here. 23 1.1 mrg dnl 24 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 1.1 mrg dnl for more details. 28 1.1 mrg dnl 29 1.1 mrg dnl You should have received copies of the GNU General Public License and the 30 1.1 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 1.1 mrg dnl see https://www.gnu.org/licenses/. 32 1.1 mrg 33 1.1 mrg include(`../config.m4') 34 1.1 mrg 35 1.1 mrg C cycles/limb 36 1.1 mrg C UltraSPARC T3: 26 37 1.1 mrg C UltraSPARC T4: 4.5 38 1.1 mrg 39 1.1 mrg C INPUT PARAMETERS 40 1.1 mrg define(`rp', `%i0') 41 1.1 mrg define(`up', `%i1') 42 1.1 mrg define(`n', `%i2') 43 1.1 mrg define(`v0', `%i3') 44 1.1 mrg 45 1.1 mrg define(`u0', `%l0') 46 1.1 mrg define(`u1', `%l1') 47 1.1 mrg define(`u2', `%l2') 48 1.1 mrg define(`u3', `%l3') 49 1.1 mrg define(`r0', `%l4') 50 1.1 mrg define(`r1', `%l5') 51 1.1 mrg define(`r2', `%l6') 52 1.1 mrg define(`r3', `%l7') 53 1.1 mrg 54 1.1 mrg ASM_START() 55 1.1 mrg REGISTER(%g2,#scratch) 56 1.1 mrg REGISTER(%g3,#scratch) 57 1.1 mrg PROLOGUE(mpn_addmul_1) 58 1.1 mrg save %sp, -176, %sp 59 1.1 mrg ldx [up+0], %g1 60 1.1 mrg 61 1.1 mrg and n, 3, %g3 62 1.1 mrg brz %g3, L(b0) 63 1.1 mrg addcc %g0, %g0, %g5 C clear carry limb, flag 64 1.1 mrg cmp %g3, 2 65 1.1 mrg bcs %xcc, L(b01) 66 1.1 mrg nop 67 1.1 mrg be %xcc, L(b10) 68 1.1 mrg ldx [up+8], %g5 69 1.1 mrg 70 1.1 mrg L(b11): ldx [up+16], u3 71 1.1 mrg mulx %g1, v0, %o2 72 1.1 mrg umulxhi(%g1, v0, %o3) 73 1.1 mrg ldx [rp+0], r1 74 1.1 mrg mulx %g5, v0, %o4 75 1.1 mrg ldx [rp+8], r2 76 1.1 mrg umulxhi(%g5, v0, %o5) 77 1.1 mrg ldx [rp+16], r3 78 1.1 mrg mulx u3, v0, %g4 79 1.1 mrg umulxhi(u3, v0, %g5) 80 1.1 mrg addcc %o3, %o4, %o4 81 1.1 mrg addxccc(%o5, %g4, %g4) 82 1.1 mrg addxc( %g0, %g5, %g5) 83 1.1 mrg addcc r1, %o2, r1 84 1.1 mrg stx r1, [rp+0] 85 1.1 mrg addxccc(r2, %o4, r2) 86 1.1 mrg stx r2, [rp+8] 87 1.1 mrg addxccc(r3, %g4, r3) 88 1.1 mrg stx r3, [rp+16] 89 1.1 mrg add n, -3, n 90 1.1 mrg add up, 24, up 91 1.1 mrg brz n, L(xit) 92 1.1 mrg add rp, 24, rp 93 1.1 mrg b L(com) 94 1.1 mrg nop 95 1.1 mrg 96 1.1 mrg L(b10): mulx %g1, v0, %o4 97 1.1 mrg ldx [rp+0], r2 98 1.1 mrg umulxhi(%g1, v0, %o5) 99 1.1 mrg ldx [rp+8], r3 100 1.1 mrg mulx %g5, v0, %g4 101 1.1 mrg umulxhi(%g5, v0, %g5) 102 1.1 mrg addcc %o5, %g4, %g4 103 1.1 mrg addxc( %g0, %g5, %g5) 104 1.1 mrg addcc r2, %o4, r2 105 1.1 mrg stx r2, [rp+0] 106 1.1 mrg addxccc(r3, %g4, r3) 107 1.1 mrg stx r3, [rp+8] 108 1.1 mrg add n, -2, n 109 1.1 mrg add up, 16, up 110 1.1 mrg brz n, L(xit) 111 1.1 mrg add rp, 16, rp 112 1.1 mrg b L(com) 113 1.1 mrg nop 114 1.1 mrg 115 1.1 mrg L(b01): ldx [rp+0], r3 116 1.1 mrg mulx %g1, v0, %g4 117 1.1 mrg umulxhi(%g1, v0, %g5) 118 1.1 mrg addcc r3, %g4, r3 119 1.1 mrg stx r3, [rp+0] 120 1.1 mrg add n, -1, n 121 1.1 mrg add up, 8, up 122 1.1 mrg brz n, L(xit) 123 1.1 mrg add rp, 8, rp 124 1.1 mrg 125 1.1 mrg L(com): ldx [up+0], %g1 126 1.1 mrg L(b0): ldx [up+8], u1 127 1.1 mrg ldx [up+16], u2 128 1.1 mrg ldx [up+24], u3 129 1.1 mrg mulx %g1, v0, %o0 130 1.1 mrg umulxhi(%g1, v0, %o1) 131 1.1 mrg b L(lo0) 132 1.1 mrg nop 133 1.1 mrg 134 1.1 mrg ALIGN(16) 135 1.1 mrg L(top): ldx [up+0], u0 136 1.1 mrg addxc( %g0, %g5, %g5) C propagate carry into carry limb 137 1.1 mrg ldx [up+8], u1 138 1.1 mrg addcc r0, %o0, r0 139 1.1 mrg ldx [up+16], u2 140 1.1 mrg addxccc(r1, %o2, r1) 141 1.1 mrg ldx [up+24], u3 142 1.1 mrg addxccc(r2, %o4, r2) 143 1.1 mrg stx r0, [rp-32] 144 1.1 mrg addxccc(r3, %g4, r3) 145 1.1 mrg stx r1, [rp-24] 146 1.1 mrg mulx u0, v0, %o0 147 1.1 mrg stx r2, [rp-16] 148 1.1 mrg umulxhi(u0, v0, %o1) 149 1.1 mrg stx r3, [rp-8] 150 1.1 mrg L(lo0): mulx u1, v0, %o2 151 1.1 mrg ldx [rp+0], r0 152 1.1 mrg umulxhi(u1, v0, %o3) 153 1.1 mrg ldx [rp+8], r1 154 1.1 mrg mulx u2, v0, %o4 155 1.1 mrg ldx [rp+16], r2 156 1.1 mrg umulxhi(u2, v0, %o5) 157 1.1 mrg ldx [rp+24], r3 158 1.1 mrg mulx u3, v0, %g4 159 1.1 mrg addxccc(%g5, %o0, %o0) 160 1.1 mrg umulxhi(u3, v0, %g5) 161 1.1 mrg add up, 32, up 162 1.1 mrg addxccc(%o1, %o2, %o2) 163 1.1 mrg add rp, 32, rp 164 1.1 mrg addxccc(%o3, %o4, %o4) 165 1.1 mrg add n, -4, n 166 1.1 mrg addxccc(%o5, %g4, %g4) 167 1.1 mrg brgz n, L(top) 168 1.1 mrg nop 169 1.1 mrg 170 1.1 mrg addxc( %g0, %g5, %g5) 171 1.1 mrg addcc r0, %o0, r0 172 1.1 mrg stx r0, [rp-32] 173 1.1 mrg addxccc(r1, %o2, r1) 174 1.1 mrg stx r1, [rp-24] 175 1.1 mrg addxccc(r2, %o4, r2) 176 1.1 mrg stx r2, [rp-16] 177 1.1 mrg addxccc(r3, %g4, r3) 178 1.1 mrg stx r3, [rp-8] 179 1.1 mrg L(xit): addxc( %g0, %g5, %i0) 180 1.1 mrg ret 181 1.1 mrg restore 182 1.1 mrg EPILOGUE() 183