1 1.1.1.2 mrg dnl SPARC v9 mpn_sqr_diag_addlsh1 for T3/T4/T5. 2 1.1 mrg 3 1.1 mrg dnl Contributed to the GNU project by Torbjrn Granlund. 4 1.1 mrg 5 1.1 mrg dnl Copyright 2013 Free Software Foundation, Inc. 6 1.1 mrg 7 1.1 mrg dnl This file is part of the GNU MP Library. 8 1.1 mrg dnl 9 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 10 1.1 mrg dnl it under the terms of either: 11 1.1 mrg dnl 12 1.1 mrg dnl * the GNU Lesser General Public License as published by the Free 13 1.1 mrg dnl Software Foundation; either version 3 of the License, or (at your 14 1.1 mrg dnl option) any later version. 15 1.1 mrg dnl 16 1.1 mrg dnl or 17 1.1 mrg dnl 18 1.1 mrg dnl * the GNU General Public License as published by the Free Software 19 1.1 mrg dnl Foundation; either version 2 of the License, or (at your option) any 20 1.1 mrg dnl later version. 21 1.1 mrg dnl 22 1.1 mrg dnl or both in parallel, as here. 23 1.1 mrg dnl 24 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 25 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 1.1 mrg dnl for more details. 28 1.1 mrg dnl 29 1.1 mrg dnl You should have received copies of the GNU General Public License and the 30 1.1 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31 1.1 mrg dnl see https://www.gnu.org/licenses/. 32 1.1 mrg 33 1.1 mrg include(`../config.m4') 34 1.1 mrg 35 1.1 mrg C cycles/limb 36 1.1 mrg C UltraSPARC T3: ? 37 1.1 mrg C UltraSPARC T4: >= 4.5 38 1.1 mrg 39 1.1 mrg 40 1.1 mrg define(`rp', `%i0') 41 1.1 mrg define(`tp', `%i1') 42 1.1 mrg define(`up', `%i2') 43 1.1 mrg define(`n', `%i3') 44 1.1 mrg 45 1.1 mrg ASM_START() 46 1.1 mrg REGISTER(%g2,#scratch) 47 1.1 mrg REGISTER(%g3,#scratch) 48 1.1 mrg PROLOGUE(mpn_sqr_diag_addlsh1) 49 1.1 mrg save %sp, -176, %sp 50 1.1 mrg 51 1.1 mrg ldx [up+0], %g1 52 1.1 mrg mulx %g1, %g1, %o0 53 1.1 mrg umulxhi(%g1, %g1, %g2) 54 1.1 mrg stx %o0, [rp+0] 55 1.1 mrg 56 1.1 mrg ldx [up+8], %g1 57 1.1 mrg ldx [tp+0], %g4 58 1.1 mrg ldx [tp+8], %g5 59 1.1 mrg mulx %g1, %g1, %o0 60 1.1 mrg orcc %g0, %g0, %o5 61 1.1 mrg b L(dm) 62 1.1 mrg add n, -2, n 63 1.1 mrg 64 1.1 mrg ALIGN(16) 65 1.1 mrg L(top): ldx [up+8], %g1 66 1.1 mrg addcc %g4, %o2, %o2 67 1.1 mrg addxccc(%g5, %o0, %g3) 68 1.1 mrg ldx [tp+16], %g4 69 1.1 mrg ldx [tp+24], %g5 70 1.1 mrg mulx %g1, %g1, %o0 71 1.1 mrg stx %o2, [rp+8] 72 1.1 mrg stx %g3, [rp+16] 73 1.1 mrg add rp, 16, rp 74 1.1 mrg add tp, 16, tp 75 1.1 mrg L(dm): add %g2, %o5, %o2 76 1.1 mrg umulxhi(%g1, %g1, %g2) 77 1.1 mrg addxccc(%g4, %g4, %g4) 78 1.1 mrg addxccc(%g5, %g5, %g5) 79 1.1 mrg add up, 8, up 80 1.1 mrg addxc( %g0, %g0, %o5) 81 1.1 mrg brnz n, L(top) 82 1.1 mrg add n, -1, n 83 1.1 mrg 84 1.1 mrg addcc %o2, %g4, %g4 85 1.1 mrg addxccc(%o0, %g5, %g5) 86 1.1 mrg stx %g4, [rp+8] 87 1.1 mrg stx %g5, [rp+16] 88 1.1 mrg addxc( %o5, %g2, %g2) 89 1.1 mrg stx %g2, [rp+24] 90 1.1 mrg 91 1.1 mrg ret 92 1.1 mrg restore 93 1.1 mrg EPILOGUE() 94