1 1.1 mrg dnl Alpha ev6 nails mpn_addmul_3. 2 1.1 mrg 3 1.1 mrg dnl Copyright 2002, 2006 Free Software Foundation, Inc. 4 1.1.1.2 mrg 5 1.1 mrg dnl This file is part of the GNU MP Library. 6 1.1 mrg dnl 7 1.1.1.2 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 1.1.1.2 mrg dnl it under the terms of either: 9 1.1.1.2 mrg dnl 10 1.1.1.2 mrg dnl * the GNU Lesser General Public License as published by the Free 11 1.1.1.2 mrg dnl Software Foundation; either version 3 of the License, or (at your 12 1.1.1.2 mrg dnl option) any later version. 13 1.1.1.2 mrg dnl 14 1.1.1.2 mrg dnl or 15 1.1.1.2 mrg dnl 16 1.1.1.2 mrg dnl * the GNU General Public License as published by the Free Software 17 1.1.1.2 mrg dnl Foundation; either version 2 of the License, or (at your option) any 18 1.1.1.2 mrg dnl later version. 19 1.1.1.2 mrg dnl 20 1.1.1.2 mrg dnl or both in parallel, as here. 21 1.1.1.2 mrg dnl 22 1.1.1.2 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 1.1.1.2 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 1.1.1.2 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 1.1.1.2 mrg dnl for more details. 26 1.1 mrg dnl 27 1.1.1.2 mrg dnl You should have received copies of the GNU General Public License and the 28 1.1.1.2 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 1.1.1.2 mrg dnl see https://www.gnu.org/licenses/. 30 1.1 mrg 31 1.1 mrg include(`../config.m4') 32 1.1 mrg 33 1.1 mrg C Runs at 3.0 cycles/limb. 34 1.1 mrg 35 1.1 mrg C With 2-way unrolling, we could probably reach 2.25 c/l (3.33 i/c). 36 1.1 mrg 37 1.1 mrg 38 1.1 mrg C INPUT PARAMETERS 39 1.1 mrg define(`rp',`r16') 40 1.1 mrg define(`up',`r17') 41 1.1 mrg define(`n',`r18') 42 1.1 mrg define(`vp',`r19') 43 1.1 mrg 44 1.1 mrg C Useful register aliases 45 1.1 mrg define(`numb_mask',`r24') 46 1.1 mrg define(`ulimb',`r25') 47 1.1 mrg define(`rlimb',`r27') 48 1.1 mrg 49 1.1 mrg define(`m0a',`r0') 50 1.1 mrg define(`m0b',`r1') 51 1.1 mrg define(`m1a',`r2') 52 1.1 mrg define(`m1b',`r3') 53 1.1 mrg define(`m2a',`r20') 54 1.1 mrg define(`m2b',`r21') 55 1.1 mrg 56 1.1 mrg define(`acc0',`r4') 57 1.1 mrg define(`acc1',`r5') 58 1.1 mrg define(`acc2',`r22') 59 1.1 mrg 60 1.1 mrg define(`v0',`r6') 61 1.1 mrg define(`v1',`r7') 62 1.1 mrg define(`v2',`r23') 63 1.1 mrg 64 1.1 mrg C Used for temps: r8 r19 r28 65 1.1 mrg 66 1.1 mrg define(`NAIL_BITS',`GMP_NAIL_BITS') 67 1.1 mrg define(`NUMB_BITS',`GMP_NUMB_BITS') 68 1.1 mrg 69 1.1 mrg C This declaration is munged by configure 70 1.1 mrg NAILS_SUPPORT(3-63) 71 1.1 mrg 72 1.1 mrg ASM_START() 73 1.1 mrg PROLOGUE(mpn_addmul_3) 74 1.1 mrg lda numb_mask,-1(r31) 75 1.1 mrg srl numb_mask,NAIL_BITS,numb_mask 76 1.1 mrg 77 1.1 mrg ldq v0, 0(vp) 78 1.1 mrg ldq v1, 8(vp) 79 1.1 mrg ldq v2, 16(vp) 80 1.1 mrg 81 1.1 mrg bis r31, r31, acc0 C zero acc0 82 1.1 mrg sll v0,NAIL_BITS, v0 83 1.1 mrg bis r31, r31, acc1 C zero acc1 84 1.1 mrg sll v1,NAIL_BITS, v1 85 1.1 mrg bis r31, r31, acc2 C zero acc2 86 1.1 mrg sll v2,NAIL_BITS, v2 87 1.1 mrg bis r31, r31, r19 88 1.1 mrg 89 1.1 mrg ldq ulimb, 0(up) 90 1.1 mrg lda up, 8(up) 91 1.1 mrg mulq v0, ulimb, m0a C U1 92 1.1 mrg umulh v0, ulimb, m0b C U1 93 1.1 mrg mulq v1, ulimb, m1a C U1 94 1.1 mrg umulh v1, ulimb, m1b C U1 95 1.1 mrg lda n, -1(n) 96 1.1 mrg mulq v2, ulimb, m2a C U1 97 1.1 mrg umulh v2, ulimb, m2b C U1 98 1.1 mrg beq n, L(end) C U0 99 1.1 mrg 100 1.1 mrg ALIGN(16) 101 1.1 mrg L(top): ldq rlimb, 0(rp) C L1 102 1.1 mrg ldq ulimb, 0(up) C L0 103 1.1 mrg bis r31, r31, r31 C U0 nop 104 1.1 mrg addq r19, acc0, acc0 C U1 propagate nail 105 1.1 mrg 106 1.1 mrg lda rp, 8(rp) C L1 107 1.1 mrg srl m0a,NAIL_BITS, r8 C U0 108 1.1 mrg lda up, 8(up) C L0 109 1.1 mrg mulq v0, ulimb, m0a C U1 110 1.1 mrg 111 1.1 mrg addq r8, acc0, r19 C U0 112 1.1 mrg addq m0b, acc1, acc0 C L1 113 1.1 mrg umulh v0, ulimb, m0b C U1 114 1.1 mrg bis r31, r31, r31 C L0 nop 115 1.1 mrg 116 1.1 mrg addq rlimb, r19, r19 C L1 117 1.1 mrg srl m1a,NAIL_BITS, r8 C U0 118 1.1 mrg bis r31, r31, r31 C L0 nop 119 1.1 mrg mulq v1, ulimb, m1a C U1 120 1.1 mrg 121 1.1 mrg addq r8, acc0, acc0 C U0 122 1.1 mrg addq m1b, acc2, acc1 C L1 123 1.1 mrg umulh v1, ulimb, m1b C U1 124 1.1 mrg and r19,numb_mask, r28 C L0 extract numb part 125 1.1 mrg 126 1.1 mrg bis r31, r31, r31 C L1 nop 127 1.1 mrg srl m2a,NAIL_BITS, r8 C U0 128 1.1 mrg lda n, -1(n) C L0 129 1.1 mrg mulq v2, ulimb, m2a C U1 130 1.1 mrg 131 1.1 mrg addq r8, acc1, acc1 C L0 132 1.1 mrg bis r31, m2b, acc2 C L1 133 1.1 mrg umulh v2, ulimb, m2b C U1 134 1.1 mrg srl r19,NUMB_BITS, r19 C U0 extract nail part 135 1.1 mrg 136 1.1 mrg stq r28, -8(rp) C L 137 1.1 mrg bne n, L(top) C U0 138 1.1 mrg 139 1.1 mrg L(end): ldq rlimb, 0(rp) 140 1.1 mrg addq r19, acc0, acc0 C propagate nail 141 1.1 mrg lda rp, 8(rp) 142 1.1 mrg srl m0a,NAIL_BITS, r8 C U0 143 1.1 mrg addq r8, acc0, r19 144 1.1 mrg addq m0b, acc1, acc0 145 1.1 mrg addq rlimb, r19, r19 146 1.1 mrg srl m1a,NAIL_BITS, r8 C U0 147 1.1 mrg addq r8, acc0, acc0 148 1.1 mrg addq m1b, acc2, acc1 149 1.1 mrg and r19,numb_mask, r28 C extract limb 150 1.1 mrg srl m2a,NAIL_BITS, r8 C U0 151 1.1 mrg addq r8, acc1, acc1 152 1.1 mrg bis r31, m2b, acc2 153 1.1 mrg srl r19,NUMB_BITS, r19 C extract nail 154 1.1 mrg stq r28, -8(rp) 155 1.1 mrg 156 1.1 mrg addq r19, acc0, acc0 C propagate nail 157 1.1 mrg and acc0,numb_mask, r28 158 1.1 mrg stq r28, 0(rp) 159 1.1 mrg srl acc0,NUMB_BITS, r19 160 1.1 mrg addq r19, acc1, acc1 161 1.1 mrg 162 1.1 mrg and acc1,numb_mask, r28 163 1.1 mrg stq r28, 8(rp) 164 1.1 mrg srl acc1,NUMB_BITS, r19 165 1.1 mrg addq r19, acc2, m0a 166 1.1 mrg 167 1.1 mrg ret r31, (r26), 1 168 1.1 mrg EPILOGUE() 169 1.1 mrg ASM_END() 170