1 1.1 mrg dnl Alpha mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2). 2 1.1 mrg 3 1.1 mrg dnl Copyright 2003, 2013 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg dnl This file is part of the GNU MP Library. 6 1.1 mrg dnl 7 1.1 mrg dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 1.1 mrg dnl it under the terms of either: 9 1.1 mrg dnl 10 1.1 mrg dnl * the GNU Lesser General Public License as published by the Free 11 1.1 mrg dnl Software Foundation; either version 3 of the License, or (at your 12 1.1 mrg dnl option) any later version. 13 1.1 mrg dnl 14 1.1 mrg dnl or 15 1.1 mrg dnl 16 1.1 mrg dnl * the GNU General Public License as published by the Free Software 17 1.1 mrg dnl Foundation; either version 2 of the License, or (at your option) any 18 1.1 mrg dnl later version. 19 1.1 mrg dnl 20 1.1 mrg dnl or both in parallel, as here. 21 1.1 mrg dnl 22 1.1 mrg dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 1.1 mrg dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 1.1 mrg dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 1.1 mrg dnl for more details. 26 1.1 mrg dnl 27 1.1 mrg dnl You should have received copies of the GNU General Public License and the 28 1.1 mrg dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 1.1 mrg dnl see https://www.gnu.org/licenses/. 30 1.1 mrg 31 1.1 mrg include(`../config.m4') 32 1.1 mrg 33 1.1 mrg C cycles/limb 34 1.1 mrg C EV4: ? 35 1.1 mrg C EV5: 6 36 1.1 mrg C EV6: 3.75 37 1.1 mrg 38 1.1 mrg C TODO 39 1.1 mrg C * Tune to reach 3.5 c/l on ev6 and 5.75 c/l on ev5. 40 1.1 mrg 41 1.1 mrg define(`rp',`r16') 42 1.1 mrg define(`up',`r17') 43 1.1 mrg define(`vp',`r18') 44 1.1 mrg define(`n', `r19') 45 1.1 mrg 46 1.1 mrg define(`u0', `r8') 47 1.1 mrg define(`u1', `r1') 48 1.1 mrg define(`v0', `r4') 49 1.1 mrg define(`v1', `r5') 50 1.1 mrg 51 1.1 mrg define(`cy0', `r0') 52 1.1 mrg define(`cy1', `r20') 53 1.1 mrg define(`cy', `r22') 54 1.1 mrg define(`rr', `r24') 55 1.1 mrg define(`ps', `r25') 56 1.1 mrg define(`sl', `r28') 57 1.1 mrg 58 1.1 mrg ifdef(`OPERATION_addlsh2_n',` 59 1.1 mrg define(ADDSUB, addq) 60 1.1 mrg define(CARRY, `cmpult $1,$2,$3') 61 1.1 mrg define(func, mpn_addlsh2_n) 62 1.1 mrg ') 63 1.1 mrg ifdef(`OPERATION_sublsh2_n',` 64 1.1 mrg define(ADDSUB, subq) 65 1.1 mrg define(CARRY, `cmpult $2,$1,$3') 66 1.1 mrg define(func, mpn_sublsh2_n) 67 1.1 mrg ') 68 1.1 mrg 69 1.1 mrg MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n) 70 1.1 mrg 71 1.1 mrg ASM_START() 72 1.1 mrg PROLOGUE(func) 73 1.1 mrg and n, 2, cy0 74 1.1 mrg blbs n, L(bx1) 75 1.1 mrg L(bx0): ldq v1, 0(vp) 76 1.1 mrg ldq u1, 0(up) 77 1.1 mrg bis r31, r31, r2 78 1.1 mrg bne cy0, L(b10) 79 1.1 mrg 80 1.1 mrg L(b00): lda vp, 48(vp) 81 1.1 mrg lda up, -16(up) 82 1.1 mrg lda rp, -8(rp) 83 1.1 mrg s4addq v1, r31, sl 84 1.1 mrg br r31, L(lo0) 85 1.1 mrg 86 1.1 mrg L(b10): lda vp, 32(vp) 87 1.1 mrg lda rp, 8(rp) 88 1.1 mrg lda cy0, 0(r31) 89 1.1 mrg br r31, L(lo2) 90 1.1 mrg 91 1.1 mrg L(bx1): ldq v0, 0(vp) 92 1.1 mrg ldq u0, 0(up) 93 1.1 mrg lda cy1, 0(r31) 94 1.1 mrg bis r31, r31, r3 95 1.1 mrg nop 96 1.1 mrg beq cy0, L(b01) 97 1.1 mrg 98 1.1 mrg L(b11): lda vp, 40(vp) 99 1.1 mrg lda up, -24(up) 100 1.1 mrg lda rp, 16(rp) 101 1.1 mrg br r31, L(lo3) 102 1.1 mrg 103 1.1 mrg L(b01): lda n, -4(n) 104 1.1 mrg ble n, L(end) 105 1.1 mrg lda vp, 24(vp) 106 1.1 mrg lda up, -8(up) 107 1.1 mrg 108 1.1 mrg ALIGN(16) 109 1.1 mrg L(top): s4addq v0, r3, sl C combined vlimb 110 1.1 mrg ldq v1, -16(vp) 111 1.1 mrg ADDSUB u0, sl, ps C ulimb + (vlimb << 1) 112 1.1 mrg ldq u1, 16(up) 113 1.1 mrg srl v0, 62, r2 C high v bits 114 1.1 mrg ADDSUB ps, cy1, rr C consume carry from previous operation 115 1.1 mrg CARRY( ps, u0, cy0) C carry out #2 116 1.1 mrg stq rr, 0(rp) 117 1.1 mrg CARRY( rr, ps, cy) C carry out #3 118 1.1 mrg lda vp, 32(vp) C bookkeeping 119 1.1 mrg addq cy, cy0, cy0 C final carry out 120 1.1 mrg s4addq v1, r2, sl 121 1.1 mrg L(lo0): ldq v0, -40(vp) 122 1.1 mrg ADDSUB u1, sl, ps 123 1.1 mrg ldq u0, 24(up) 124 1.1 mrg srl v1, 62, r3 125 1.1 mrg ADDSUB ps, cy0, rr 126 1.1 mrg CARRY( ps, u1, cy1) 127 1.1 mrg stq rr, 8(rp) 128 1.1 mrg CARRY( rr, ps, cy) 129 1.1 mrg lda rp, 32(rp) C bookkeeping 130 1.1 mrg addq cy, cy1, cy1 131 1.1 mrg L(lo3): s4addq v0, r3, sl 132 1.1 mrg ldq v1, -32(vp) 133 1.1 mrg ADDSUB u0, sl, ps 134 1.1 mrg ldq u1, 32(up) 135 1.1 mrg srl v0, 62, r2 136 1.1 mrg ADDSUB ps, cy1, rr 137 1.1 mrg CARRY( ps, u0, cy0) 138 1.1 mrg stq rr, -16(rp) 139 1.1 mrg CARRY( rr, ps, cy) 140 1.1 mrg lda up, 32(up) C bookkeeping 141 1.1 mrg addq cy, cy0, cy0 142 1.1 mrg L(lo2): s4addq v1, r2, sl 143 1.1 mrg ldq v0, -24(vp) 144 1.1 mrg ADDSUB u1, sl, ps 145 1.1 mrg ldq u0, 8(up) 146 1.1 mrg srl v1, 62, r3 147 1.1 mrg ADDSUB ps, cy0, rr 148 1.1 mrg CARRY( ps, u1, cy1) 149 1.1 mrg stq rr, -8(rp) 150 1.1 mrg CARRY( rr, ps, cy) 151 1.1 mrg lda n, -4(n) C bookkeeping 152 1.1 mrg addq cy, cy1, cy1 153 1.1 mrg bgt n, L(top) 154 1.1 mrg 155 1.1 mrg L(end): s4addq v0, r3, sl 156 1.1 mrg ADDSUB u0, sl, ps 157 1.1 mrg srl v0, 62, r2 158 1.1 mrg ADDSUB ps, cy1, rr 159 1.1 mrg CARRY( ps, u0, cy0) 160 1.1 mrg stq rr, 0(rp) 161 1.1 mrg CARRY( rr, ps, cy) 162 1.1 mrg addq cy, cy0, cy0 163 1.1 mrg addq cy0, r2, r0 164 1.1 mrg 165 1.1 mrg ret r31,(r26),1 166 1.1 mrg EPILOGUE() 167 1.1 mrg ASM_END() 168