1 dnl POWER9 mpn_addmul_1 and mpn_submul_1. 2 3 dnl Copyright 2018 Free Software Foundation, Inc. 4 5 dnl This file is part of the GNU MP Library. 6 dnl 7 dnl The GNU MP Library is free software; you can redistribute it and/or modify 8 dnl it under the terms of either: 9 dnl 10 dnl * the GNU Lesser General Public License as published by the Free 11 dnl Software Foundation; either version 3 of the License, or (at your 12 dnl option) any later version. 13 dnl 14 dnl or 15 dnl 16 dnl * the GNU General Public License as published by the Free Software 17 dnl Foundation; either version 2 of the License, or (at your option) any 18 dnl later version. 19 dnl 20 dnl or both in parallel, as here. 21 dnl 22 dnl The GNU MP Library is distributed in the hope that it will be useful, but 23 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 dnl for more details. 26 dnl 27 dnl You should have received copies of the GNU General Public License and the 28 dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29 dnl see https://www.gnu.org/licenses/. 30 31 include(`../config.m4') 32 33 C mpn_addmul_1 mpn_submul_1 34 C cycles/limb cycles/limb 35 C POWER3/PPC630 - - 36 C POWER4/PPC970 - - 37 C POWER5 - - 38 C POWER6 - - 39 C POWER7 - - 40 C POWER8 - - 41 C POWER9 2.63 2.63 42 43 C INPUT PARAMETERS 44 define(`rp', `r3') 45 define(`up', `r4') 46 define(`n', `r5') 47 define(`v0', `r6') 48 49 50 ifdef(`OPERATION_addmul_1',` 51 define(`ADDSUBC', adde) 52 define(`ADDSUB', addc) 53 define(`func', mpn_addmul_1) 54 define(`AM', `$1') 55 define(`SM', `') 56 ') 57 ifdef(`OPERATION_submul_1',` 58 define(`ADDSUBC', subfe) 59 define(`ADDSUB', subfc) 60 define(`func', mpn_submul_1) 61 define(`AM', `') 62 define(`SM', `$1') 63 ') 64 65 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 66 67 ASM_START() 68 PROLOGUE(func) 69 cmpdi cr7, n, 3 70 srdi r10, n, 2 71 mtctr r10 72 rldicl. r9, n, 0, 63 73 ld r11, 0(up) 74 bne cr0, L(bx1) 75 76 L(bx0): rldicl. r9, n, 63, 63 77 AM(` subfzeo r12, n ') C ov = 0, ca = 0 78 AM(` li r12, 0 ') 79 SM(` subfco r12, r12, r12 ') C r12 = 0, ov = 0, ca = 1 80 ld r9, 8(up) 81 mulld r0, r11, v0 82 mulhdu r5, r11, v0 83 blt cr7, L(2) 84 ld r8, 16(up) 85 bne cr0, L(b10) 86 87 L(b00): addi rp, rp, -24 88 b L(lo0) 89 L(b10): addi rp, rp, -8 90 addi up, up, 16 91 b L(lo2) 92 93 L(2): addi rp, rp, -8 94 b L(cj2) 95 96 L(bx1): rldicl. r9, n, 63, 63 97 AM(` subfzeo r5, n ') C ov = 0, ca = 0 98 AM(` li r5, 0 ') 99 SM(` subfco r5, r5, r5 ') C r5 = 0, ov = 0, ca = 1 100 blt cr7, L(1) 101 ld r8, 8(up) 102 mulld r7, r11, v0 103 mulhdu r12, r11, v0 104 ld r9, 16(up) 105 bne cr0, L(b11) 106 107 L(b01): addi rp, rp, -16 108 addi up, up, 8 109 b L(lo1) 110 111 L(1): mulld r7, r11, v0 112 mulhdu r12, r11, v0 113 ld r11, 0(rp) 114 ADDSUB r10, r7, r11 115 std r10, 0(rp) 116 AM(` addze r3, r12 ') 117 SM(` subfe r0, r0, r0 ') 118 SM(` sub r3, r12, r0 ') 119 blr 120 121 L(b11): addi up, up, 24 122 ble cr7, L(end) 123 124 ALIGN(16) 125 L(top): ld r11, 0(rp) 126 mulld r0, r8, v0 127 addex( r7, r7, r5, 0) 128 mulhdu r5, r8, v0 129 ld r8, 0(up) 130 ADDSUBC r10, r7, r11 131 std r10, 0(rp) 132 L(lo2): ld r11, 8(rp) 133 mulld r7, r9, v0 134 addex( r0, r0, r12, 0) 135 mulhdu r12, r9, v0 136 ld r9, 8(up) 137 ADDSUBC r10, r0, r11 138 std r10, 8(rp) 139 L(lo1): ld r11, 16(rp) 140 mulld r0, r8, v0 141 addex( r7, r7, r5, 0) 142 mulhdu r5, r8, v0 143 ld r8, 16(up) 144 ADDSUBC r10, r7, r11 145 std r10, 16(rp) 146 L(lo0): ld r11, 24(rp) 147 mulld r7, r9, v0 148 addex( r0, r0, r12, 0) 149 mulhdu r12, r9, v0 150 ld r9, 24(up) 151 ADDSUBC r10, r0, r11 152 std r10, 24(rp) 153 addi up, up, 32 154 addi rp, rp, 32 155 bdnz L(top) 156 157 L(end): ld r11, 0(rp) 158 mulld r0, r8, v0 159 addex( r7, r7, r5, 0) 160 mulhdu r5, r8, v0 161 ADDSUBC r10, r7, r11 162 std r10, 0(rp) 163 L(cj2): ld r11, 8(rp) 164 mulld r7, r9, v0 165 addex( r0, r0, r12, 0) 166 mulhdu r12, r9, v0 167 ADDSUBC r10, r0, r11 168 std r10, 8(rp) 169 ld r11, 16(rp) 170 addex( r7, r7, r5, 0) 171 ADDSUBC r10, r7, r11 172 std r10, 16(rp) 173 li r0, 0 174 addex( r3, r12, r0, 0) 175 AM(` addze r3, r3 ') 176 SM(` subfe r0, r0, r0 ') 177 SM(` sub r3, r3, r0 ') 178 blr 179 EPILOGUE() 180