1 1.1 mrg /* Cray PVP/IEEE mpn_mul_basecase. 2 1.1 mrg 3 1.1 mrg Copyright 2000, 2001 Free Software Foundation, Inc. 4 1.1 mrg 5 1.1 mrg This file is part of the GNU MP Library. 6 1.1 mrg 7 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 8 1.1.1.2 mrg it under the terms of either: 9 1.1.1.2 mrg 10 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free 11 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your 12 1.1.1.2 mrg option) any later version. 13 1.1.1.2 mrg 14 1.1.1.2 mrg or 15 1.1.1.2 mrg 16 1.1.1.2 mrg * the GNU General Public License as published by the Free Software 17 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any 18 1.1.1.2 mrg later version. 19 1.1.1.2 mrg 20 1.1.1.2 mrg or both in parallel, as here. 21 1.1 mrg 22 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 23 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 1.1.1.2 mrg for more details. 26 1.1 mrg 27 1.1.1.2 mrg You should have received copies of the GNU General Public License and the 28 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 29 1.1.1.2 mrg see https://www.gnu.org/licenses/. */ 30 1.1 mrg 31 1.1 mrg /* The most critical loop of this code runs at about 5 cycles/limb on a T90. 32 1.1 mrg That is not perfect, mainly due to vector register shortage. */ 33 1.1 mrg 34 1.1 mrg #include <intrinsics.h> 35 1.1 mrg #include "gmp-impl.h" 36 1.1 mrg 37 1.1 mrg void 38 1.1 mrg mpn_mul_basecase (mp_ptr rp, 39 1.1 mrg mp_srcptr up, mp_size_t un, 40 1.1 mrg mp_srcptr vp, mp_size_t vn) 41 1.1 mrg { 42 1.1 mrg mp_limb_t cy[un + vn]; 43 1.1 mrg mp_limb_t vl; 44 1.1 mrg mp_limb_t a, b, r, s0, s1, c0, c1; 45 1.1 mrg mp_size_t i, j; 46 1.1 mrg int more_carries; 47 1.1 mrg 48 1.1 mrg for (i = 0; i < un + vn; i++) 49 1.1 mrg { 50 1.1 mrg rp[i] = 0; 51 1.1 mrg cy[i] = 0; 52 1.1 mrg } 53 1.1 mrg 54 1.1 mrg #pragma _CRI novector 55 1.1 mrg for (j = 0; j < vn; j++) 56 1.1 mrg { 57 1.1 mrg vl = vp[j]; 58 1.1 mrg 59 1.1 mrg a = up[0] * vl; 60 1.1 mrg r = rp[j]; 61 1.1 mrg s0 = a + r; 62 1.1 mrg rp[j] = s0; 63 1.1 mrg c0 = ((a & r) | ((a | r) & ~s0)) >> 63; 64 1.1 mrg cy[j] += c0; 65 1.1 mrg 66 1.1 mrg #pragma _CRI ivdep 67 1.1 mrg for (i = 1; i < un; i++) 68 1.1 mrg { 69 1.1 mrg a = up[i] * vl; 70 1.1 mrg b = _int_mult_upper (up[i - 1], vl); 71 1.1 mrg s0 = a + b; 72 1.1 mrg c0 = ((a & b) | ((a | b) & ~s0)) >> 63; 73 1.1 mrg r = rp[j + i]; 74 1.1 mrg s1 = s0 + r; 75 1.1 mrg rp[j + i] = s1; 76 1.1 mrg c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63; 77 1.1 mrg cy[j + i] += c0 + c1; 78 1.1 mrg } 79 1.1 mrg rp[j + un] = _int_mult_upper (up[un - 1], vl); 80 1.1 mrg } 81 1.1 mrg 82 1.1 mrg more_carries = 0; 83 1.1 mrg #pragma _CRI ivdep 84 1.1 mrg for (i = 1; i < un + vn; i++) 85 1.1 mrg { 86 1.1 mrg r = rp[i]; 87 1.1 mrg c0 = cy[i - 1]; 88 1.1 mrg s0 = r + c0; 89 1.1 mrg rp[i] = s0; 90 1.1 mrg c0 = (r & ~s0) >> 63; 91 1.1 mrg more_carries += c0; 92 1.1 mrg } 93 1.1 mrg /* If that second loop generated carry, handle that in scalar loop. */ 94 1.1 mrg if (more_carries) 95 1.1 mrg { 96 1.1 mrg mp_limb_t cyrec = 0; 97 1.1 mrg for (i = 1; i < un + vn; i++) 98 1.1 mrg { 99 1.1 mrg r = rp[i]; 100 1.1 mrg c0 = (r < cy[i - 1]); 101 1.1 mrg s0 = r + cyrec; 102 1.1 mrg rp[i] = s0; 103 1.1 mrg c1 = (r & ~s0) >> 63; 104 1.1 mrg cyrec = c0 | c1; 105 1.1 mrg } 106 1.1 mrg } 107 1.1 mrg } 108