1 1.1 mrg /* Cray PVP/IEEE mpn_mul_1 -- multiply a limb vector with a limb and store the 2 1.1 mrg result in a second limb vector. 3 1.1 mrg 4 1.1 mrg Copyright 2000, 2001 Free Software Foundation, Inc. 5 1.1 mrg 6 1.1 mrg This file is part of the GNU MP Library. 7 1.1 mrg 8 1.1 mrg The GNU MP Library is free software; you can redistribute it and/or modify 9 1.1.1.2 mrg it under the terms of either: 10 1.1.1.2 mrg 11 1.1.1.2 mrg * the GNU Lesser General Public License as published by the Free 12 1.1.1.2 mrg Software Foundation; either version 3 of the License, or (at your 13 1.1.1.2 mrg option) any later version. 14 1.1.1.2 mrg 15 1.1.1.2 mrg or 16 1.1.1.2 mrg 17 1.1.1.2 mrg * the GNU General Public License as published by the Free Software 18 1.1.1.2 mrg Foundation; either version 2 of the License, or (at your option) any 19 1.1.1.2 mrg later version. 20 1.1.1.2 mrg 21 1.1.1.2 mrg or both in parallel, as here. 22 1.1 mrg 23 1.1 mrg The GNU MP Library is distributed in the hope that it will be useful, but 24 1.1 mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 1.1.1.2 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 1.1.1.2 mrg for more details. 27 1.1 mrg 28 1.1.1.2 mrg You should have received copies of the GNU General Public License and the 29 1.1.1.2 mrg GNU Lesser General Public License along with the GNU MP Library. If not, 30 1.1.1.2 mrg see https://www.gnu.org/licenses/. */ 31 1.1 mrg 32 1.1 mrg /* This code runs at 5 cycles/limb on a T90. That would probably 33 1.1 mrg be hard to improve upon, even with assembly code. */ 34 1.1 mrg 35 1.1 mrg #include <intrinsics.h> 36 1.1 mrg #include "gmp-impl.h" 37 1.1 mrg 38 1.1 mrg mp_limb_t 39 1.1 mrg mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) 40 1.1 mrg { 41 1.1 mrg mp_limb_t cy[n]; 42 1.1 mrg mp_limb_t a, b, r, s0, s1, c0, c1; 43 1.1 mrg mp_size_t i; 44 1.1 mrg int more_carries; 45 1.1 mrg 46 1.1 mrg if (up == rp) 47 1.1 mrg { 48 1.1 mrg /* The algorithm used below cannot handle overlap. Handle it here by 49 1.1 mrg making a temporary copy of the source vector, then call ourselves. */ 50 1.1 mrg mp_limb_t xp[n]; 51 1.1 mrg MPN_COPY (xp, up, n); 52 1.1 mrg return mpn_mul_1 (rp, xp, n, vl); 53 1.1 mrg } 54 1.1 mrg 55 1.1 mrg a = up[0] * vl; 56 1.1 mrg rp[0] = a; 57 1.1 mrg cy[0] = 0; 58 1.1 mrg 59 1.1 mrg /* Main multiply loop. Generate a raw accumulated output product in rp[] 60 1.1 mrg and a carry vector in cy[]. */ 61 1.1 mrg #pragma _CRI ivdep 62 1.1 mrg for (i = 1; i < n; i++) 63 1.1 mrg { 64 1.1 mrg a = up[i] * vl; 65 1.1 mrg b = _int_mult_upper (up[i - 1], vl); 66 1.1 mrg s0 = a + b; 67 1.1 mrg c0 = ((a & b) | ((a | b) & ~s0)) >> 63; 68 1.1 mrg rp[i] = s0; 69 1.1 mrg cy[i] = c0; 70 1.1 mrg } 71 1.1 mrg /* Carry add loop. Add the carry vector cy[] to the raw sum rp[] and 72 1.1 mrg store the new sum back to rp[0]. */ 73 1.1 mrg more_carries = 0; 74 1.1 mrg #pragma _CRI ivdep 75 1.1 mrg for (i = 2; i < n; i++) 76 1.1 mrg { 77 1.1 mrg r = rp[i]; 78 1.1 mrg c0 = cy[i - 1]; 79 1.1 mrg s0 = r + c0; 80 1.1 mrg rp[i] = s0; 81 1.1 mrg c0 = (r & ~s0) >> 63; 82 1.1 mrg more_carries += c0; 83 1.1 mrg } 84 1.1 mrg /* If that second loop generated carry, handle that in scalar loop. */ 85 1.1 mrg if (more_carries) 86 1.1 mrg { 87 1.1 mrg mp_limb_t cyrec = 0; 88 1.1 mrg /* Look for places where rp[k] is zero and cy[k-1] is non-zero. 89 1.1 mrg These are where we got a recurrency carry. */ 90 1.1 mrg for (i = 2; i < n; i++) 91 1.1 mrg { 92 1.1 mrg r = rp[i]; 93 1.1 mrg c0 = (r == 0 && cy[i - 1] != 0); 94 1.1 mrg s0 = r + cyrec; 95 1.1 mrg rp[i] = s0; 96 1.1 mrg c1 = (r & ~s0) >> 63; 97 1.1 mrg cyrec = c0 | c1; 98 1.1 mrg } 99 1.1 mrg return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1]; 100 1.1 mrg } 101 1.1 mrg 102 1.1 mrg return _int_mult_upper (up[n - 1], vl) + cy[n - 1]; 103 1.1 mrg } 104