Home | History | Annotate | Line # | Download | only in ieee
      1      1.1  mrg /* Cray PVP/IEEE mpn_mul_basecase.
      2      1.1  mrg 
      3      1.1  mrg Copyright 2000, 2001 Free Software Foundation, Inc.
      4      1.1  mrg 
      5      1.1  mrg This file is part of the GNU MP Library.
      6      1.1  mrg 
      7      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
      8  1.1.1.2  mrg it under the terms of either:
      9  1.1.1.2  mrg 
     10  1.1.1.2  mrg   * the GNU Lesser General Public License as published by the Free
     11  1.1.1.2  mrg     Software Foundation; either version 3 of the License, or (at your
     12  1.1.1.2  mrg     option) any later version.
     13  1.1.1.2  mrg 
     14  1.1.1.2  mrg or
     15  1.1.1.2  mrg 
     16  1.1.1.2  mrg   * the GNU General Public License as published by the Free Software
     17  1.1.1.2  mrg     Foundation; either version 2 of the License, or (at your option) any
     18  1.1.1.2  mrg     later version.
     19  1.1.1.2  mrg 
     20  1.1.1.2  mrg or both in parallel, as here.
     21      1.1  mrg 
     22      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     23      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     24  1.1.1.2  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     25  1.1.1.2  mrg for more details.
     26      1.1  mrg 
     27  1.1.1.2  mrg You should have received copies of the GNU General Public License and the
     28  1.1.1.2  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     29  1.1.1.2  mrg see https://www.gnu.org/licenses/.  */
     30      1.1  mrg 
     31      1.1  mrg /* The most critical loop of this code runs at about 5 cycles/limb on a T90.
     32      1.1  mrg    That is not perfect, mainly due to vector register shortage.  */
     33      1.1  mrg 
     34      1.1  mrg #include <intrinsics.h>
     35      1.1  mrg #include "gmp-impl.h"
     36      1.1  mrg 
     37      1.1  mrg void
     38      1.1  mrg mpn_mul_basecase (mp_ptr rp,
     39      1.1  mrg 		  mp_srcptr up, mp_size_t un,
     40      1.1  mrg 		  mp_srcptr vp, mp_size_t vn)
     41      1.1  mrg {
     42      1.1  mrg   mp_limb_t cy[un + vn];
     43      1.1  mrg   mp_limb_t vl;
     44      1.1  mrg   mp_limb_t a, b, r, s0, s1, c0, c1;
     45      1.1  mrg   mp_size_t i, j;
     46      1.1  mrg   int more_carries;
     47      1.1  mrg 
     48      1.1  mrg   for (i = 0; i < un + vn; i++)
     49      1.1  mrg     {
     50      1.1  mrg       rp[i] = 0;
     51      1.1  mrg       cy[i] = 0;
     52      1.1  mrg     }
     53      1.1  mrg 
     54      1.1  mrg #pragma _CRI novector
     55      1.1  mrg   for (j = 0; j < vn; j++)
     56      1.1  mrg     {
     57      1.1  mrg       vl = vp[j];
     58      1.1  mrg 
     59      1.1  mrg       a = up[0] * vl;
     60      1.1  mrg       r = rp[j];
     61      1.1  mrg       s0 = a + r;
     62      1.1  mrg       rp[j] = s0;
     63      1.1  mrg       c0 = ((a & r) | ((a | r) & ~s0)) >> 63;
     64      1.1  mrg       cy[j] += c0;
     65      1.1  mrg 
     66      1.1  mrg #pragma _CRI ivdep
     67      1.1  mrg       for (i = 1; i < un; i++)
     68      1.1  mrg 	{
     69      1.1  mrg 	  a = up[i] * vl;
     70      1.1  mrg 	  b = _int_mult_upper (up[i - 1], vl);
     71      1.1  mrg 	  s0 = a + b;
     72      1.1  mrg 	  c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
     73      1.1  mrg 	  r = rp[j + i];
     74      1.1  mrg 	  s1 = s0 + r;
     75      1.1  mrg 	  rp[j + i] = s1;
     76      1.1  mrg 	  c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63;
     77      1.1  mrg 	  cy[j + i] += c0 + c1;
     78      1.1  mrg 	}
     79      1.1  mrg       rp[j + un] = _int_mult_upper (up[un - 1], vl);
     80      1.1  mrg     }
     81      1.1  mrg 
     82      1.1  mrg   more_carries = 0;
     83      1.1  mrg #pragma _CRI ivdep
     84      1.1  mrg   for (i = 1; i < un + vn; i++)
     85      1.1  mrg     {
     86      1.1  mrg       r = rp[i];
     87      1.1  mrg       c0 = cy[i - 1];
     88      1.1  mrg       s0 = r + c0;
     89      1.1  mrg       rp[i] = s0;
     90      1.1  mrg       c0 = (r & ~s0) >> 63;
     91      1.1  mrg       more_carries += c0;
     92      1.1  mrg     }
     93      1.1  mrg   /* If that second loop generated carry, handle that in scalar loop.  */
     94      1.1  mrg   if (more_carries)
     95      1.1  mrg     {
     96      1.1  mrg       mp_limb_t cyrec = 0;
     97      1.1  mrg       for (i = 1; i < un + vn; i++)
     98      1.1  mrg 	{
     99      1.1  mrg 	  r = rp[i];
    100      1.1  mrg 	  c0 = (r < cy[i - 1]);
    101      1.1  mrg 	  s0 = r + cyrec;
    102      1.1  mrg 	  rp[i] = s0;
    103      1.1  mrg 	  c1 = (r & ~s0) >> 63;
    104      1.1  mrg 	  cyrec = c0 | c1;
    105      1.1  mrg 	}
    106      1.1  mrg     }
    107      1.1  mrg }
    108