Home | History | Annotate | Line # | Download | only in generic
mulmid.c revision 1.1.1.3
      1 /* mpn_mulmid -- middle product
      2 
      3    Contributed by David Harvey.
      4 
      5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      7    GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      8 
      9 Copyright 2011 Free Software Foundation, Inc.
     10 
     11 This file is part of the GNU MP Library.
     12 
     13 The GNU MP Library is free software; you can redistribute it and/or modify
     14 it under the terms of either:
     15 
     16   * the GNU Lesser General Public License as published by the Free
     17     Software Foundation; either version 3 of the License, or (at your
     18     option) any later version.
     19 
     20 or
     21 
     22   * the GNU General Public License as published by the Free Software
     23     Foundation; either version 2 of the License, or (at your option) any
     24     later version.
     25 
     26 or both in parallel, as here.
     27 
     28 The GNU MP Library is distributed in the hope that it will be useful, but
     29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31 for more details.
     32 
     33 You should have received copies of the GNU General Public License and the
     34 GNU Lesser General Public License along with the GNU MP Library.  If not,
     35 see https://www.gnu.org/licenses/.  */
     36 
     37 
     38 #include "gmp-impl.h"
     39 
     40 
     41 #define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
     42 
     43 
     44 void
     45 mpn_mulmid (mp_ptr rp,
     46             mp_srcptr ap, mp_size_t an,
     47             mp_srcptr bp, mp_size_t bn)
     48 {
     49   mp_size_t rn, k;
     50   mp_ptr scratch, temp;
     51 
     52   ASSERT (an >= bn);
     53   ASSERT (bn >= 1);
     54   ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
     55   ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
     56 
     57   if (bn < MULMID_TOOM42_THRESHOLD)
     58     {
     59       /* region not tall enough to make toom42 worthwhile for any portion */
     60 
     61       if (an < CHUNK)
     62 	{
     63 	  /* region not too wide either, just call basecase directly */
     64 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
     65 	  return;
     66 	}
     67 
     68       /* Region quite wide. For better locality, use basecase on chunks:
     69 
     70 	 AAABBBCC..
     71 	 .AAABBBCC.
     72 	 ..AAABBBCC
     73       */
     74 
     75       k = CHUNK - bn + 1;    /* number of diagonals per chunk */
     76 
     77       /* first chunk (marked A in the above diagram) */
     78       mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
     79 
     80       /* remaining chunks (B, C, etc) */
     81       an -= k;
     82 
     83       while (an >= CHUNK)
     84 	{
     85 	  mp_limb_t t0, t1, cy;
     86 	  ap += k, rp += k;
     87 	  t0 = rp[0], t1 = rp[1];
     88 	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
     89 	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
     90 	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
     91 	  an -= k;
     92 	}
     93 
     94       if (an >= bn)
     95 	{
     96 	  /* last remaining chunk */
     97 	  mp_limb_t t0, t1, cy;
     98 	  ap += k, rp += k;
     99 	  t0 = rp[0], t1 = rp[1];
    100 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
    101 	  ADDC_LIMB (cy, rp[0], rp[0], t0);
    102 	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
    103 	}
    104 
    105       return;
    106     }
    107 
    108   /* region is tall enough for toom42 */
    109 
    110   rn = an - bn + 1;
    111 
    112   if (rn < MULMID_TOOM42_THRESHOLD)
    113     {
    114       /* region not wide enough to make toom42 worthwhile for any portion */
    115 
    116       TMP_DECL;
    117 
    118       if (bn < CHUNK)
    119 	{
    120 	  /* region not too tall either, just call basecase directly */
    121 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
    122 	  return;
    123 	}
    124 
    125       /* Region quite tall. For better locality, use basecase on chunks:
    126 
    127 	 AAAAA....
    128 	 .AAAAA...
    129 	 ..BBBBB..
    130 	 ...BBBBB.
    131 	 ....CCCCC
    132       */
    133 
    134       TMP_MARK;
    135 
    136       temp = TMP_ALLOC_LIMBS (rn + 2);
    137 
    138       /* first chunk (marked A in the above diagram) */
    139       bp += bn - CHUNK, an -= bn - CHUNK;
    140       mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
    141 
    142       /* remaining chunks (B, C, etc) */
    143       bn -= CHUNK;
    144 
    145       while (bn >= CHUNK)
    146 	{
    147 	  ap += CHUNK, bp -= CHUNK;
    148 	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
    149 	  mpn_add_n (rp, rp, temp, rn + 2);
    150 	  bn -= CHUNK;
    151 	}
    152 
    153       if (bn)
    154 	{
    155 	  /* last remaining chunk */
    156 	  ap += CHUNK, bp -= bn;
    157 	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
    158 	  mpn_add_n (rp, rp, temp, rn + 2);
    159 	}
    160 
    161       TMP_FREE;
    162       return;
    163     }
    164 
    165   /* we're definitely going to use toom42 somewhere */
    166 
    167   if (bn > rn)
    168     {
    169       /* slice region into chunks, use toom42 on all chunks except possibly
    170 	 the last:
    171 
    172          AA....
    173          .AA...
    174          ..BB..
    175          ...BB.
    176          ....CC
    177       */
    178 
    179       TMP_DECL;
    180       TMP_MARK;
    181 
    182       temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
    183       scratch = temp + rn + 2;
    184 
    185       /* first chunk (marked A in the above diagram) */
    186       bp += bn - rn;
    187       mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
    188 
    189       /* remaining chunks (B, C, etc) */
    190       bn -= rn;
    191 
    192       while (bn >= rn)
    193         {
    194           ap += rn, bp -= rn;
    195 	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
    196           mpn_add_n (rp, rp, temp, rn + 2);
    197           bn -= rn;
    198         }
    199 
    200       if (bn)
    201         {
    202           /* last remaining chunk */
    203           ap += rn, bp -= bn;
    204 	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
    205           mpn_add_n (rp, rp, temp, rn + 2);
    206         }
    207 
    208       TMP_FREE;
    209     }
    210   else
    211     {
    212       /* slice region into chunks, use toom42 on all chunks except possibly
    213 	 the last:
    214 
    215          AAABBBCC..
    216          .AAABBBCC.
    217          ..AAABBBCC
    218       */
    219 
    220       TMP_DECL;
    221       TMP_MARK;
    222 
    223       scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
    224 
    225       /* first chunk (marked A in the above diagram) */
    226       mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
    227 
    228       /* remaining chunks (B, C, etc) */
    229       rn -= bn;
    230 
    231       while (rn >= bn)
    232         {
    233 	  mp_limb_t t0, t1, cy;
    234           ap += bn, rp += bn;
    235           t0 = rp[0], t1 = rp[1];
    236           mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
    237 	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
    238 	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
    239 	  rn -= bn;
    240         }
    241 
    242       TMP_FREE;
    243 
    244       if (rn)
    245         {
    246           /* last remaining chunk */
    247 	  mp_limb_t t0, t1, cy;
    248           ap += bn, rp += bn;
    249           t0 = rp[0], t1 = rp[1];
    250           mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
    251 	  ADDC_LIMB (cy, rp[0], rp[0], t0);
    252 	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
    253         }
    254     }
    255 }
    256