Home | History | Annotate | Line # | Download | only in generic
mulmid.c revision 1.1.1.2
      1 /* mpn_mulmid -- middle product
      2 
      3    Contributed by David Harvey.
      4 
      5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      7    GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      8 
      9 Copyright 2011 Free Software Foundation, Inc.
     10 
     11 This file is part of the GNU MP Library.
     12 
     13 The GNU MP Library is free software; you can redistribute it and/or modify
     14 it under the terms of either:
     15 
     16   * the GNU Lesser General Public License as published by the Free
     17     Software Foundation; either version 3 of the License, or (at your
     18     option) any later version.
     19 
     20 or
     21 
     22   * the GNU General Public License as published by the Free Software
     23     Foundation; either version 2 of the License, or (at your option) any
     24     later version.
     25 
     26 or both in parallel, as here.
     27 
     28 The GNU MP Library is distributed in the hope that it will be useful, but
     29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31 for more details.
     32 
     33 You should have received copies of the GNU General Public License and the
     34 GNU Lesser General Public License along with the GNU MP Library.  If not,
     35 see https://www.gnu.org/licenses/.  */
     36 
     37 
     38 #include "gmp.h"
     39 #include "gmp-impl.h"
     40 
     41 
     42 #define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
     43 
     44 
     45 void
     46 mpn_mulmid (mp_ptr rp,
     47             mp_srcptr ap, mp_size_t an,
     48             mp_srcptr bp, mp_size_t bn)
     49 {
     50   mp_size_t rn, k;
     51   mp_ptr scratch, temp;
     52 
     53   ASSERT (an >= bn);
     54   ASSERT (bn >= 1);
     55   ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
     56   ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
     57 
     58   if (bn < MULMID_TOOM42_THRESHOLD)
     59     {
     60       /* region not tall enough to make toom42 worthwhile for any portion */
     61 
     62       if (an < CHUNK)
     63 	{
     64 	  /* region not too wide either, just call basecase directly */
     65 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
     66 	  return;
     67 	}
     68 
     69       /* Region quite wide. For better locality, use basecase on chunks:
     70 
     71 	 AAABBBCC..
     72 	 .AAABBBCC.
     73 	 ..AAABBBCC
     74       */
     75 
     76       k = CHUNK - bn + 1;    /* number of diagonals per chunk */
     77 
     78       /* first chunk (marked A in the above diagram) */
     79       mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
     80 
     81       /* remaining chunks (B, C, etc) */
     82       an -= k;
     83 
     84       while (an >= CHUNK)
     85 	{
     86 	  mp_limb_t t0, t1, cy;
     87 	  ap += k, rp += k;
     88 	  t0 = rp[0], t1 = rp[1];
     89 	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
     90 	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
     91 	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
     92 	  an -= k;
     93 	}
     94 
     95       if (an >= bn)
     96 	{
     97 	  /* last remaining chunk */
     98 	  mp_limb_t t0, t1, cy;
     99 	  ap += k, rp += k;
    100 	  t0 = rp[0], t1 = rp[1];
    101 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
    102 	  ADDC_LIMB (cy, rp[0], rp[0], t0);
    103 	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
    104 	}
    105 
    106       return;
    107     }
    108 
    109   /* region is tall enough for toom42 */
    110 
    111   rn = an - bn + 1;
    112 
    113   if (rn < MULMID_TOOM42_THRESHOLD)
    114     {
    115       /* region not wide enough to make toom42 worthwhile for any portion */
    116 
    117       TMP_DECL;
    118 
    119       if (bn < CHUNK)
    120 	{
    121 	  /* region not too tall either, just call basecase directly */
    122 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
    123 	  return;
    124 	}
    125 
    126       /* Region quite tall. For better locality, use basecase on chunks:
    127 
    128 	 AAAAA....
    129 	 .AAAAA...
    130 	 ..BBBBB..
    131 	 ...BBBBB.
    132 	 ....CCCCC
    133       */
    134 
    135       TMP_MARK;
    136 
    137       temp = TMP_ALLOC_LIMBS (rn + 2);
    138 
    139       /* first chunk (marked A in the above diagram) */
    140       bp += bn - CHUNK, an -= bn - CHUNK;
    141       mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
    142 
    143       /* remaining chunks (B, C, etc) */
    144       bn -= CHUNK;
    145 
    146       while (bn >= CHUNK)
    147 	{
    148 	  ap += CHUNK, bp -= CHUNK;
    149 	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
    150 	  mpn_add_n (rp, rp, temp, rn + 2);
    151 	  bn -= CHUNK;
    152 	}
    153 
    154       if (bn)
    155 	{
    156 	  /* last remaining chunk */
    157 	  ap += CHUNK, bp -= bn;
    158 	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
    159 	  mpn_add_n (rp, rp, temp, rn + 2);
    160 	}
    161 
    162       TMP_FREE;
    163       return;
    164     }
    165 
    166   /* we're definitely going to use toom42 somewhere */
    167 
    168   if (bn > rn)
    169     {
    170       /* slice region into chunks, use toom42 on all chunks except possibly
    171 	 the last:
    172 
    173          AA....
    174          .AA...
    175          ..BB..
    176          ...BB.
    177          ....CC
    178       */
    179 
    180       TMP_DECL;
    181       TMP_MARK;
    182 
    183       temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
    184       scratch = temp + rn + 2;
    185 
    186       /* first chunk (marked A in the above diagram) */
    187       bp += bn - rn;
    188       mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
    189 
    190       /* remaining chunks (B, C, etc) */
    191       bn -= rn;
    192 
    193       while (bn >= rn)
    194         {
    195           ap += rn, bp -= rn;
    196 	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
    197           mpn_add_n (rp, rp, temp, rn + 2);
    198           bn -= rn;
    199         }
    200 
    201       if (bn)
    202         {
    203           /* last remaining chunk */
    204           ap += rn, bp -= bn;
    205 	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
    206           mpn_add_n (rp, rp, temp, rn + 2);
    207         }
    208 
    209       TMP_FREE;
    210     }
    211   else
    212     {
    213       /* slice region into chunks, use toom42 on all chunks except possibly
    214 	 the last:
    215 
    216          AAABBBCC..
    217          .AAABBBCC.
    218          ..AAABBBCC
    219       */
    220 
    221       TMP_DECL;
    222       TMP_MARK;
    223 
    224       scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
    225 
    226       /* first chunk (marked A in the above diagram) */
    227       mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
    228 
    229       /* remaining chunks (B, C, etc) */
    230       rn -= bn;
    231 
    232       while (rn >= bn)
    233         {
    234 	  mp_limb_t t0, t1, cy;
    235           ap += bn, rp += bn;
    236           t0 = rp[0], t1 = rp[1];
    237           mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
    238 	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
    239 	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
    240 	  rn -= bn;
    241         }
    242 
    243       TMP_FREE;
    244 
    245       if (rn)
    246         {
    247           /* last remaining chunk */
    248 	  mp_limb_t t0, t1, cy;
    249           ap += bn, rp += bn;
    250           t0 = rp[0], t1 = rp[1];
    251           mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
    252 	  ADDC_LIMB (cy, rp[0], rp[0], t0);
    253 	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
    254         }
    255     }
    256 }
    257