Home | History | Annotate | Line # | Download | only in generic
mulmid.c revision 1.1.1.2
      1      1.1  mrg /* mpn_mulmid -- middle product
      2      1.1  mrg 
      3      1.1  mrg    Contributed by David Harvey.
      4      1.1  mrg 
      5      1.1  mrg    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
      6      1.1  mrg    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
      7      1.1  mrg    GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
      8      1.1  mrg 
      9      1.1  mrg Copyright 2011 Free Software Foundation, Inc.
     10      1.1  mrg 
     11      1.1  mrg This file is part of the GNU MP Library.
     12      1.1  mrg 
     13      1.1  mrg The GNU MP Library is free software; you can redistribute it and/or modify
     14  1.1.1.2  mrg it under the terms of either:
     15  1.1.1.2  mrg 
     16  1.1.1.2  mrg   * the GNU Lesser General Public License as published by the Free
     17  1.1.1.2  mrg     Software Foundation; either version 3 of the License, or (at your
     18  1.1.1.2  mrg     option) any later version.
     19  1.1.1.2  mrg 
     20  1.1.1.2  mrg or
     21  1.1.1.2  mrg 
     22  1.1.1.2  mrg   * the GNU General Public License as published by the Free Software
     23  1.1.1.2  mrg     Foundation; either version 2 of the License, or (at your option) any
     24  1.1.1.2  mrg     later version.
     25  1.1.1.2  mrg 
     26  1.1.1.2  mrg or both in parallel, as here.
     27      1.1  mrg 
     28      1.1  mrg The GNU MP Library is distributed in the hope that it will be useful, but
     29      1.1  mrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     30  1.1.1.2  mrg or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     31  1.1.1.2  mrg for more details.
     32      1.1  mrg 
     33  1.1.1.2  mrg You should have received copies of the GNU General Public License and the
     34  1.1.1.2  mrg GNU Lesser General Public License along with the GNU MP Library.  If not,
     35  1.1.1.2  mrg see https://www.gnu.org/licenses/.  */
     36      1.1  mrg 
     37      1.1  mrg 
     38      1.1  mrg #include "gmp.h"
     39      1.1  mrg #include "gmp-impl.h"
     40      1.1  mrg 
     41      1.1  mrg 
     42      1.1  mrg #define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
     43      1.1  mrg 
     44      1.1  mrg 
     45      1.1  mrg void
     46      1.1  mrg mpn_mulmid (mp_ptr rp,
     47      1.1  mrg             mp_srcptr ap, mp_size_t an,
     48      1.1  mrg             mp_srcptr bp, mp_size_t bn)
     49      1.1  mrg {
     50      1.1  mrg   mp_size_t rn, k;
     51      1.1  mrg   mp_ptr scratch, temp;
     52      1.1  mrg 
     53      1.1  mrg   ASSERT (an >= bn);
     54      1.1  mrg   ASSERT (bn >= 1);
     55      1.1  mrg   ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
     56      1.1  mrg   ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
     57      1.1  mrg 
     58      1.1  mrg   if (bn < MULMID_TOOM42_THRESHOLD)
     59      1.1  mrg     {
     60      1.1  mrg       /* region not tall enough to make toom42 worthwhile for any portion */
     61      1.1  mrg 
     62      1.1  mrg       if (an < CHUNK)
     63      1.1  mrg 	{
     64      1.1  mrg 	  /* region not too wide either, just call basecase directly */
     65      1.1  mrg 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
     66      1.1  mrg 	  return;
     67      1.1  mrg 	}
     68      1.1  mrg 
     69      1.1  mrg       /* Region quite wide. For better locality, use basecase on chunks:
     70      1.1  mrg 
     71      1.1  mrg 	 AAABBBCC..
     72      1.1  mrg 	 .AAABBBCC.
     73      1.1  mrg 	 ..AAABBBCC
     74      1.1  mrg       */
     75      1.1  mrg 
     76      1.1  mrg       k = CHUNK - bn + 1;    /* number of diagonals per chunk */
     77      1.1  mrg 
     78      1.1  mrg       /* first chunk (marked A in the above diagram) */
     79      1.1  mrg       mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
     80      1.1  mrg 
     81      1.1  mrg       /* remaining chunks (B, C, etc) */
     82      1.1  mrg       an -= k;
     83      1.1  mrg 
     84      1.1  mrg       while (an >= CHUNK)
     85      1.1  mrg 	{
     86      1.1  mrg 	  mp_limb_t t0, t1, cy;
     87      1.1  mrg 	  ap += k, rp += k;
     88      1.1  mrg 	  t0 = rp[0], t1 = rp[1];
     89      1.1  mrg 	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
     90      1.1  mrg 	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
     91      1.1  mrg 	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
     92      1.1  mrg 	  an -= k;
     93      1.1  mrg 	}
     94      1.1  mrg 
     95      1.1  mrg       if (an >= bn)
     96      1.1  mrg 	{
     97      1.1  mrg 	  /* last remaining chunk */
     98      1.1  mrg 	  mp_limb_t t0, t1, cy;
     99      1.1  mrg 	  ap += k, rp += k;
    100      1.1  mrg 	  t0 = rp[0], t1 = rp[1];
    101      1.1  mrg 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
    102      1.1  mrg 	  ADDC_LIMB (cy, rp[0], rp[0], t0);
    103      1.1  mrg 	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
    104      1.1  mrg 	}
    105      1.1  mrg 
    106      1.1  mrg       return;
    107      1.1  mrg     }
    108      1.1  mrg 
    109      1.1  mrg   /* region is tall enough for toom42 */
    110      1.1  mrg 
    111      1.1  mrg   rn = an - bn + 1;
    112      1.1  mrg 
    113      1.1  mrg   if (rn < MULMID_TOOM42_THRESHOLD)
    114      1.1  mrg     {
    115      1.1  mrg       /* region not wide enough to make toom42 worthwhile for any portion */
    116      1.1  mrg 
    117      1.1  mrg       TMP_DECL;
    118      1.1  mrg 
    119      1.1  mrg       if (bn < CHUNK)
    120      1.1  mrg 	{
    121      1.1  mrg 	  /* region not too tall either, just call basecase directly */
    122      1.1  mrg 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
    123      1.1  mrg 	  return;
    124      1.1  mrg 	}
    125      1.1  mrg 
    126      1.1  mrg       /* Region quite tall. For better locality, use basecase on chunks:
    127      1.1  mrg 
    128      1.1  mrg 	 AAAAA....
    129      1.1  mrg 	 .AAAAA...
    130      1.1  mrg 	 ..BBBBB..
    131      1.1  mrg 	 ...BBBBB.
    132      1.1  mrg 	 ....CCCCC
    133      1.1  mrg       */
    134      1.1  mrg 
    135      1.1  mrg       TMP_MARK;
    136      1.1  mrg 
    137      1.1  mrg       temp = TMP_ALLOC_LIMBS (rn + 2);
    138      1.1  mrg 
    139      1.1  mrg       /* first chunk (marked A in the above diagram) */
    140      1.1  mrg       bp += bn - CHUNK, an -= bn - CHUNK;
    141      1.1  mrg       mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
    142      1.1  mrg 
    143      1.1  mrg       /* remaining chunks (B, C, etc) */
    144      1.1  mrg       bn -= CHUNK;
    145      1.1  mrg 
    146      1.1  mrg       while (bn >= CHUNK)
    147      1.1  mrg 	{
    148      1.1  mrg 	  ap += CHUNK, bp -= CHUNK;
    149      1.1  mrg 	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
    150      1.1  mrg 	  mpn_add_n (rp, rp, temp, rn + 2);
    151      1.1  mrg 	  bn -= CHUNK;
    152      1.1  mrg 	}
    153      1.1  mrg 
    154      1.1  mrg       if (bn)
    155      1.1  mrg 	{
    156      1.1  mrg 	  /* last remaining chunk */
    157      1.1  mrg 	  ap += CHUNK, bp -= bn;
    158      1.1  mrg 	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
    159      1.1  mrg 	  mpn_add_n (rp, rp, temp, rn + 2);
    160      1.1  mrg 	}
    161      1.1  mrg 
    162      1.1  mrg       TMP_FREE;
    163      1.1  mrg       return;
    164      1.1  mrg     }
    165      1.1  mrg 
    166      1.1  mrg   /* we're definitely going to use toom42 somewhere */
    167      1.1  mrg 
    168      1.1  mrg   if (bn > rn)
    169      1.1  mrg     {
    170      1.1  mrg       /* slice region into chunks, use toom42 on all chunks except possibly
    171      1.1  mrg 	 the last:
    172      1.1  mrg 
    173      1.1  mrg          AA....
    174      1.1  mrg          .AA...
    175      1.1  mrg          ..BB..
    176      1.1  mrg          ...BB.
    177      1.1  mrg          ....CC
    178      1.1  mrg       */
    179      1.1  mrg 
    180      1.1  mrg       TMP_DECL;
    181      1.1  mrg       TMP_MARK;
    182      1.1  mrg 
    183      1.1  mrg       temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
    184      1.1  mrg       scratch = temp + rn + 2;
    185      1.1  mrg 
    186      1.1  mrg       /* first chunk (marked A in the above diagram) */
    187      1.1  mrg       bp += bn - rn;
    188      1.1  mrg       mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
    189      1.1  mrg 
    190      1.1  mrg       /* remaining chunks (B, C, etc) */
    191      1.1  mrg       bn -= rn;
    192      1.1  mrg 
    193      1.1  mrg       while (bn >= rn)
    194      1.1  mrg         {
    195      1.1  mrg           ap += rn, bp -= rn;
    196      1.1  mrg 	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
    197      1.1  mrg           mpn_add_n (rp, rp, temp, rn + 2);
    198      1.1  mrg           bn -= rn;
    199      1.1  mrg         }
    200      1.1  mrg 
    201      1.1  mrg       if (bn)
    202      1.1  mrg         {
    203      1.1  mrg           /* last remaining chunk */
    204      1.1  mrg           ap += rn, bp -= bn;
    205      1.1  mrg 	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
    206      1.1  mrg           mpn_add_n (rp, rp, temp, rn + 2);
    207      1.1  mrg         }
    208      1.1  mrg 
    209      1.1  mrg       TMP_FREE;
    210      1.1  mrg     }
    211      1.1  mrg   else
    212      1.1  mrg     {
    213      1.1  mrg       /* slice region into chunks, use toom42 on all chunks except possibly
    214      1.1  mrg 	 the last:
    215      1.1  mrg 
    216      1.1  mrg          AAABBBCC..
    217      1.1  mrg          .AAABBBCC.
    218      1.1  mrg          ..AAABBBCC
    219      1.1  mrg       */
    220      1.1  mrg 
    221      1.1  mrg       TMP_DECL;
    222      1.1  mrg       TMP_MARK;
    223      1.1  mrg 
    224      1.1  mrg       scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
    225      1.1  mrg 
    226      1.1  mrg       /* first chunk (marked A in the above diagram) */
    227      1.1  mrg       mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
    228      1.1  mrg 
    229      1.1  mrg       /* remaining chunks (B, C, etc) */
    230      1.1  mrg       rn -= bn;
    231      1.1  mrg 
    232      1.1  mrg       while (rn >= bn)
    233      1.1  mrg         {
    234      1.1  mrg 	  mp_limb_t t0, t1, cy;
    235      1.1  mrg           ap += bn, rp += bn;
    236      1.1  mrg           t0 = rp[0], t1 = rp[1];
    237      1.1  mrg           mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
    238      1.1  mrg 	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
    239      1.1  mrg 	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
    240      1.1  mrg 	  rn -= bn;
    241      1.1  mrg         }
    242      1.1  mrg 
    243      1.1  mrg       TMP_FREE;
    244      1.1  mrg 
    245      1.1  mrg       if (rn)
    246      1.1  mrg         {
    247      1.1  mrg           /* last remaining chunk */
    248      1.1  mrg 	  mp_limb_t t0, t1, cy;
    249      1.1  mrg           ap += bn, rp += bn;
    250      1.1  mrg           t0 = rp[0], t1 = rp[1];
    251      1.1  mrg           mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
    252      1.1  mrg 	  ADDC_LIMB (cy, rp[0], rp[0], t0);
    253      1.1  mrg 	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
    254      1.1  mrg         }
    255      1.1  mrg     }
    256      1.1  mrg }
    257