Home | History | Annotate | Line # | Download | only in src
      1 /* Sum -- efficiently sum a list of floating-point numbers
      2 
      3 Copyright 2014-2023 Free Software Foundation, Inc.
      4 Contributed by the AriC and Caramba projects, INRIA.
      5 
      6 This file is part of the GNU MPFR Library.
      7 
      8 The GNU MPFR Library is free software; you can redistribute it and/or modify
      9 it under the terms of the GNU Lesser General Public License as published by
     10 the Free Software Foundation; either version 3 of the License, or (at your
     11 option) any later version.
     12 
     13 The GNU MPFR Library is distributed in the hope that it will be useful, but
     14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16 License for more details.
     17 
     18 You should have received a copy of the GNU Lesser General Public License
     19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
     20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
     21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
     22 
     23 #define MPFR_NEED_LONGLONG_H
     24 #include "mpfr-impl.h"
     25 
     26 /* Note: In the prototypes, one uses
     27  *
     28  *   const mpfr_ptr *x      i.e.:  __mpfr_struct *const *x
     29  *
     30  * instead of
     31  *
     32  *   const mpfr_srcptr *x   i.e.:  const __mpfr_struct *const *x
     33  *
     34  * because here one has a double indirection and the type matching rules
     35  * from the C standard in such a case are stricter and they would yield
     36  * annoying errors for the user in practice. See:
     37  *
     38  *   Why can't I pass a char ** to a function which expects a const char **?
     39  *
     40  * in the comp.lang.c FAQ:
     41  *
     42  *   https://c-faq.com/ansi/constmismatch.html
     43  */
     44 
     45 /* See the doc/sum.txt file for the algorithm and a part of its proof
     46 (this will later go into algorithms.tex).
     47 
     48 TODO [VL, after a discussion with James Demmel]: Compared to
     49   James Demmel and Yozo Hida, Fast and accurate floating-point summation
     50   with application to computational geometry, Numerical Algorithms,
     51   volume 37, number 1-4, pages 101--112, 2004.
     52 sorting is not necessary here. It is not done because in the most common
     53 cases (where big cancellations are rare), it would take time and be
     54 useless. However, the lack of sorting increases the worst case complexity.
     55 For instance, consider many inputs that cancel one another (two by two).
     56 One would need n/2 iterations, where each iteration reads the exponent
     57 of each input, therefore n*n/2 read operations. Using a worst-case sort
     58 in O(n log n) could give a O(n log n) worst-case complexity. As we don't
     59 want to slow down the most common cases, this could be done at the 3rd
     60 iteration. But are there practical applications which would be used as
     61 tests?
     62 
     63 Note: see the following paper and its references:
     64   http://www.acsel-lab.com/arithmetic/arith21/papers/p54.pdf
     65   (J. Demmel and H. D. Nguyen, Fast Reproducible Floating-Point Summation)
     66 VL: This is very different:
     67           In MPFR             In the paper & references
     68     arbitrary precision            fixed precision
     69      correct rounding        just reproducible rounding
     70     integer operations        floating-point operations
     71         sequential             parallel (& sequential)
     72 */
     73 
     74 #ifdef MPFR_COV_CHECK
     75 int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2][2] = { 0 };
     76 #endif
     77 
     78 /* Update minexp (V) after detecting a potential integer overflow in
     79    extreme cases (only a 32-bit ABI may be concerned in practice).
     80    Instead of an assertion failure below, we could
     81    1. check that the ulp of each regular input has an exponent >= MPFR_EXP_MIN
     82       (with an assertion failure if this is not the case);
     83    2. set minexp to MPFR_EXP_MIN and shift the accumulator accordingly
     84       (the sum will then be exact).
     85    However, such cases, which involve huge precisions, will probably
     86    never occur in practice (at least with a 64-bit ABI) and are not
     87    easily testable due to these huge precisions. Moreover, switching
     88    to a 64-bit ABI would be a better solution for such computations.
     89    So, let's leave this unimplemented. */
     90 #define SAFE_SUB(V,E,SH)                        \
     91   do                                            \
     92     {                                           \
     93       mpfr_prec_t sh = (SH);                    \
     94       MPFR_ASSERTN ((E) >= MPFR_EXP_MIN + sh);  \
     95       V = (E) - sh;                             \
     96     }                                           \
     97   while (0)
     98 
     99 /* Function sum_raw
    100  * ================
    101  *
    102  * Accumulate a new [minexp,maxexp[ block into (wp,ws). If e and err denote
    103  * the exponents of the computed result and of the error bound respectively,
    104  * while e - err is less than some given bound (due to cancellation), shift
    105  * the accumulator and reiterate.
    106  *
    107  * Inputs:
    108  *   wp: pointer to the accumulator (least significant limb first).
    109  *   ws: size of the accumulator (in limbs).
    110  *   wq: precision of the accumulator (ws * GMP_NUMB_BITS).
    111  *   x: array of the input numbers.
    112  *   n: size of this array (number of inputs, regular or not).
    113  *   minexp: exponent of the least significant bit of the first block.
    114  *   maxexp: exponent of the first block (exponent of its MSB + 1).
    115  *   tp: pointer to a temporary area (pre-allocated).
    116  *   ts: size of this temporary area.
    117  *   logn: ceil(log2(rn)), where rn is the number of regular inputs.
    118  *   prec: lower bound for e - err (as described above).
    119  *   ep: pointer to mpfr_exp_t (see below), or a null pointer.
    120  *   minexpp: pointer to mpfr_exp_t (see below), or a null pointer.
    121  *   maxexpp: pointer to mpfr_exp_t (see below), or a null pointer.
    122  *
    123  * Preconditions:
    124  *   prec >= 1
    125  *   wq >= logn + prec + 2
    126  *
    127  * This function returns 0 if the accumulator is 0 (which implies that
    128  * the exact sum for this sum_raw invocation is 0), otherwise the number
    129  * of cancelled bits (>= 1), defined as the number of identical bits on
    130  * the most significant part of the accumulator. In the latter case, it
    131  * also returns the following data in variables passed by reference, if
    132  * the pointers are not NULL:
    133  * - in ep: the exponent e of the computed result;
    134  * - in minexpp: the last value of minexp;
    135  * - in maxexpp: the new value of maxexp (for the next iteration after
    136  *   the first invocation of sum_raw in the main code).
    137  *
    138  * Notes:
    139  * - minexp is also the exponent of the least significant bit of the
    140  *   accumulator;
    141  * - the temporary area must be large enough to hold a shifted input
    142  *   block, and the value of ts is used only when the full assertions
    143  *   are checked (i.e. with the --enable-assert configure option), to
    144  *   check that a buffer overflow doesn't occur;
    145  * - contrary to the returned value of minexp (the value in the last
    146  *   iteration), the returned value of maxexp is the one for the next
    147  *   iteration (= maxexp2 of the last iteration).
    148  */
    149 static mpfr_prec_t
    150 sum_raw (mp_limb_t *wp, mp_size_t ws, mpfr_prec_t wq, const mpfr_ptr *x,
    151          unsigned long n, mpfr_exp_t minexp, mpfr_exp_t maxexp,
    152          mp_limb_t *tp, mp_size_t ts, int logn, mpfr_prec_t prec,
    153          mpfr_exp_t *ep, mpfr_exp_t *minexpp, mpfr_exp_t *maxexpp)
    154 {
    155   MPFR_LOG_FUNC
    156     (("ws=%Pd ts=%Pd prec=%Pd", (mpfr_prec_t) ws, (mpfr_prec_t) ts, prec),
    157      ("", 0));
    158 
    159   /* The C code below requires prec >= 0 due to the use of unsigned
    160      integer arithmetic on it. Actually the computation makes sense
    161      only with prec >= 1 (otherwise one can't even know the sign of
    162      the result), hence the following assertion. */
    163   MPFR_ASSERTD (prec >= 1);
    164 
    165   /* Consistency check. */
    166   MPFR_ASSERTD (wq == (mpfr_prec_t) ws * GMP_NUMB_BITS);
    167 
    168   /* The following precondition together with prec >= 1 will imply:
    169      minexp - shiftq < maxexp2, as required by the algorithm. */
    170   MPFR_ASSERTD (wq >= logn + prec + 2);
    171 
    172   while (1)
    173     {
    174       mpfr_exp_t maxexp2 = MPFR_EXP_MIN;
    175       unsigned long i;
    176 
    177       MPFR_LOG_MSG (("sum_raw loop: "
    178                      "maxexp=%" MPFR_EXP_FSPEC "d "
    179                      "minexp=%" MPFR_EXP_FSPEC "d\n",
    180                      (mpfr_eexp_t) maxexp, (mpfr_eexp_t) minexp));
    181 
    182       MPFR_ASSERTD (maxexp > minexp);
    183 
    184       for (i = 0; i < n; i++)
    185         if (! MPFR_IS_SINGULAR (x[i]))  /* Step 1 (see sum_raw in sum.txt) */
    186           {
    187             mp_limb_t *dp, *vp;
    188             mp_size_t ds, vs, vds;
    189             mpfr_exp_t xe, vd;
    190             mpfr_prec_t xq;
    191             int tr;
    192 
    193             xe = MPFR_GET_EXP (x[i]);
    194             xq = MPFR_GET_PREC (x[i]);
    195 
    196             vp = MPFR_MANT (x[i]);
    197             vs = MPFR_PREC2LIMBS (xq);
    198             vd = xe - vs * GMP_NUMB_BITS - minexp;
    199             /* vd is the exponent of the least significant represented bit of
    200                x[i] (including the trailing bits, whose value is 0) minus the
    201                exponent of the least significant bit of the accumulator. To
    202                make the code simpler, we won't try to filter out the trailing
    203                bits of x[i]. */
    204 
    205             /* Steps 2, 3, 4 (see sum_raw in sum.txt) */
    206 
    207             if (vd < 0)
    208               {
    209                 /* This covers the following cases:
    210                  *     [-+- accumulator ---]
    211                  *   [---|----- x[i] ------|--]
    212                  *       |   [----- x[i] --|--]
    213                  *       |                 |[----- x[i] -----]
    214                  *       |                 |    [----- x[i] -----]
    215                  *     maxexp           minexp
    216                  */
    217 
    218                 /* Step 2 for subcase vd < 0 */
    219 
    220                 if (xe <= minexp)
    221                   {
    222                     /* x[i] is entirely after the LSB of the accumulator,
    223                        so that it will be ignored at this iteration. */
    224                     if (xe > maxexp2)
    225                       {
    226                         maxexp2 = xe;
    227                         /* And since the exponent of x[i] is valid... */
    228                         MPFR_ASSERTD (maxexp2 >= MPFR_EMIN_MIN);
    229                       }
    230                     continue;
    231                   }
    232 
    233                 /* Step 3 for subcase vd < 0 */
    234 
    235                 /* If some significant bits of x[i] are after the LSB of the
    236                    accumulator, then maxexp2 will necessarily be minexp. */
    237                 if (MPFR_LIKELY (xe - xq < minexp))
    238                   maxexp2 = minexp;
    239 
    240                 /* Step 4 for subcase vd < 0 */
    241 
    242                 /* We need to ignore the least |vd| significant bits of x[i].
    243                    First, let's ignore the least vds = |vd| / GMP_NUMB_BITS
    244                    limbs. */
    245                 vd = - vd;
    246                 vds = vd / GMP_NUMB_BITS;
    247                 vs -= vds;
    248                 MPFR_ASSERTD (vs > 0);  /* see xe <= minexp test above */
    249                 vp += vds;
    250                 vd -= vds * GMP_NUMB_BITS;
    251                 MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS);
    252 
    253                 if (xe > maxexp)
    254                   {
    255                     vs -= (xe - maxexp) / GMP_NUMB_BITS;
    256                     MPFR_ASSERTD (vs > 0);
    257                     tr = (xe - maxexp) % GMP_NUMB_BITS;
    258                   }
    259                 else
    260                   tr = 0;
    261 
    262                 if (vd != 0)
    263                   {
    264                     MPFR_ASSERTD (vs <= ts);
    265                     mpn_rshift (tp, vp, vs, vd);
    266                     vp = tp;
    267                     tr += vd;
    268                     if (tr >= GMP_NUMB_BITS)
    269                       {
    270                         vs--;
    271                         tr -= GMP_NUMB_BITS;
    272                       }
    273                     MPFR_ASSERTD (vs >= 1);
    274                     MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS);
    275                     if (tr != 0)
    276                       {
    277                         tp[vs-1] &= MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
    278                         tr = 0;
    279                       }
    280                     /* Truncation has now been taken into account. */
    281                     MPFR_ASSERTD (tr == 0);
    282                   }
    283 
    284                 dp = wp;
    285                 ds = ws;
    286               }
    287             else  /* vd >= 0 */
    288               {
    289                 /* This covers the following cases:
    290                  *               [-+- accumulator ---]
    291                  *   [- x[i] -]    |                 |
    292                  *             [---|-- x[i] ------]  |
    293                  *          [------|-- x[i] ---------]
    294                  *                 |   [- x[i] -]    |
    295                  *               maxexp           minexp
    296                  */
    297 
    298                 /* Steps 2 and 3 for subcase vd >= 0 */
    299 
    300                 MPFR_ASSERTD (xe - xq >= minexp);  /* see definition of vd */
    301 
    302                 /* Step 4 for subcase vd >= 0 */
    303 
    304                 /* We need to ignore the least vd significant bits
    305                    of the accumulator. First, let's ignore the least
    306                    vds = vd / GMP_NUMB_BITS limbs. -> (dp,ds) */
    307                 vds = vd / GMP_NUMB_BITS;
    308                 ds = ws - vds;
    309                 if (ds <= 0)
    310                   continue;
    311                 dp = wp + vds;
    312                 vd -= vds * GMP_NUMB_BITS;
    313                 MPFR_ASSERTD (vd >= 0 && vd < GMP_NUMB_BITS);
    314 
    315                 /* The low part of x[i] (to be determined) will have to be
    316                    shifted vd bits to the left if vd != 0. */
    317 
    318                 if (xe > maxexp)
    319                   {
    320                     vs -= (xe - maxexp) / GMP_NUMB_BITS;
    321                     if (vs <= 0)
    322                       continue;
    323                     tr = (xe - maxexp) % GMP_NUMB_BITS;
    324                   }
    325                 else
    326                   tr = 0;
    327 
    328                 MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS && vs > 0);
    329 
    330                 /* We need to consider the least significant vs limbs of x[i]
    331                    except the most significant tr bits. */
    332 
    333                 if (vd != 0)
    334                   {
    335                     mp_limb_t carry;
    336 
    337                     MPFR_ASSERTD (vs <= ts);
    338                     carry = mpn_lshift (tp, vp, vs, vd);
    339                     tr -= vd;
    340                     if (tr < 0)
    341                       {
    342                         tr += GMP_NUMB_BITS;
    343                         MPFR_ASSERTD (vs + 1 <= ts);
    344                         tp[vs++] = carry;
    345                       }
    346                     MPFR_ASSERTD (tr >= 0 && tr < GMP_NUMB_BITS);
    347                     vp = tp;
    348                   }
    349               }  /* vd >= 0 */
    350 
    351             MPFR_ASSERTD (vs > 0 && vs <= ds);
    352 
    353             /* We can't truncate the most significant limb of the input
    354                (in case it hasn't been shifted to the temporary area).
    355                So, let's ignore it now. It will be taken into account
    356                via carry propagation after the addition. */
    357             if (tr != 0)
    358               vs--;
    359 
    360             /* Step 5 (see sum_raw in sum.txt) */
    361 
    362             if (MPFR_IS_POS (x[i]))
    363               {
    364                 mp_limb_t carry;
    365 
    366                 carry = vs > 0 ? mpn_add_n (dp, dp, vp, vs) : 0;
    367                 MPFR_ASSERTD (carry <= 1);
    368                 if (tr != 0)
    369                   carry += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
    370                 if (ds > vs)
    371                   mpn_add_1 (dp + vs, dp + vs, ds - vs, carry);
    372               }
    373             else
    374               {
    375                 mp_limb_t borrow;
    376 
    377                 borrow = vs > 0 ? mpn_sub_n (dp, dp, vp, vs) : 0;
    378                 MPFR_ASSERTD (borrow <= 1);
    379                 if (tr != 0)
    380                   borrow += vp[vs] & MPFR_LIMB_MASK (GMP_NUMB_BITS - tr);
    381                 if (ds > vs)
    382                   mpn_sub_1 (dp + vs, dp + vs, ds - vs, borrow);
    383               }
    384           }
    385 
    386       {
    387         mpfr_prec_t cancel;  /* number of cancelled bits */
    388         mp_size_t wi;        /* index in the accumulator */
    389         mp_limb_t a, b;
    390         int cnt;
    391 
    392         cancel = 0;
    393         wi = ws - 1;
    394         MPFR_ASSERTD (wi >= 0);
    395         a = wp[wi] >> (GMP_NUMB_BITS - 1) ? MPFR_LIMB_MAX : MPFR_LIMB_ZERO;
    396 
    397         while (wi >= 0)
    398           if ((b = wp[wi]) == a)
    399             {
    400               cancel += GMP_NUMB_BITS;
    401               wi--;
    402             }
    403           else
    404             {
    405               b ^= a;
    406               MPFR_ASSERTD (b != 0);
    407               count_leading_zeros (cnt, b);
    408               cancel += cnt;
    409               break;
    410             }
    411 
    412         if (wi >= 0 || a != MPFR_LIMB_ZERO)  /* accumulator != 0 */
    413           {
    414             mpfr_exp_t e;        /* exponent of the computed result */
    415             mpfr_exp_t err;      /* exponent of the error bound */
    416 
    417             MPFR_LOG_MSG (("accumulator %s 0, cancel=%Pd\n",
    418                            a != MPFR_LIMB_ZERO ? "<" : ">", cancel));
    419 
    420             MPFR_ASSERTD (cancel > 0);
    421             e = minexp + wq - cancel;
    422             MPFR_ASSERTD (e >= minexp);
    423             err = maxexp2 + logn;  /* OK even if maxexp2 == MPFR_EXP_MIN */
    424 
    425             /* The absolute value of the truncated sum is in the binade
    426                [2^(e-1),2^e] (closed on both ends due to two's complement).
    427                The error is strictly less than 2^err (and is 0 if
    428                maxexp2 == MPFR_EXP_MIN). */
    429 
    430             /* This basically tests whether err <= e - prec without
    431                potential integer overflow (since prec >= 0)...
    432                Note that the maxexp2 == MPFR_EXP_MIN test is there just for
    433                the potential corner case e - prec < MPFR_EXP_MIN + logn.
    434                Such corner cases, involving specific huge-precision numbers,
    435                are probably not supported in many places in MPFR, but this
    436                test doesn't hurt... */
    437             if (maxexp2 == MPFR_EXP_MIN ||
    438                 (err <= e && SAFE_DIFF (mpfr_uexp_t, e, err) >= prec))
    439               {
    440                 MPFR_LOG_MSG (("(err=%" MPFR_EXP_FSPEC "d) <= (e=%"
    441                                MPFR_EXP_FSPEC "d) - (prec=%Pd)\n",
    442                                (mpfr_eexp_t) err, (mpfr_eexp_t) e, prec));
    443                 /* To avoid tests or copies, we consider the only two cases
    444                    that will occur in sum_aux. */
    445                 MPFR_ASSERTD ((ep != NULL &&
    446                                minexpp != NULL &&
    447                                maxexpp != NULL) ||
    448                               (ep == NULL &&
    449                                minexpp == NULL &&
    450                                maxexpp == NULL));
    451                 if (ep != NULL)
    452                   {
    453                     *ep = e;
    454                     *minexpp = minexp;
    455                     *maxexpp = maxexp2;
    456                   }
    457                 MPFR_LOG_MSG (("return with minexp=%" MPFR_EXP_FSPEC
    458                                "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n",
    459                                (mpfr_eexp_t) minexp, (mpfr_eexp_t) maxexp2,
    460                                maxexp2 == MPFR_EXP_MIN ?
    461                                " (MPFR_EXP_MIN)" : ""));
    462                 return cancel;
    463               }
    464             else
    465               {
    466                 mpfr_exp_t diffexp;
    467                 mpfr_prec_t shiftq;
    468                 mpfr_size_t shifts;
    469                 int shiftc;
    470 
    471                 MPFR_LOG_MSG (("e=%" MPFR_EXP_FSPEC "d err=%" MPFR_EXP_FSPEC
    472                                "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n",
    473                                (mpfr_eexp_t) e, (mpfr_eexp_t) err,
    474                                (mpfr_eexp_t) maxexp2,
    475                                maxexp2 == MPFR_EXP_MIN ?
    476                                " (MPFR_EXP_MIN)" : ""));
    477 
    478                 diffexp = err - e;
    479                 if (diffexp < 0)
    480                   diffexp = 0;
    481                 /* diffexp = max(0, err - e) */
    482 
    483                 MPFR_LOG_MSG (("diffexp=%" MPFR_EXP_FSPEC "d\n",
    484                                 (mpfr_eexp_t) diffexp));
    485 
    486                 MPFR_ASSERTD (diffexp < cancel - 2);
    487                 shiftq = cancel - 2 - (mpfr_prec_t) diffexp;
    488                 /* equivalent to: minexp + wq - 2 - max(e,err) */
    489                 MPFR_ASSERTD (shiftq > 0);
    490                 shifts = shiftq / GMP_NUMB_BITS;
    491                 shiftc = shiftq % GMP_NUMB_BITS;
    492                 MPFR_LOG_MSG (("shiftq = %Pd = %Pd * GMP_NUMB_BITS + %d\n",
    493                                shiftq, (mpfr_prec_t) shifts, shiftc));
    494                 if (MPFR_LIKELY (shiftc != 0))
    495                   mpn_lshift (wp + shifts, wp, ws - shifts, shiftc);
    496                 else
    497                   mpn_copyd (wp + shifts, wp, ws - shifts);
    498                 MPN_ZERO (wp, shifts);
    499                 /* Compute minexp = minexp - shiftq safely. */
    500                 SAFE_SUB (minexp, minexp, shiftq);
    501                 MPFR_ASSERTD (minexp < maxexp2);
    502               }
    503           }
    504         else if (maxexp2 == MPFR_EXP_MIN)
    505           {
    506             MPFR_LOG_MSG (("accumulator = 0, maxexp2 = MPFR_EXP_MIN\n", 0));
    507             return 0;
    508           }
    509         else
    510           {
    511             MPFR_LOG_MSG (("accumulator = 0, reiterate\n", 0));
    512             /* Compute minexp = maxexp2 - (wq - (logn + 1)) safely. */
    513             SAFE_SUB (minexp, maxexp2, wq - (logn + 1));
    514             /* Note: the logn + 1 corresponds to cq in the main code. */
    515           }
    516       }
    517 
    518       maxexp = maxexp2;
    519     }
    520 }
    521 
    522 /**********************************************************************/
    523 
    524 /* Generic case: all the inputs are finite numbers,
    525    with at least 3 regular numbers. */
    526 static int
    527 sum_aux (mpfr_ptr sum, const mpfr_ptr *x, unsigned long n, mpfr_rnd_t rnd,
    528          mpfr_exp_t maxexp, unsigned long rn)
    529 {
    530   mp_limb_t *sump;
    531   mp_limb_t *tp;  /* pointer to a temporary area */
    532   mp_limb_t *wp;  /* pointer to the accumulator */
    533   mp_size_t ts;   /* size of the temporary area, in limbs */
    534   mp_size_t ws;   /* size of the accumulator, in limbs */
    535   mp_size_t zs;   /* size of the TMD accumulator, in limbs */
    536   mpfr_prec_t wq; /* size of the accumulator, in bits */
    537   int logn;       /* ceil(log2(rn)) */
    538   int cq;
    539   mpfr_prec_t sq;
    540   int inex;
    541   MPFR_TMP_DECL (marker);
    542 
    543   MPFR_LOG_FUNC
    544     (("n=%lu rnd=%d maxexp=%" MPFR_EXP_FSPEC "d rn=%lu",
    545       n, rnd, (mpfr_eexp_t) maxexp, rn),
    546      ("sum[%Pd]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum));
    547 
    548   MPFR_ASSERTD (rn >= 3 && rn <= n);
    549 
    550   /* In practice, no integer overflow on the exponent. */
    551   MPFR_STAT_STATIC_ASSERT (MPFR_EXP_MAX - MPFR_EMAX_MAX >=
    552                            sizeof (unsigned long) * CHAR_BIT);
    553 
    554   /* Set up some variables and the accumulator. */
    555 
    556   sump = MPFR_MANT (sum);
    557 
    558   /* rn is the number of regular inputs (the singular ones will be
    559      ignored). Compute logn = ceil(log2(rn)). */
    560   logn = MPFR_INT_CEIL_LOG2 (rn);
    561   MPFR_ASSERTD (logn >= 2);
    562 
    563   MPFR_LOG_MSG (("logn=%d maxexp=%" MPFR_EXP_FSPEC "d\n",
    564                  logn, (mpfr_eexp_t) maxexp));
    565 
    566   sq = MPFR_GET_PREC (sum);
    567   cq = logn + 1;
    568 
    569   /* First determine the size of the accumulator.
    570    * cq + sq + logn + 2 >= logn + sq + 5, which will be used later.
    571    * The assertion wq - cq - sq >= 4 is another way to check that.
    572    */
    573   ws = MPFR_PREC2LIMBS (cq + sq + logn + 2);
    574   wq = (mpfr_prec_t) ws * GMP_NUMB_BITS;
    575   MPFR_ASSERTD (wq - cq - sq >= 4);
    576 
    577   /* TODO: timings, comparing with a larger zs. */
    578   zs = MPFR_PREC2LIMBS (wq - sq);
    579 
    580   MPFR_LOG_MSG (("cq=%d sq=%Pd logn=%d wq=%Pd\n", cq, sq, logn, wq));
    581 
    582   /* An input block will have up to wq - cq bits, and its shifted value
    583      (to be correctly aligned) may take GMP_NUMB_BITS - 1 additional bits. */
    584   ts = MPFR_PREC2LIMBS (wq - cq + GMP_NUMB_BITS - 1);
    585 
    586   MPFR_TMP_MARK (marker);
    587 
    588   /* Note: If the TMD does not occur, which should be the case for most
    589      sums, allocating zs limbs is not necessary. However, we choose to
    590      do this now (thus in all cases) because zs is very small, so that
    591      the difference on the memory footprint will not be noticeable.
    592      More precisely, zs is at most 2 in practice with the current code;
    593      we may want to increase it in order to avoid performance issues in
    594      some unlikely corner cases, but even in this case, it will remain
    595      small.
    596      One will have:
    597        [------ ts ------][------ ws ------][- zs -]
    598      The following would probably be better:
    599        [------ ts ------]  [------ ws ------]
    600                    [- zs -]
    601      i.e. where the TMD accumulator (partially or completely) takes
    602      some unneeded part of the temporary area in order to improve
    603      data locality. But
    604        * in low precision, data locality is regarded as ensured even
    605          with the actual choice;
    606        * in high precision, data locality for TMD resolution may not
    607          be that important.
    608   */
    609   tp = MPFR_TMP_LIMBS_ALLOC (ts + ws + zs);
    610   wp = tp + ts;
    611 
    612   MPN_ZERO (wp, ws);  /* zero the accumulator */
    613 
    614   {
    615     mpfr_exp_t minexp;   /* exponent of the LSB of the block for sum_raw */
    616     mpfr_prec_t cancel;  /* number of cancelled bits */
    617     mpfr_exp_t e;        /* temporary exponent of the result */
    618     mpfr_exp_t u;        /* temporary exponent of the ulp (quantum) */
    619     mp_limb_t lbit;      /* last bit (useful if even rounding) */
    620     mp_limb_t rbit;      /* rounding bit (corrected in halfway case) */
    621     int corr;            /* correction term (from -1 to 2) */
    622     int sd, sh;          /* shift counts */
    623     mp_size_t sn;        /* size of the output number */
    624     int tmd;             /* 0: the TMD does not occur
    625                             1: the TMD occurs on a machine number
    626                             2: the TMD occurs on a midpoint */
    627     int neg;             /* 0 if positive sum, 1 if negative */
    628     int sgn;             /* +1 if positive sum, -1 if negative */
    629 
    630     MPFR_LOG_MSG (("Compute an approximation with sum_raw...\n", 0));
    631 
    632     /* Compute minexp = maxexp - (wq - cq) safely. */
    633     SAFE_SUB (minexp, maxexp, wq - cq);
    634     MPFR_ASSERTD (wq >= logn + sq + 5);
    635     cancel = sum_raw (wp, ws, wq, x, n, minexp, maxexp, tp, ts,
    636                       logn, sq + 3, &e, &minexp, &maxexp);
    637 
    638     if (MPFR_UNLIKELY (cancel == 0))
    639       {
    640         /* The exact sum is zero. Since not all inputs are 0, the sum
    641          * is +0 except in MPFR_RNDD, as specified according to the
    642          * IEEE 754 rules for the addition of two numbers.
    643          */
    644         MPFR_SET_SIGN (sum, (rnd != MPFR_RNDD ?
    645                              MPFR_SIGN_POS : MPFR_SIGN_NEG));
    646         MPFR_SET_ZERO (sum);
    647         MPFR_TMP_FREE (marker);
    648         MPFR_RET (0);
    649       }
    650 
    651     /* The absolute value of the truncated sum is in the binade
    652        [2^(e-1),2^e] (closed on both ends due to two's complement).
    653        The error is strictly less than 2^(maxexp + logn) (and is 0
    654        if maxexp == MPFR_EXP_MIN). */
    655 
    656     u = e - sq;  /* e being the exponent, u is the ulp of the target */
    657 
    658     /* neg = 1 if negative, 0 if positive. */
    659     neg = wp[ws-1] >> (GMP_NUMB_BITS - 1);
    660     MPFR_ASSERTD (neg == 0 || neg == 1);
    661 
    662     sgn = neg ? -1 : 1;
    663     MPFR_ASSERTN (sgn == (neg ? MPFR_SIGN_NEG : MPFR_SIGN_POS));
    664 
    665     MPFR_LOG_MSG (("neg=%d sgn=%d cancel=%Pd"
    666                    " e=%" MPFR_EXP_FSPEC "d"
    667                    " u=%" MPFR_EXP_FSPEC "d"
    668                    " maxexp=%" MPFR_EXP_FSPEC "d%s\n",
    669                    neg, sgn, cancel, (mpfr_eexp_t) e, (mpfr_eexp_t) u,
    670                    (mpfr_eexp_t) maxexp,
    671                    maxexp == MPFR_EXP_MIN ? " (MPFR_EXP_MIN)" : ""));
    672 
    673     if (rnd == MPFR_RNDF)
    674       {
    675         /* Rounding the approximate value to nearest (ties don't matter) is
    676            sufficient. We need to get the rounding bit; the code is similar
    677            to a part from the generic code (here, corr = rbit). */
    678         if (MPFR_LIKELY (u > minexp))
    679           {
    680             mpfr_prec_t tq;
    681             mp_size_t wi;
    682             int td;
    683 
    684             tq = u - minexp;
    685             MPFR_ASSERTD (tq > 0); /* number of trailing bits */
    686             MPFR_LOG_MSG (("tq=%Pd\n", tq));
    687 
    688             wi = tq / GMP_NUMB_BITS;
    689             td = tq % GMP_NUMB_BITS;
    690             corr = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) :
    691               (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1));
    692           }
    693         else
    694           corr = 0;
    695         inex = 0;  /* not meaningful, but needs to have a value */
    696       }
    697     else  /* rnd != MPFR_RNDF */
    698       {
    699         if (MPFR_LIKELY (u > minexp))
    700           {
    701             mpfr_prec_t tq;
    702             mp_size_t wi;
    703             int td;
    704 
    705             tq = u - minexp;
    706             MPFR_ASSERTD (tq > 0); /* number of trailing bits */
    707             MPFR_LOG_MSG (("tq=%Pd\n", tq));
    708 
    709             wi = tq / GMP_NUMB_BITS;
    710 
    711             /* Determine the rounding bit, which is represented. */
    712             td = tq % GMP_NUMB_BITS;
    713             lbit = (wp[wi] >> td) & MPFR_LIMB_ONE;
    714             rbit = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) :
    715               (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1));
    716             MPFR_ASSERTD (rbit == 0 || rbit == 1);
    717             MPFR_LOG_MSG (("rbit=%d\n", (int) rbit));
    718 
    719             if (maxexp == MPFR_EXP_MIN)
    720               {
    721                 /* The sum in the accumulator is exact. Determine inex:
    722                    inex = 0 if the final sum is exact, else 1, i.e.
    723                    inex = rounding bit || sticky bit. In round to nearest,
    724                    also determine the rounding direction: obtained from
    725                    the rounding bit possibly except in halfway cases.
    726                    Halfway cases are rounded toward -inf iff the last bit
    727                    of the truncated significand in two's complement is 0
    728                    (in precision > 1, because the parity after rounding is
    729                    the same in two's complement and sign + magnitude; in
    730                    precision 1, one checks that the rule works for both
    731                    positive (lbit == 1) and negative (lbit == 0) numbers,
    732                    rounding halfway cases away from zero). */
    733                 if (MPFR_LIKELY (rbit == 0 || (rnd == MPFR_RNDN && lbit == 0)))
    734                   {
    735                     /* We need to determine the sticky bit, either to set inex
    736                        (if the rounding bit is 0) or to possibly "correct" rbit
    737                        (round to nearest, halfway case rounded downward) from
    738                        which the rounding direction will be determined. */
    739                     MPFR_LOG_MSG (("Determine the sticky bit...\n", 0));
    740 
    741                     inex = td >= 2 ? (wp[wi] & MPFR_LIMB_MASK (td - 1)) != 0
    742                       : td == 0 ?
    743                       (MPFR_ASSERTD (wi >= 1),
    744                        (wp[--wi] & MPFR_LIMB_MASK (GMP_NUMB_BITS - 1)) != 0)
    745                       : 0;
    746 
    747                     if (!inex)
    748                       {
    749                         while (!inex && wi > 0)
    750                           inex = wp[--wi] != 0;
    751                         if (!inex && rbit != 0)
    752                           {
    753                             /* sticky bit = 0, rounding bit = 1,
    754                                i.e. halfway case, which will be
    755                                rounded downward (see earlier if). */
    756                             MPFR_ASSERTD (rnd == MPFR_RNDN);
    757                             inex = 1;
    758                             rbit = 0;  /* even rounding downward */
    759                             MPFR_LOG_MSG (("Halfway case rounded downward;"
    760                                            " set inex=1 rbit=0\n", 0));
    761                           }
    762                       }
    763                   }
    764                 else
    765                   inex = 1;
    766                 tmd = 0;  /* We can round correctly -> no TMD. */
    767               }
    768             else  /* maxexp > MPFR_EXP_MIN */
    769               {
    770                 mpfr_exp_t d;
    771                 mp_limb_t limb, mask;
    772                 int nbits;
    773 
    774                 /* Since maxexp was set to either the exponent of a x[i] or
    775                    to minexp... */
    776                 MPFR_ASSERTD (maxexp >= MPFR_EMIN_MIN || maxexp == minexp);
    777 
    778                 inex = 1;  /* We do not know whether the sum is exact. */
    779 
    780                 MPFR_ASSERTD (u <= MPFR_EMAX_MAX && u <= minexp + wq);
    781                 d = u - (maxexp + logn);  /* representable */
    782                 MPFR_ASSERTD (d >= 3);  /* due to prec = sq + 3 in sum_raw */
    783 
    784                 /* Let's see whether the TMD occurs by looking at the d bits
    785                    following the ulp bit, or the d-1 bits after the rounding
    786                    bit. */
    787 
    788                 /* First chunk after the rounding bit... It starts at:
    789                    (wi,td-2) if td >= 2,
    790                    (wi-1,td-2+GMP_NUMB_BITS) if td < 2. */
    791                 if (td == 0)
    792                   {
    793                     MPFR_ASSERTD (wi >= 1);
    794                     limb = wp[--wi];
    795                     mask = MPFR_LIMB_MASK (GMP_NUMB_BITS - 1);
    796                     nbits = GMP_NUMB_BITS;
    797                   }
    798                 else if (td == 1)
    799                   {
    800                     limb = wi >= 1 ? wp[--wi] : MPFR_LIMB_ZERO;
    801                     mask = MPFR_LIMB_MAX;
    802                     nbits = GMP_NUMB_BITS + 1;
    803                   }
    804                 else  /* td >= 2 */
    805                   {
    806                     MPFR_ASSERTD (td >= 2);
    807                     limb = wp[wi];
    808                     mask = MPFR_LIMB_MASK (td - 1);
    809                     nbits = td;
    810                   }
    811 
    812                 /* nbits: number of bits of the first chunk + 1
    813                    (the +1 is for the rounding bit). */
    814 
    815                 if (nbits > d)
    816                   {
    817                     /* Some low significant bits must be ignored. */
    818                     limb >>= nbits - d;
    819                     mask >>= nbits - d;
    820                     d = 0;
    821                   }
    822                 else
    823                   {
    824                     d -= nbits;
    825                     MPFR_ASSERTD (d >= 0);
    826                   }
    827 
    828                 limb &= mask;
    829                 tmd =
    830                   limb == MPFR_LIMB_ZERO ?
    831                     (rbit == 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) :
    832                   limb == mask ?
    833                     (limb = MPFR_LIMB_MAX,
    834                      rbit != 0 ? 1 : rnd == MPFR_RNDN ? 2 : 0) : 0;
    835 
    836                 while (tmd != 0 && d != 0)
    837                   {
    838                     mp_limb_t limb2;
    839 
    840                     MPFR_ASSERTD (d > 0);
    841                     if (wi == 0)
    842                       {
    843                         /* The non-represented bits are 0's. */
    844                         if (limb != MPFR_LIMB_ZERO)
    845                           tmd = 0;
    846                         break;
    847                       }
    848                     MPFR_ASSERTD (wi > 0);
    849                     limb2 = wp[--wi];
    850                     if (d < GMP_NUMB_BITS)
    851                       {
    852                         int c = GMP_NUMB_BITS - d;
    853                         MPFR_ASSERTD (c > 0 && c < GMP_NUMB_BITS);
    854                         if ((limb2 >> c) != (limb >> c))
    855                           tmd = 0;
    856                         break;
    857                       }
    858                     if (limb2 != limb)
    859                       tmd = 0;
    860                     d -= GMP_NUMB_BITS;
    861                   }
    862               }
    863           }
    864         else  /* u <= minexp */
    865           {
    866             /* The exact value of the accumulator will be copied.
    867              * The TMD occurs if and only if there are bits still
    868              * not taken into account, and if it occurs, this is
    869              * necessarily on a machine number (-> tmd = 1).
    870              */
    871             lbit = u == minexp ? wp[0] & MPFR_LIMB_ONE : 0;
    872             rbit = 0;
    873             inex = tmd = maxexp != MPFR_EXP_MIN;
    874           }
    875 
    876         MPFR_ASSERTD (rbit == 0 || rbit == 1);
    877 
    878         MPFR_LOG_MSG (("tmd=%d lbit=%d rbit=%d inex=%d neg=%d\n",
    879                        tmd, (int) lbit, (int) rbit, inex, neg));
    880 
    881         /* Here, if the final sum is known to be exact, inex = 0, otherwise
    882          * inex = 1. We have a truncated significand, a trailing term t such
    883          * that 0 <= t < 1 ulp, and an error on the trailing term bounded by
    884          * t' in absolute value. Thus the error e on the truncated significand
    885          * satisfies -t' <= e < 1 ulp + t'. Thus one has 4 correction cases
    886          * denoted by a corr value between -1 and 2 depending on e, neg, rbit,
    887          * and the rounding mode:
    888          *   -1: equivalent to nextbelow;
    889          *    0: the truncated significand is not corrected;
    890          *    1: add 1 ulp;
    891          *    2: add 1 ulp, then nextabove.
    892          * The nextbelow and nextabove are used here since there may be a
    893          * change of the binade.
    894          */
    895 
    896         if (tmd == 0)  /* no TMD */
    897           {
    898             switch (rnd)
    899               {
    900               case MPFR_RNDD:
    901                 corr = 0;
    902                 break;
    903               case MPFR_RNDU:
    904                 corr = inex;
    905                 break;
    906               case MPFR_RNDZ:
    907                 corr = inex && neg;
    908                 break;
    909               case MPFR_RNDA:
    910                 corr = inex && !neg;
    911                 break;
    912               default:
    913                 MPFR_ASSERTN (rnd == MPFR_RNDN);
    914                 /* Note: for halfway cases (maxexp == MPFR_EXP_MIN) that are
    915                    rounded downward, rbit has been changed to 0 so that corr
    916                    is set correctly. */
    917                 corr = rbit;
    918               }
    919             MPFR_ASSERTD (corr == 0 || corr == 1);
    920             if (inex &&
    921                 corr == 0)  /* two's complement significand decreased */
    922               inex = -1;
    923           }
    924         else  /* tmd */
    925           {
    926             mpfr_exp_t minexp2;
    927             mpfr_prec_t cancel2;
    928             mpfr_exp_t err;  /* exponent of the error bound */
    929             mp_size_t zz;    /* nb of limbs to zero in the TMD accumulator */
    930             mp_limb_t *zp;   /* pointer to the TMD accumulator */
    931             mpfr_prec_t zq;  /* size of the TMD accumulator, in bits */
    932             int sst;         /* sign of the secondary term */
    933 
    934             /* TMD case. Here we use a new variable minexp2, with the same
    935                meaning as minexp, as we want to keep the minexp value for
    936                the copy to the destination. */
    937 
    938             MPFR_ASSERTD (maxexp > MPFR_EXP_MIN);
    939             MPFR_ASSERTD (tmd == 1 || tmd == 2);
    940 
    941             /* TMD accumulator */
    942             zp = wp + ws;
    943             zq = (mpfr_prec_t) zs * GMP_NUMB_BITS;
    944 
    945             err = maxexp + logn;
    946 
    947             MPFR_LOG_MSG (("TMD with"
    948                            " maxexp=%" MPFR_EXP_FSPEC "d"
    949                            " err=%" MPFR_EXP_FSPEC "d"
    950                            " zs=%Pd"
    951                            " zq=%Pd\n",
    952                            (mpfr_eexp_t) maxexp, (mpfr_eexp_t) err,
    953                            (mpfr_prec_t) zs, zq));
    954 
    955             /* The d-1 bits from u-2 to u-d (= err) are identical. */
    956 
    957             if (err >= minexp)
    958               {
    959                 mpfr_prec_t tq;
    960                 mp_size_t wi;
    961                 int td;
    962 
    963                 /* Let's keep the last 2 over the d-1 identical bits and the
    964                    following bits, i.e. the bits from err+1 to minexp. */
    965                 tq = err - minexp + 2;  /* tq = number of such bits */
    966                 MPFR_LOG_MSG (("[TMD] tq=%Pd\n", tq));
    967                 MPFR_ASSERTD (tq >= 2);
    968 
    969                 wi = tq / GMP_NUMB_BITS;
    970                 td = tq % GMP_NUMB_BITS;
    971 
    972                 /* Note: The "else" (td == 0) branch below can be executed
    973                    only if tq >= GMP_NUMB_BITS, which is possible only when
    974                    logn is large enough. Indeed, if tq > logn + some constant,
    975                    this means that the TMD did not occur.
    976                    TODO: Find an upper bound on tq, and add a corresponding
    977                    MPFR_ASSERTD assertion / hint. On some platforms, this
    978                    branch might be dead code, and such information would
    979                    allow the compiler to remove it.
    980                    It seems that this branch is never tested (r12754). */
    981 
    982                 if (td != 0)
    983                   {
    984                     wi++;  /* number of words with represented bits */
    985                     td = GMP_NUMB_BITS - td;
    986                     zz = zs - wi;
    987                     MPFR_ASSERTD (zz >= 0 && zz < zs);
    988                     mpn_lshift (zp + zz, wp, wi, td);
    989                   }
    990                 else
    991                   {
    992                     MPFR_ASSERTD (wi > 0);
    993                     zz = zs - wi;
    994                     MPFR_ASSERTD (zz >= 0 && zz < zs);
    995                     if (zz > 0)
    996                       MPN_COPY (zp + zz, wp, wi);
    997                   }
    998 
    999                 /* Compute minexp2 = minexp - (zs * GMP_NUMB_BITS + td)
   1000                    safely. */
   1001                 SAFE_SUB (minexp2, minexp, zz * GMP_NUMB_BITS + td);
   1002                 MPFR_ASSERTD (minexp2 == err + 2 - zq);
   1003               }
   1004             else  /* err < minexp */
   1005               {
   1006                 /* At least one of the identical bits is not represented,
   1007                    meaning that it is 0 and all these bits are 0's. Thus
   1008                    the accumulator will be 0. The new minexp is determined
   1009                    from maxexp, with cq bits reserved to avoid an overflow
   1010                    (as in the early steps). */
   1011                 MPFR_LOG_MSG (("[TMD] err < minexp\n", 0));
   1012                 zz = zs;
   1013 
   1014                 /* Compute minexp2 = maxexp - (zq - cq) safely. */
   1015                 SAFE_SUB (minexp2, maxexp, zq - cq);
   1016                 MPFR_ASSERTD (minexp2 == err + 1 - zq);
   1017               }
   1018 
   1019             MPN_ZERO (zp, zz);
   1020 
   1021             /* We need to determine the sign sst of the secondary term.
   1022                In sum_raw, since the truncated sum corresponding to this
   1023                secondary term will be in [2^(e-1),2^e] and the error
   1024                strictly less than 2^err, we can stop the iterations when
   1025                e - err >= 1 (this bound is the 11th argument of sum_raw). */
   1026             cancel2 = sum_raw (zp, zs, zq, x, n, minexp2, maxexp, tp, ts,
   1027                                logn, 1, NULL, NULL, NULL);
   1028 
   1029             if (cancel2 != 0)
   1030               sst = MPFR_LIMB_MSB (zp[zs-1]) == 0 ? 1 : -1;
   1031             else if (tmd == 1)
   1032               sst = 0;
   1033             else
   1034               {
   1035                 /* For halfway cases, let's virtually eliminate them
   1036                    by setting a sst equivalent to a non-halfway case,
   1037                    which depends on the last bit of the pre-rounded
   1038                    result. */
   1039                 MPFR_ASSERTD (rnd == MPFR_RNDN && tmd == 2);
   1040                 sst = lbit != 0 ? 1 : -1;
   1041               }
   1042 
   1043             MPFR_LOG_MSG (("[TMD] tmd=%d rbit=%d sst=%d\n",
   1044                            tmd, (int) rbit, sst));
   1045 
   1046             /* Do not consider the corrected sst for MPFR_COV_SET */
   1047             MPFR_COV_SET (sum_tmd[(int) rnd][tmd-1][rbit]
   1048                           [cancel2 == 0 ? 1 : sst+1][neg][sq > MPFR_PREC_MIN]);
   1049 
   1050             inex =
   1051               MPFR_IS_LIKE_RNDD (rnd, sgn) ? (sst ? -1 : 0) :
   1052               MPFR_IS_LIKE_RNDU (rnd, sgn) ? (sst ?  1 : 0) :
   1053               (MPFR_ASSERTD (rnd == MPFR_RNDN),
   1054                tmd == 1 ? - sst : sst);
   1055 
   1056             if (tmd == 2 && sst == (rbit != 0 ? -1 : 1))
   1057               corr = 1 - (int) rbit;
   1058             else if (MPFR_IS_LIKE_RNDD (rnd, sgn) && sst == -1)
   1059               corr = (int) rbit - 1;
   1060             else if (MPFR_IS_LIKE_RNDU (rnd, sgn) && sst == +1)
   1061               corr = (int) rbit + 1;
   1062             else
   1063               corr = (int) rbit;
   1064           }  /* tmd */
   1065       }  /* rnd != MPFR_RNDF */
   1066 
   1067     MPFR_LOG_MSG (("neg=%d corr=%d inex=%d\n", neg, corr, inex));
   1068 
   1069     /* Sign handling (-> absolute value and sign), together with
   1070        rounding. The most common cases are corr = 0 and corr = 1
   1071        as this is necessarily the case when the TMD did not occur. */
   1072 
   1073     MPFR_ASSERTD (corr >= -1 && corr <= 2);
   1074 
   1075     MPFR_SIGN (sum) = sgn;
   1076 
   1077     /* Let's copy/shift the bits [max(u,minexp),e) to the
   1078        most significant part of the destination, and zero
   1079        the least significant part (there can be one only if
   1080        u < minexp). The trailing bits of the destination may
   1081        contain garbage at this point. */
   1082 
   1083     sn = MPFR_PREC2LIMBS (sq);
   1084     sd = (mpfr_prec_t) sn * GMP_NUMB_BITS - sq;
   1085     sh = cancel % GMP_NUMB_BITS;
   1086 
   1087     MPFR_ASSERTD (sd >= 0 && sd < GMP_NUMB_BITS);
   1088 
   1089     if (MPFR_LIKELY (u > minexp))
   1090       {
   1091         mp_size_t wi;
   1092 
   1093         /* Recompute the initial value of wi. */
   1094         wi = (u - minexp) / GMP_NUMB_BITS;
   1095         if (MPFR_LIKELY (sh != 0))
   1096           {
   1097             mp_size_t fi;
   1098 
   1099             fi = (e - minexp) / GMP_NUMB_BITS - (sn - 1);
   1100             MPFR_ASSERTD (fi == wi || fi == wi + 1);
   1101             mpn_lshift (sump, wp + fi, sn, sh);
   1102             if (fi != wi)
   1103               sump[0] |= wp[wi] >> (GMP_NUMB_BITS - sh);
   1104           }
   1105         else
   1106           {
   1107             MPFR_ASSERTD ((mpfr_prec_t) (ws - (wi + sn)) * GMP_NUMB_BITS
   1108                           == cancel);
   1109             MPN_COPY (sump, wp + wi, sn);
   1110           }
   1111       }
   1112     else  /* u <= minexp */
   1113       {
   1114         mp_size_t en;
   1115 
   1116         en = (e - minexp + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS;
   1117         if (MPFR_LIKELY (sh != 0))
   1118           mpn_lshift (sump + sn - en, wp, en, sh);
   1119         else if (MPFR_UNLIKELY (en > 0))
   1120           MPN_COPY (sump + sn - en, wp, en);
   1121         if (sn > en)
   1122           MPN_ZERO (sump, sn - en);
   1123       }
   1124 
   1125     /* Let's take the complement if the result is negative, and at
   1126        the same time, do the rounding and zero the trailing bits.
   1127        As this is valid only for precisions >= 2, there is special
   1128        code for precision 1 first. */
   1129 
   1130     if (MPFR_UNLIKELY (sq == 1))  /* precision 1 */
   1131       {
   1132         sump[0] = MPFR_LIMB_HIGHBIT;
   1133         e += neg ? 1 - corr : corr;
   1134       }
   1135     else if (neg)  /* negative result with sq > 1 */
   1136       {
   1137         MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) == 0);
   1138 
   1139         /* abs(x + corr) = - (x + corr) = com(x) + (1 - corr) */
   1140 
   1141         /* We want to avoid separate mpn_com (or mpn_neg) and mpn_add_1
   1142            (or mpn_sub_1) operations, as they could yield two loops in
   1143            some particular cases involving a long sequence of 0's in
   1144            the low significant bits (except the least significant bit,
   1145            which doesn't matter). */
   1146 
   1147         if (corr <= 1)
   1148           {
   1149             mp_limb_t corr2;
   1150 
   1151             /* Here we can just do the correction operation on the
   1152                least significant limb, then do either a mpn_com or
   1153                a mpn_neg on the remaining limbs, depending on the
   1154                carry (BTW, mpn_neg is just a mpn_com with an initial
   1155                carry propagation: after some point, mpn_neg does a
   1156                complement). */
   1157 
   1158             corr2 = (mp_limb_t) (1 - corr) << sd;
   1159             /* Note: If corr = -1, this can overflow to corr2 = 0.
   1160                This case is taken into account below. */
   1161 
   1162             sump[0] = (~ (sump[0] | MPFR_LIMB_MASK (sd))) + corr2;
   1163 
   1164             if (sump[0] < corr2 || (corr2 == 0 && corr < 0))
   1165               {
   1166                 if (sn == 1 || ! mpn_neg (sump + 1, sump + 1, sn - 1))
   1167                   {
   1168                     /* Note: The | is important when sump[sn-1] is not 0
   1169                        (this can occur with sn = 1 and corr = -1). TODO:
   1170                        Add something to make sure that this is tested. */
   1171                     sump[sn-1] |= MPFR_LIMB_HIGHBIT;
   1172                     e++;
   1173                   }
   1174               }
   1175             else if (sn > 1)
   1176               mpn_com (sump + 1, sump + 1, sn - 1);
   1177           }
   1178         else  /* corr == 2 */
   1179           {
   1180             mp_limb_t corr2, c;
   1181             mp_size_t i = 1;
   1182 
   1183             /* We want to compute com(x) - 1, but GMP doesn't have an
   1184                operation for that. The fact is that a sequence of low
   1185                significant bits 1 is invariant. Starting at the first
   1186                low significant bit 0, we can do the complement with
   1187                mpn_com. */
   1188 
   1189             corr2 = MPFR_LIMB_ONE << sd;
   1190             c = ~ (sump[0] | MPFR_LIMB_MASK (sd));
   1191             sump[0] = c - corr2;
   1192 
   1193             if (c == 0)
   1194               {
   1195                 while (MPFR_ASSERTD (i < sn), sump[i] == MPFR_LIMB_MAX)
   1196                   i++;
   1197                 sump[i] = (~ sump[i]) - 1;
   1198                 i++;
   1199               }
   1200 
   1201             if (i < sn)
   1202               mpn_com (sump + i, sump + i, sn - i);
   1203             else if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0))
   1204               {
   1205                 /* Happens on 01111...111, whose complement is
   1206                    10000...000, and com(x) - 1 is 01111...111. */
   1207                 sump[sn-1] |= MPFR_LIMB_HIGHBIT;
   1208                 e--;
   1209               }
   1210           }
   1211       }
   1212     else  /* positive result with sq > 1 */
   1213       {
   1214         MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0);
   1215         sump[0] &= ~ MPFR_LIMB_MASK (sd);
   1216 
   1217         if (corr > 0)
   1218           {
   1219             mp_limb_t corr2, carry_out;
   1220 
   1221             corr2 = (mp_limb_t) corr << sd;
   1222             /* If corr == 2 && sd == GMP_NUMB_BITS - 1, this overflows
   1223                to corr2 = 0. This case is taken into account below. */
   1224 
   1225             carry_out = corr2 != 0 ? mpn_add_1 (sump, sump, sn, corr2) :
   1226               (MPFR_ASSERTD (sn > 1),
   1227                mpn_add_1 (sump + 1, sump + 1, sn - 1, MPFR_LIMB_ONE));
   1228 
   1229             MPFR_ASSERTD (sump[sn-1] >> (GMP_NUMB_BITS - 1) == !carry_out);
   1230 
   1231             if (MPFR_UNLIKELY (carry_out))
   1232               {
   1233                 /* Note: The | is important when sump[sn-1] is not 0
   1234                    (this can occur with sn = 1 and corr = 2). TODO:
   1235                    Add something to make sure that this is tested. */
   1236                 sump[sn-1] |= MPFR_LIMB_HIGHBIT;
   1237                 e++;
   1238               }
   1239           }
   1240 
   1241         if (corr < 0)
   1242           {
   1243             mpn_sub_1 (sump, sump, sn, MPFR_LIMB_ONE << sd);
   1244 
   1245             if (MPFR_UNLIKELY (MPFR_LIMB_MSB (sump[sn-1]) == 0))
   1246               {
   1247                 sump[sn-1] |= MPFR_LIMB_HIGHBIT;
   1248                 e--;
   1249               }
   1250           }
   1251       }
   1252 
   1253     MPFR_ASSERTD (MPFR_LIMB_MSB (sump[sn-1]) != 0);
   1254     MPFR_LOG_MSG (("Set exponent e=%" MPFR_EXP_FSPEC "d\n", (mpfr_eexp_t) e));
   1255     /* e may be outside the current exponent range, but this will be checked
   1256        with mpfr_check_range below. */
   1257     MPFR_EXP (sum) = e;
   1258   }  /* main block */
   1259 
   1260   MPFR_TMP_FREE (marker);
   1261   return mpfr_check_range (sum, inex, rnd);
   1262 }
   1263 
   1264 /**********************************************************************/
   1265 
   1266 int
   1267 mpfr_sum (mpfr_ptr sum, const mpfr_ptr *x, unsigned long n, mpfr_rnd_t rnd)
   1268 {
   1269   MPFR_LOG_FUNC
   1270     (("n=%lu rnd=%d", n, rnd),
   1271      ("sum[%Pd]=%.*Rg", mpfr_get_prec (sum), mpfr_log_prec, sum));
   1272 
   1273   if (MPFR_UNLIKELY (n <= 2))
   1274     {
   1275       if (n == 0)
   1276         {
   1277           MPFR_SET_ZERO (sum);
   1278           MPFR_SET_POS (sum);
   1279           MPFR_RET (0);
   1280         }
   1281       else if (n == 1)
   1282         return mpfr_set (sum, x[0], rnd);
   1283       else
   1284         return mpfr_add (sum, x[0], x[1], rnd);
   1285     }
   1286   else
   1287     {
   1288       mpfr_exp_t maxexp = MPFR_EXP_MIN;  /* max(Empty) */
   1289       unsigned long i;
   1290       unsigned long rn = 0;  /* will be the number of regular inputs */
   1291       /* sign of infinities and zeros (0: currently unknown) */
   1292       int sign_inf = 0, sign_zero = 0;
   1293 
   1294       MPFR_LOG_MSG (("Check for special inputs (n = %lu >= 3)\n", n));
   1295 
   1296       for (i = 0; i < n; i++)
   1297         {
   1298           if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x[i])))
   1299             {
   1300               if (MPFR_IS_NAN (x[i]))
   1301                 {
   1302                   /* The current value x[i] is NaN. Then the sum is NaN. */
   1303                 nan:
   1304                   MPFR_SET_NAN (sum);
   1305                   MPFR_RET_NAN;
   1306                 }
   1307               else if (MPFR_IS_INF (x[i]))
   1308                 {
   1309                   /* The current value x[i] is an infinity.
   1310                      There are two cases:
   1311                      1. This is the first infinity value (sign_inf == 0).
   1312                         Then set sign_inf to its sign, and go on.
   1313                      2. All the infinities found until now have the same
   1314                         sign sign_inf. If this new infinity has a different
   1315                         sign, then return NaN immediately, else go on. */
   1316                   if (sign_inf == 0)
   1317                     sign_inf = MPFR_SIGN (x[i]);
   1318                   else if (MPFR_SIGN (x[i]) != sign_inf)
   1319                     goto nan;
   1320                 }
   1321               else if (MPFR_UNLIKELY (rn == 0))
   1322                 {
   1323                   /* The current value x[i] is a zero. The code below matters
   1324                      only when all values found until now are zeros, otherwise
   1325                      it is harmless (the test rn == 0 above is just a minor
   1326                      optimization).
   1327                      Here we track the sign of the zero result when all inputs
   1328                      are zeros: if all zeros have the same sign, the result
   1329                      will have this sign, otherwise (i.e. if there is at least
   1330                      a zero of each sign), the sign of the zero result depends
   1331                      only on the rounding mode (note that this choice is
   1332                      sticky when new zeros are considered). */
   1333                   MPFR_ASSERTD (MPFR_IS_ZERO (x[i]));
   1334                   if (sign_zero == 0)
   1335                     sign_zero = MPFR_SIGN (x[i]);
   1336                   else if (MPFR_SIGN (x[i]) != sign_zero)
   1337                     sign_zero = rnd == MPFR_RNDD ? -1 : 1;
   1338                 }
   1339             }
   1340           else
   1341             {
   1342               /* The current value x[i] is a regular number. */
   1343               mpfr_exp_t e = MPFR_GET_EXP (x[i]);
   1344               if (e > maxexp)
   1345                 maxexp = e;  /* maximum exponent found until now */
   1346               rn++;  /* current number of regular inputs */
   1347             }
   1348         }
   1349 
   1350       MPFR_LOG_MSG (("rn=%lu sign_inf=%d sign_zero=%d\n",
   1351                      rn, sign_inf, sign_zero));
   1352 
   1353       /* At this point the result cannot be NaN (this case has already
   1354          been filtered out). */
   1355 
   1356       if (MPFR_UNLIKELY (sign_inf != 0))
   1357         {
   1358           /* At least one infinity, and all of them have the same sign
   1359              sign_inf. The sum is the infinity of this sign. */
   1360           MPFR_SET_INF (sum);
   1361           MPFR_SET_SIGN (sum, sign_inf);
   1362           MPFR_RET (0);
   1363         }
   1364 
   1365       /* At this point, all the inputs are finite numbers. */
   1366 
   1367       if (MPFR_UNLIKELY (rn == 0))
   1368         {
   1369           /* All the numbers were zeros (and there is at least one).
   1370              The sum is zero with sign sign_zero. */
   1371           MPFR_ASSERTD (sign_zero != 0);
   1372           MPFR_SET_ZERO (sum);
   1373           MPFR_SET_SIGN (sum, sign_zero);
   1374           MPFR_RET (0);
   1375         }
   1376 
   1377       /* Optimize the case where there are only two regular numbers. */
   1378       if (MPFR_UNLIKELY (rn <= 2))
   1379         {
   1380           unsigned long h = ULONG_MAX;
   1381 
   1382           for (i = 0; i < n; i++)
   1383             if (! MPFR_IS_SINGULAR (x[i]))
   1384               {
   1385                 if (rn == 1)
   1386                   return mpfr_set (sum, x[i], rnd);
   1387                 if (h != ULONG_MAX)
   1388                   return mpfr_add (sum, x[h], x[i], rnd);
   1389                 h = i;
   1390               }
   1391           MPFR_RET_NEVER_GO_HERE();
   1392         }
   1393 
   1394       return sum_aux (sum, x, n, rnd, maxexp, rn);
   1395     }
   1396 }
   1397