Home | History | Annotate | Line # | Download | only in src
      1 /* mpfr_fma -- Floating multiply-add
      2 
      3 Copyright 2001-2002, 2004, 2006-2023 Free Software Foundation, Inc.
      4 Contributed by the AriC and Caramba projects, INRIA.
      5 
      6 This file is part of the GNU MPFR Library.
      7 
      8 The GNU MPFR Library is free software; you can redistribute it and/or modify
      9 it under the terms of the GNU Lesser General Public License as published by
     10 the Free Software Foundation; either version 3 of the License, or (at your
     11 option) any later version.
     12 
     13 The GNU MPFR Library is distributed in the hope that it will be useful, but
     14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
     16 License for more details.
     17 
     18 You should have received a copy of the GNU Lesser General Public License
     19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
     20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
     21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
     22 
     23 #define MPFR_NEED_LONGLONG_H
     24 #include "mpfr-impl.h"
     25 
     26 /* The fused-multiply-add (fma) of x, y and z is defined by:
     27    fma(x,y,z)= x*y + z
     28 */
     29 
     30 /* this function deals with all cases where inputs are singular, i.e.,
     31    either NaN, Inf or zero */
     32 static int
     33 mpfr_fma_singular (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
     34                    mpfr_rnd_t rnd_mode)
     35 {
     36   if (MPFR_IS_NAN(x) || MPFR_IS_NAN(y) || MPFR_IS_NAN(z))
     37     {
     38       MPFR_SET_NAN(s);
     39       MPFR_RET_NAN;
     40     }
     41   /* now neither x, y or z is NaN */
     42   else if (MPFR_IS_INF(x) || MPFR_IS_INF(y))
     43     {
     44       /* cases Inf*0+z, 0*Inf+z, Inf-Inf */
     45       if ((MPFR_IS_ZERO(y)) ||
     46           (MPFR_IS_ZERO(x)) ||
     47           (MPFR_IS_INF(z) &&
     48            ((MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y))) != MPFR_SIGN(z))))
     49         {
     50           MPFR_SET_NAN(s);
     51           MPFR_RET_NAN;
     52         }
     53       else if (MPFR_IS_INF(z)) /* case Inf-Inf already checked above */
     54         {
     55           MPFR_SET_INF(s);
     56           MPFR_SET_SAME_SIGN(s, z);
     57           MPFR_RET(0);
     58         }
     59       else /* z is finite */
     60         {
     61           MPFR_SET_INF(s);
     62           MPFR_SET_SIGN(s, MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y)));
     63           MPFR_RET(0);
     64         }
     65     }
     66   /* now x and y are finite */
     67   else if (MPFR_IS_INF(z))
     68     {
     69       MPFR_SET_INF(s);
     70       MPFR_SET_SAME_SIGN(s, z);
     71       MPFR_RET(0);
     72     }
     73   else if (MPFR_IS_ZERO(x) || MPFR_IS_ZERO(y))
     74     {
     75       if (MPFR_IS_ZERO(z))
     76         {
     77           int sign_p;
     78           sign_p = MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y));
     79           MPFR_SET_SIGN(s, (rnd_mode != MPFR_RNDD ?
     80                             (MPFR_IS_NEG_SIGN(sign_p) && MPFR_IS_NEG(z) ?
     81                              MPFR_SIGN_NEG : MPFR_SIGN_POS) :
     82                             (MPFR_IS_POS_SIGN(sign_p) && MPFR_IS_POS(z) ?
     83                              MPFR_SIGN_POS : MPFR_SIGN_NEG)));
     84           MPFR_SET_ZERO(s);
     85           MPFR_RET(0);
     86         }
     87       else
     88         return mpfr_set (s, z, rnd_mode);
     89     }
     90   else /* necessarily z is zero here */
     91     {
     92       MPFR_ASSERTD(MPFR_IS_ZERO(z));
     93       return (x == y) ? mpfr_sqr (s, x, rnd_mode)
     94         : mpfr_mul (s, x, y, rnd_mode);
     95     }
     96 }
     97 
     98 /* s <- x*y + z */
     99 int
    100 mpfr_fma (mpfr_ptr s, mpfr_srcptr x, mpfr_srcptr y, mpfr_srcptr z,
    101           mpfr_rnd_t rnd_mode)
    102 {
    103   int inexact;
    104   mpfr_t u;
    105   mp_size_t n;
    106   mpfr_exp_t e;
    107   mpfr_prec_t precx, precy;
    108   MPFR_SAVE_EXPO_DECL (expo);
    109   MPFR_GROUP_DECL(group);
    110 
    111   MPFR_LOG_FUNC
    112     (("x[%Pd]=%.*Rg y[%Pd]=%.*Rg  z[%Pd]=%.*Rg rnd=%d",
    113       mpfr_get_prec (x), mpfr_log_prec, x,
    114       mpfr_get_prec (y), mpfr_log_prec, y,
    115       mpfr_get_prec (z), mpfr_log_prec, z, rnd_mode),
    116      ("s[%Pd]=%.*Rg inexact=%d",
    117       mpfr_get_prec (s), mpfr_log_prec, s, inexact));
    118 
    119   /* particular cases */
    120   if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) || MPFR_IS_SINGULAR(y) ||
    121                      MPFR_IS_SINGULAR(z) ))
    122     return mpfr_fma_singular (s, x, y, z, rnd_mode);
    123 
    124   e = MPFR_GET_EXP (x) + MPFR_GET_EXP (y);
    125 
    126   precx = MPFR_PREC (x);
    127   precy = MPFR_PREC (y);
    128 
    129   /* First deal with special case where prec(x) = prec(y) and x*y does
    130      not overflow nor underflow. Do it only for small sizes since for large
    131      sizes x*y is faster using Mulders' algorithm (as a rule of thumb,
    132      we assume mpn_mul_n is faster up to 4*MPFR_MUL_THRESHOLD).
    133      Since |EXP(x)|, |EXP(y)| < 2^(k-2) on a k-bit computer,
    134      |EXP(x)+EXP(y)| < 2^(k-1), thus cannot overflow nor underflow. */
    135   if (precx == precy && e <= __gmpfr_emax && e > __gmpfr_emin)
    136     {
    137       if (precx < GMP_NUMB_BITS &&
    138           MPFR_PREC(z) == precx &&
    139           MPFR_PREC(s) == precx)
    140         {
    141           mp_limb_t umant[2], zmant[2];
    142           mpfr_t zz;
    143           int inex;
    144 
    145           umul_ppmm (umant[1], umant[0], MPFR_MANT(x)[0], MPFR_MANT(y)[0]);
    146           MPFR_PREC(u) = MPFR_PREC(zz) = 2 * precx;
    147           MPFR_MANT(u) = umant;
    148           MPFR_MANT(zz) = zmant;
    149           MPFR_SIGN(u) = MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y));
    150           MPFR_SIGN(zz) = MPFR_SIGN(z);
    151           MPFR_EXP(zz) = MPFR_EXP(z);
    152           if (MPFR_PREC(zz) <= GMP_NUMB_BITS) /* zz fits in one limb */
    153             {
    154               if ((umant[1] & MPFR_LIMB_HIGHBIT) == 0)
    155                 {
    156                   umant[0] = umant[1] << 1;
    157                   MPFR_EXP(u) = e - 1;
    158                 }
    159               else
    160                 {
    161                   umant[0] = umant[1];
    162                   MPFR_EXP(u) = e;
    163                 }
    164               zmant[0] = MPFR_MANT(z)[0];
    165             }
    166           else
    167             {
    168               zmant[1] = MPFR_MANT(z)[0];
    169               zmant[0] = MPFR_LIMB_ZERO;
    170               if ((umant[1] & MPFR_LIMB_HIGHBIT) == 0)
    171                 {
    172                   umant[1] = (umant[1] << 1) |
    173                     (umant[0] >> (GMP_NUMB_BITS - 1));
    174                   umant[0] = umant[0] << 1;
    175                   MPFR_EXP(u) = e - 1;
    176                 }
    177               else
    178                 MPFR_EXP(u) = e;
    179             }
    180           inex = mpfr_add (u, u, zz, rnd_mode);
    181           /* mpfr_set_1_2 requires PREC(u) = 2*PREC(s),
    182              thus we need PREC(s) = PREC(x) = PREC(y) = PREC(z) */
    183           return mpfr_set_1_2 (s, u, rnd_mode, inex);
    184         }
    185       else if ((n = MPFR_LIMB_SIZE(x)) <= 4 * MPFR_MUL_THRESHOLD)
    186         {
    187           mpfr_limb_ptr up;
    188           mp_size_t un = n + n;
    189           MPFR_TMP_DECL(marker);
    190 
    191           MPFR_TMP_MARK(marker);
    192           MPFR_TMP_INIT (up, u, un * GMP_NUMB_BITS, un);
    193           up = MPFR_MANT(u);
    194           /* multiply x*y exactly into u */
    195           if (x == y)
    196             mpn_sqr (up, MPFR_MANT(x), n);
    197           else
    198             mpn_mul_n (up, MPFR_MANT(x), MPFR_MANT(y), n);
    199           if (MPFR_LIMB_MSB (up[un - 1]) == 0)
    200             {
    201               mpn_lshift (up, up, un, 1);
    202               MPFR_EXP(u) = e - 1;
    203             }
    204           else
    205             MPFR_EXP(u) = e;
    206           MPFR_SIGN(u) = MPFR_MULT_SIGN(MPFR_SIGN(x), MPFR_SIGN(y));
    207           /* The above code does not generate any exception.
    208              The exceptions will come only from mpfr_add. */
    209           inexact = mpfr_add (s, u, z, rnd_mode);
    210           MPFR_TMP_FREE(marker);
    211           return inexact;
    212         }
    213     }
    214 
    215   /* If we take prec(u) >= prec(x) + prec(y), the product u <- x*y
    216      is exact, except in case of overflow or underflow. */
    217   MPFR_ASSERTN (precx + precy <= MPFR_PREC_MAX);
    218   MPFR_GROUP_INIT_1 (group, precx + precy, u);
    219   MPFR_SAVE_EXPO_MARK (expo);
    220 
    221   if (MPFR_UNLIKELY (mpfr_mul (u, x, y, MPFR_RNDN)))
    222     {
    223       /* overflow or underflow - this case is regarded as rare, thus
    224          does not need to be very efficient (even if some tests below
    225          could have been done earlier).
    226          It is an overflow iff u is an infinity (since MPFR_RNDN was used).
    227          Alternatively, we could test the overflow flag, but in this case,
    228          mpfr_clear_flags would have been necessary. */
    229 
    230       if (MPFR_IS_INF (u))  /* overflow */
    231         {
    232           int sign_u = MPFR_SIGN (u);
    233 
    234           MPFR_LOG_MSG (("Overflow on x*y\n", 0));
    235           MPFR_GROUP_CLEAR (group);  /* we no longer need u */
    236 
    237           /* Let's eliminate the obvious case where x*y and z have the
    238              same sign. No possible cancellation -> real overflow.
    239              Also, we know that |z| < 2^emax. If E(x) + E(y) >= emax+3,
    240              then |x*y| >= 2^(emax+1), and |x*y + z| > 2^emax. This case
    241              is also an overflow. */
    242           if (sign_u == MPFR_SIGN (z) || e >= __gmpfr_emax + 3)
    243             {
    244               MPFR_SAVE_EXPO_FREE (expo);
    245               return mpfr_overflow (s, rnd_mode, sign_u);
    246             }
    247         }
    248       else  /* underflow: one has |x*y| < 2^(emin-1). */
    249         {
    250           MPFR_LOG_MSG (("Underflow on x*y\n", 0));
    251 
    252           /* Easy cases: when 2^(emin-1) <= 1/2 * min(ulp(z),ulp(s)),
    253              one can replace x*y by sign(x*y) * 2^(emin-1). Note that
    254              this is even true in case of equality for MPFR_RNDN thanks
    255              to the even-rounding rule.
    256              The + 1 on MPFR_PREC (s) is necessary because the exponent
    257              of the result can be EXP(z) - 1. */
    258           if (MPFR_GET_EXP (z) - __gmpfr_emin >=
    259               MAX (MPFR_PREC (z), MPFR_PREC (s) + 1))
    260             {
    261               MPFR_PREC (u) = MPFR_PREC_MIN;
    262               mpfr_setmin (u, __gmpfr_emin);
    263               MPFR_SET_SIGN (u, MPFR_MULT_SIGN (MPFR_SIGN (x),
    264                                                 MPFR_SIGN (y)));
    265               mpfr_clear_flags ();
    266               goto add;
    267             }
    268 
    269           MPFR_GROUP_CLEAR (group);  /* we no longer need u */
    270         }
    271 
    272       /* Let's use UBF to resolve the overflow/underflow issues. */
    273       {
    274         mpfr_ubf_t uu;
    275         mp_size_t un;
    276         mpfr_limb_ptr up;
    277         MPFR_TMP_DECL(marker);
    278 
    279         MPFR_LOG_MSG (("Use UBF\n", 0));
    280 
    281         MPFR_TMP_MARK (marker);
    282         un = MPFR_LIMB_SIZE (x) + MPFR_LIMB_SIZE (y);
    283         MPFR_TMP_INIT (up, uu, (mpfr_prec_t) un * GMP_NUMB_BITS, un);
    284         mpfr_ubf_mul_exact (uu, x, y);
    285         mpfr_clear_flags ();
    286         inexact = mpfr_add (s, (mpfr_srcptr) uu, z, rnd_mode);
    287         MPFR_UBF_CLEAR_EXP (uu);
    288         MPFR_TMP_FREE (marker);
    289       }
    290     }
    291   else
    292     {
    293     add:
    294       inexact = mpfr_add (s, u, z, rnd_mode);
    295       MPFR_GROUP_CLEAR (group);
    296     }
    297 
    298   MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
    299   MPFR_SAVE_EXPO_FREE (expo);
    300   return mpfr_check_range (s, inexact, rnd_mode);
    301 }
    302