Home | History | Annotate | Line # | Download | only in libbid
      1 /* Copyright (C) 2007-2024 Free Software Foundation, Inc.
      2 
      3 This file is part of GCC.
      4 
      5 GCC is free software; you can redistribute it and/or modify it under
      6 the terms of the GNU General Public License as published by the Free
      7 Software Foundation; either version 3, or (at your option) any later
      8 version.
      9 
     10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
     12 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     13 for more details.
     14 
     15 Under Section 7 of GPL version 3, you are granted additional
     16 permissions described in the GCC Runtime Library Exception, version
     17 3.1, as published by the Free Software Foundation.
     18 
     19 You should have received a copy of the GNU General Public License and
     20 a copy of the GCC Runtime Library Exception along with this program;
     21 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22 <http://www.gnu.org/licenses/>.  */
     23 
     24 /*****************************************************************************
     25  *
     26  *  BID128 fma   x * y + z
     27  *
     28  ****************************************************************************/
     29 
     30 #include "bid_internal.h"
     31 
     32 static void
     33 rounding_correction (unsigned int rnd_mode,
     34         	     unsigned int is_inexact_lt_midpoint,
     35         	     unsigned int is_inexact_gt_midpoint,
     36         	     unsigned int is_midpoint_lt_even,
     37         	     unsigned int is_midpoint_gt_even,
     38         	     int unbexp,
     39         	     UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
     40   // unbiased true exponent unbexp may be larger than emax
     41 
     42   UINT128 res = *ptrres; // expected to have the correct sign and coefficient
     43   // (the exponent field is ignored, as unbexp is used instead)
     44   UINT64 sign, exp;
     45   UINT64 C_hi, C_lo;
     46 
     47   // general correction from RN to RA, RM, RP, RZ
     48   // Note: if the result is negative, then is_inexact_lt_midpoint,
     49   // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
     50   // have to be considered as if determined for the absolute value of the
     51   // result (so they seem to be reversed)
     52 
     53   if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
     54       is_midpoint_lt_even || is_midpoint_gt_even) {
     55     *ptrfpsf |= INEXACT_EXCEPTION;
     56   }
     57   // apply correction to result calculated with unbounded exponent
     58   sign = res.w[1] & MASK_SIGN;
     59   exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
     60   C_hi = res.w[1] & MASK_COEFF;
     61   C_lo = res.w[0];
     62   if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
     63       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
     64       is_midpoint_gt_even))) ||
     65       (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
     66       ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
     67       is_midpoint_gt_even)))) {
     68     // C = C + 1
     69     C_lo = C_lo + 1;
     70     if (C_lo == 0)
     71       C_hi = C_hi + 1;
     72     if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
     73       // C = 10^34 => rounding overflow
     74       C_hi = 0x0000314dc6448d93ull;
     75       C_lo = 0x38c15b0a00000000ull; // 10^33
     76       // exp = exp + EXP_P1;
     77       unbexp = unbexp + 1;
     78       exp = (UINT64) (unbexp + 6176) << 49;
     79     }
     80   } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
     81       ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
     82       (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
     83     // C = C - 1
     84     C_lo = C_lo - 1;
     85     if (C_lo == 0xffffffffffffffffull)
     86       C_hi--;
     87     // check if we crossed into the lower decade
     88     if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
     89       // C = 10^33 - 1
     90       if (exp > 0) {
     91         C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
     92         C_lo = 0x378d8e63ffffffffull;
     93         // exp = exp - EXP_P1;
     94         unbexp = unbexp - 1;
     95         exp = (UINT64) (unbexp + 6176) << 49;
     96       } else { // if exp = 0 the result is tiny & inexact
     97         *ptrfpsf |= UNDERFLOW_EXCEPTION;
     98       }
     99     }
    100   } else {
    101     ; // the result is already correct
    102   }
    103   if (unbexp > expmax) { // 6111
    104     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
    105     exp = 0;
    106     if (!sign) { // result is positive
    107       if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
    108         C_hi = 0x7800000000000000ull;
    109         C_lo = 0x0000000000000000ull;
    110       } else { // res = +MAXFP = (10^34-1) * 10^emax
    111         C_hi = 0x5fffed09bead87c0ull;
    112         C_lo = 0x378d8e63ffffffffull;
    113       }
    114     } else { // result is negative
    115       if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
    116         C_hi = 0xf800000000000000ull;
    117         C_lo = 0x0000000000000000ull;
    118       } else { // res = -MAXFP = -(10^34-1) * 10^emax
    119         C_hi = 0xdfffed09bead87c0ull;
    120         C_lo = 0x378d8e63ffffffffull;
    121       }
    122     }
    123   }
    124   // assemble the result
    125   res.w[1] = sign | exp | C_hi;
    126   res.w[0] = C_lo;
    127   *ptrres = res;
    128 }
    129 
    130 static void
    131 add256 (UINT256 x, UINT256 y, UINT256 * pz) {
    132   // *z = x + yl assume the sum fits in 256 bits
    133   UINT256 z;
    134   z.w[0] = x.w[0] + y.w[0];
    135   if (z.w[0] < x.w[0]) {
    136     x.w[1]++;
    137     if (x.w[1] == 0x0000000000000000ull) {
    138       x.w[2]++;
    139       if (x.w[2] == 0x0000000000000000ull) {
    140         x.w[3]++;
    141       }
    142     }
    143   }
    144   z.w[1] = x.w[1] + y.w[1];
    145   if (z.w[1] < x.w[1]) {
    146     x.w[2]++;
    147     if (x.w[2] == 0x0000000000000000ull) {
    148       x.w[3]++;
    149     }
    150   }
    151   z.w[2] = x.w[2] + y.w[2];
    152   if (z.w[2] < x.w[2]) {
    153     x.w[3]++;
    154   }
    155   z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
    156   *pz = z;
    157 }
    158 
    159 static void
    160 sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
    161   // *z = x - y; assume x >= y
    162   UINT256 z;
    163   z.w[0] = x.w[0] - y.w[0];
    164   if (z.w[0] > x.w[0]) {
    165     x.w[1]--;
    166     if (x.w[1] == 0xffffffffffffffffull) {
    167       x.w[2]--;
    168       if (x.w[2] == 0xffffffffffffffffull) {
    169         x.w[3]--;
    170       }
    171     }
    172   }
    173   z.w[1] = x.w[1] - y.w[1];
    174   if (z.w[1] > x.w[1]) {
    175     x.w[2]--;
    176     if (x.w[2] == 0xffffffffffffffffull) {
    177       x.w[3]--;
    178     }
    179   }
    180   z.w[2] = x.w[2] - y.w[2];
    181   if (z.w[2] > x.w[2]) {
    182     x.w[3]--;
    183   }
    184   z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
    185   *pz = z;
    186 }
    187 
    188 
    189 static int
    190 nr_digits256 (UINT256 R256) {
    191   int ind;
    192   // determine the number of decimal digits in R256
    193   if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
    194     // between 1 and 19 digits
    195     for (ind = 1; ind <= 19; ind++) {
    196       if (R256.w[0] < ten2k64[ind]) {
    197         break;
    198       }
    199     }
    200     // ind digits
    201   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
    202              (R256.w[1] < ten2k128[0].w[1] ||
    203               (R256.w[1] == ten2k128[0].w[1]
    204                && R256.w[0] < ten2k128[0].w[0]))) {
    205     // 20 digits
    206     ind = 20;
    207   } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
    208     // between 21 and 38 digits
    209     for (ind = 1; ind <= 18; ind++) {
    210       if (R256.w[1] < ten2k128[ind].w[1] ||
    211           (R256.w[1] == ten2k128[ind].w[1] &&
    212            R256.w[0] < ten2k128[ind].w[0])) {
    213         break;
    214       }
    215     }
    216     // ind + 20 digits
    217     ind = ind + 20;
    218   } else if (R256.w[3] == 0x0 &&
    219              (R256.w[2] < ten2k256[0].w[2] ||
    220               (R256.w[2] == ten2k256[0].w[2] &&
    221                R256.w[1] < ten2k256[0].w[1]) ||
    222               (R256.w[2] == ten2k256[0].w[2] &&
    223                R256.w[1] == ten2k256[0].w[1] &&
    224                R256.w[0] < ten2k256[0].w[0]))) {
    225     // 39 digits
    226     ind = 39;
    227   } else {
    228     // between 40 and 68 digits
    229     for (ind = 1; ind <= 29; ind++) {
    230       if (R256.w[3] < ten2k256[ind].w[3] ||
    231           (R256.w[3] == ten2k256[ind].w[3] &&
    232            R256.w[2] < ten2k256[ind].w[2]) ||
    233           (R256.w[3] == ten2k256[ind].w[3] &&
    234            R256.w[2] == ten2k256[ind].w[2] &&
    235            R256.w[1] < ten2k256[ind].w[1]) ||
    236           (R256.w[3] == ten2k256[ind].w[3] &&
    237            R256.w[2] == ten2k256[ind].w[2] &&
    238            R256.w[1] == ten2k256[ind].w[1] &&
    239            R256.w[0] < ten2k256[ind].w[0])) {
    240         break;
    241       }
    242     }
    243     // ind + 39 digits
    244     ind = ind + 39;
    245   }
    246   return (ind);
    247 }
    248 
    249 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
    250 // use the rounding information from ptr_is_* to avoid a double rounding error
    251 static void
    252 add_and_round (int q3,
    253                int q4,
    254                int e4,
    255                int delta,
    256                int p34,
    257                UINT64 z_sign,
    258                UINT64 p_sign,
    259                UINT128 C3,
    260                UINT256 C4,
    261                int rnd_mode,
    262                int *ptr_is_midpoint_lt_even,
    263                int *ptr_is_midpoint_gt_even,
    264                int *ptr_is_inexact_lt_midpoint,
    265                int *ptr_is_inexact_gt_midpoint,
    266                _IDEC_flags * ptrfpsf, UINT128 * ptrres) {
    267 
    268   int scale;
    269   int x0;
    270   int ind;
    271   UINT64 R64;
    272   UINT128 P128, R128;
    273   UINT192 P192, R192;
    274   UINT256 R256;
    275   int is_midpoint_lt_even = 0;
    276   int is_midpoint_gt_even = 0;
    277   int is_inexact_lt_midpoint = 0;
    278   int is_inexact_gt_midpoint = 0;
    279   int is_midpoint_lt_even0 = 0;
    280   int is_midpoint_gt_even0 = 0;
    281   int is_inexact_lt_midpoint0 = 0;
    282   int is_inexact_gt_midpoint0 = 0;
    283   int incr_exp = 0;
    284   int is_tiny = 0;
    285   int lt_half_ulp = 0;
    286   int eq_half_ulp = 0;
    287   // int gt_half_ulp = 0;
    288   UINT128 res = *ptrres;
    289 
    290   // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
    291   scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
    292   // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
    293 
    294   // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
    295   // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
    296   if (scale == 0) {
    297     R256.w[3] = 0x0ull;
    298     R256.w[2] = 0x0ull;
    299     R256.w[1] = C3.w[1];
    300     R256.w[0] = C3.w[0];
    301   } else if (scale <= 19) { // 10^scale fits in 64 bits
    302     P128.w[1] = 0;
    303     P128.w[0] = ten2k64[scale];
    304     __mul_128x128_to_256 (R256, P128, C3);
    305   } else if (scale <= 38) { // 10^scale fits in 128 bits
    306     __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
    307   } else if (scale <= 57) { // 39 <= scale <= 57
    308     // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
    309     // (10^67 has 223 bits; 10^69 has 230 bits);
    310     // must split the computation:
    311     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
    312     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
    313     // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
    314     __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
    315     // now multiply R128 by 10^38
    316     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
    317   } else { // 58 <= scale <= 66
    318     // 10^scale takes between 193 and 220 bits,
    319     // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
    320     // must split the computation:
    321     // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
    322     // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
    323     // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
    324     // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
    325     // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
    326     __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
    327     // now calculate 10*38 * 10^(scale-38) * C3
    328     __mul_128x128_to_256 (R256, R128, ten2k128[18]);
    329   }
    330   // C3 * 10^scale is now in R256
    331 
    332   // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
    333   // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
    334   // possible
    335   // add/subtract C4 and C3 * 10^scale; the exponent is e4
    336   if (p_sign == z_sign) { // R256 = C4 + R256
    337     // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
    338     // but may require rounding
    339     add256 (C4, R256, &R256);
    340   } else { // if (p_sign != z_sign) { // R256 = C4 - R256
    341     // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
    342     // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
    343     // but may require rounding
    344 
    345     // compare first R256 = C3 * 10^scale and C4
    346     if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
    347         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
    348         (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
    349         R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
    350       // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
    351       // but may require rounding
    352       sub256 (R256, C4, &R256);
    353       // flip p_sign too, because the result has the sign of z
    354       p_sign = z_sign;
    355     } else { // if C4 > C3 * 10^scale
    356       // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
    357       // but may require rounding
    358       sub256 (C4, R256, &R256);
    359     }
    360     // if the result is pure zero, the sign depends on the rounding mode
    361     // (x*y and z had opposite signs)
    362     if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
    363         R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
    364       if (rnd_mode != ROUNDING_DOWN)
    365         p_sign = 0x0000000000000000ull;
    366       else
    367         p_sign = 0x8000000000000000ull;
    368       // the exponent is max (e4, expmin)
    369       if (e4 < -6176)
    370         e4 = expmin;
    371       // assemble result
    372       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
    373       res.w[0] = 0x0;
    374       *ptrres = res;
    375       return;
    376     }
    377   }
    378 
    379   // determine the number of decimal digits in R256
    380   ind = nr_digits256 (R256);
    381 
    382   // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
    383   // round to the destination precision, with unbounded exponent
    384 
    385   if (ind <= p34) {
    386     // result rounded to the destination precision with unbounded exponent
    387     // is exact
    388     if (ind + e4 < p34 + expmin) {
    389       is_tiny = 1; // applies to all rounding modes
    390     }
    391     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
    392     res.w[0] = R256.w[0];
    393     // Note: res is correct only if expmin <= e4 <= expmax
    394   } else { // if (ind > p34)
    395     // if more than P digits, round to nearest to P digits
    396     // round R256 to p34 digits
    397     x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
    398     if (ind <= 38) {
    399       P128.w[1] = R256.w[1];
    400       P128.w[0] = R256.w[0];
    401       round128_19_38 (ind, x0, P128, &R128, &incr_exp,
    402         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
    403         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
    404     } else if (ind <= 57) {
    405       P192.w[2] = R256.w[2];
    406       P192.w[1] = R256.w[1];
    407       P192.w[0] = R256.w[0];
    408       round192_39_57 (ind, x0, P192, &R192, &incr_exp,
    409         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
    410         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
    411       R128.w[1] = R192.w[1];
    412       R128.w[0] = R192.w[0];
    413     } else { // if (ind <= 68)
    414       round256_58_76 (ind, x0, R256, &R256, &incr_exp,
    415         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
    416         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
    417       R128.w[1] = R256.w[1];
    418       R128.w[0] = R256.w[0];
    419     }
    420     // the rounded result has p34 = 34 digits
    421     e4 = e4 + x0 + incr_exp;
    422     if (rnd_mode == ROUNDING_TO_NEAREST) {
    423       if (e4 < expmin) {
    424         is_tiny = 1; // for other rounding modes apply correction
    425       }
    426     } else {
    427       // for RM, RP, RZ, RA apply correction in order to determine tininess
    428       // but do not save the result; apply the correction to
    429       // (-1)^p_sign * significand * 10^0
    430       P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
    431       P128.w[0] = R128.w[0];
    432       rounding_correction (rnd_mode,
    433         		   is_inexact_lt_midpoint,
    434         		   is_inexact_gt_midpoint, is_midpoint_lt_even,
    435         		   is_midpoint_gt_even, 0, &P128, ptrfpsf);
    436       scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
    437       // the number of digits in the significand is p34 = 34
    438       if (e4 + scale < expmin) {
    439         is_tiny = 1;
    440       }
    441     }
    442     ind = p34; // the number of decimal digits in the signifcand of res
    443     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
    444     res.w[0] = R128.w[0];
    445     // Note: res is correct only if expmin <= e4 <= expmax
    446     // set the inexact flag after rounding with bounded exponent, if any
    447   }
    448   // at this point we have the result rounded with unbounded exponent in
    449   // res and we know its tininess:
    450   // res = (-1)^p_sign * significand * 10^e4,
    451   // where q (significand) = ind <= p34
    452   // Note: res is correct only if expmin <= e4 <= expmax
    453 
    454   // check for overflow if RN
    455   if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
    456     res.w[1] = p_sign | 0x7800000000000000ull;
    457     res.w[0] = 0x0000000000000000ull;
    458     *ptrres = res;
    459     *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
    460     return; // BID_RETURN (res)
    461   } // else not overflow or not RN, so continue
    462 
    463   // if (e4 >= expmin) we have the result rounded with bounded exponent
    464   if (e4 < expmin) {
    465     x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
    466     // where the result rounded [at most] once is
    467     //   (-1)^p_sign * significand_res * 10^e4
    468 
    469     // avoid double rounding error
    470     is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
    471     is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
    472     is_midpoint_lt_even0 = is_midpoint_lt_even;
    473     is_midpoint_gt_even0 = is_midpoint_gt_even;
    474     is_inexact_lt_midpoint = 0;
    475     is_inexact_gt_midpoint = 0;
    476     is_midpoint_lt_even = 0;
    477     is_midpoint_gt_even = 0;
    478 
    479     if (x0 > ind) {
    480       // nothing is left of res when moving the decimal point left x0 digits
    481       is_inexact_lt_midpoint = 1;
    482       res.w[1] = p_sign | 0x0000000000000000ull;
    483       res.w[0] = 0x0000000000000000ull;
    484       e4 = expmin;
    485     } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
    486       // this is <, =, or > 1/2 ulp
    487       // compare the ind-digit value in the significand of res with
    488       // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
    489       // less than, equal to, or greater than 1/2 ulp (significand of res)
    490       R128.w[1] = res.w[1] & MASK_COEFF;
    491       R128.w[0] = res.w[0];
    492       if (ind <= 19) {
    493         if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
    494           lt_half_ulp = 1;
    495           is_inexact_lt_midpoint = 1;
    496         } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
    497           eq_half_ulp = 1;
    498           is_midpoint_gt_even = 1;
    499         } else { // > 1/2 ulp
    500           // gt_half_ulp = 1;
    501           is_inexact_gt_midpoint = 1;
    502         }
    503       } else { // if (ind <= 38) {
    504         if (R128.w[1] < midpoint128[ind - 20].w[1] ||
    505             (R128.w[1] == midpoint128[ind - 20].w[1] &&
    506             R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
    507           lt_half_ulp = 1;
    508           is_inexact_lt_midpoint = 1;
    509         } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
    510             R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
    511           eq_half_ulp = 1;
    512           is_midpoint_gt_even = 1;
    513         } else { // > 1/2 ulp
    514           // gt_half_ulp = 1;
    515           is_inexact_gt_midpoint = 1;
    516         }
    517       }
    518       if (lt_half_ulp || eq_half_ulp) {
    519         // res = +0.0 * 10^expmin
    520         res.w[1] = 0x0000000000000000ull;
    521         res.w[0] = 0x0000000000000000ull;
    522       } else { // if (gt_half_ulp)
    523         // res = +1 * 10^expmin
    524         res.w[1] = 0x0000000000000000ull;
    525         res.w[0] = 0x0000000000000001ull;
    526       }
    527       res.w[1] = p_sign | res.w[1];
    528       e4 = expmin;
    529     } else { // if (1 <= x0 <= ind - 1 <= 33)
    530       // round the ind-digit result to ind - x0 digits
    531 
    532       if (ind <= 18) { // 2 <= ind <= 18
    533         round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
    534         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
    535         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
    536         res.w[1] = 0x0;
    537         res.w[0] = R64;
    538       } else if (ind <= 38) {
    539         P128.w[1] = res.w[1] & MASK_COEFF;
    540         P128.w[0] = res.w[0];
    541         round128_19_38 (ind, x0, P128, &res, &incr_exp,
    542         		&is_midpoint_lt_even, &is_midpoint_gt_even,
    543         		&is_inexact_lt_midpoint,
    544         		&is_inexact_gt_midpoint);
    545       }
    546       e4 = e4 + x0; // expmin
    547       // we want the exponent to be expmin, so if incr_exp = 1 then
    548       // multiply the rounded result by 10 - it will still fit in 113 bits
    549       if (incr_exp) {
    550         // 64 x 128 -> 128
    551         P128.w[1] = res.w[1] & MASK_COEFF;
    552         P128.w[0] = res.w[0];
    553         __mul_64x128_to_128 (res, ten2k64[1], P128);
    554       }
    555       res.w[1] =
    556         p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
    557       // avoid a double rounding error
    558       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
    559           is_midpoint_lt_even) { // double rounding error upward
    560         // res = res - 1
    561         res.w[0]--;
    562         if (res.w[0] == 0xffffffffffffffffull)
    563           res.w[1]--;
    564         // Note: a double rounding error upward is not possible; for this
    565         // the result after the first rounding would have to be 99...95
    566         // (35 digits in all), possibly followed by a number of zeros; this
    567         // is not possible in Cases (2)-(6) or (15)-(17) which may get here
    568         is_midpoint_lt_even = 0;
    569         is_inexact_lt_midpoint = 1;
    570       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
    571           is_midpoint_gt_even) { // double rounding error downward
    572         // res = res + 1
    573         res.w[0]++;
    574         if (res.w[0] == 0)
    575           res.w[1]++;
    576         is_midpoint_gt_even = 0;
    577         is_inexact_gt_midpoint = 1;
    578       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
    579         	 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
    580         // if this second rounding was exact the result may still be
    581         // inexact because of the first rounding
    582         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
    583           is_inexact_gt_midpoint = 1;
    584         }
    585         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
    586           is_inexact_lt_midpoint = 1;
    587         }
    588       } else if (is_midpoint_gt_even &&
    589         	 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
    590         // pulled up to a midpoint
    591         is_inexact_lt_midpoint = 1;
    592         is_inexact_gt_midpoint = 0;
    593         is_midpoint_lt_even = 0;
    594         is_midpoint_gt_even = 0;
    595       } else if (is_midpoint_lt_even &&
    596         	 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
    597         // pulled down to a midpoint
    598         is_inexact_lt_midpoint = 0;
    599         is_inexact_gt_midpoint = 1;
    600         is_midpoint_lt_even = 0;
    601         is_midpoint_gt_even = 0;
    602       } else {
    603         ;
    604       }
    605     }
    606   }
    607   // res contains the correct result
    608   // apply correction if not rounding to nearest
    609   if (rnd_mode != ROUNDING_TO_NEAREST) {
    610     rounding_correction (rnd_mode,
    611         		 is_inexact_lt_midpoint, is_inexact_gt_midpoint,
    612         		 is_midpoint_lt_even, is_midpoint_gt_even,
    613         		 e4, &res, ptrfpsf);
    614   }
    615   if (is_midpoint_lt_even || is_midpoint_gt_even ||
    616       is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
    617     // set the inexact flag
    618     *ptrfpsf |= INEXACT_EXCEPTION;
    619     if (is_tiny)
    620       *ptrfpsf |= UNDERFLOW_EXCEPTION;
    621   }
    622 
    623   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
    624   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
    625   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
    626   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
    627   *ptrres = res;
    628   return;
    629 }
    630 
    631 
    632 #if DECIMAL_CALL_BY_REFERENCE
    633 static void
    634 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
    635         	int *ptr_is_midpoint_gt_even,
    636         	int *ptr_is_inexact_lt_midpoint,
    637         	int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
    638         	UINT128 * px, UINT128 * py,
    639         	UINT128 *
    640         	pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
    641         	_EXC_INFO_PARAM) {
    642   UINT128 x = *px, y = *py, z = *pz;
    643 #if !DECIMAL_GLOBAL_ROUNDING
    644   unsigned int rnd_mode = *prnd_mode;
    645 #endif
    646 #else
    647 static UINT128
    648 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
    649         	int *ptr_is_midpoint_gt_even,
    650         	int *ptr_is_inexact_lt_midpoint,
    651         	int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
    652         	UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
    653         	_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
    654 #endif
    655 
    656   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
    657   UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
    658   UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
    659   int true_p_exp;
    660   UINT128 C1, C2, C3;
    661   UINT256 C4;
    662   int q1 = 0, q2 = 0, q3 = 0, q4;
    663   int e1, e2, e3, e4;
    664   int scale, ind, delta, x0;
    665   int p34 = P34; // used to modify the limit on the number of digits
    666   BID_UI64DOUBLE tmp;
    667   int x_nr_bits, y_nr_bits, z_nr_bits;
    668   unsigned int save_fpsf;
    669   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
    670   int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
    671   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
    672   int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
    673   int incr_exp = 0;
    674   int lsb;
    675   int lt_half_ulp = 0;
    676   int eq_half_ulp = 0;
    677   int gt_half_ulp = 0;
    678   int is_tiny = 0;
    679   UINT64 R64, tmp64;
    680   UINT128 P128, R128;
    681   UINT192 P192, R192;
    682   UINT256 R256;
    683 
    684   // the following are based on the table of special cases for fma; the NaN
    685   // behavior is similar to that of the IA-64 Architecture fma
    686 
    687   // identify cases where at least one operand is NaN
    688 
    689   BID_SWAP128 (x);
    690   BID_SWAP128 (y);
    691   BID_SWAP128 (z);
    692   if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
    693     // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
    694     // check first for non-canonical NaN payload
    695     if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
    696         (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
    697          (y.w[0] > 0x38c15b09ffffffffull))) {
    698       y.w[1] = y.w[1] & 0xffffc00000000000ull;
    699       y.w[0] = 0x0ull;
    700     }
    701     if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
    702       // set invalid flag
    703       *pfpsf |= INVALID_EXCEPTION;
    704       // return quiet (y)
    705       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
    706       res.w[0] = y.w[0];
    707     } else { // y is QNaN
    708       // return y
    709       res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
    710       res.w[0] = y.w[0];
    711       // if z = SNaN or x = SNaN signal invalid exception
    712       if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
    713           (x.w[1] & MASK_SNAN) == MASK_SNAN) {
    714         // set invalid flag
    715         *pfpsf |= INVALID_EXCEPTION;
    716       }
    717     }
    718     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
    719     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
    720     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
    721     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
    722     BID_SWAP128 (res);
    723     BID_RETURN (res)
    724   } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
    725     // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
    726     // check first for non-canonical NaN payload
    727     if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
    728         (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
    729          (z.w[0] > 0x38c15b09ffffffffull))) {
    730       z.w[1] = z.w[1] & 0xffffc00000000000ull;
    731       z.w[0] = 0x0ull;
    732     }
    733     if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
    734       // set invalid flag
    735       *pfpsf |= INVALID_EXCEPTION;
    736       // return quiet (z)
    737       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
    738       res.w[0] = z.w[0];
    739     } else { // z is QNaN
    740       // return z
    741       res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
    742       res.w[0] = z.w[0];
    743       // if x = SNaN signal invalid exception
    744       if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
    745         // set invalid flag
    746         *pfpsf |= INVALID_EXCEPTION;
    747       }
    748     }
    749     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
    750     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
    751     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
    752     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
    753     BID_SWAP128 (res);
    754     BID_RETURN (res)
    755   } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
    756     // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
    757     // check first for non-canonical NaN payload
    758     if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
    759         (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
    760          (x.w[0] > 0x38c15b09ffffffffull))) {
    761       x.w[1] = x.w[1] & 0xffffc00000000000ull;
    762       x.w[0] = 0x0ull;
    763     }
    764     if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
    765       // set invalid flag
    766       *pfpsf |= INVALID_EXCEPTION;
    767       // return quiet (x)
    768       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
    769       res.w[0] = x.w[0];
    770     } else { // x is QNaN
    771       // return x
    772       res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
    773       res.w[0] = x.w[0];
    774     }
    775     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
    776     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
    777     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
    778     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
    779     BID_SWAP128 (res);
    780     BID_RETURN (res)
    781   }
    782   // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
    783   // for non-canonical values
    784 
    785   x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
    786   C1.w[1] = x.w[1] & MASK_COEFF;
    787   C1.w[0] = x.w[0];
    788   if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
    789     // if x is not infinity check for non-canonical values - treated as zero
    790     if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
    791       // non-canonical
    792       x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
    793       C1.w[1] = 0; // significand high
    794       C1.w[0] = 0; // significand low
    795     } else { // G0_G1 != 11
    796       x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
    797       if (C1.w[1] > 0x0001ed09bead87c0ull ||
    798           (C1.w[1] == 0x0001ed09bead87c0ull &&
    799            C1.w[0] > 0x378d8e63ffffffffull)) {
    800         // x is non-canonical if coefficient is larger than 10^34 -1
    801         C1.w[1] = 0;
    802         C1.w[0] = 0;
    803       } else { // canonical
    804         ;
    805       }
    806     }
    807   }
    808   y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
    809   C2.w[1] = y.w[1] & MASK_COEFF;
    810   C2.w[0] = y.w[0];
    811   if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
    812     // if y is not infinity check for non-canonical values - treated as zero
    813     if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
    814       // non-canonical
    815       y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
    816       C2.w[1] = 0; // significand high
    817       C2.w[0] = 0; // significand low
    818     } else { // G0_G1 != 11
    819       y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
    820       if (C2.w[1] > 0x0001ed09bead87c0ull ||
    821           (C2.w[1] == 0x0001ed09bead87c0ull &&
    822            C2.w[0] > 0x378d8e63ffffffffull)) {
    823         // y is non-canonical if coefficient is larger than 10^34 -1
    824         C2.w[1] = 0;
    825         C2.w[0] = 0;
    826       } else { // canonical
    827         ;
    828       }
    829     }
    830   }
    831   z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
    832   C3.w[1] = z.w[1] & MASK_COEFF;
    833   C3.w[0] = z.w[0];
    834   if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
    835     // if z is not infinity check for non-canonical values - treated as zero
    836     if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
    837       // non-canonical
    838       z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
    839       C3.w[1] = 0; // significand high
    840       C3.w[0] = 0; // significand low
    841     } else { // G0_G1 != 11
    842       z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
    843       if (C3.w[1] > 0x0001ed09bead87c0ull ||
    844           (C3.w[1] == 0x0001ed09bead87c0ull &&
    845            C3.w[0] > 0x378d8e63ffffffffull)) {
    846         // z is non-canonical if coefficient is larger than 10^34 -1
    847         C3.w[1] = 0;
    848         C3.w[0] = 0;
    849       } else { // canonical
    850         ;
    851       }
    852     }
    853   }
    854 
    855   p_sign = x_sign ^ y_sign; // sign of the product
    856 
    857   // identify cases where at least one operand is infinity
    858 
    859   if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
    860     if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
    861       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
    862         if (p_sign == z_sign) {
    863           res.w[1] = z_sign | MASK_INF;
    864           res.w[0] = 0x0;
    865         } else {
    866           // return QNaN Indefinite
    867           res.w[1] = 0x7c00000000000000ull;
    868           res.w[0] = 0x0000000000000000ull;
    869           // set invalid flag
    870           *pfpsf |= INVALID_EXCEPTION;
    871         }
    872       } else { // z = 0 or z = f
    873         res.w[1] = p_sign | MASK_INF;
    874         res.w[0] = 0x0;
    875       }
    876     } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
    877       if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
    878         if (p_sign == z_sign) {
    879           res.w[1] = z_sign | MASK_INF;
    880           res.w[0] = 0x0;
    881         } else {
    882           // return QNaN Indefinite
    883           res.w[1] = 0x7c00000000000000ull;
    884           res.w[0] = 0x0000000000000000ull;
    885           // set invalid flag
    886           *pfpsf |= INVALID_EXCEPTION;
    887         }
    888       } else { // z = 0 or z = f
    889         res.w[1] = p_sign | MASK_INF;
    890         res.w[0] = 0x0;
    891       }
    892     } else { // y = 0
    893       // return QNaN Indefinite
    894       res.w[1] = 0x7c00000000000000ull;
    895       res.w[0] = 0x0000000000000000ull;
    896       // set invalid flag
    897       *pfpsf |= INVALID_EXCEPTION;
    898     }
    899     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
    900     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
    901     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
    902     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
    903     BID_SWAP128 (res);
    904     BID_RETURN (res)
    905   } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
    906     if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
    907       // x = f, necessarily
    908       if ((p_sign != z_sign)
    909           || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
    910         // return QNaN Indefinite
    911         res.w[1] = 0x7c00000000000000ull;
    912         res.w[0] = 0x0000000000000000ull;
    913         // set invalid flag
    914         *pfpsf |= INVALID_EXCEPTION;
    915       } else {
    916         res.w[1] = z_sign | MASK_INF;
    917         res.w[0] = 0x0;
    918       }
    919     } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
    920       // z = 0, f, inf
    921       // return QNaN Indefinite
    922       res.w[1] = 0x7c00000000000000ull;
    923       res.w[0] = 0x0000000000000000ull;
    924       // set invalid flag
    925       *pfpsf |= INVALID_EXCEPTION;
    926     } else {
    927       // x = f and z = 0, f, necessarily
    928       res.w[1] = p_sign | MASK_INF;
    929       res.w[0] = 0x0;
    930     }
    931     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
    932     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
    933     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
    934     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
    935     BID_SWAP128 (res);
    936     BID_RETURN (res)
    937   } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
    938     // x = 0, f and y = 0, f, necessarily
    939     res.w[1] = z_sign | MASK_INF;
    940     res.w[0] = 0x0;
    941     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
    942     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
    943     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
    944     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
    945     BID_SWAP128 (res);
    946     BID_RETURN (res)
    947   }
    948 
    949   true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
    950   if (true_p_exp < -6176)
    951     p_exp = 0; // cannot be less than EXP_MIN
    952   else
    953     p_exp = (UINT64) (true_p_exp + 6176) << 49;
    954 
    955   if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
    956     // the result is 0
    957     if (p_exp < z_exp)
    958       res.w[1] = p_exp; // preferred exponent
    959     else
    960       res.w[1] = z_exp; // preferred exponent
    961     if (p_sign == z_sign) {
    962       res.w[1] |= z_sign;
    963       res.w[0] = 0x0;
    964     } else { // x * y and z have opposite signs
    965       if (rnd_mode == ROUNDING_DOWN) {
    966         // res = -0.0
    967         res.w[1] |= MASK_SIGN;
    968         res.w[0] = 0x0;
    969       } else {
    970         // res = +0.0
    971         // res.w[1] |= 0x0;
    972         res.w[0] = 0x0;
    973       }
    974     }
    975     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
    976     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
    977     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
    978     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
    979     BID_SWAP128 (res);
    980     BID_RETURN (res)
    981   }
    982   // from this point on, we may need to know the number of decimal digits
    983   // in the significands of x, y, z when x, y, z != 0
    984 
    985   if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
    986     // q1 = nr. of decimal digits in x
    987     // determine first the nr. of bits in x
    988     if (C1.w[1] == 0) {
    989       if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
    990         // split the 64-bit value in two 32-bit halves to avoid rounding errors
    991         if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
    992           tmp.d = (double) (C1.w[0] >> 32); // exact conversion
    993           x_nr_bits =
    994             33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
    995         } else { // x < 2^32
    996           tmp.d = (double) (C1.w[0]); // exact conversion
    997           x_nr_bits =
    998             1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
    999         }
   1000       } else { // if x < 2^53
   1001         tmp.d = (double) C1.w[0]; // exact conversion
   1002         x_nr_bits =
   1003           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   1004       }
   1005     } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
   1006       tmp.d = (double) C1.w[1]; // exact conversion
   1007       x_nr_bits =
   1008         65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   1009     }
   1010     q1 = nr_digits[x_nr_bits - 1].digits;
   1011     if (q1 == 0) {
   1012       q1 = nr_digits[x_nr_bits - 1].digits1;
   1013       if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
   1014           (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
   1015            C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
   1016         q1++;
   1017     }
   1018   }
   1019 
   1020   if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
   1021     if (C2.w[1] == 0) {
   1022       if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
   1023         // split the 64-bit value in two 32-bit halves to avoid rounding errors
   1024         if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
   1025           tmp.d = (double) (C2.w[0] >> 32); // exact conversion
   1026           y_nr_bits =
   1027             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   1028         } else { // y < 2^32
   1029           tmp.d = (double) C2.w[0]; // exact conversion
   1030           y_nr_bits =
   1031             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   1032         }
   1033       } else { // if y < 2^53
   1034         tmp.d = (double) C2.w[0]; // exact conversion
   1035         y_nr_bits =
   1036           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   1037       }
   1038     } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
   1039       tmp.d = (double) C2.w[1]; // exact conversion
   1040       y_nr_bits =
   1041         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   1042     }
   1043 
   1044     q2 = nr_digits[y_nr_bits].digits;
   1045     if (q2 == 0) {
   1046       q2 = nr_digits[y_nr_bits].digits1;
   1047       if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
   1048           (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
   1049            C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
   1050         q2++;
   1051     }
   1052   }
   1053 
   1054   if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
   1055     if (C3.w[1] == 0) {
   1056       if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
   1057         // split the 64-bit value in two 32-bit halves to avoid rounding errors
   1058         if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
   1059           tmp.d = (double) (C3.w[0] >> 32); // exact conversion
   1060           z_nr_bits =
   1061             32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   1062         } else { // z < 2^32
   1063           tmp.d = (double) C3.w[0]; // exact conversion
   1064           z_nr_bits =
   1065             ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   1066         }
   1067       } else { // if z < 2^53
   1068         tmp.d = (double) C3.w[0]; // exact conversion
   1069         z_nr_bits =
   1070           ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   1071       }
   1072     } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
   1073       tmp.d = (double) C3.w[1]; // exact conversion
   1074       z_nr_bits =
   1075         64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   1076     }
   1077 
   1078     q3 = nr_digits[z_nr_bits].digits;
   1079     if (q3 == 0) {
   1080       q3 = nr_digits[z_nr_bits].digits1;
   1081       if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
   1082           (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
   1083            C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
   1084         q3++;
   1085     }
   1086   }
   1087 
   1088   if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
   1089       (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
   1090     // x = 0 or y = 0
   1091     // z = f, necessarily; for 0 + z return z, with the preferred exponent
   1092     // the result is z, but need to get the preferred exponent
   1093     if (z_exp <= p_exp) { // the preferred exponent is z_exp
   1094       res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
   1095       res.w[0] = C3.w[0];
   1096     } else { // if (p_exp < z_exp) the preferred exponent is p_exp
   1097       // return (C3 * 10^scale) * 10^(z_exp - scale)
   1098       // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
   1099       scale = p34 - q3;
   1100       ind = (z_exp - p_exp) >> 49;
   1101       if (ind < scale)
   1102         scale = ind;
   1103       if (scale == 0) {
   1104         res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
   1105         res.w[0] = z.w[0];
   1106       } else if (q3 <= 19) { // z fits in 64 bits
   1107         if (scale <= 19) { // 10^scale fits in 64 bits
   1108           // 64 x 64 C3.w[0] * ten2k64[scale]
   1109           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
   1110         } else { // 10^scale fits in 128 bits
   1111           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
   1112           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
   1113         }
   1114       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
   1115         // 64 x 128 ten2k64[scale] * C3
   1116         __mul_128x64_to_128 (res, ten2k64[scale], C3);
   1117       }
   1118       // subtract scale from the exponent
   1119       z_exp = z_exp - ((UINT64) scale << 49);
   1120       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
   1121     }
   1122     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   1123     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   1124     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   1125     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   1126     BID_SWAP128 (res);
   1127     BID_RETURN (res)
   1128   } else {
   1129     ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
   1130   }
   1131 
   1132   e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
   1133   e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
   1134   e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
   1135   e4 = e1 + e2; // unbiased exponent of the exact x * y
   1136 
   1137   // calculate C1 * C2 and its number of decimal digits, q4
   1138 
   1139   // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
   1140   // where 2 <= q1 + q2 <= 68
   1141   // calculate C4 = C1 * C2 and determine q
   1142   C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
   1143   if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
   1144     C4.w[0] = C1.w[0] * C2.w[0];
   1145     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
   1146     if (C4.w[0] < ten2k64[q1 + q2 - 1])
   1147       q4 = q1 + q2 - 1; // q4 in [1, 18]
   1148     else
   1149       q4 = q1 + q2; // q4 in [2, 19]
   1150     // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
   1151   } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
   1152     // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
   1153     __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
   1154     // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
   1155     if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
   1156       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
   1157       q4 = 19; // 19 = q1 + q2 - 1
   1158     } else {
   1159       // if (C4.w[1] == 0)
   1160       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
   1161       // else
   1162       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
   1163       q4 = 20; // 20 = q1 + q2
   1164     }
   1165   } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
   1166     // C4 = C1 * C2 fits in 64 or 128 bits
   1167     // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
   1168     // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
   1169     if (q1 <= 19) {
   1170       __mul_128x64_to_128 (C4, C1.w[0], C2);
   1171     } else { // q2 <= 19
   1172       __mul_128x64_to_128 (C4, C2.w[0], C1);
   1173     }
   1174     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
   1175     if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
   1176         (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
   1177          C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
   1178       // if (C4.w[1] == 0) // q4 = 20, necessarily
   1179       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
   1180       // else
   1181       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
   1182       q4 = q1 + q2 - 1; // q4 in [20, 37]
   1183     } else {
   1184       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
   1185       q4 = q1 + q2; // q4 in [21, 38]
   1186     }
   1187   } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
   1188     // both C1 and C2 fit in 128 bits (actually in 113 bits)
   1189     // may replace this by 128x128_to192
   1190     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
   1191     // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
   1192     if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
   1193         		 (C4.w[1] == ten2k128[18].w[1]
   1194         		  && C4.w[0] < ten2k128[18].w[0]))) {
   1195       // 18 = 38 - 20 = q1+q2-1 - 20
   1196       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
   1197       q4 = 38; // 38 = q1 + q2 - 1
   1198     } else {
   1199       // if (C4.w[2] == 0)
   1200       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
   1201       // else
   1202       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
   1203       q4 = 39; // 39 = q1 + q2
   1204     }
   1205   } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
   1206     // C4 = C1 * C2 fits in 128 or 192 bits
   1207     // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
   1208     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
   1209     // may fit in 64 bits
   1210     if (C1.w[1] == 0) { // C1 fits in 64 bits
   1211       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
   1212       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
   1213     } else if (C2.w[1] == 0) { // C2 fits in 64 bits
   1214       // __mul_64x128_full (REShi64, RESlo128, A64, B128)
   1215       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
   1216     } else { // both C1 and C2 require 128 bits
   1217       // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
   1218       __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
   1219     }
   1220     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
   1221     if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
   1222         (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
   1223          (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
   1224           (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
   1225            C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
   1226       // if (C4.w[2] == 0) // q4 = 39, necessarily
   1227       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
   1228       // else
   1229       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
   1230       q4 = q1 + q2 - 1; // q4 in [39, 56]
   1231     } else {
   1232       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
   1233       q4 = q1 + q2; // q4 in [40, 57]
   1234     }
   1235   } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
   1236     // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
   1237     // may fit in 64 bits
   1238     if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
   1239       __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
   1240     } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
   1241       __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
   1242     } else { // C1 * C2 will fit in 192 bits or in 256 bits
   1243       __mul_128x128_to_256 (C4, C1, C2);
   1244     }
   1245     // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
   1246     if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
   1247         		 (C4.w[2] == ten2k256[18].w[2]
   1248         		  && (C4.w[1] < ten2k256[18].w[1]
   1249         		      || (C4.w[1] == ten2k256[18].w[1]
   1250         			  && C4.w[0] < ten2k256[18].w[0]))))) {
   1251       // 18 = 57 - 39 = q1+q2-1 - 39
   1252       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
   1253       q4 = 57; // 57 = q1 + q2 - 1
   1254     } else {
   1255       // if (C4.w[3] == 0)
   1256       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
   1257       // else
   1258       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
   1259       q4 = 58; // 58 = q1 + q2
   1260     }
   1261   } else { // if 59 <= q1 + q2 <= 68
   1262     // C4 = C1 * C2 fits in 192 or 256 bits
   1263     // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
   1264     // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
   1265     // 64 bits
   1266     // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
   1267     __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
   1268     // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
   1269     if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
   1270         (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
   1271          (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
   1272           (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
   1273            (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
   1274             (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
   1275              C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
   1276       // if (C4.w[3] == 0) // q4 = 58, necessarily
   1277       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
   1278       // else
   1279       //   length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
   1280       q4 = q1 + q2 - 1; // q4 in [58, 67]
   1281     } else {
   1282       // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
   1283       q4 = q1 + q2; // q4 in [59, 68]
   1284     }
   1285   }
   1286 
   1287   if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
   1288     save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
   1289     *pfpsf = 0;
   1290 
   1291     if (q4 > p34) {
   1292 
   1293       // truncate C4 to p34 digits into res
   1294       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
   1295       x0 = q4 - p34;
   1296       if (q4 <= 38) {
   1297         P128.w[1] = C4.w[1];
   1298         P128.w[0] = C4.w[0];
   1299         round128_19_38 (q4, x0, P128, &res, &incr_exp,
   1300         		&is_midpoint_lt_even, &is_midpoint_gt_even,
   1301         		&is_inexact_lt_midpoint,
   1302         		&is_inexact_gt_midpoint);
   1303       } else if (q4 <= 57) { // 35 <= q4 <= 57
   1304         P192.w[2] = C4.w[2];
   1305         P192.w[1] = C4.w[1];
   1306         P192.w[0] = C4.w[0];
   1307         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
   1308         		&is_midpoint_lt_even, &is_midpoint_gt_even,
   1309         		&is_inexact_lt_midpoint,
   1310         		&is_inexact_gt_midpoint);
   1311         res.w[0] = R192.w[0];
   1312         res.w[1] = R192.w[1];
   1313       } else { // if (q4 <= 68)
   1314         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
   1315         		&is_midpoint_lt_even, &is_midpoint_gt_even,
   1316         		&is_inexact_lt_midpoint,
   1317         		&is_inexact_gt_midpoint);
   1318         res.w[0] = R256.w[0];
   1319         res.w[1] = R256.w[1];
   1320       }
   1321       e4 = e4 + x0;
   1322       if (incr_exp) {
   1323         e4 = e4 + 1;
   1324       }
   1325       q4 = p34;
   1326       // res is now the coefficient of the result rounded to the destination
   1327       // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
   1328     } else { // if (q4 <= p34)
   1329       // C4 * 10^e4 is the result rounded to the destination precision, with
   1330       // unbounded exponent (which is exact)
   1331 
   1332       if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
   1333         // e4 is too large, but can be brought within range by scaling up C4
   1334         scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
   1335         // res = (C4 * 10^scale) * 10^expmax
   1336         if (q4 <= 19) { // C4 fits in 64 bits
   1337           if (scale <= 19) { // 10^scale fits in 64 bits
   1338             // 64 x 64 C4.w[0] * ten2k64[scale]
   1339             __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
   1340           } else { // 10^scale fits in 128 bits
   1341             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
   1342             __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
   1343           }
   1344         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
   1345           // 64 x 128 ten2k64[scale] * CC43
   1346           __mul_128x64_to_128 (res, ten2k64[scale], C4);
   1347         }
   1348         e4 = e4 - scale; // expmax
   1349         q4 = q4 + scale;
   1350       } else {
   1351         res.w[1] = C4.w[1];
   1352         res.w[0] = C4.w[0];
   1353       }
   1354       // res is the coefficient of the result rounded to the destination
   1355       // precision, with unbounded exponent (it has q4 digits); the exponent
   1356       // is e4 (exact result)
   1357     }
   1358 
   1359     // check for overflow
   1360     if (q4 + e4 > p34 + expmax) {
   1361       if (rnd_mode == ROUNDING_TO_NEAREST) {
   1362         res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
   1363         res.w[0] = 0x0000000000000000ull;
   1364         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
   1365       } else {
   1366         res.w[1] = p_sign | res.w[1];
   1367         rounding_correction (rnd_mode,
   1368         		     is_inexact_lt_midpoint,
   1369         		     is_inexact_gt_midpoint,
   1370         		     is_midpoint_lt_even, is_midpoint_gt_even,
   1371         		     e4, &res, pfpsf);
   1372       }
   1373       *pfpsf |= save_fpsf;
   1374       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   1375       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   1376       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   1377       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   1378       BID_SWAP128 (res);
   1379       BID_RETURN (res)
   1380     }
   1381     // check for underflow
   1382     if (q4 + e4 < expmin + P34) {
   1383       is_tiny = 1; // the result is tiny
   1384       if (e4 < expmin) {
   1385         // if e4 < expmin, we must truncate more of res
   1386         x0 = expmin - e4; // x0 >= 1
   1387         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
   1388         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
   1389         is_midpoint_lt_even0 = is_midpoint_lt_even;
   1390         is_midpoint_gt_even0 = is_midpoint_gt_even;
   1391         is_inexact_lt_midpoint = 0;
   1392         is_inexact_gt_midpoint = 0;
   1393         is_midpoint_lt_even = 0;
   1394         is_midpoint_gt_even = 0;
   1395         // the number of decimal digits in res is q4
   1396         if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
   1397           if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
   1398             round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
   1399         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
   1400         		  &is_inexact_lt_midpoint,
   1401         		  &is_inexact_gt_midpoint);
   1402             if (incr_exp) {
   1403               // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
   1404               R64 = ten2k64[q4 - x0];
   1405             }
   1406             // res.w[1] = 0; (from above)
   1407             res.w[0] = R64;
   1408           } else { // if (q4 <= 34)
   1409             // 19 <= q4 <= 38
   1410             P128.w[1] = res.w[1];
   1411             P128.w[0] = res.w[0];
   1412             round128_19_38 (q4, x0, P128, &res, &incr_exp,
   1413         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
   1414         		    &is_inexact_lt_midpoint,
   1415         		    &is_inexact_gt_midpoint);
   1416             if (incr_exp) {
   1417               // increase coefficient by a factor of 10; this will be <= 10^33
   1418               // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
   1419               if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
   1420         	// res.w[1] = 0;
   1421         	res.w[0] = ten2k64[q4 - x0];
   1422               } else { // 20 <= q4 - x0 <= 37
   1423         	res.w[0] = ten2k128[q4 - x0 - 20].w[0];
   1424         	res.w[1] = ten2k128[q4 - x0 - 20].w[1];
   1425               }
   1426             }
   1427           }
   1428           e4 = e4 + x0; // expmin
   1429         } else if (x0 == q4) {
   1430           // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
   1431           // determine relationship with 1/2 ulp
   1432           if (q4 <= 19) {
   1433             if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
   1434               lt_half_ulp = 1;
   1435               is_inexact_lt_midpoint = 1;
   1436             } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
   1437               eq_half_ulp = 1;
   1438               is_midpoint_gt_even = 1;
   1439             } else { // > 1/2 ulp
   1440               // gt_half_ulp = 1;
   1441               is_inexact_gt_midpoint = 1;
   1442             }
   1443           } else { // if (q4 <= 34)
   1444             if (res.w[1] < midpoint128[q4 - 20].w[1] ||
   1445                 (res.w[1] == midpoint128[q4 - 20].w[1] &&
   1446                 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
   1447               lt_half_ulp = 1;
   1448               is_inexact_lt_midpoint = 1;
   1449             } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
   1450                 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
   1451               eq_half_ulp = 1;
   1452               is_midpoint_gt_even = 1;
   1453             } else { // > 1/2 ulp
   1454               // gt_half_ulp = 1;
   1455               is_inexact_gt_midpoint = 1;
   1456             }
   1457           }
   1458           if (lt_half_ulp || eq_half_ulp) {
   1459             // res = +0.0 * 10^expmin
   1460             res.w[1] = 0x0000000000000000ull;
   1461             res.w[0] = 0x0000000000000000ull;
   1462           } else { // if (gt_half_ulp)
   1463             // res = +1 * 10^expmin
   1464             res.w[1] = 0x0000000000000000ull;
   1465             res.w[0] = 0x0000000000000001ull;
   1466           }
   1467           e4 = expmin;
   1468         } else { // if (x0 > q4)
   1469           // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
   1470           res.w[1] = 0;
   1471           res.w[0] = 0;
   1472           e4 = expmin;
   1473           is_inexact_lt_midpoint = 1;
   1474         }
   1475         // avoid a double rounding error
   1476         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
   1477             is_midpoint_lt_even) { // double rounding error upward
   1478           // res = res - 1
   1479           res.w[0]--;
   1480           if (res.w[0] == 0xffffffffffffffffull)
   1481             res.w[1]--;
   1482           // Note: a double rounding error upward is not possible; for this
   1483           // the result after the first rounding would have to be 99...95
   1484           // (35 digits in all), possibly followed by a number of zeros; this
   1485           // not possible for f * f + 0
   1486           is_midpoint_lt_even = 0;
   1487           is_inexact_lt_midpoint = 1;
   1488         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
   1489             is_midpoint_gt_even) { // double rounding error downward
   1490           // res = res + 1
   1491           res.w[0]++;
   1492           if (res.w[0] == 0)
   1493             res.w[1]++;
   1494           is_midpoint_gt_even = 0;
   1495           is_inexact_gt_midpoint = 1;
   1496         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
   1497         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
   1498           // if this second rounding was exact the result may still be
   1499           // inexact because of the first rounding
   1500           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
   1501             is_inexact_gt_midpoint = 1;
   1502           }
   1503           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
   1504             is_inexact_lt_midpoint = 1;
   1505           }
   1506         } else if (is_midpoint_gt_even &&
   1507         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
   1508           // pulled up to a midpoint
   1509           is_inexact_lt_midpoint = 1;
   1510           is_inexact_gt_midpoint = 0;
   1511           is_midpoint_lt_even = 0;
   1512           is_midpoint_gt_even = 0;
   1513         } else if (is_midpoint_lt_even &&
   1514         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
   1515           // pulled down to a midpoint
   1516           is_inexact_lt_midpoint = 0;
   1517           is_inexact_gt_midpoint = 1;
   1518           is_midpoint_lt_even = 0;
   1519           is_midpoint_gt_even = 0;
   1520         } else {
   1521           ;
   1522         }
   1523       } else { // if e4 >= emin then q4 < P and the result is tiny and exact
   1524         if (e3 < e4) {
   1525           // if (e3 < e4) the preferred exponent is e3
   1526           // return (C4 * 10^scale) * 10^(e4 - scale)
   1527           // where scale = min (p34-q4, (e4 - e3))
   1528           scale = p34 - q4;
   1529           ind = e4 - e3;
   1530           if (ind < scale)
   1531             scale = ind;
   1532           if (scale == 0) {
   1533             ; // res and e4 are unchanged
   1534           } else if (q4 <= 19) { // C4 fits in 64 bits
   1535             if (scale <= 19) { // 10^scale fits in 64 bits
   1536               // 64 x 64 res.w[0] * ten2k64[scale]
   1537               __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
   1538             } else { // 10^scale fits in 128 bits
   1539               // 64 x 128 res.w[0] * ten2k128[scale - 20]
   1540               __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
   1541             }
   1542           } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
   1543             // 64 x 128 ten2k64[scale] * C3
   1544             __mul_128x64_to_128 (res, ten2k64[scale], res);
   1545           }
   1546           // subtract scale from the exponent
   1547           e4 = e4 - scale;
   1548         }
   1549       }
   1550 
   1551       // check for inexact result
   1552       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
   1553           is_midpoint_lt_even || is_midpoint_gt_even) {
   1554         // set the inexact flag and the underflow flag
   1555         *pfpsf |= INEXACT_EXCEPTION;
   1556         *pfpsf |= UNDERFLOW_EXCEPTION;
   1557       }
   1558       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
   1559       if (rnd_mode != ROUNDING_TO_NEAREST) {
   1560         rounding_correction (rnd_mode,
   1561         		     is_inexact_lt_midpoint,
   1562         		     is_inexact_gt_midpoint,
   1563         		     is_midpoint_lt_even, is_midpoint_gt_even,
   1564         		     e4, &res, pfpsf);
   1565       }
   1566       *pfpsf |= save_fpsf;
   1567       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   1568       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   1569       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   1570       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   1571       BID_SWAP128 (res);
   1572       BID_RETURN (res)
   1573     }
   1574     // no overflow, and no underflow for rounding to nearest
   1575     res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
   1576 
   1577     if (rnd_mode != ROUNDING_TO_NEAREST) {
   1578       rounding_correction (rnd_mode,
   1579         		   is_inexact_lt_midpoint,
   1580         		   is_inexact_gt_midpoint,
   1581         		   is_midpoint_lt_even, is_midpoint_gt_even,
   1582         		   e4, &res, pfpsf);
   1583       // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
   1584       if (e4 == expmin) {
   1585         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
   1586             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
   1587              res.w[0] < 0x38c15b0a00000000ull)) {
   1588           is_tiny = 1;
   1589         }
   1590       }
   1591     }
   1592 
   1593     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
   1594         is_midpoint_lt_even || is_midpoint_gt_even) {
   1595       // set the inexact flag
   1596       *pfpsf |= INEXACT_EXCEPTION;
   1597       if (is_tiny)
   1598         *pfpsf |= UNDERFLOW_EXCEPTION;
   1599     }
   1600 
   1601     if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
   1602       // need to ensure that the result has the preferred exponent
   1603       p_exp = res.w[1] & MASK_EXP;
   1604       if (z_exp < p_exp) { // the preferred exponent is z_exp
   1605         // signficand of res in C3
   1606         C3.w[1] = res.w[1] & MASK_COEFF;
   1607         C3.w[0] = res.w[0];
   1608         // the number of decimal digits of x * y is q4 <= 34
   1609         // Note: the coefficient fits in 128 bits
   1610 
   1611         // return (C3 * 10^scale) * 10^(p_exp - scale)
   1612         // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
   1613         scale = p34 - q4;
   1614         ind = (p_exp - z_exp) >> 49;
   1615         if (ind < scale)
   1616           scale = ind;
   1617         // subtract scale from the exponent
   1618         p_exp = p_exp - ((UINT64) scale << 49);
   1619         if (scale == 0) {
   1620           ; // leave res unchanged
   1621         } else if (q4 <= 19) { // x * y fits in 64 bits
   1622           if (scale <= 19) { // 10^scale fits in 64 bits
   1623             // 64 x 64 C3.w[0] * ten2k64[scale]
   1624             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
   1625           } else { // 10^scale fits in 128 bits
   1626             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
   1627             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
   1628           }
   1629           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
   1630         } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
   1631           // 64 x 128 ten2k64[scale] * C3
   1632           __mul_128x64_to_128 (res, ten2k64[scale], C3);
   1633           res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
   1634         }
   1635       } // else leave the result as it is, because p_exp <= z_exp
   1636     }
   1637     *pfpsf |= save_fpsf;
   1638     *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   1639     *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   1640     *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   1641     *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   1642     BID_SWAP128 (res);
   1643     BID_RETURN (res)
   1644   } // else we have f * f + f
   1645 
   1646   // continue with x = f, y = f, z = f
   1647 
   1648   delta = q3 + e3 - q4 - e4;
   1649 delta_ge_zero:
   1650   if (delta >= 0) {
   1651 
   1652     if (p34 <= delta - 1 ||	// Case (1')
   1653         (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
   1654       // check for overflow, which can occur only in Case (1')
   1655       if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
   1656         // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
   1657         // condition for (q3 + e3) > (p34 + expmax)
   1658         if (rnd_mode == ROUNDING_TO_NEAREST) {
   1659           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
   1660           res.w[0] = 0x0000000000000000ull;
   1661           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
   1662         } else {
   1663           if (p_sign == z_sign) {
   1664             is_inexact_lt_midpoint = 1;
   1665           } else {
   1666             is_inexact_gt_midpoint = 1;
   1667           }
   1668           // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
   1669           scale = p34 - q3;
   1670           if (scale == 0) {
   1671             res.w[1] = z_sign | C3.w[1];
   1672             res.w[0] = C3.w[0];
   1673           } else {
   1674             if (q3 <= 19) { // C3 fits in 64 bits
   1675               if (scale <= 19) { // 10^scale fits in 64 bits
   1676         	// 64 x 64 C3.w[0] * ten2k64[scale]
   1677         	__mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
   1678               } else { // 10^scale fits in 128 bits
   1679         	// 64 x 128 C3.w[0] * ten2k128[scale - 20]
   1680         	__mul_128x64_to_128 (res, C3.w[0],
   1681         			     ten2k128[scale - 20]);
   1682               }
   1683             } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
   1684               // 64 x 128 ten2k64[scale] * C3
   1685               __mul_128x64_to_128 (res, ten2k64[scale], C3);
   1686             }
   1687             // the coefficient in res has q3 + scale = p34 digits
   1688           }
   1689           e3 = e3 - scale;
   1690           res.w[1] = z_sign | res.w[1];
   1691           rounding_correction (rnd_mode,
   1692         		       is_inexact_lt_midpoint,
   1693         		       is_inexact_gt_midpoint,
   1694         		       is_midpoint_lt_even, is_midpoint_gt_even,
   1695         		       e3, &res, pfpsf);
   1696         }
   1697         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   1698         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   1699         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   1700         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   1701         BID_SWAP128 (res);
   1702         BID_RETURN (res)
   1703       }
   1704       // res = z
   1705       if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
   1706         // return (C3 * 10^scale) * 10^(z_exp - scale)
   1707         // where scale = min (p34-q3, z_exp-EMIN)
   1708         scale = p34 - q3;
   1709         ind = e3 + 6176;
   1710         if (ind < scale)
   1711           scale = ind;
   1712         if (scale == 0) {
   1713           res.w[1] = C3.w[1];
   1714           res.w[0] = C3.w[0];
   1715         } else if (q3 <= 19) { // z fits in 64 bits
   1716           if (scale <= 19) { // 10^scale fits in 64 bits
   1717             // 64 x 64 C3.w[0] * ten2k64[scale]
   1718             __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
   1719           } else { // 10^scale fits in 128 bits
   1720             // 64 x 128 C3.w[0] * ten2k128[scale - 20]
   1721             __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
   1722           }
   1723         } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
   1724           // 64 x 128 ten2k64[scale] * C3
   1725           __mul_128x64_to_128 (res, ten2k64[scale], C3);
   1726         }
   1727         // the coefficient in res has q3 + scale digits
   1728         // subtract scale from the exponent
   1729         z_exp = z_exp - ((UINT64) scale << 49);
   1730         e3 = e3 - scale;
   1731         res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
   1732         if (scale + q3 < p34)
   1733           *pfpsf |= UNDERFLOW_EXCEPTION;
   1734       } else {
   1735         scale = 0;
   1736         res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
   1737         res.w[0] = C3.w[0];
   1738       }
   1739 
   1740       // use the following to avoid double rounding errors when operating on
   1741       // mixed formats in rounding to nearest, and for correcting the result
   1742       // if not rounding to nearest
   1743       if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
   1744         // there is a gap of exactly one digit between the scaled C3 and C4
   1745         // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
   1746         if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
   1747             (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
   1748             (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
   1749         		  C3.w[0] != ten2k128[q3 - 21].w[0]))) {
   1750           // C3 * 10^ scale != 10^(q3-1)
   1751           // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
   1752           // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
   1753           is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
   1754         } else { // if C3 * 10^scale = 10^(q3+scale-1)
   1755           // ok from above e3 = (z_exp >> 49) - 6176;
   1756           // the result is always inexact
   1757           if (q4 == 1) {
   1758             R64 = C4.w[0];
   1759           } else {
   1760             // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
   1761             // x = q4-1, 1 <= x <= 67 and check if this operation is exact
   1762             if (q4 <= 18) { // 2 <= q4 <= 18
   1763               round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
   1764         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
   1765         		    &is_inexact_lt_midpoint,
   1766         		    &is_inexact_gt_midpoint);
   1767             } else if (q4 <= 38) {
   1768               P128.w[1] = C4.w[1];
   1769               P128.w[0] = C4.w[0];
   1770               round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
   1771         		      &is_midpoint_lt_even,
   1772         		      &is_midpoint_gt_even,
   1773         		      &is_inexact_lt_midpoint,
   1774         		      &is_inexact_gt_midpoint);
   1775               R64 = R128.w[0]; // one decimal digit
   1776             } else if (q4 <= 57) {
   1777               P192.w[2] = C4.w[2];
   1778               P192.w[1] = C4.w[1];
   1779               P192.w[0] = C4.w[0];
   1780               round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
   1781         		      &is_midpoint_lt_even,
   1782         		      &is_midpoint_gt_even,
   1783         		      &is_inexact_lt_midpoint,
   1784         		      &is_inexact_gt_midpoint);
   1785               R64 = R192.w[0]; // one decimal digit
   1786             } else { // if (q4 <= 68)
   1787               round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
   1788         		      &is_midpoint_lt_even,
   1789         		      &is_midpoint_gt_even,
   1790         		      &is_inexact_lt_midpoint,
   1791         		      &is_inexact_gt_midpoint);
   1792               R64 = R256.w[0]; // one decimal digit
   1793             }
   1794             if (incr_exp) {
   1795               R64 = 10;
   1796             }
   1797           }
   1798           if (q4 == 1 && C4.w[0] == 5) {
   1799             is_inexact_lt_midpoint = 0;
   1800             is_inexact_gt_midpoint = 0;
   1801             is_midpoint_lt_even = 1;
   1802             is_midpoint_gt_even = 0;
   1803           } else if ((e3 == expmin) ||
   1804         	     R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
   1805             // result does not change
   1806             is_inexact_lt_midpoint = 0;
   1807             is_inexact_gt_midpoint = 1;
   1808             is_midpoint_lt_even = 0;
   1809             is_midpoint_gt_even = 0;
   1810           } else {
   1811             is_inexact_lt_midpoint = 1;
   1812             is_inexact_gt_midpoint = 0;
   1813             is_midpoint_lt_even = 0;
   1814             is_midpoint_gt_even = 0;
   1815             // result decremented is 10^(q3+scale) - 1
   1816             if ((q3 + scale) <= 19) {
   1817               res.w[1] = 0;
   1818               res.w[0] = ten2k64[q3 + scale];
   1819             } else { // if ((q3 + scale + 1) <= 35)
   1820               res.w[1] = ten2k128[q3 + scale - 20].w[1];
   1821               res.w[0] = ten2k128[q3 + scale - 20].w[0];
   1822             }
   1823             res.w[0] = res.w[0] - 1; // borrow never occurs
   1824             z_exp = z_exp - EXP_P1;
   1825             e3 = e3 - 1;
   1826             res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
   1827           }
   1828           if (e3 == expmin) {
   1829             if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
   1830               ; // result not tiny (in round-to-nearest mode)
   1831             } else {
   1832               *pfpsf |= UNDERFLOW_EXCEPTION;
   1833             }
   1834           }
   1835         } // end 10^(q3+scale-1)
   1836         // set the inexact flag
   1837         *pfpsf |= INEXACT_EXCEPTION;
   1838       } else {
   1839         if (p_sign == z_sign) {
   1840           // if (z_sign), set as if for absolute value
   1841           is_inexact_lt_midpoint = 1;
   1842         } else { // if (p_sign != z_sign)
   1843           // if (z_sign), set as if for absolute value
   1844           is_inexact_gt_midpoint = 1;
   1845         }
   1846         *pfpsf |= INEXACT_EXCEPTION;
   1847       }
   1848       // the result is always inexact => set the inexact flag
   1849       // Determine tininess:
   1850       //    if (exp > expmin)
   1851       //      the result is not tiny
   1852       //    else // if exp = emin
   1853       //      if (q3 + scale < p34)
   1854       //        the result is tiny
   1855       //      else // if (q3 + scale = p34)
   1856       //        if (C3 * 10^scale > 10^33)
   1857       //          the result is not tiny
   1858       //        else // if C3 * 10^scale = 10^33
   1859       //          if (xy * z > 0)
   1860       //            the result is not tiny
   1861       //          else // if (xy * z < 0)
   1862       //            if (z > 0)
   1863       //              if rnd_mode != RP
   1864       //                the result is tiny
   1865       //              else // if RP
   1866       //                the result is not tiny
   1867       //            else // if (z < 0)
   1868       //              if rnd_mode != RM
   1869       //                the result is tiny
   1870       //              else // if RM
   1871       //                the result is not tiny
   1872       //              endif
   1873       //            endif
   1874       //          endif
   1875       //        endif
   1876       //      endif
   1877       //    endif
   1878       if ((e3 == expmin && (q3 + scale) < p34) ||
   1879           (e3 == expmin && (q3 + scale) == p34 &&
   1880           (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&	// 10^33_high
   1881           res.w[0] == 0x38c15b0a00000000ull &&	// 10^33_low
   1882           z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
   1883           (z_sign && rnd_mode != ROUNDING_DOWN)))) {
   1884         *pfpsf |= UNDERFLOW_EXCEPTION;
   1885       }
   1886       if (rnd_mode != ROUNDING_TO_NEAREST) {
   1887         rounding_correction (rnd_mode,
   1888         		     is_inexact_lt_midpoint,
   1889         		     is_inexact_gt_midpoint,
   1890         		     is_midpoint_lt_even, is_midpoint_gt_even,
   1891         		     e3, &res, pfpsf);
   1892       }
   1893       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   1894       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   1895       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   1896       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   1897       BID_SWAP128 (res);
   1898       BID_RETURN (res)
   1899 
   1900     } else if (p34 == delta) { // Case (1''B)
   1901 
   1902       // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
   1903       // and C3 can be scaled up to p34 digits if needed
   1904 
   1905       // scale C3 to p34 digits if needed
   1906       scale = p34 - q3; // 0 <= scale <= p34 - 1
   1907       if (scale == 0) {
   1908         res.w[1] = C3.w[1];
   1909         res.w[0] = C3.w[0];
   1910       } else if (q3 <= 19) { // z fits in 64 bits
   1911         if (scale <= 19) { // 10^scale fits in 64 bits
   1912           // 64 x 64 C3.w[0] * ten2k64[scale]
   1913           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
   1914         } else { // 10^scale fits in 128 bits
   1915           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
   1916           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
   1917         }
   1918       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
   1919         // 64 x 128 ten2k64[scale] * C3
   1920         __mul_128x64_to_128 (res, ten2k64[scale], C3);
   1921       }
   1922       // subtract scale from the exponent
   1923       z_exp = z_exp - ((UINT64) scale << 49);
   1924       e3 = e3 - scale;
   1925       // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
   1926 
   1927       // determine whether x * y is less than, equal to, or greater than
   1928       // 1/2 ulp (z)
   1929       if (q4 <= 19) {
   1930         if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
   1931           lt_half_ulp = 1;
   1932         } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
   1933           eq_half_ulp = 1;
   1934         } else { // > 1/2 ulp
   1935           gt_half_ulp = 1;
   1936         }
   1937       } else if (q4 <= 38) {
   1938         if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
   1939             (C4.w[1] == midpoint128[q4 - 20].w[1] &&
   1940             C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
   1941           lt_half_ulp = 1;
   1942         } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
   1943             C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
   1944           eq_half_ulp = 1;
   1945         } else { // > 1/2 ulp
   1946           gt_half_ulp = 1;
   1947         }
   1948       } else if (q4 <= 58) {
   1949         if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
   1950             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
   1951             C4.w[1] < midpoint192[q4 - 39].w[1]) ||
   1952             (C4.w[2] == midpoint192[q4 - 39].w[2] &&
   1953             C4.w[1] == midpoint192[q4 - 39].w[1] &&
   1954             C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
   1955           lt_half_ulp = 1;
   1956         } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
   1957             C4.w[1] == midpoint192[q4 - 39].w[1] &&
   1958             C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
   1959           eq_half_ulp = 1;
   1960         } else { // > 1/2 ulp
   1961           gt_half_ulp = 1;
   1962         }
   1963       } else {
   1964         if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
   1965             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
   1966             C4.w[2] < midpoint256[q4 - 59].w[2]) ||
   1967             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
   1968             C4.w[2] == midpoint256[q4 - 59].w[2] &&
   1969             C4.w[1] < midpoint256[q4 - 59].w[1]) ||
   1970             (C4.w[3] == midpoint256[q4 - 59].w[3] &&
   1971             C4.w[2] == midpoint256[q4 - 59].w[2] &&
   1972             C4.w[1] == midpoint256[q4 - 59].w[1] &&
   1973             C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
   1974           lt_half_ulp = 1;
   1975         } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
   1976             C4.w[2] == midpoint256[q4 - 59].w[2] &&
   1977             C4.w[1] == midpoint256[q4 - 59].w[1] &&
   1978             C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
   1979           eq_half_ulp = 1;
   1980         } else { // > 1/2 ulp
   1981           gt_half_ulp = 1;
   1982         }
   1983       }
   1984 
   1985       if (p_sign == z_sign) {
   1986         if (lt_half_ulp) {
   1987           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
   1988           // use the following to avoid double rounding errors when operating on
   1989           // mixed formats in rounding to nearest
   1990           is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
   1991         } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
   1992           // add 1 ulp to the significand
   1993           res.w[0]++;
   1994           if (res.w[0] == 0x0ull)
   1995             res.w[1]++;
   1996           // check for rounding overflow, when coeff == 10^34
   1997           if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
   1998               res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
   1999             e3 = e3 + 1;
   2000             // coeff = 10^33
   2001             z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
   2002             res.w[1] = 0x0000314dc6448d93ull;
   2003             res.w[0] = 0x38c15b0a00000000ull;
   2004           }
   2005           // end add 1 ulp
   2006           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
   2007           if (eq_half_ulp) {
   2008             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
   2009           } else {
   2010             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
   2011           }
   2012         } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
   2013           // leave unchanged
   2014           res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
   2015           is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
   2016         }
   2017         // the result is always inexact, and never tiny
   2018         // set the inexact flag
   2019         *pfpsf |= INEXACT_EXCEPTION;
   2020         // check for overflow
   2021         if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
   2022           res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
   2023           res.w[0] = 0x0000000000000000ull;
   2024           *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
   2025           *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   2026           *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   2027           *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   2028           *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   2029           BID_SWAP128 (res);
   2030           BID_RETURN (res)
   2031         }
   2032         if (rnd_mode != ROUNDING_TO_NEAREST) {
   2033           rounding_correction (rnd_mode,
   2034         		       is_inexact_lt_midpoint,
   2035         		       is_inexact_gt_midpoint,
   2036         		       is_midpoint_lt_even, is_midpoint_gt_even,
   2037         		       e3, &res, pfpsf);
   2038           z_exp = res.w[1] & MASK_EXP;
   2039         }
   2040       } else { // if (p_sign != z_sign)
   2041         // consider two cases, because C3 * 10^scale = 10^33 is a special case
   2042         if (res.w[1] != 0x0000314dc6448d93ull ||
   2043             res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
   2044           if (lt_half_ulp) {
   2045             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
   2046             // use the following to avoid double rounding errors when operating
   2047             // on mixed formats in rounding to nearest
   2048             is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
   2049           } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
   2050             // subtract 1 ulp from the significand
   2051             res.w[0]--;
   2052             if (res.w[0] == 0xffffffffffffffffull)
   2053               res.w[1]--;
   2054             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
   2055             if (eq_half_ulp) {
   2056               is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
   2057             } else {
   2058               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
   2059             }
   2060           } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
   2061             // leave unchanged
   2062             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
   2063             is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
   2064           }
   2065           // the result is always inexact, and never tiny
   2066           // check for overflow for RN
   2067           if (e3 > expmax) {
   2068             if (rnd_mode == ROUNDING_TO_NEAREST) {
   2069               res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
   2070               res.w[0] = 0x0000000000000000ull;
   2071               *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
   2072             } else {
   2073               rounding_correction (rnd_mode,
   2074         			   is_inexact_lt_midpoint,
   2075         			   is_inexact_gt_midpoint,
   2076         			   is_midpoint_lt_even,
   2077         			   is_midpoint_gt_even, e3, &res,
   2078         			   pfpsf);
   2079             }
   2080             *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   2081             *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   2082             *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   2083             *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   2084             BID_SWAP128 (res);
   2085             BID_RETURN (res)
   2086           }
   2087           // set the inexact flag
   2088           *pfpsf |= INEXACT_EXCEPTION;
   2089           if (rnd_mode != ROUNDING_TO_NEAREST) {
   2090             rounding_correction (rnd_mode,
   2091         			 is_inexact_lt_midpoint,
   2092         			 is_inexact_gt_midpoint,
   2093         			 is_midpoint_lt_even,
   2094         			 is_midpoint_gt_even, e3, &res, pfpsf);
   2095           }
   2096           z_exp = res.w[1] & MASK_EXP;
   2097         } else { // if C3 * 10^scale = 10^33
   2098           e3 = (z_exp >> 49) - 6176;
   2099           if (e3 > expmin) {
   2100             // the result is exact if exp > expmin and C4 = d*10^(q4-1),
   2101             // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
   2102             if (q4 == 1) {
   2103               // if q4 = 1 the result is exact
   2104               // result coefficient = 10^34 - C4
   2105               res.w[1] = 0x0001ed09bead87c0ull;
   2106               res.w[0] = 0x378d8e6400000000ull - C4.w[0];
   2107               z_exp = z_exp - EXP_P1;
   2108               e3 = e3 - 1;
   2109               res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
   2110             } else {
   2111               // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
   2112               // x = q4-1, 1 <= x <= 67 and check if this operation is exact
   2113               if (q4 <= 18) { // 2 <= q4 <= 18
   2114         	round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
   2115         		      &is_midpoint_lt_even,
   2116         		      &is_midpoint_gt_even,
   2117         		      &is_inexact_lt_midpoint,
   2118         		      &is_inexact_gt_midpoint);
   2119               } else if (q4 <= 38) {
   2120         	P128.w[1] = C4.w[1];
   2121         	P128.w[0] = C4.w[0];
   2122         	round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
   2123         			&is_midpoint_lt_even,
   2124         			&is_midpoint_gt_even,
   2125         			&is_inexact_lt_midpoint,
   2126         			&is_inexact_gt_midpoint);
   2127         	R64 = R128.w[0]; // one decimal digit
   2128               } else if (q4 <= 57) {
   2129         	P192.w[2] = C4.w[2];
   2130         	P192.w[1] = C4.w[1];
   2131         	P192.w[0] = C4.w[0];
   2132         	round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
   2133         			&is_midpoint_lt_even,
   2134         			&is_midpoint_gt_even,
   2135         			&is_inexact_lt_midpoint,
   2136         			&is_inexact_gt_midpoint);
   2137         	R64 = R192.w[0]; // one decimal digit
   2138               } else { // if (q4 <= 68)
   2139         	round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
   2140         			&is_midpoint_lt_even,
   2141         			&is_midpoint_gt_even,
   2142         			&is_inexact_lt_midpoint,
   2143         			&is_inexact_gt_midpoint);
   2144         	R64 = R256.w[0]; // one decimal digit
   2145               }
   2146               if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
   2147         	  !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
   2148         	// the result is exact: 10^34 - R64
   2149         	// incr_exp = 0 with certainty
   2150         	z_exp = z_exp - EXP_P1;
   2151         	e3 = e3 - 1;
   2152         	res.w[1] =
   2153         	  z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
   2154         	res.w[0] = 0x378d8e6400000000ull - R64;
   2155               } else {
   2156         	// We want R64 to be the top digit of C4, but we actually
   2157         	// obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
   2158         	// because the top digit is (C4 * 10^(-q4+1))RZ
   2159         	// however, if incr_exp = 1 then R64 = 10 with certainty
   2160         	if (incr_exp) {
   2161         	  R64 = 10;
   2162         	}
   2163         	// the result is inexact as C4 has more than 1 significant digit
   2164         	// and C3 * 10^scale = 10^33
   2165         	// example of case that is treated here:
   2166         	// 100...0 * 10^e3 - 0.41 * 10^e3 =
   2167         	// 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
   2168         	// note that (e3 > expmin}
   2169         	// in order to round, subtract R64 from 10^34 and then compare
   2170         	// C4 - R64 * 10^(q4-1) with 1/2 ulp
   2171         	// calculate 10^34 - R64
   2172         	res.w[1] = 0x0001ed09bead87c0ull;
   2173         	res.w[0] = 0x378d8e6400000000ull - R64;
   2174         	z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
   2175         	// calculate C4 - R64 * 10^(q4-1); this is a rare case and
   2176         	// R64 is small, 1 <= R64 <= 9
   2177         	e3 = e3 - 1;
   2178         	if (is_inexact_lt_midpoint) {
   2179         	  is_inexact_lt_midpoint = 0;
   2180         	  is_inexact_gt_midpoint = 1;
   2181         	} else if (is_inexact_gt_midpoint) {
   2182         	  is_inexact_gt_midpoint = 0;
   2183         	  is_inexact_lt_midpoint = 1;
   2184         	} else if (is_midpoint_lt_even) {
   2185         	  is_midpoint_lt_even = 0;
   2186         	  is_midpoint_gt_even = 1;
   2187         	} else if (is_midpoint_gt_even) {
   2188         	  is_midpoint_gt_even = 0;
   2189         	  is_midpoint_lt_even = 1;
   2190         	} else {
   2191         	  ;
   2192         	}
   2193         	// the result is always inexact, and never tiny
   2194         	// check for overflow for RN
   2195         	if (e3 > expmax) {
   2196         	  if (rnd_mode == ROUNDING_TO_NEAREST) {
   2197         	    res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
   2198         	    res.w[0] = 0x0000000000000000ull;
   2199         	    *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
   2200         	  } else {
   2201         	    rounding_correction (rnd_mode,
   2202         				 is_inexact_lt_midpoint,
   2203         				 is_inexact_gt_midpoint,
   2204         				 is_midpoint_lt_even,
   2205         				 is_midpoint_gt_even, e3, &res,
   2206         				 pfpsf);
   2207         	  }
   2208         	  *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   2209         	  *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   2210         	  *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   2211         	  *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   2212         	  BID_SWAP128 (res);
   2213         	  BID_RETURN (res)
   2214         	}
   2215         	// set the inexact flag
   2216         	*pfpsf |= INEXACT_EXCEPTION;
   2217         	res.w[1] =
   2218         	  z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
   2219         	if (rnd_mode != ROUNDING_TO_NEAREST) {
   2220         	  rounding_correction (rnd_mode,
   2221         			       is_inexact_lt_midpoint,
   2222         			       is_inexact_gt_midpoint,
   2223         			       is_midpoint_lt_even,
   2224         			       is_midpoint_gt_even, e3, &res,
   2225         			       pfpsf);
   2226         	}
   2227         	z_exp = res.w[1] & MASK_EXP;
   2228               } // end result is inexact
   2229             } // end q4 > 1
   2230           } else { // if (e3 = emin)
   2231             // if e3 = expmin the result is also tiny (the condition for
   2232             // tininess is C4 > 050...0 [q4 digits] which is met because
   2233             // the msd of C4 is not zero)
   2234             // the result is tiny and inexact in all rounding modes;
   2235             // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
   2236             // gt_half_ulp to calculate)
   2237             // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
   2238 
   2239             // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
   2240             if (gt_half_ulp) { // res = 10^33 - 1
   2241               res.w[1] = 0x0000314dc6448d93ull;
   2242               res.w[0] = 0x38c15b09ffffffffull;
   2243             } else {
   2244               res.w[1] = 0x0000314dc6448d93ull;
   2245               res.w[0] = 0x38c15b0a00000000ull;
   2246             }
   2247             res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
   2248             *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later
   2249 
   2250             if (eq_half_ulp) {
   2251               is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
   2252             } else if (lt_half_ulp) {
   2253               is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
   2254             } else { // if (gt_half_ulp)
   2255               is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
   2256             }
   2257 
   2258             if (rnd_mode != ROUNDING_TO_NEAREST) {
   2259               rounding_correction (rnd_mode,
   2260                   is_inexact_lt_midpoint,
   2261                   is_inexact_gt_midpoint,
   2262                   is_midpoint_lt_even,
   2263                   is_midpoint_gt_even, e3, &res,
   2264                   pfpsf);
   2265               z_exp = res.w[1] & MASK_EXP;
   2266             }
   2267           } // end e3 = emin
   2268           // set the inexact flag (if the result was not exact)
   2269           if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
   2270               is_midpoint_lt_even || is_midpoint_gt_even)
   2271             *pfpsf |= INEXACT_EXCEPTION;
   2272         } // end 10^33
   2273       } // end if (p_sign != z_sign)
   2274       res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
   2275       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   2276       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   2277       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   2278       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   2279       BID_SWAP128 (res);
   2280       BID_RETURN (res)
   2281 
   2282     } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
   2283         (q3 <= delta && delta + q4 <= p34) || // Case (3)
   2284         (delta < q3 && p34 < delta + q4) || // Case (4)
   2285         (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
   2286         (delta + q4 < q3)) && // Case (6)
   2287         !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
   2288 
   2289       // the result has the sign of z
   2290 
   2291       if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
   2292           (delta < q3 && p34 < delta + q4)) { // Case (4)
   2293         // round first the sum x * y + z with unbounded exponent
   2294         // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
   2295         // 1 <= scale <= 33
   2296         // calculate res = C3 * 10^scale
   2297         scale = p34 - q3;
   2298         x0 = delta + q4 - p34;
   2299       } else if (delta + q4 < q3) { // Case (6)
   2300         // make Case (6) look like Case (3) or Case (5) with scale = 0
   2301         // by scaling up C4 by 10^(q3 - delta - q4)
   2302         scale = q3 - delta - q4; // 1 <= scale <= 33
   2303         if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
   2304           if (scale <= 19) { // 10^scale fits in 64 bits
   2305             // 64 x 64 C4.w[0] * ten2k64[scale]
   2306             __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
   2307           } else { // 10^scale fits in 128 bits
   2308             // 64 x 128 C4.w[0] * ten2k128[scale - 20]
   2309             __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
   2310           }
   2311         } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
   2312           // 64 x 128 ten2k64[scale] * C4
   2313           __mul_128x64_to_128 (P128, ten2k64[scale], C4);
   2314         }
   2315         C4.w[0] = P128.w[0];
   2316         C4.w[1] = P128.w[1];
   2317         // e4 does not need adjustment, as it is not used from this point on
   2318         scale = 0;
   2319         x0 = 0;
   2320         // now Case (6) looks like Case (3) or Case (5) with scale = 0
   2321       } else { // if Case (3) or Case (5)
   2322         // Note: Case (3) is similar to Case (2), but scale differs and the
   2323         // result is exact, unless it is tiny (so x0 = 0 when calculating the
   2324         // result with unbounded exponent)
   2325 
   2326         // calculate first the sum x * y + z with unbounded exponent (exact)
   2327         // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
   2328         // 1 <= scale <= 33
   2329         // calculate res = C3 * 10^scale
   2330         scale = delta + q4 - q3;
   2331         x0 = 0;
   2332         // Note: the comments which follow refer [mainly] to Case (2)]
   2333       }
   2334 
   2335     case2_repeat:
   2336       if (scale == 0) { // this could happen e.g. if we return to case2_repeat
   2337         // or in Case (4)
   2338         res.w[1] = C3.w[1];
   2339         res.w[0] = C3.w[0];
   2340       } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
   2341         if (scale <= 19) { // 10^scale fits in 64 bits
   2342           // 64 x 64 C3.w[0] * ten2k64[scale]
   2343           __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
   2344         } else { // 10^scale fits in 128 bits
   2345           // 64 x 128 C3.w[0] * ten2k128[scale - 20]
   2346           __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
   2347         }
   2348       } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
   2349         // 64 x 128 ten2k64[scale] * C3
   2350         __mul_128x64_to_128 (res, ten2k64[scale], C3);
   2351       }
   2352       // e3 is already calculated
   2353       e3 = e3 - scale;
   2354       // now res = C3 * 10^scale and e3 = e3 - scale
   2355       // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
   2356       // because the result was too small
   2357 
   2358       // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
   2359       // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
   2360       // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
   2361       // the rounding fits in 128 bits!)
   2362       // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
   2363       // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
   2364       if (x0 == 0) { // this could happen only if we return to case2_repeat, or
   2365         // for Case (3) or Case (6)
   2366         R128.w[1] = C4.w[1];
   2367         R128.w[0] = C4.w[0];
   2368       } else if (q4 <= 18) {
   2369         // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
   2370         round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
   2371             &is_midpoint_lt_even, &is_midpoint_gt_even,
   2372             &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
   2373         if (incr_exp) {
   2374           // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
   2375           R64 = ten2k64[q4 - x0];
   2376         }
   2377         R128.w[1] = 0;
   2378         R128.w[0] = R64;
   2379       } else if (q4 <= 38) {
   2380         // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
   2381         P128.w[1] = C4.w[1];
   2382         P128.w[0] = C4.w[0];
   2383         round128_19_38 (q4, x0, P128, &R128, &incr_exp,
   2384             &is_midpoint_lt_even, &is_midpoint_gt_even,
   2385             &is_inexact_lt_midpoint,
   2386             &is_inexact_gt_midpoint);
   2387         if (incr_exp) {
   2388           // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
   2389           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
   2390             R128.w[0] = ten2k64[q4 - x0];
   2391             // R128.w[1] stays 0
   2392           } else { // 20 <= q4 - x0 <= 37
   2393             R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
   2394             R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
   2395           }
   2396         }
   2397       } else if (q4 <= 57) {
   2398         // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
   2399         P192.w[2] = C4.w[2];
   2400         P192.w[1] = C4.w[1];
   2401         P192.w[0] = C4.w[0];
   2402         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
   2403             &is_midpoint_lt_even, &is_midpoint_gt_even,
   2404             &is_inexact_lt_midpoint,
   2405             &is_inexact_gt_midpoint);
   2406         // R192.w[2] is always 0
   2407         if (incr_exp) {
   2408           // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
   2409           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
   2410             R192.w[0] = ten2k64[q4 - x0];
   2411             // R192.w[1] stays 0
   2412             // R192.w[2] stays 0
   2413           } else { // 20 <= q4 - x0 <= 33
   2414             R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
   2415             R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
   2416             // R192.w[2] stays 0
   2417           }
   2418         }
   2419         R128.w[1] = R192.w[1];
   2420         R128.w[0] = R192.w[0];
   2421       } else {
   2422         // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
   2423         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
   2424             &is_midpoint_lt_even, &is_midpoint_gt_even,
   2425             &is_inexact_lt_midpoint,
   2426             &is_inexact_gt_midpoint);
   2427         // R256.w[3] and R256.w[2] are always 0
   2428         if (incr_exp) {
   2429           // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
   2430           if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
   2431             R256.w[0] = ten2k64[q4 - x0];
   2432             // R256.w[1] stays 0
   2433             // R256.w[2] stays 0
   2434             // R256.w[3] stays 0
   2435           } else { // 20 <= q4 - x0 <= 33
   2436             R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
   2437             R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
   2438             // R256.w[2] stays 0
   2439             // R256.w[3] stays 0
   2440           }
   2441         }
   2442         R128.w[1] = R256.w[1];
   2443         R128.w[0] = R256.w[0];
   2444       }
   2445       // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
   2446       // rounded to nearest, which were copied into R128
   2447       if (z_sign == p_sign) {
   2448         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
   2449         // the sum can result in [up to] p34 or p34 + 1 digits
   2450         res.w[0] = res.w[0] + R128.w[0];
   2451         res.w[1] = res.w[1] + R128.w[1];
   2452         if (res.w[0] < R128.w[0])
   2453           res.w[1]++; // carry
   2454         // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
   2455         if (res.w[1] > 0x0001ed09bead87c0ull ||
   2456             (res.w[1] == 0x0001ed09bead87c0ull &&
   2457              res.w[0] > 0x378d8e63ffffffffull)) {
   2458           // avoid double rounding error
   2459           is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
   2460           is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
   2461           is_midpoint_lt_even0 = is_midpoint_lt_even;
   2462           is_midpoint_gt_even0 = is_midpoint_gt_even;
   2463           is_inexact_lt_midpoint = 0;
   2464           is_inexact_gt_midpoint = 0;
   2465           is_midpoint_lt_even = 0;
   2466           is_midpoint_gt_even = 0;
   2467           P128.w[1] = res.w[1];
   2468           P128.w[0] = res.w[0];
   2469           round128_19_38 (35, 1, P128, &res, &incr_exp,
   2470               &is_midpoint_lt_even, &is_midpoint_gt_even,
   2471               &is_inexact_lt_midpoint,
   2472               &is_inexact_gt_midpoint);
   2473           // incr_exp is 0 with certainty in this case
   2474           // avoid a double rounding error
   2475           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
   2476               is_midpoint_lt_even) { // double rounding error upward
   2477             // res = res - 1
   2478             res.w[0]--;
   2479             if (res.w[0] == 0xffffffffffffffffull)
   2480               res.w[1]--;
   2481             // Note: a double rounding error upward is not possible; for this
   2482             // the result after the first rounding would have to be 99...95
   2483             // (35 digits in all), possibly followed by a number of zeros; this
   2484             // not possible in Cases (2)-(6) or (15)-(17) which may get here
   2485             is_midpoint_lt_even = 0;
   2486             is_inexact_lt_midpoint = 1;
   2487           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
   2488               is_midpoint_gt_even) { // double rounding error downward
   2489             // res = res + 1
   2490             res.w[0]++;
   2491             if (res.w[0] == 0)
   2492               res.w[1]++;
   2493             is_midpoint_gt_even = 0;
   2494             is_inexact_gt_midpoint = 1;
   2495           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
   2496         	     !is_inexact_lt_midpoint
   2497         	     && !is_inexact_gt_midpoint) {
   2498             // if this second rounding was exact the result may still be
   2499             // inexact because of the first rounding
   2500             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
   2501               is_inexact_gt_midpoint = 1;
   2502             }
   2503             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
   2504               is_inexact_lt_midpoint = 1;
   2505             }
   2506           } else if (is_midpoint_gt_even &&
   2507         	     (is_inexact_gt_midpoint0
   2508         	      || is_midpoint_lt_even0)) {
   2509             // pulled up to a midpoint
   2510             is_inexact_lt_midpoint = 1;
   2511             is_inexact_gt_midpoint = 0;
   2512             is_midpoint_lt_even = 0;
   2513             is_midpoint_gt_even = 0;
   2514           } else if (is_midpoint_lt_even &&
   2515         	     (is_inexact_lt_midpoint0
   2516         	      || is_midpoint_gt_even0)) {
   2517             // pulled down to a midpoint
   2518             is_inexact_lt_midpoint = 0;
   2519             is_inexact_gt_midpoint = 1;
   2520             is_midpoint_lt_even = 0;
   2521             is_midpoint_gt_even = 0;
   2522           } else {
   2523             ;
   2524           }
   2525           // adjust exponent
   2526           e3 = e3 + 1;
   2527           if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
   2528               !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
   2529             if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
   2530         	is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
   2531               is_inexact_lt_midpoint = 1;
   2532             }
   2533           }
   2534         } else {
   2535           // this is the result rounded with unbounded exponent, unless a
   2536           // correction is needed
   2537           res.w[1] = res.w[1] & MASK_COEFF;
   2538           if (lsb == 1) {
   2539             if (is_midpoint_gt_even) {
   2540               // res = res + 1
   2541               is_midpoint_gt_even = 0;
   2542               is_midpoint_lt_even = 1;
   2543               res.w[0]++;
   2544               if (res.w[0] == 0x0)
   2545         	res.w[1]++;
   2546               // check for rounding overflow
   2547               if (res.w[1] == 0x0001ed09bead87c0ull &&
   2548         	  res.w[0] == 0x378d8e6400000000ull) {
   2549         	// res = 10^34 => rounding overflow
   2550         	res.w[1] = 0x0000314dc6448d93ull;
   2551         	res.w[0] = 0x38c15b0a00000000ull; // 10^33
   2552         	e3++;
   2553               }
   2554             } else if (is_midpoint_lt_even) {
   2555               // res = res - 1
   2556               is_midpoint_lt_even = 0;
   2557               is_midpoint_gt_even = 1;
   2558               res.w[0]--;
   2559               if (res.w[0] == 0xffffffffffffffffull)
   2560         	res.w[1]--;
   2561               // if the result is pure zero, the sign depends on the rounding
   2562               // mode (x*y and z had opposite signs)
   2563               if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
   2564         	if (rnd_mode != ROUNDING_DOWN)
   2565         	  z_sign = 0x0000000000000000ull;
   2566         	else
   2567         	  z_sign = 0x8000000000000000ull;
   2568         	// the exponent is max (e3, expmin)
   2569         	res.w[1] = 0x0;
   2570         	res.w[0] = 0x0;
   2571         	*ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   2572         	*ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   2573         	*ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   2574         	*ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   2575         	BID_SWAP128 (res);
   2576         	BID_RETURN (res)
   2577               }
   2578             } else {
   2579               ;
   2580             }
   2581           }
   2582         }
   2583       } else { // if (z_sign != p_sign)
   2584         lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
   2585         // used to swap rounding indicators if p_sign != z_sign
   2586         // the sum can result in [up to] p34 or p34 - 1 digits
   2587         tmp64 = res.w[0];
   2588         res.w[0] = res.w[0] - R128.w[0];
   2589         res.w[1] = res.w[1] - R128.w[1];
   2590         if (res.w[0] > tmp64)
   2591           res.w[1]--; // borrow
   2592         // if res < 10^33 and exp > expmin need to decrease x0 and
   2593         // increase scale by 1
   2594         if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
   2595         		     (res.w[1] == 0x0000314dc6448d93ull &&
   2596         		      res.w[0] < 0x38c15b0a00000000ull)) ||
   2597         		    (is_inexact_lt_midpoint
   2598         		     && res.w[1] == 0x0000314dc6448d93ull
   2599         		     && res.w[0] == 0x38c15b0a00000000ull))
   2600             && x0 >= 1) {
   2601           x0 = x0 - 1;
   2602           // first restore e3, otherwise it will be too small
   2603           e3 = e3 + scale;
   2604           scale = scale + 1;
   2605           is_inexact_lt_midpoint = 0;
   2606           is_inexact_gt_midpoint = 0;
   2607           is_midpoint_lt_even = 0;
   2608           is_midpoint_gt_even = 0;
   2609           incr_exp = 0;
   2610           goto case2_repeat;
   2611         }
   2612         // else this is the result rounded with unbounded exponent;
   2613         // because the result has opposite sign to that of C4 which was
   2614         // rounded, need to change the rounding indicators
   2615         if (is_inexact_lt_midpoint) {
   2616           is_inexact_lt_midpoint = 0;
   2617           is_inexact_gt_midpoint = 1;
   2618         } else if (is_inexact_gt_midpoint) {
   2619           is_inexact_gt_midpoint = 0;
   2620           is_inexact_lt_midpoint = 1;
   2621         } else if (lsb == 0) {
   2622           if (is_midpoint_lt_even) {
   2623             is_midpoint_lt_even = 0;
   2624             is_midpoint_gt_even = 1;
   2625           } else if (is_midpoint_gt_even) {
   2626             is_midpoint_gt_even = 0;
   2627             is_midpoint_lt_even = 1;
   2628           } else {
   2629             ;
   2630           }
   2631         } else if (lsb == 1) {
   2632           if (is_midpoint_lt_even) {
   2633             // res = res + 1
   2634             res.w[0]++;
   2635             if (res.w[0] == 0x0)
   2636               res.w[1]++;
   2637             // check for rounding overflow
   2638             if (res.w[1] == 0x0001ed09bead87c0ull &&
   2639         	res.w[0] == 0x378d8e6400000000ull) {
   2640               // res = 10^34 => rounding overflow
   2641               res.w[1] = 0x0000314dc6448d93ull;
   2642               res.w[0] = 0x38c15b0a00000000ull; // 10^33
   2643               e3++;
   2644             }
   2645           } else if (is_midpoint_gt_even) {
   2646             // res = res - 1
   2647             res.w[0]--;
   2648             if (res.w[0] == 0xffffffffffffffffull)
   2649               res.w[1]--;
   2650             // if the result is pure zero, the sign depends on the rounding
   2651             // mode (x*y and z had opposite signs)
   2652             if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
   2653               if (rnd_mode != ROUNDING_DOWN)
   2654         	z_sign = 0x0000000000000000ull;
   2655               else
   2656         	z_sign = 0x8000000000000000ull;
   2657               // the exponent is max (e3, expmin)
   2658               res.w[1] = 0x0;
   2659               res.w[0] = 0x0;
   2660               *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   2661               *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   2662               *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   2663               *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   2664               BID_SWAP128 (res);
   2665               BID_RETURN (res)
   2666             }
   2667           } else {
   2668             ;
   2669           }
   2670         } else {
   2671           ;
   2672         }
   2673       }
   2674       // check for underflow
   2675       if (e3 == expmin) { // and if significand < 10^33 => result is tiny
   2676         if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
   2677             ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
   2678              res.w[0] < 0x38c15b0a00000000ull)) {
   2679           is_tiny = 1;
   2680         }
   2681       } else if (e3 < expmin) {
   2682         // the result is tiny, so we must truncate more of res
   2683         is_tiny = 1;
   2684         x0 = expmin - e3;
   2685         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
   2686         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
   2687         is_midpoint_lt_even0 = is_midpoint_lt_even;
   2688         is_midpoint_gt_even0 = is_midpoint_gt_even;
   2689         is_inexact_lt_midpoint = 0;
   2690         is_inexact_gt_midpoint = 0;
   2691         is_midpoint_lt_even = 0;
   2692         is_midpoint_gt_even = 0;
   2693         // determine the number of decimal digits in res
   2694         if (res.w[1] == 0x0) {
   2695           // between 1 and 19 digits
   2696           for (ind = 1; ind <= 19; ind++) {
   2697             if (res.w[0] < ten2k64[ind]) {
   2698               break;
   2699             }
   2700           }
   2701           // ind digits
   2702         } else if (res.w[1] < ten2k128[0].w[1] ||
   2703         	   (res.w[1] == ten2k128[0].w[1]
   2704         	    && res.w[0] < ten2k128[0].w[0])) {
   2705           // 20 digits
   2706           ind = 20;
   2707         } else { // between 21 and 38 digits
   2708           for (ind = 1; ind <= 18; ind++) {
   2709             if (res.w[1] < ten2k128[ind].w[1] ||
   2710         	(res.w[1] == ten2k128[ind].w[1] &&
   2711         	 res.w[0] < ten2k128[ind].w[0])) {
   2712               break;
   2713             }
   2714           }
   2715           // ind + 20 digits
   2716           ind = ind + 20;
   2717         }
   2718 
   2719         // at this point ind >= x0; because delta >= 2 on this path, the case
   2720         // ind = x0 can occur only in Case (2) or case (3), when C3 has one
   2721         // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
   2722         // the signs of x * y and z are opposite, and through cancellation
   2723         // the most significant decimal digit in res has the weight
   2724         // 10^(emin-1); however, it is clear that in this case the most
   2725         // significant digit is 9, so the result before rounding is
   2726         // 0.9... * 10^emin
   2727         // Otherwise, ind > x0 because there are non-zero decimal digits in the
   2728         // result with weight of at least 10^emin, and correction for underflow
   2729         //  can be carried out using the round*_*_2_* () routines
   2730         if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
   2731           res.w[1] = 0x0;
   2732           res.w[0] = 0x1;
   2733           is_inexact_gt_midpoint = 1;
   2734         } else if (ind <= 18) { // check that 2 <= ind
   2735           // 2 <= ind <= 18, 1 <= x0 <= 17
   2736           round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
   2737         		&is_midpoint_lt_even, &is_midpoint_gt_even,
   2738         		&is_inexact_lt_midpoint,
   2739         		&is_inexact_gt_midpoint);
   2740           if (incr_exp) {
   2741             // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
   2742             R64 = ten2k64[ind - x0];
   2743           }
   2744           res.w[1] = 0;
   2745           res.w[0] = R64;
   2746         } else if (ind <= 38) {
   2747           // 19 <= ind <= 38
   2748           P128.w[1] = res.w[1];
   2749           P128.w[0] = res.w[0];
   2750           round128_19_38 (ind, x0, P128, &res, &incr_exp,
   2751         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
   2752         		  &is_inexact_lt_midpoint,
   2753         		  &is_inexact_gt_midpoint);
   2754           if (incr_exp) {
   2755             // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
   2756             if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
   2757               res.w[0] = ten2k64[ind - x0];
   2758               // res.w[1] stays 0
   2759             } else { // 20 <= ind - x0 <= 37
   2760               res.w[0] = ten2k128[ind - x0 - 20].w[0];
   2761               res.w[1] = ten2k128[ind - x0 - 20].w[1];
   2762             }
   2763           }
   2764         }
   2765         // avoid a double rounding error
   2766         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
   2767             is_midpoint_lt_even) { // double rounding error upward
   2768           // res = res - 1
   2769           res.w[0]--;
   2770           if (res.w[0] == 0xffffffffffffffffull)
   2771             res.w[1]--;
   2772           // Note: a double rounding error upward is not possible; for this
   2773           // the result after the first rounding would have to be 99...95
   2774           // (35 digits in all), possibly followed by a number of zeros; this
   2775           // not possible in Cases (2)-(6) which may get here
   2776           is_midpoint_lt_even = 0;
   2777           is_inexact_lt_midpoint = 1;
   2778         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
   2779             is_midpoint_gt_even) { // double rounding error downward
   2780           // res = res + 1
   2781           res.w[0]++;
   2782           if (res.w[0] == 0)
   2783             res.w[1]++;
   2784           is_midpoint_gt_even = 0;
   2785           is_inexact_gt_midpoint = 1;
   2786         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
   2787         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
   2788           // if this second rounding was exact the result may still be
   2789           // inexact because of the first rounding
   2790           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
   2791             is_inexact_gt_midpoint = 1;
   2792           }
   2793           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
   2794             is_inexact_lt_midpoint = 1;
   2795           }
   2796         } else if (is_midpoint_gt_even &&
   2797         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
   2798           // pulled up to a midpoint
   2799           is_inexact_lt_midpoint = 1;
   2800           is_inexact_gt_midpoint = 0;
   2801           is_midpoint_lt_even = 0;
   2802           is_midpoint_gt_even = 0;
   2803         } else if (is_midpoint_lt_even &&
   2804         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
   2805           // pulled down to a midpoint
   2806           is_inexact_lt_midpoint = 0;
   2807           is_inexact_gt_midpoint = 1;
   2808           is_midpoint_lt_even = 0;
   2809           is_midpoint_gt_even = 0;
   2810         } else {
   2811           ;
   2812         }
   2813         // adjust exponent
   2814         e3 = e3 + x0;
   2815         if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
   2816             !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
   2817           if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
   2818               is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
   2819             is_inexact_lt_midpoint = 1;
   2820           }
   2821         }
   2822       } else {
   2823         ; // not underflow
   2824       }
   2825       // check for inexact result
   2826       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
   2827           is_midpoint_lt_even || is_midpoint_gt_even) {
   2828         // set the inexact flag
   2829         *pfpsf |= INEXACT_EXCEPTION;
   2830         if (is_tiny)
   2831           *pfpsf |= UNDERFLOW_EXCEPTION;
   2832       }
   2833       // now check for significand = 10^34 (may have resulted from going
   2834       // back to case2_repeat)
   2835       if (res.w[1] == 0x0001ed09bead87c0ull &&
   2836           res.w[0] == 0x378d8e6400000000ull) { // if  res = 10^34
   2837         res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
   2838         res.w[0] = 0x38c15b0a00000000ull;
   2839         e3 = e3 + 1;
   2840       }
   2841       res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
   2842       // check for overflow
   2843       if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
   2844         res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
   2845         res.w[0] = 0x0000000000000000ull;
   2846         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
   2847       }
   2848       if (rnd_mode != ROUNDING_TO_NEAREST) {
   2849         rounding_correction (rnd_mode,
   2850         		     is_inexact_lt_midpoint,
   2851         		     is_inexact_gt_midpoint,
   2852         		     is_midpoint_lt_even, is_midpoint_gt_even,
   2853         		     e3, &res, pfpsf);
   2854       }
   2855       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   2856       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   2857       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   2858       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   2859       BID_SWAP128 (res);
   2860       BID_RETURN (res)
   2861 
   2862     } else {
   2863 
   2864       // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
   2865       // the signs of x*y and z are opposite; in these cases massive
   2866       // cancellation can occur, so it is better to scale either C3 or C4 and
   2867       // to perform the subtraction before rounding; rounding is performed
   2868       // next, depending on the number of decimal digits in the result and on
   2869       // the exponent value
   2870       // Note: overlow is not possible in this case
   2871       // this is similar to Cases (15), (16), and (17)
   2872 
   2873       if (delta + q4 < q3) { // from Case (6)
   2874         // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
   2875         // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
   2876         // and call add_and_round; delta stays positive
   2877         // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
   2878         P128.w[1] = C3.w[1];
   2879         P128.w[0] = C3.w[0];
   2880         C3.w[1] = C4.w[1];
   2881         C3.w[0] = C4.w[0];
   2882         C4.w[1] = P128.w[1];
   2883         C4.w[0] = P128.w[0];
   2884         ind = q3;
   2885         q3 = q4;
   2886         q4 = ind;
   2887         ind = e3;
   2888         e3 = e4;
   2889         e4 = ind;
   2890         tmp_sign = z_sign;
   2891         z_sign = p_sign;
   2892         p_sign = tmp_sign;
   2893       } else { // from Cases (2), (3), (4), (5)
   2894         // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
   2895         // scaled up by q4 + delta - q3; this is the same as in Cases (15),
   2896         // (16), and (17) if we just change the sign of delta
   2897         delta = -delta;
   2898       }
   2899       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
   2900         	     rnd_mode, &is_midpoint_lt_even,
   2901         	     &is_midpoint_gt_even, &is_inexact_lt_midpoint,
   2902         	     &is_inexact_gt_midpoint, pfpsf, &res);
   2903       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   2904       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   2905       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   2906       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   2907       BID_SWAP128 (res);
   2908       BID_RETURN (res)
   2909 
   2910     }
   2911 
   2912   } else { // if delta < 0
   2913 
   2914     delta = -delta;
   2915 
   2916     if (p34 < q4 && q4 <= delta) { // Case (7)
   2917 
   2918       // truncate C4 to p34 digits into res
   2919       // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
   2920       x0 = q4 - p34;
   2921       if (q4 <= 38) {
   2922         P128.w[1] = C4.w[1];
   2923         P128.w[0] = C4.w[0];
   2924         round128_19_38 (q4, x0, P128, &res, &incr_exp,
   2925         		&is_midpoint_lt_even, &is_midpoint_gt_even,
   2926         		&is_inexact_lt_midpoint,
   2927         		&is_inexact_gt_midpoint);
   2928       } else if (q4 <= 57) { // 35 <= q4 <= 57
   2929         P192.w[2] = C4.w[2];
   2930         P192.w[1] = C4.w[1];
   2931         P192.w[0] = C4.w[0];
   2932         round192_39_57 (q4, x0, P192, &R192, &incr_exp,
   2933         		&is_midpoint_lt_even, &is_midpoint_gt_even,
   2934         		&is_inexact_lt_midpoint,
   2935         		&is_inexact_gt_midpoint);
   2936         res.w[0] = R192.w[0];
   2937         res.w[1] = R192.w[1];
   2938       } else { // if (q4 <= 68)
   2939         round256_58_76 (q4, x0, C4, &R256, &incr_exp,
   2940         		&is_midpoint_lt_even, &is_midpoint_gt_even,
   2941         		&is_inexact_lt_midpoint,
   2942         		&is_inexact_gt_midpoint);
   2943         res.w[0] = R256.w[0];
   2944         res.w[1] = R256.w[1];
   2945       }
   2946       e4 = e4 + x0;
   2947       if (incr_exp) {
   2948         e4 = e4 + 1;
   2949       }
   2950       if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
   2951           !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
   2952         // if C4 rounded to p34 digits is exact then the result is inexact,
   2953         // in a way that depends on the signs of x * y and z
   2954         if (p_sign == z_sign) {
   2955           is_inexact_lt_midpoint = 1;
   2956         } else { // if (p_sign != z_sign)
   2957           if (res.w[1] != 0x0000314dc6448d93ull ||
   2958               res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
   2959             is_inexact_gt_midpoint = 1;
   2960           } else { // res = 10^33 and exact is a special case
   2961             // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
   2962             // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
   2963             // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
   2964             // Note: ulp is really ulp/10 (after borrow which propagates to msd)
   2965             if (delta > p34 + 1) { // C3 < 1/2
   2966               // res = 10^33, unchanged
   2967               is_inexact_gt_midpoint = 1;
   2968             } else { // if (delta == p34 + 1)
   2969               if (q3 <= 19) {
   2970         	if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
   2971         	  // res = 10^33, unchanged
   2972         	  is_inexact_gt_midpoint = 1;
   2973         	} else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
   2974         	  // res = 10^33, unchanged
   2975         	  is_midpoint_lt_even = 1;
   2976         	} else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
   2977         	  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
   2978         	  res.w[0] = 0x378d8e63ffffffffull;
   2979         	  e4 = e4 - 1;
   2980         	  is_inexact_lt_midpoint = 1;
   2981         	}
   2982               } else { // if (20 <= q3 <=34)
   2983         	if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
   2984                     (C3.w[1] == midpoint128[q3 - 20].w[1] &&
   2985                     C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
   2986         	  // res = 10^33, unchanged
   2987         	  is_inexact_gt_midpoint = 1;
   2988         	} else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
   2989                     C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
   2990         	  // res = 10^33, unchanged
   2991         	  is_midpoint_lt_even = 1;
   2992         	} else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
   2993         	  res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
   2994         	  res.w[0] = 0x378d8e63ffffffffull;
   2995         	  e4 = e4 - 1;
   2996         	  is_inexact_lt_midpoint = 1;
   2997         	}
   2998               }
   2999             }
   3000           }
   3001         }
   3002       } else if (is_midpoint_lt_even) {
   3003         if (z_sign != p_sign) {
   3004           // needs correction: res = res - 1
   3005           res.w[0] = res.w[0] - 1;
   3006           if (res.w[0] == 0xffffffffffffffffull)
   3007             res.w[1]--;
   3008           // if it is (10^33-1)*10^e4 then the corect result is
   3009           // (10^34-1)*10(e4-1)
   3010           if (res.w[1] == 0x0000314dc6448d93ull &&
   3011               res.w[0] == 0x38c15b09ffffffffull) {
   3012             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
   3013             res.w[0] = 0x378d8e63ffffffffull;
   3014             e4 = e4 - 1;
   3015           }
   3016           is_midpoint_lt_even = 0;
   3017           is_inexact_lt_midpoint = 1;
   3018         } else { // if (z_sign == p_sign)
   3019           is_midpoint_lt_even = 0;
   3020           is_inexact_gt_midpoint = 1;
   3021         }
   3022       } else if (is_midpoint_gt_even) {
   3023         if (z_sign == p_sign) {
   3024           // needs correction: res = res + 1 (cannot cross in the next binade)
   3025           res.w[0] = res.w[0] + 1;
   3026           if (res.w[0] == 0x0000000000000000ull)
   3027             res.w[1]++;
   3028           is_midpoint_gt_even = 0;
   3029           is_inexact_gt_midpoint = 1;
   3030         } else { // if (z_sign != p_sign)
   3031           is_midpoint_gt_even = 0;
   3032           is_inexact_lt_midpoint = 1;
   3033         }
   3034       } else {
   3035         ; // the rounded result is already correct
   3036       }
   3037       // check for overflow
   3038       if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
   3039         res.w[1] = p_sign | 0x7800000000000000ull;
   3040         res.w[0] = 0x0000000000000000ull;
   3041         *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
   3042       } else { // no overflow or not RN
   3043         p_exp = ((UINT64) (e4 + 6176) << 49);
   3044         res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
   3045       }
   3046       if (rnd_mode != ROUNDING_TO_NEAREST) {
   3047         rounding_correction (rnd_mode,
   3048         		     is_inexact_lt_midpoint,
   3049         		     is_inexact_gt_midpoint,
   3050         		     is_midpoint_lt_even, is_midpoint_gt_even,
   3051         		     e4, &res, pfpsf);
   3052       }
   3053       if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
   3054           is_midpoint_lt_even || is_midpoint_gt_even) {
   3055         // set the inexact flag
   3056         *pfpsf |= INEXACT_EXCEPTION;
   3057       }
   3058       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   3059       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   3060       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   3061       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   3062       BID_SWAP128 (res);
   3063       BID_RETURN (res)
   3064 
   3065     } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
   3066         (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
   3067         (q4 <= delta && delta + q3 <= p34) || // Case (10)
   3068         (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
   3069         (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
   3070         (delta + q3 < q4 && q4 <= p34)) { // Case (18)
   3071 
   3072       // Case (8) is similar to Case (1), with C3 and C4 swapped
   3073       // Case (9) is similar to Case (2), with C3 and C4 swapped
   3074       // Case (10) is similar to Case (3), with C3 and C4 swapped
   3075       // Case (13) is similar to Case (4), with C3 and C4 swapped
   3076       // Case (14) is similar to Case (5), with C3 and C4 swapped
   3077       // Case (18) is similar to Case (6), with C3 and C4 swapped
   3078 
   3079       // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
   3080       // and go back to delta_ge_zero
   3081       // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
   3082       P128.w[1] = C3.w[1];
   3083       P128.w[0] = C3.w[0];
   3084       C3.w[1] = C4.w[1];
   3085       C3.w[0] = C4.w[0];
   3086       C4.w[1] = P128.w[1];
   3087       C4.w[0] = P128.w[0];
   3088       ind = q3;
   3089       q3 = q4;
   3090       q4 = ind;
   3091       ind = e3;
   3092       e3 = e4;
   3093       e4 = ind;
   3094       tmp_sign = z_sign;
   3095       z_sign = p_sign;
   3096       p_sign = tmp_sign;
   3097       tmp.ui64 = z_exp;
   3098       z_exp = p_exp;
   3099       p_exp = tmp.ui64;
   3100       goto delta_ge_zero;
   3101 
   3102     } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
   3103                (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)
   3104 
   3105       // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
   3106       // 1 <= x0 <= q3 - 1 <= p34 - 1
   3107       x0 = e4 - e3; // or x0 = delta + q3 - q4
   3108       if (q3 <= 18) { // 2 <= q3 <= 18
   3109         round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
   3110         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
   3111         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
   3112         // C3.w[1] = 0;
   3113         C3.w[0] = R64;
   3114       } else if (q3 <= 38) {
   3115         round128_19_38 (q3, x0, C3, &R128, &incr_exp,
   3116         		&is_midpoint_lt_even, &is_midpoint_gt_even,
   3117         		&is_inexact_lt_midpoint,
   3118         		&is_inexact_gt_midpoint);
   3119         C3.w[1] = R128.w[1];
   3120         C3.w[0] = R128.w[0];
   3121       }
   3122       // the rounded result has q3 - x0 digits
   3123       // we want the exponent to be e4, so if incr_exp = 1 then
   3124       // multiply the rounded result by 10 - it will still fit in 113 bits
   3125       if (incr_exp) {
   3126         // 64 x 128 -> 128
   3127         P128.w[1] = C3.w[1];
   3128         P128.w[0] = C3.w[0];
   3129         __mul_64x128_to_128 (C3, ten2k64[1], P128);
   3130       }
   3131       e3 = e3 + x0; // this is e4
   3132       // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
   3133       // the result will have the sign of x * y; the exponent is e4
   3134       R256.w[3] = 0;
   3135       R256.w[2] = 0;
   3136       R256.w[1] = C3.w[1];
   3137       R256.w[0] = C3.w[0];
   3138       if (p_sign == z_sign) { // R256 = C4 + R256
   3139         add256 (C4, R256, &R256);
   3140       } else { // if (p_sign != z_sign) { // R256 = C4 - R256
   3141         sub256 (C4, R256, &R256); // the result cannot be pure zero
   3142         // because the result has opposite sign to that of R256 which was
   3143         // rounded, need to change the rounding indicators
   3144         lsb = C4.w[0] & 0x01;
   3145         if (is_inexact_lt_midpoint) {
   3146           is_inexact_lt_midpoint = 0;
   3147           is_inexact_gt_midpoint = 1;
   3148         } else if (is_inexact_gt_midpoint) {
   3149           is_inexact_gt_midpoint = 0;
   3150           is_inexact_lt_midpoint = 1;
   3151         } else if (lsb == 0) {
   3152           if (is_midpoint_lt_even) {
   3153             is_midpoint_lt_even = 0;
   3154             is_midpoint_gt_even = 1;
   3155           } else if (is_midpoint_gt_even) {
   3156             is_midpoint_gt_even = 0;
   3157             is_midpoint_lt_even = 1;
   3158           } else {
   3159             ;
   3160           }
   3161         } else if (lsb == 1) {
   3162           if (is_midpoint_lt_even) {
   3163             // res = res + 1
   3164             R256.w[0]++;
   3165             if (R256.w[0] == 0x0) {
   3166               R256.w[1]++;
   3167               if (R256.w[1] == 0x0) {
   3168         	R256.w[2]++;
   3169         	if (R256.w[2] == 0x0) {
   3170         	  R256.w[3]++;
   3171         	}
   3172               }
   3173             }
   3174             // no check for rounding overflow - R256 was a difference
   3175           } else if (is_midpoint_gt_even) {
   3176             // res = res - 1
   3177             R256.w[0]--;
   3178             if (R256.w[0] == 0xffffffffffffffffull) {
   3179               R256.w[1]--;
   3180               if (R256.w[1] == 0xffffffffffffffffull) {
   3181         	R256.w[2]--;
   3182         	if (R256.w[2] == 0xffffffffffffffffull) {
   3183         	  R256.w[3]--;
   3184         	}
   3185               }
   3186             }
   3187           } else {
   3188             ;
   3189           }
   3190         } else {
   3191           ;
   3192         }
   3193       }
   3194       // determine the number of decimal digits in R256
   3195       ind = nr_digits256 (R256); // ind >= p34
   3196       // if R256 is sum, then ind > p34; if R256 is a difference, then
   3197       // ind >= p34; this means that we can calculate the result rounded to
   3198       // the destination precision, with unbounded exponent, starting from R256
   3199       // and using the indicators from the rounding of C3 to avoid a double
   3200       // rounding error
   3201 
   3202       if (ind < p34) {
   3203         ;
   3204       } else if (ind == p34) {
   3205         // the result rounded to the destination precision with
   3206         // unbounded exponent
   3207         // is (-1)^p_sign * R256 * 10^e4
   3208         res.w[1] = R256.w[1];
   3209         res.w[0] = R256.w[0];
   3210       } else { // if (ind > p34)
   3211         // if more than P digits, round to nearest to P digits
   3212         // round R256 to p34 digits
   3213         x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
   3214         // save C3 rounding indicators to help avoid double rounding error
   3215         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
   3216         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
   3217         is_midpoint_lt_even0 = is_midpoint_lt_even;
   3218         is_midpoint_gt_even0 = is_midpoint_gt_even;
   3219         // initialize rounding indicators
   3220         is_inexact_lt_midpoint = 0;
   3221         is_inexact_gt_midpoint = 0;
   3222         is_midpoint_lt_even = 0;
   3223         is_midpoint_gt_even = 0;
   3224         // round to p34 digits; the result fits in 113 bits
   3225         if (ind <= 38) {
   3226           P128.w[1] = R256.w[1];
   3227           P128.w[0] = R256.w[0];
   3228           round128_19_38 (ind, x0, P128, &R128, &incr_exp,
   3229         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
   3230         		  &is_inexact_lt_midpoint,
   3231         		  &is_inexact_gt_midpoint);
   3232         } else if (ind <= 57) {
   3233           P192.w[2] = R256.w[2];
   3234           P192.w[1] = R256.w[1];
   3235           P192.w[0] = R256.w[0];
   3236           round192_39_57 (ind, x0, P192, &R192, &incr_exp,
   3237         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
   3238         		  &is_inexact_lt_midpoint,
   3239         		  &is_inexact_gt_midpoint);
   3240           R128.w[1] = R192.w[1];
   3241           R128.w[0] = R192.w[0];
   3242         } else { // if (ind <= 68)
   3243           round256_58_76 (ind, x0, R256, &R256, &incr_exp,
   3244         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
   3245         		  &is_inexact_lt_midpoint,
   3246         		  &is_inexact_gt_midpoint);
   3247           R128.w[1] = R256.w[1];
   3248           R128.w[0] = R256.w[0];
   3249         }
   3250         // the rounded result has p34 = 34 digits
   3251         e4 = e4 + x0 + incr_exp;
   3252 
   3253         res.w[1] = R128.w[1];
   3254         res.w[0] = R128.w[0];
   3255 
   3256         // avoid a double rounding error
   3257         if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
   3258             is_midpoint_lt_even) { // double rounding error upward
   3259           // res = res - 1
   3260           res.w[0]--;
   3261           if (res.w[0] == 0xffffffffffffffffull)
   3262             res.w[1]--;
   3263           is_midpoint_lt_even = 0;
   3264           is_inexact_lt_midpoint = 1;
   3265           // Note: a double rounding error upward is not possible; for this
   3266           // the result after the first rounding would have to be 99...95
   3267           // (35 digits in all), possibly followed by a number of zeros; this
   3268           // not possible in Cases (2)-(6) or (15)-(17) which may get here
   3269           // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
   3270           if (res.w[1] == 0x0000314dc6448d93ull &&
   3271             res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
   3272             res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
   3273             res.w[0] = 0x378d8e63ffffffffull;
   3274             e4--;
   3275           }
   3276         } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
   3277             is_midpoint_gt_even) { // double rounding error downward
   3278           // res = res + 1
   3279           res.w[0]++;
   3280           if (res.w[0] == 0)
   3281             res.w[1]++;
   3282           is_midpoint_gt_even = 0;
   3283           is_inexact_gt_midpoint = 1;
   3284         } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
   3285         	   !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
   3286           // if this second rounding was exact the result may still be
   3287           // inexact because of the first rounding
   3288           if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
   3289             is_inexact_gt_midpoint = 1;
   3290           }
   3291           if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
   3292             is_inexact_lt_midpoint = 1;
   3293           }
   3294         } else if (is_midpoint_gt_even &&
   3295         	   (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
   3296           // pulled up to a midpoint
   3297           is_inexact_lt_midpoint = 1;
   3298           is_inexact_gt_midpoint = 0;
   3299           is_midpoint_lt_even = 0;
   3300           is_midpoint_gt_even = 0;
   3301         } else if (is_midpoint_lt_even &&
   3302         	   (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
   3303           // pulled down to a midpoint
   3304           is_inexact_lt_midpoint = 0;
   3305           is_inexact_gt_midpoint = 1;
   3306           is_midpoint_lt_even = 0;
   3307           is_midpoint_gt_even = 0;
   3308         } else {
   3309           ;
   3310         }
   3311       }
   3312 
   3313       // determine tininess
   3314       if (rnd_mode == ROUNDING_TO_NEAREST) {
   3315         if (e4 < expmin) {
   3316           is_tiny = 1; // for other rounding modes apply correction
   3317         }
   3318       } else {
   3319         // for RM, RP, RZ, RA apply correction in order to determine tininess
   3320         // but do not save the result; apply the correction to
   3321         // (-1)^p_sign * res * 10^0
   3322         P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
   3323         P128.w[0] = res.w[0];
   3324         rounding_correction (rnd_mode,
   3325         		     is_inexact_lt_midpoint,
   3326         		     is_inexact_gt_midpoint,
   3327         		     is_midpoint_lt_even, is_midpoint_gt_even,
   3328         		     0, &P128, pfpsf);
   3329         scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
   3330         // the number of digits in the significand is p34 = 34
   3331         if (e4 + scale < expmin) {
   3332           is_tiny = 1;
   3333         }
   3334       }
   3335 
   3336       // the result rounded to the destination precision with unbounded exponent
   3337       // is (-1)^p_sign * res * 10^e4
   3338       res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
   3339       // res.w[0] unchanged;
   3340       // Note: res is correct only if expmin <= e4 <= expmax
   3341       ind = p34; // the number of decimal digits in the signifcand of res
   3342 
   3343       // at this point we have the result rounded with unbounded exponent in
   3344       // res and we know its tininess:
   3345       // res = (-1)^p_sign * significand * 10^e4,
   3346       // where q (significand) = ind = p34
   3347       // Note: res is correct only if expmin <= e4 <= expmax
   3348 
   3349       // check for overflow if RN
   3350       if (rnd_mode == ROUNDING_TO_NEAREST
   3351           && (ind + e4) > (p34 + expmax)) {
   3352         res.w[1] = p_sign | 0x7800000000000000ull;
   3353         res.w[0] = 0x0000000000000000ull;
   3354         *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
   3355         *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   3356         *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   3357         *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   3358         *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   3359         BID_SWAP128 (res);
   3360         BID_RETURN (res)
   3361       } // else not overflow or not RN, so continue
   3362 
   3363       // from this point on this is similar to the last part of the computation
   3364       // for Cases (15), (16), (17)
   3365 
   3366       // if (e4 >= expmin) we have the result rounded with bounded exponent
   3367       if (e4 < expmin) {
   3368         x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
   3369         // where the result rounded [at most] once is
   3370         //   (-1)^p_sign * significand_res * 10^e4
   3371 
   3372         // avoid double rounding error
   3373         is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
   3374         is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
   3375         is_midpoint_lt_even0 = is_midpoint_lt_even;
   3376         is_midpoint_gt_even0 = is_midpoint_gt_even;
   3377         is_inexact_lt_midpoint = 0;
   3378         is_inexact_gt_midpoint = 0;
   3379         is_midpoint_lt_even = 0;
   3380         is_midpoint_gt_even = 0;
   3381 
   3382         if (x0 > ind) {
   3383           // nothing is left of res when moving the decimal point left x0 digits
   3384           is_inexact_lt_midpoint = 1;
   3385           res.w[1] = p_sign | 0x0000000000000000ull;
   3386           res.w[0] = 0x0000000000000000ull;
   3387           e4 = expmin;
   3388         } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
   3389           // this is <, =, or > 1/2 ulp
   3390           // compare the ind-digit value in the significand of res with
   3391           // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
   3392           // less than, equal to, or greater than 1/2 ulp (significand of res)
   3393           R128.w[1] = res.w[1] & MASK_COEFF;
   3394           R128.w[0] = res.w[0];
   3395           if (ind <= 19) {
   3396             if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
   3397               lt_half_ulp = 1;
   3398               is_inexact_lt_midpoint = 1;
   3399             } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
   3400               eq_half_ulp = 1;
   3401               is_midpoint_gt_even = 1;
   3402             } else { // > 1/2 ulp
   3403               gt_half_ulp = 1;
   3404               is_inexact_gt_midpoint = 1;
   3405             }
   3406           } else { // if (ind <= 38)
   3407             if (R128.w[1] < midpoint128[ind - 20].w[1] ||
   3408                 (R128.w[1] == midpoint128[ind - 20].w[1] &&
   3409                 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
   3410               lt_half_ulp = 1;
   3411               is_inexact_lt_midpoint = 1;
   3412             } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
   3413                 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
   3414               eq_half_ulp = 1;
   3415               is_midpoint_gt_even = 1;
   3416             } else { // > 1/2 ulp
   3417               gt_half_ulp = 1;
   3418               is_inexact_gt_midpoint = 1;
   3419             }
   3420           }
   3421           if (lt_half_ulp || eq_half_ulp) {
   3422             // res = +0.0 * 10^expmin
   3423             res.w[1] = 0x0000000000000000ull;
   3424             res.w[0] = 0x0000000000000000ull;
   3425           } else { // if (gt_half_ulp)
   3426             // res = +1 * 10^expmin
   3427             res.w[1] = 0x0000000000000000ull;
   3428             res.w[0] = 0x0000000000000001ull;
   3429           }
   3430           res.w[1] = p_sign | res.w[1];
   3431           e4 = expmin;
   3432         } else { // if (1 <= x0 <= ind - 1 <= 33)
   3433           // round the ind-digit result to ind - x0 digits
   3434 
   3435           if (ind <= 18) { // 2 <= ind <= 18
   3436             round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
   3437         		  &is_midpoint_lt_even, &is_midpoint_gt_even,
   3438         		  &is_inexact_lt_midpoint,
   3439         		  &is_inexact_gt_midpoint);
   3440             res.w[1] = 0x0;
   3441             res.w[0] = R64;
   3442           } else if (ind <= 38) {
   3443             P128.w[1] = res.w[1] & MASK_COEFF;
   3444             P128.w[0] = res.w[0];
   3445             round128_19_38 (ind, x0, P128, &res, &incr_exp,
   3446         		    &is_midpoint_lt_even, &is_midpoint_gt_even,
   3447         		    &is_inexact_lt_midpoint,
   3448         		    &is_inexact_gt_midpoint);
   3449           }
   3450           e4 = e4 + x0; // expmin
   3451           // we want the exponent to be expmin, so if incr_exp = 1 then
   3452           // multiply the rounded result by 10 - it will still fit in 113 bits
   3453           if (incr_exp) {
   3454             // 64 x 128 -> 128
   3455             P128.w[1] = res.w[1] & MASK_COEFF;
   3456             P128.w[0] = res.w[0];
   3457             __mul_64x128_to_128 (res, ten2k64[1], P128);
   3458           }
   3459           res.w[1] =
   3460             p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
   3461         					     w[1] & MASK_COEFF);
   3462           // avoid a double rounding error
   3463           if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
   3464                 is_midpoint_lt_even) { // double rounding error upward
   3465             // res = res - 1
   3466             res.w[0]--;
   3467             if (res.w[0] == 0xffffffffffffffffull)
   3468               res.w[1]--;
   3469             // Note: a double rounding error upward is not possible; for this
   3470             // the result after the first rounding would have to be 99...95
   3471             // (35 digits in all), possibly followed by a number of zeros; this
   3472             // not possible in this underflow case
   3473             is_midpoint_lt_even = 0;
   3474             is_inexact_lt_midpoint = 1;
   3475           } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
   3476                 is_midpoint_gt_even) { // double rounding error downward
   3477             // res = res + 1
   3478             res.w[0]++;
   3479             if (res.w[0] == 0)
   3480               res.w[1]++;
   3481             is_midpoint_gt_even = 0;
   3482             is_inexact_gt_midpoint = 1;
   3483           } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
   3484         	     !is_inexact_lt_midpoint
   3485         	     && !is_inexact_gt_midpoint) {
   3486             // if this second rounding was exact the result may still be
   3487             // inexact because of the first rounding
   3488             if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
   3489               is_inexact_gt_midpoint = 1;
   3490             }
   3491             if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
   3492               is_inexact_lt_midpoint = 1;
   3493             }
   3494           } else if (is_midpoint_gt_even &&
   3495         	     (is_inexact_gt_midpoint0
   3496         	      || is_midpoint_lt_even0)) {
   3497             // pulled up to a midpoint
   3498             is_inexact_lt_midpoint = 1;
   3499             is_inexact_gt_midpoint = 0;
   3500             is_midpoint_lt_even = 0;
   3501             is_midpoint_gt_even = 0;
   3502           } else if (is_midpoint_lt_even &&
   3503         	     (is_inexact_lt_midpoint0
   3504         	      || is_midpoint_gt_even0)) {
   3505             // pulled down to a midpoint
   3506             is_inexact_lt_midpoint = 0;
   3507             is_inexact_gt_midpoint = 1;
   3508             is_midpoint_lt_even = 0;
   3509             is_midpoint_gt_even = 0;
   3510           } else {
   3511             ;
   3512           }
   3513         }
   3514       }
   3515       // res contains the correct result
   3516       // apply correction if not rounding to nearest
   3517       if (rnd_mode != ROUNDING_TO_NEAREST) {
   3518         rounding_correction (rnd_mode,
   3519         		     is_inexact_lt_midpoint,
   3520         		     is_inexact_gt_midpoint,
   3521         		     is_midpoint_lt_even, is_midpoint_gt_even,
   3522         		     e4, &res, pfpsf);
   3523       }
   3524       if (is_midpoint_lt_even || is_midpoint_gt_even ||
   3525           is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
   3526         // set the inexact flag
   3527         *pfpsf |= INEXACT_EXCEPTION;
   3528         if (is_tiny)
   3529           *pfpsf |= UNDERFLOW_EXCEPTION;
   3530       }
   3531       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   3532       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   3533       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   3534       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   3535       BID_SWAP128 (res);
   3536       BID_RETURN (res)
   3537 
   3538     } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
   3539         (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
   3540         (delta + q3 <= p34 && p34 < q4)) { // Case (17)
   3541 
   3542       // calculate first the result rounded to the destination precision, with
   3543       // unbounded exponent
   3544 
   3545       add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
   3546               rnd_mode, &is_midpoint_lt_even,
   3547               &is_midpoint_gt_even, &is_inexact_lt_midpoint,
   3548               &is_inexact_gt_midpoint, pfpsf, &res);
   3549       *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   3550       *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   3551       *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   3552       *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   3553       BID_SWAP128 (res);
   3554       BID_RETURN (res)
   3555 
   3556     } else {
   3557       ;
   3558     }
   3559 
   3560   } // end if delta < 0
   3561 
   3562   *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
   3563   *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
   3564   *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
   3565   *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
   3566   BID_SWAP128 (res);
   3567   BID_RETURN (res)
   3568 
   3569 }
   3570 
   3571 
   3572 #if DECIMAL_CALL_BY_REFERENCE
   3573 void
   3574 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
   3575             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3576             _EXC_INFO_PARAM) {
   3577   UINT128 x = *px, y = *py, z = *pz;
   3578 #if !DECIMAL_GLOBAL_ROUNDING
   3579   unsigned int rnd_mode = *prnd_mode;
   3580 #endif
   3581 #else
   3582 UINT128
   3583 bid128_fma (UINT128 x, UINT128 y, UINT128 z
   3584             _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3585             _EXC_INFO_PARAM) {
   3586 #endif
   3587   int is_midpoint_lt_even, is_midpoint_gt_even,
   3588     is_inexact_lt_midpoint, is_inexact_gt_midpoint;
   3589   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   3590 
   3591 #if DECIMAL_CALL_BY_REFERENCE
   3592   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3593         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
   3594         	  &res, &x, &y, &z
   3595         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3596         	  _EXC_INFO_ARG);
   3597 #else
   3598   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3599         		&is_inexact_lt_midpoint,
   3600         		&is_inexact_gt_midpoint, x, y,
   3601         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3602         		_EXC_INFO_ARG);
   3603 #endif
   3604   BID_RETURN (res);
   3605 }
   3606 
   3607 
   3608 #if DECIMAL_CALL_BY_REFERENCE
   3609 void
   3610 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
   3611                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3612                _EXC_INFO_PARAM) {
   3613   UINT64 x = *px, y = *py, z = *pz;
   3614 #if !DECIMAL_GLOBAL_ROUNDING
   3615   unsigned int rnd_mode = *prnd_mode;
   3616 #endif
   3617 #else
   3618 UINT128
   3619 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
   3620                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3621                _EXC_INFO_PARAM) {
   3622 #endif
   3623   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
   3624     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   3625   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   3626   UINT128 x1, y1, z1;
   3627 
   3628 #if DECIMAL_CALL_BY_REFERENCE
   3629   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3630   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3631   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3632   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3633         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
   3634         	  &res, &x1, &y1, &z1
   3635         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3636         	  _EXC_INFO_ARG);
   3637 #else
   3638   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3639   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3640   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3641   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3642         		&is_inexact_lt_midpoint,
   3643         		&is_inexact_gt_midpoint, x1, y1,
   3644         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3645         		_EXC_INFO_ARG);
   3646 #endif
   3647   BID_RETURN (res);
   3648 }
   3649 
   3650 
   3651 #if DECIMAL_CALL_BY_REFERENCE
   3652 void
   3653 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
   3654                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3655                _EXC_INFO_PARAM) {
   3656   UINT64 x = *px, y = *py;
   3657   UINT128 z = *pz;
   3658 #if !DECIMAL_GLOBAL_ROUNDING
   3659   unsigned int rnd_mode = *prnd_mode;
   3660 #endif
   3661 #else
   3662 UINT128
   3663 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
   3664                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3665                _EXC_INFO_PARAM) {
   3666 #endif
   3667   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
   3668     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   3669   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   3670   UINT128 x1, y1;
   3671 
   3672 #if DECIMAL_CALL_BY_REFERENCE
   3673   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3674   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3675   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3676         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
   3677         	  &res, &x1, &y1, &z
   3678         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3679         	  _EXC_INFO_ARG);
   3680 #else
   3681   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3682   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3683   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3684         		&is_inexact_lt_midpoint,
   3685         		&is_inexact_gt_midpoint, x1, y1,
   3686         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3687         		_EXC_INFO_ARG);
   3688 #endif
   3689   BID_RETURN (res);
   3690 }
   3691 
   3692 
   3693 #if DECIMAL_CALL_BY_REFERENCE
   3694 void
   3695 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
   3696                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3697                _EXC_INFO_PARAM) {
   3698   UINT64 x = *px, z = *pz;
   3699 #if !DECIMAL_GLOBAL_ROUNDING
   3700   unsigned int rnd_mode = *prnd_mode;
   3701 #endif
   3702 #else
   3703 UINT128
   3704 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
   3705                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3706                _EXC_INFO_PARAM) {
   3707 #endif
   3708   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
   3709     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   3710   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   3711   UINT128 x1, z1;
   3712 
   3713 #if DECIMAL_CALL_BY_REFERENCE
   3714   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3715   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3716   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3717         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
   3718         	  &res, &x1, py, &z1
   3719         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3720         	  _EXC_INFO_ARG);
   3721 #else
   3722   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3723   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3724   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3725         		&is_inexact_lt_midpoint,
   3726         		&is_inexact_gt_midpoint, x1, y,
   3727         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3728         		_EXC_INFO_ARG);
   3729 #endif
   3730   BID_RETURN (res);
   3731 }
   3732 
   3733 
   3734 #if DECIMAL_CALL_BY_REFERENCE
   3735 void
   3736 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
   3737                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3738                _EXC_INFO_PARAM) {
   3739   UINT64 x = *px;
   3740 #if !DECIMAL_GLOBAL_ROUNDING
   3741   unsigned int rnd_mode = *prnd_mode;
   3742 #endif
   3743 #else
   3744 UINT128
   3745 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
   3746                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3747                _EXC_INFO_PARAM) {
   3748 #endif
   3749   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
   3750     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   3751   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   3752   UINT128 x1;
   3753 
   3754 #if DECIMAL_CALL_BY_REFERENCE
   3755   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3756   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3757         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
   3758         	  &res, &x1, py, pz
   3759         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3760         	  _EXC_INFO_ARG);
   3761 #else
   3762   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3763   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3764         		&is_inexact_lt_midpoint,
   3765         		&is_inexact_gt_midpoint, x1, y,
   3766         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3767         		_EXC_INFO_ARG);
   3768 #endif
   3769   BID_RETURN (res);
   3770 }
   3771 
   3772 
   3773 #if DECIMAL_CALL_BY_REFERENCE
   3774 void
   3775 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
   3776                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3777                _EXC_INFO_PARAM) {
   3778   UINT64 y = *py, z = *pz;
   3779 #if !DECIMAL_GLOBAL_ROUNDING
   3780   unsigned int rnd_mode = *prnd_mode;
   3781 #endif
   3782 #else
   3783 UINT128
   3784 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
   3785                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3786                _EXC_INFO_PARAM) {
   3787 #endif
   3788   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
   3789     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   3790   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   3791   UINT128 y1, z1;
   3792 
   3793 #if DECIMAL_CALL_BY_REFERENCE
   3794   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3795   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3796   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3797         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
   3798         	  &res, px, &y1, &z1
   3799         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3800         	  _EXC_INFO_ARG);
   3801 #else
   3802   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3803   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3804   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3805         		&is_inexact_lt_midpoint,
   3806         		&is_inexact_gt_midpoint, x, y1,
   3807         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3808         		_EXC_INFO_ARG);
   3809 #endif
   3810   BID_RETURN (res);
   3811 }
   3812 
   3813 
   3814 #if DECIMAL_CALL_BY_REFERENCE
   3815 void
   3816 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
   3817                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3818                _EXC_INFO_PARAM) {
   3819   UINT64 y = *py;
   3820 #if !DECIMAL_GLOBAL_ROUNDING
   3821   unsigned int rnd_mode = *prnd_mode;
   3822 #endif
   3823 #else
   3824 UINT128
   3825 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
   3826                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3827                _EXC_INFO_PARAM) {
   3828 #endif
   3829   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
   3830     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   3831   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   3832   UINT128 y1;
   3833 
   3834 #if DECIMAL_CALL_BY_REFERENCE
   3835   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3836   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3837         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
   3838         	  &res, px, &y1, pz
   3839         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3840         	  _EXC_INFO_ARG);
   3841 #else
   3842   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3843   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3844         		&is_inexact_lt_midpoint,
   3845         		&is_inexact_gt_midpoint, x, y1,
   3846         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3847         		_EXC_INFO_ARG);
   3848 #endif
   3849   BID_RETURN (res);
   3850 }
   3851 
   3852 
   3853 #if DECIMAL_CALL_BY_REFERENCE
   3854 void
   3855 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
   3856                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3857                _EXC_INFO_PARAM) {
   3858   UINT64 z = *pz;
   3859 #if !DECIMAL_GLOBAL_ROUNDING
   3860   unsigned int rnd_mode = *prnd_mode;
   3861 #endif
   3862 #else
   3863 UINT128
   3864 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
   3865                _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3866                _EXC_INFO_PARAM) {
   3867 #endif
   3868   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
   3869     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   3870   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   3871   UINT128 z1;
   3872 
   3873 #if DECIMAL_CALL_BY_REFERENCE
   3874   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3875   bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3876         	  &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
   3877         	  &res, px, py, &z1
   3878         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3879         	  _EXC_INFO_ARG);
   3880 #else
   3881   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3882   res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
   3883         		&is_inexact_lt_midpoint,
   3884         		&is_inexact_gt_midpoint, x, y,
   3885         		z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3886         		_EXC_INFO_ARG);
   3887 #endif
   3888   BID_RETURN (res);
   3889 }
   3890 
   3891 // Note: bid128qqq_fma is represented by bid128_fma
   3892 
   3893 // Note: bid64ddd_fma is represented by bid64_fma
   3894 
   3895 #if DECIMAL_CALL_BY_REFERENCE
   3896 void
   3897 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
   3898               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3899               _EXC_INFO_PARAM) {
   3900   UINT64 x = *px, y = *py;
   3901 #if !DECIMAL_GLOBAL_ROUNDING
   3902   unsigned int rnd_mode = *prnd_mode;
   3903 #endif
   3904 #else
   3905 UINT64
   3906 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
   3907               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3908               _EXC_INFO_PARAM) {
   3909 #endif
   3910   UINT64 res1 = 0xbaddbaddbaddbaddull;
   3911   UINT128 x1, y1;
   3912 
   3913 #if DECIMAL_CALL_BY_REFERENCE
   3914   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3915   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3916   bid64qqq_fma (&res1, &x1, &y1, pz
   3917         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3918         	_EXC_INFO_ARG);
   3919 #else
   3920   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3921   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3922   res1 = bid64qqq_fma (x1, y1, z
   3923         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3924         	       _EXC_INFO_ARG);
   3925 #endif
   3926   BID_RETURN (res1);
   3927 }
   3928 
   3929 
   3930 #if DECIMAL_CALL_BY_REFERENCE
   3931 void
   3932 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
   3933               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3934               _EXC_INFO_PARAM) {
   3935   UINT64 x = *px, z = *pz;
   3936 #if !DECIMAL_GLOBAL_ROUNDING
   3937   unsigned int rnd_mode = *prnd_mode;
   3938 #endif
   3939 #else
   3940 UINT64
   3941 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
   3942               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3943               _EXC_INFO_PARAM) {
   3944 #endif
   3945   UINT64 res1 = 0xbaddbaddbaddbaddull;
   3946   UINT128 x1, z1;
   3947 
   3948 #if DECIMAL_CALL_BY_REFERENCE
   3949   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3950   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3951   bid64qqq_fma (&res1, &x1, py, &z1
   3952         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3953         	_EXC_INFO_ARG);
   3954 #else
   3955   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3956   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3957   res1 = bid64qqq_fma (x1, y, z1
   3958         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3959         	       _EXC_INFO_ARG);
   3960 #endif
   3961   BID_RETURN (res1);
   3962 }
   3963 
   3964 
   3965 #if DECIMAL_CALL_BY_REFERENCE
   3966 void
   3967 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
   3968               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3969               _EXC_INFO_PARAM) {
   3970   UINT64 x = *px;
   3971 #if !DECIMAL_GLOBAL_ROUNDING
   3972   unsigned int rnd_mode = *prnd_mode;
   3973 #endif
   3974 #else
   3975 UINT64
   3976 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
   3977               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   3978               _EXC_INFO_PARAM) {
   3979 #endif
   3980   UINT64 res1 = 0xbaddbaddbaddbaddull;
   3981   UINT128 x1;
   3982 
   3983 #if DECIMAL_CALL_BY_REFERENCE
   3984   bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3985   bid64qqq_fma (&res1, &x1, py, pz
   3986         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3987         	_EXC_INFO_ARG);
   3988 #else
   3989   x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   3990   res1 = bid64qqq_fma (x1, y, z
   3991         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   3992         	       _EXC_INFO_ARG);
   3993 #endif
   3994   BID_RETURN (res1);
   3995 }
   3996 
   3997 
   3998 #if DECIMAL_CALL_BY_REFERENCE
   3999 void
   4000 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
   4001               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   4002               _EXC_INFO_PARAM) {
   4003   UINT64 y = *py, z = *pz;
   4004 #if !DECIMAL_GLOBAL_ROUNDING
   4005   unsigned int rnd_mode = *prnd_mode;
   4006 #endif
   4007 #else
   4008 UINT64
   4009 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
   4010               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   4011               _EXC_INFO_PARAM) {
   4012 #endif
   4013   UINT64 res1 = 0xbaddbaddbaddbaddull;
   4014   UINT128 y1, z1;
   4015 
   4016 #if DECIMAL_CALL_BY_REFERENCE
   4017   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   4018   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   4019   bid64qqq_fma (&res1, px, &y1, &z1
   4020         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   4021         	_EXC_INFO_ARG);
   4022 #else
   4023   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   4024   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   4025   res1 = bid64qqq_fma (x, y1, z1
   4026         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   4027         	       _EXC_INFO_ARG);
   4028 #endif
   4029   BID_RETURN (res1);
   4030 }
   4031 
   4032 
   4033 #if DECIMAL_CALL_BY_REFERENCE
   4034 void
   4035 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
   4036               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   4037               _EXC_INFO_PARAM) {
   4038   UINT64 y = *py;
   4039 #if !DECIMAL_GLOBAL_ROUNDING
   4040   unsigned int rnd_mode = *prnd_mode;
   4041 #endif
   4042 #else
   4043 UINT64
   4044 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
   4045               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   4046               _EXC_INFO_PARAM) {
   4047 #endif
   4048   UINT64 res1 = 0xbaddbaddbaddbaddull;
   4049   UINT128 y1;
   4050 
   4051 #if DECIMAL_CALL_BY_REFERENCE
   4052   bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   4053   bid64qqq_fma (&res1, px, &y1, pz
   4054         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   4055         	_EXC_INFO_ARG);
   4056 #else
   4057   y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   4058   res1 = bid64qqq_fma (x, y1, z
   4059         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   4060         	       _EXC_INFO_ARG);
   4061 #endif
   4062   BID_RETURN (res1);
   4063 }
   4064 
   4065 
   4066 #if DECIMAL_CALL_BY_REFERENCE
   4067 void
   4068 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
   4069               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   4070               _EXC_INFO_PARAM) {
   4071   UINT64 z = *pz;
   4072 #if !DECIMAL_GLOBAL_ROUNDING
   4073   unsigned int rnd_mode = *prnd_mode;
   4074 #endif
   4075 #else
   4076 UINT64
   4077 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
   4078               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   4079               _EXC_INFO_PARAM) {
   4080 #endif
   4081   UINT64 res1 = 0xbaddbaddbaddbaddull;
   4082   UINT128 z1;
   4083 
   4084 #if DECIMAL_CALL_BY_REFERENCE
   4085   bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   4086   bid64qqq_fma (&res1, px, py, &z1
   4087         	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   4088         	_EXC_INFO_ARG);
   4089 #else
   4090   z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
   4091   res1 = bid64qqq_fma (x, y, z1
   4092         	       _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   4093         	       _EXC_INFO_ARG);
   4094 #endif
   4095   BID_RETURN (res1);
   4096 }
   4097 
   4098 
   4099 #if DECIMAL_CALL_BY_REFERENCE
   4100 void
   4101 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
   4102               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   4103               _EXC_INFO_PARAM) {
   4104   UINT128 x = *px, y = *py, z = *pz;
   4105 #if !DECIMAL_GLOBAL_ROUNDING
   4106   unsigned int rnd_mode = *prnd_mode;
   4107 #endif
   4108 #else
   4109 UINT64
   4110 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
   4111               _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
   4112               _EXC_INFO_PARAM) {
   4113 #endif
   4114   int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
   4115     is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
   4116   int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
   4117     is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
   4118   int incr_exp;
   4119   UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   4120   UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
   4121   UINT64 res1 = 0xbaddbaddbaddbaddull;
   4122   unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
   4123   UINT64 sign;
   4124   UINT64 exp;
   4125   int unbexp;
   4126   UINT128 C;
   4127   BID_UI64DOUBLE tmp;
   4128   int nr_bits;
   4129   int q, x0;
   4130   int scale;
   4131   int lt_half_ulp = 0, eq_half_ulp = 0;
   4132 
   4133   // Note: for rounding modes other than RN or RA, the result can be obtained
   4134   // by rounding first to BID128 and then to BID64
   4135 
   4136   save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
   4137   *pfpsf = 0;
   4138 
   4139 #if DECIMAL_CALL_BY_REFERENCE
   4140   bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
   4141         	  &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
   4142         	  &res, &x, &y, &z
   4143         	  _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   4144         	  _EXC_INFO_ARG);
   4145 #else
   4146   res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
   4147         		&is_inexact_lt_midpoint0,
   4148         		&is_inexact_gt_midpoint0, x, y,
   4149         		z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
   4150         		_EXC_INFO_ARG);
   4151 #endif
   4152 
   4153   if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
   4154       (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
   4155       ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
   4156       ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
   4157 #if DECIMAL_CALL_BY_REFERENCE
   4158     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
   4159 #else
   4160     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
   4161 #endif
   4162     // determine the unbiased exponent of the result
   4163     unbexp = ((res1 >> 53) & 0x3ff) - 398;
   4164 
   4165     // if subnormal, res1  must have exp = -398
   4166     // if tiny and inexact set underflow and inexact status flags
   4167     if (!((res1 & MASK_NAN) == MASK_NAN) &&	// res1 not NaN
   4168         (unbexp == -398)
   4169         && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
   4170         && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
   4171             || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
   4172       // set the inexact flag and the underflow flag
   4173       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
   4174     } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
   4175                is_midpoint_lt_even0 || is_midpoint_gt_even0) {
   4176       // set the inexact flag and the underflow flag
   4177       *pfpsf |= INEXACT_EXCEPTION;
   4178     }
   4179 
   4180     *pfpsf |= save_fpsf;
   4181     BID_RETURN (res1);
   4182   } // else continue, and use rounding to nearest to round to 16 digits
   4183 
   4184   // at this point the result is rounded to nearest (even or away) to 34 digits
   4185   // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
   4186   sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
   4187   exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
   4188   unbexp = (exp >> 49) - 6176;
   4189   C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
   4190   C.w[0] = res.w[LOW_128W];
   4191 
   4192   if ((C.w[1] == 0x0 && C.w[0] == 0x0) ||	// result is zero
   4193       (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
   4194       // clear under/overflow
   4195 #if DECIMAL_CALL_BY_REFERENCE
   4196     bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
   4197 #else
   4198     res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
   4199 #endif
   4200     *pfpsf |= save_fpsf;
   4201     BID_RETURN (res1);
   4202   } // else continue
   4203 
   4204   // -398 - 34 <= unbexp <= 369 + 15
   4205   if (rnd_mode == ROUNDING_TIES_AWAY) {
   4206     // apply correction, if needed, to make the result rounded to nearest-even
   4207     if (is_midpoint_gt_even) {
   4208       // res = res - 1
   4209       res1--; // res1 is now even
   4210     } // else the result is already correctly rounded to nearest-even
   4211   }
   4212   // at this point the result is finite, non-zero canonical normal or subnormal,
   4213   // and in most cases overflow or underflow will not occur
   4214 
   4215   // determine the number of digits q in the result
   4216   // q = nr. of decimal digits in x
   4217   // determine first the nr. of bits in x
   4218   if (C.w[1] == 0) {
   4219     if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
   4220       // split the 64-bit value in two 32-bit halves to avoid rounding errors
   4221       if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
   4222         tmp.d = (double) (C.w[0] >> 32); // exact conversion
   4223         nr_bits =
   4224           33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   4225       } else { // x < 2^32
   4226         tmp.d = (double) (C.w[0]); // exact conversion
   4227         nr_bits =
   4228           1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   4229       }
   4230     } else { // if x < 2^53
   4231       tmp.d = (double) C.w[0]; // exact conversion
   4232       nr_bits =
   4233         1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   4234     }
   4235   } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
   4236     tmp.d = (double) C.w[1]; // exact conversion
   4237     nr_bits =
   4238       65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
   4239   }
   4240   q = nr_digits[nr_bits - 1].digits;
   4241   if (q == 0) {
   4242     q = nr_digits[nr_bits - 1].digits1;
   4243     if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
   4244         (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
   4245          C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
   4246       q++;
   4247   }
   4248   // if q > 16, round to nearest even to 16 digits (but for underflow it may
   4249   // have to be truncated even more)
   4250   if (q > 16) {
   4251     x0 = q - 16;
   4252     if (q <= 18) {
   4253       round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
   4254         	    &is_midpoint_lt_even, &is_midpoint_gt_even,
   4255         	    &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
   4256     } else { // 19 <= q <= 34
   4257       round128_19_38 (q, x0, C, &res128, &incr_exp,
   4258         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
   4259         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
   4260       res1 = res128.w[0]; // the result fits in 64 bits
   4261     }
   4262     unbexp = unbexp + x0;
   4263     if (incr_exp)
   4264       unbexp++;
   4265     q = 16; // need to set in case denormalization is necessary
   4266   } else {
   4267     // the result does not require a second rounding (and it must have
   4268     // been exact in the first rounding, since q <= 16)
   4269     res1 = C.w[0];
   4270   }
   4271 
   4272   // avoid a double rounding error
   4273   if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
   4274       is_midpoint_lt_even) { // double rounding error upward
   4275     // res = res - 1
   4276     res1--; // res1 becomes odd
   4277     is_midpoint_lt_even = 0;
   4278     is_inexact_lt_midpoint = 1;
   4279     if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
   4280       res1 = 0x002386f26fc0ffffull; // 10^16 - 1
   4281       unbexp--;
   4282     }
   4283   } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
   4284       is_midpoint_gt_even) { // double rounding error downward
   4285     // res = res + 1
   4286     res1++; // res1 becomes odd (so it cannot be 10^16)
   4287     is_midpoint_gt_even = 0;
   4288     is_inexact_gt_midpoint = 1;
   4289   } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
   4290              !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
   4291     // if this second rounding was exact the result may still be
   4292     // inexact because of the first rounding
   4293     if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
   4294       is_inexact_gt_midpoint = 1;
   4295     }
   4296     if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
   4297       is_inexact_lt_midpoint = 1;
   4298     }
   4299   } else if (is_midpoint_gt_even &&
   4300              (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
   4301     // pulled up to a midpoint
   4302     is_inexact_lt_midpoint = 1;
   4303     is_inexact_gt_midpoint = 0;
   4304     is_midpoint_lt_even = 0;
   4305     is_midpoint_gt_even = 0;
   4306   } else if (is_midpoint_lt_even &&
   4307              (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
   4308     // pulled down to a midpoint
   4309     is_inexact_lt_midpoint = 0;
   4310     is_inexact_gt_midpoint = 1;
   4311     is_midpoint_lt_even = 0;
   4312     is_midpoint_gt_even = 0;
   4313   } else {
   4314     ;
   4315   }
   4316   // this is the result rounded correctly to nearest even, with unbounded exp.
   4317 
   4318   // check for overflow
   4319   if (q + unbexp > P16 + expmax16) {
   4320     res1 = sign | 0x7800000000000000ull;
   4321     *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
   4322     *pfpsf |= save_fpsf;
   4323     BID_RETURN (res1)
   4324   } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
   4325     // not overflow; the result must be exact, and we can multiply res1 by
   4326     // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
   4327     scale = unbexp - expmax16;
   4328     res1 = res1 * ten2k64[scale]; // res1 * 10^scale
   4329     unbexp = expmax16; // unbexp - scale
   4330   } else {
   4331     ; // continue
   4332   }
   4333 
   4334   // check for underflow
   4335   if (q + unbexp < P16 + expmin16) {
   4336     if (unbexp < expmin16) {
   4337       // we must truncate more of res
   4338       x0 = expmin16 - unbexp; // x0 >= 1
   4339       is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
   4340       is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
   4341       is_midpoint_lt_even0 = is_midpoint_lt_even;
   4342       is_midpoint_gt_even0 = is_midpoint_gt_even;
   4343       is_inexact_lt_midpoint = 0;
   4344       is_inexact_gt_midpoint = 0;
   4345       is_midpoint_lt_even = 0;
   4346       is_midpoint_gt_even = 0;
   4347       // the number of decimal digits in res1 is q
   4348       if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
   4349         // 2 <= q <= 16, 1 <= x0 <= 15
   4350         round64_2_18 (q, x0, res1, &res1, &incr_exp,
   4351         	      &is_midpoint_lt_even, &is_midpoint_gt_even,
   4352         	      &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
   4353         if (incr_exp) {
   4354           // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
   4355           res1 = ten2k64[q - x0];
   4356         }
   4357         unbexp = unbexp + x0; // expmin16
   4358       } else if (x0 == q) {
   4359         // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
   4360         // determine relationship with 1/2 ulp
   4361         // q <= 16
   4362         if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
   4363           lt_half_ulp = 1;
   4364           is_inexact_lt_midpoint = 1;
   4365         } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
   4366           eq_half_ulp = 1;
   4367           is_midpoint_gt_even = 1;
   4368         } else { // > 1/2 ulp
   4369           // gt_half_ulp = 1;
   4370           is_inexact_gt_midpoint = 1;
   4371         }
   4372         if (lt_half_ulp || eq_half_ulp) {
   4373           // res = +0.0 * 10^expmin16
   4374           res1 = 0x0000000000000000ull;
   4375         } else { // if (gt_half_ulp)
   4376           // res = +1 * 10^expmin16
   4377           res1 = 0x0000000000000001ull;
   4378         }
   4379         unbexp = expmin16;
   4380       } else { // if (x0 > q)
   4381         // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
   4382         res1 = 0x0000000000000000ull;
   4383         unbexp = expmin16;
   4384         is_inexact_lt_midpoint = 1;
   4385       }
   4386       // avoid a double rounding error
   4387       if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
   4388           is_midpoint_lt_even) { // double rounding error upward
   4389         // res = res - 1
   4390         res1--; // res1 becomes odd
   4391         is_midpoint_lt_even = 0;
   4392         is_inexact_lt_midpoint = 1;
   4393       } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
   4394           is_midpoint_gt_even) { // double rounding error downward
   4395         // res = res + 1
   4396         res1++; // res1 becomes odd
   4397         is_midpoint_gt_even = 0;
   4398         is_inexact_gt_midpoint = 1;
   4399       } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
   4400         	 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
   4401         // if this rounding was exact the result may still be
   4402         // inexact because of the previous roundings
   4403         if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
   4404           is_inexact_gt_midpoint = 1;
   4405         }
   4406         if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
   4407           is_inexact_lt_midpoint = 1;
   4408         }
   4409       } else if (is_midpoint_gt_even &&
   4410         	 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
   4411         // pulled up to a midpoint
   4412         is_inexact_lt_midpoint = 1;
   4413         is_inexact_gt_midpoint = 0;
   4414         is_midpoint_lt_even = 0;
   4415         is_midpoint_gt_even = 0;
   4416       } else if (is_midpoint_lt_even &&
   4417         	 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
   4418         // pulled down to a midpoint
   4419         is_inexact_lt_midpoint = 0;
   4420         is_inexact_gt_midpoint = 1;
   4421         is_midpoint_lt_even = 0;
   4422         is_midpoint_gt_even = 0;
   4423       } else {
   4424         ;
   4425       }
   4426     }
   4427     // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
   4428     // and the result is tiny and exact
   4429 
   4430     // check for inexact result
   4431     if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
   4432         is_midpoint_lt_even || is_midpoint_gt_even ||
   4433         is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
   4434         is_midpoint_lt_even0 || is_midpoint_gt_even0) {
   4435       // set the inexact flag and the underflow flag
   4436       *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
   4437     }
   4438   } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
   4439              is_midpoint_lt_even || is_midpoint_gt_even) {
   4440     *pfpsf |= INEXACT_EXCEPTION;
   4441   }
   4442   // this is the result rounded correctly to nearest, with bounded exponent
   4443 
   4444   if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
   4445     // res = res + 1
   4446     res1++; // res1 is now odd
   4447   } // else the result is already correct
   4448 
   4449   // assemble the result
   4450   if (res1 < 0x0020000000000000ull) { // res < 2^53
   4451     res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
   4452   } else { // res1 >= 2^53
   4453     res1 = sign | MASK_STEERING_BITS |
   4454       ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
   4455   }
   4456   *pfpsf |= save_fpsf;
   4457   BID_RETURN (res1);
   4458 }
   4459