1 /* mpfr_sqr -- Floating-point square 2 3 Copyright 2004-2023 Free Software Foundation, Inc. 4 Contributed by the AriC and Caramba projects, INRIA. 5 6 This file is part of the GNU MPFR Library. 7 8 The GNU MPFR Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MPFR Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23 #define MPFR_NEED_LONGLONG_H 24 #include "mpfr-impl.h" 25 26 #if !defined(MPFR_GENERIC_ABI) && (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64) 27 28 /* Special code for prec(a) < GMP_NUMB_BITS and prec(b) <= GMP_NUMB_BITS. 29 Note: this function was copied from mpfr_mul_1 in file mul.c, thus any 30 change here should be done also in mpfr_mul_1. 31 Although this function works as soon as prec(a) < GMP_NUMB_BITS and 32 prec(b) <= GMP_NUMB_BITS, we use it for prec(a)=prec(b) < GMP_NUMB_BITS. */ 33 static int 34 mpfr_sqr_1 (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode, mpfr_prec_t p) 35 { 36 mp_limb_t a0; 37 mpfr_limb_ptr ap = MPFR_MANT(a); 38 mp_limb_t b0 = MPFR_MANT(b)[0]; 39 mpfr_exp_t ax; 40 mpfr_prec_t sh = GMP_NUMB_BITS - p; 41 mp_limb_t rb, sb, mask = MPFR_LIMB_MASK(sh); 42 43 /* When prec(b) <= GMP_NUMB_BITS / 2, we could replace umul_ppmm 44 by a limb multiplication as follows, but we assume umul_ppmm is as fast 45 as a limb multiplication on modern processors: 46 a0 = (b0 >> (GMP_NUMB_BITS / 2)) * (b0 >> (GMP_NUMB_BITS / 2)); 47 sb = 0; 48 */ 49 ax = MPFR_GET_EXP(b) * 2; 50 umul_ppmm (a0, sb, b0, b0); 51 if (a0 < MPFR_LIMB_HIGHBIT) 52 { 53 ax --; 54 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1)); 55 sb <<= 1; 56 } 57 rb = a0 & (MPFR_LIMB_ONE << (sh - 1)); 58 sb |= (a0 & mask) ^ rb; 59 ap[0] = a0 & ~mask; 60 61 MPFR_SIGN(a) = MPFR_SIGN_POS; 62 63 /* rounding */ 64 if (MPFR_UNLIKELY(ax > __gmpfr_emax)) 65 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS); 66 67 /* Warning: underflow should be checked *after* rounding, thus when rounding 68 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and 69 a >= 0.111...111[1]*2^(emin-1), there is no underflow. */ 70 if (MPFR_UNLIKELY(ax < __gmpfr_emin)) 71 { 72 /* Note: for emin=2*k+1, a >= 0.111...111*2^(emin-1) is not possible, 73 i.e., a >= (1 - 2^(-p))*2^(2k), since we need a = b^2 with EXP(b)=k, 74 and the largest such b is (1 - 2^(-p))*2^k satisfies 75 b^2 < (1 - 2^(-p))*2^(2k). 76 For emin=2*k, it is only possible for some values of p: it is not 77 possible for p=53, because the largest significand is 6369051672525772 78 but its square has only 52 leading ones. For p=24 it is possible, 79 with b = 11863283, whose square has 24 leading ones. */ 80 if (ax == __gmpfr_emin - 1 && ap[0] == ~mask && 81 ((rnd_mode == MPFR_RNDN && rb) || 82 (MPFR_IS_LIKE_RNDA (rnd_mode, 0) && (rb | sb)))) 83 goto rounding; /* no underflow */ 84 /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2) 85 we have to change to RNDZ. This corresponds to: 86 (a) either ax < emin - 1 87 (b) or ax = emin - 1 and ap[0] = 1000....000 and rb = sb = 0 */ 88 if (rnd_mode == MPFR_RNDN && 89 (ax < __gmpfr_emin - 1 || 90 (ap[0] == MPFR_LIMB_HIGHBIT && (rb | sb) == 0))) 91 rnd_mode = MPFR_RNDZ; 92 return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS); 93 } 94 95 rounding: 96 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin 97 in the cases "goto rounding" above. */ 98 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) 99 { 100 MPFR_ASSERTD(ax >= __gmpfr_emin); 101 MPFR_RET (0); 102 } 103 else if (rnd_mode == MPFR_RNDN) 104 { 105 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0)) 106 goto truncate; 107 else 108 goto add_one_ulp; 109 } 110 else if (MPFR_IS_LIKE_RNDZ (rnd_mode, 0)) 111 { 112 truncate: 113 MPFR_ASSERTD(ax >= __gmpfr_emin); 114 MPFR_RET(-MPFR_SIGN_POS); 115 } 116 else /* round away from zero */ 117 { 118 add_one_ulp: 119 ap[0] += MPFR_LIMB_ONE << sh; 120 if (ap[0] == 0) 121 { 122 ap[0] = MPFR_LIMB_HIGHBIT; 123 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax)) 124 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS); 125 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax); 126 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin); 127 MPFR_SET_EXP (a, ax + 1); 128 } 129 MPFR_RET(MPFR_SIGN_POS); 130 } 131 } 132 133 /* special code for PREC(a) = GMP_NUMB_BITS */ 134 static int 135 mpfr_sqr_1n (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode) 136 { 137 mp_limb_t a0; 138 mpfr_limb_ptr ap = MPFR_MANT(a); 139 mp_limb_t b0 = MPFR_MANT(b)[0]; 140 mpfr_exp_t ax; 141 mp_limb_t rb, sb; 142 143 ax = MPFR_GET_EXP(b) * 2; 144 umul_ppmm (a0, sb, b0, b0); 145 if (a0 < MPFR_LIMB_HIGHBIT) 146 { 147 ax --; 148 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1)); 149 sb <<= 1; 150 } 151 rb = sb & MPFR_LIMB_HIGHBIT; 152 sb = sb & ~MPFR_LIMB_HIGHBIT; 153 ap[0] = a0; 154 155 MPFR_SIGN(a) = MPFR_SIGN_POS; 156 157 /* rounding */ 158 if (MPFR_UNLIKELY(ax > __gmpfr_emax)) 159 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS); 160 161 /* Warning: underflow should be checked *after* rounding, thus when rounding 162 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and 163 a >= 0.111...111[1]*2^(emin-1), there is no underflow. */ 164 if (MPFR_UNLIKELY(ax < __gmpfr_emin)) 165 { 166 /* As seen in mpfr_mul_1, we cannot have a0 = 111...111 here if there 167 was not an exponent decrease (ax--) above. 168 In the case of an exponent decrease: 169 - For GMP_NUMB_BITS=32, a0 = 111...111 is not possible since the 170 largest b0 such that b0^2 < 2^(2*32-1) is b0=3037000499, but 171 its square has only 30 leading ones. 172 - For GMP_NUMB_BITS=64, a0 = 111...111 is possible: the largest b0 173 is 13043817825332782212, and its square has 64 leading ones; but 174 since the next bit is rb=0, for RNDN, we always have an underflow. 175 For the test below, note that a is positive. 176 */ 177 if (ax == __gmpfr_emin - 1 && ap[0] == MPFR_LIMB_MAX && 178 MPFR_IS_LIKE_RNDA (rnd_mode, 0)) 179 goto rounding; /* no underflow */ 180 /* For RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2) 181 we have to change to RNDZ. This corresponds to: 182 (a) either ax < emin - 1 183 (b) or ax = emin - 1 and ap[0] = 1000....000 and rb = sb = 0 */ 184 if (rnd_mode == MPFR_RNDN && 185 (ax < __gmpfr_emin - 1 || 186 (ap[0] == MPFR_LIMB_HIGHBIT && (rb | sb) == 0))) 187 rnd_mode = MPFR_RNDZ; 188 return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS); 189 } 190 191 rounding: 192 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin 193 in the cases "goto rounding" above. */ 194 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) 195 { 196 MPFR_ASSERTD(ax >= __gmpfr_emin); 197 MPFR_RET (0); 198 } 199 else if (rnd_mode == MPFR_RNDN) 200 { 201 if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0)) 202 goto truncate; 203 else 204 goto add_one_ulp; 205 } 206 else if (MPFR_IS_LIKE_RNDZ (rnd_mode, 0)) 207 { 208 truncate: 209 MPFR_ASSERTD(ax >= __gmpfr_emin); 210 MPFR_RET(-MPFR_SIGN_POS); 211 } 212 else /* round away from zero */ 213 { 214 add_one_ulp: 215 ap[0] += MPFR_LIMB_ONE; 216 if (ap[0] == 0) 217 { 218 ap[0] = MPFR_LIMB_HIGHBIT; 219 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax)) 220 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS); 221 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax); 222 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin); 223 MPFR_SET_EXP (a, ax + 1); 224 } 225 MPFR_RET(MPFR_SIGN_POS); 226 } 227 } 228 229 /* Special code for GMP_NUMB_BITS < prec(a) < 2*GMP_NUMB_BITS and 230 GMP_NUMB_BITS < prec(b) <= 2*GMP_NUMB_BITS. 231 Note: this function was copied and optimized from mpfr_mul_2 in file mul.c, 232 thus any change here should be done also in mpfr_mul_2, if applicable. */ 233 static int 234 mpfr_sqr_2 (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode, mpfr_prec_t p) 235 { 236 mp_limb_t h, l, u, v; 237 mpfr_limb_ptr ap = MPFR_MANT(a); 238 mpfr_exp_t ax = 2 * MPFR_GET_EXP(b); 239 mpfr_prec_t sh = 2 * GMP_NUMB_BITS - p; 240 mp_limb_t rb, sb, sb2, mask = MPFR_LIMB_MASK(sh); 241 mp_limb_t *bp = MPFR_MANT(b); 242 243 /* we store the 4-limb product in h=ap[1], l=ap[0], sb=ap[-1], sb2=ap[-2] */ 244 umul_ppmm (h, l, bp[1], bp[1]); 245 umul_ppmm (u, v, bp[1], bp[0]); 246 l += u << 1; 247 h += (l < (u << 1)) + (u >> (GMP_NUMB_BITS - 1)); 248 249 /* now the full square is {h, l, 2*v + high(b0*c0), low(b0*c0)}, 250 where the lower part contributes to less than 3 ulps to {h, l} */ 251 252 /* If h has its most significant bit set and the low sh-1 bits of l are not 253 000...000 nor 111...111 nor 111...110, then we can round correctly; 254 if h has zero as most significant bit, we have to shift left h and l, 255 thus if the low sh-2 bits are not 000...000 nor 111...111 nor 111...110, 256 then we can round correctly. To avoid an extra test we consider the latter 257 case (if we can round, we can also round in the former case). 258 For sh <= 3, we have mask <= 7, thus (mask>>2) <= 1, and the approximation 259 cannot be enough. */ 260 if (MPFR_LIKELY(((l + 2) & (mask >> 2)) > 2)) 261 sb = sb2 = 1; /* result cannot be exact in that case */ 262 else 263 { 264 mp_limb_t carry1, carry2; 265 266 umul_ppmm (sb, sb2, bp[0], bp[0]); 267 /* the full product is {h, l, sb + v + w, sb2} */ 268 ADD_LIMB (sb, v, carry1); 269 ADD_LIMB (l, carry1, carry2); 270 h += carry2; 271 ADD_LIMB (sb, v, carry1); 272 ADD_LIMB (l, carry1, carry2); 273 h += carry2; 274 } 275 if (h < MPFR_LIMB_HIGHBIT) 276 { 277 ax --; 278 h = (h << 1) | (l >> (GMP_NUMB_BITS - 1)); 279 l = (l << 1) | (sb >> (GMP_NUMB_BITS - 1)); 280 sb <<= 1; 281 /* no need to shift sb2 since we only want to know if it is zero or not */ 282 } 283 ap[1] = h; 284 rb = l & (MPFR_LIMB_ONE << (sh - 1)); 285 sb |= ((l & mask) ^ rb) | sb2; 286 ap[0] = l & ~mask; 287 288 MPFR_SIGN(a) = MPFR_SIGN_POS; 289 290 /* rounding */ 291 if (MPFR_UNLIKELY(ax > __gmpfr_emax)) 292 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS); 293 294 /* Warning: underflow should be checked *after* rounding, thus when rounding 295 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and 296 a >= 0.111...111[1]*2^(emin-1), there is no underflow. */ 297 if (MPFR_UNLIKELY(ax < __gmpfr_emin)) 298 { 299 /* Note: like for mpfr_sqr_1, the case 300 0.111...111*2^(emin-1) < a < 2^(emin-1) is not possible when emin is 301 odd, since (modulo a shift) this would imply 1-2^(-p) < a = b^2 < 1, 302 and this is not possible with 1-2^(-p) <= b < 1. 303 For emin even, it is possible for some values of p, for example for 304 p=69 with b=417402170410649030795*2^k. */ 305 if (ax == __gmpfr_emin - 1 && 306 ap[1] == MPFR_LIMB_MAX && 307 ap[0] == ~mask && 308 ((rnd_mode == MPFR_RNDN && rb) || 309 (MPFR_IS_LIKE_RNDA (rnd_mode, 0) && (rb | sb)))) 310 goto rounding; /* no underflow */ 311 /* for RNDN, mpfr_underflow always rounds away, thus for 312 |a| <= 2^(emin-2) we have to change to RNDZ */ 313 if (rnd_mode == MPFR_RNDN && 314 (ax < __gmpfr_emin - 1 || 315 (ap[1] == MPFR_LIMB_HIGHBIT && ap[0] == 0 && (rb | sb) == 0))) 316 rnd_mode = MPFR_RNDZ; 317 return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS); 318 } 319 320 rounding: 321 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin 322 in the cases "goto rounding" above. */ 323 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) 324 { 325 MPFR_ASSERTD(ax >= __gmpfr_emin); 326 MPFR_RET (0); 327 } 328 else if (rnd_mode == MPFR_RNDN) 329 { 330 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0)) 331 goto truncate; 332 else 333 goto add_one_ulp; 334 } 335 else if (MPFR_IS_LIKE_RNDZ (rnd_mode, 0)) 336 { 337 truncate: 338 MPFR_ASSERTD(ax >= __gmpfr_emin); 339 MPFR_RET(-MPFR_SIGN_POS); 340 } 341 else /* round away from zero */ 342 { 343 add_one_ulp: 344 ap[0] += MPFR_LIMB_ONE << sh; 345 ap[1] += (ap[0] == 0); 346 if (ap[1] == 0) 347 { 348 ap[1] = MPFR_LIMB_HIGHBIT; 349 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax)) 350 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS); 351 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax); 352 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin); 353 MPFR_SET_EXP (a, ax + 1); 354 } 355 MPFR_RET(MPFR_SIGN_POS); 356 } 357 } 358 359 /* Special code for 2*GMP_NUMB_BITS < prec(a) < 3*GMP_NUMB_BITS and 360 2*GMP_NUMB_BITS < prec(b) <= 3*GMP_NUMB_BITS. */ 361 static int 362 mpfr_sqr_3 (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode, mpfr_prec_t p) 363 { 364 mp_limb_t a0, a1, a2, h, l; 365 mpfr_limb_ptr ap = MPFR_MANT(a); 366 mpfr_exp_t ax = 2 * MPFR_GET_EXP(b); 367 mpfr_prec_t sh = 3 * GMP_NUMB_BITS - p; 368 mp_limb_t rb, sb, sb2, mask = MPFR_LIMB_MASK(sh); 369 mp_limb_t *bp = MPFR_MANT(b); 370 371 /* we store the upper 3-limb product in a2, a1, a0: 372 b2^2, 2*b2*b1, 2*b2*b0+b1^2 */ 373 374 /* first compute b2*b1 and b2*b0, which will be shifted by 1 */ 375 umul_ppmm (a1, a0, bp[2], bp[1]); 376 umul_ppmm (h, l, bp[2], bp[0]); 377 a0 += h; 378 a1 += (a0 < h); 379 /* now a1, a0 contains b2*b1 + floor(b2*b0/B): there can be no overflow 380 since b2*b1*B + b2*b0 <= b2*(b1*B+b0) <= b2*(B^2-1) < B^3 */ 381 382 /* multiply a2, a1, a0 by 2 */ 383 a2 = a1 >> (GMP_NUMB_BITS - 1); 384 a1 = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1)); 385 a0 = (a0 << 1); 386 387 /* add b2^2 */ 388 umul_ppmm (h, l, bp[2], bp[2]); 389 a1 += l; 390 a2 += h + (a1 < l); 391 392 /* add b1^2 */ 393 umul_ppmm (h, l, bp[1], bp[1]); 394 a0 += h; 395 a1 += (a0 < h); 396 a2 += (a1 == 0 && a0 < h); 397 398 /* Now the approximate product {a2, a1, a0} has an error of less than 399 5 ulps (3 ulps for the ignored low limbs of 2*b2*b0+b1^2, 400 plus 2 ulps for the ignored 2*b1*b0 (plus b0^2). 401 Since we might shift by 1 bit, we make sure the low sh-2 bits of a0 402 are not 0, -1, -2, -3 or -4. */ 403 404 if (MPFR_LIKELY(((a0 + 4) & (mask >> 2)) > 4)) 405 sb = sb2 = 1; /* result cannot be exact in that case */ 406 else 407 { 408 mp_limb_t t[6]; 409 mpn_sqr (t, bp, 3); 410 a2 = t[5]; 411 a1 = t[4]; 412 a0 = t[3]; 413 sb = t[2]; 414 sb2 = t[1] | t[0]; 415 } 416 if (a2 < MPFR_LIMB_HIGHBIT) 417 { 418 ax --; 419 a2 = (a2 << 1) | (a1 >> (GMP_NUMB_BITS - 1)); 420 a1 = (a1 << 1) | (a0 >> (GMP_NUMB_BITS - 1)); 421 a0 = (a0 << 1) | (sb >> (GMP_NUMB_BITS - 1)); 422 sb <<= 1; 423 /* no need to shift sb2: we only need to know if it is zero or not */ 424 } 425 ap[2] = a2; 426 ap[1] = a1; 427 rb = a0 & (MPFR_LIMB_ONE << (sh - 1)); 428 sb |= ((a0 & mask) ^ rb) | sb2; 429 ap[0] = a0 & ~mask; 430 431 MPFR_SIGN(a) = MPFR_SIGN_POS; 432 433 /* rounding */ 434 if (MPFR_UNLIKELY(ax > __gmpfr_emax)) 435 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS); 436 437 /* Warning: underflow should be checked *after* rounding, thus when rounding 438 away and when a > 0.111...111*2^(emin-1), or when rounding to nearest and 439 a >= 0.111...111[1]*2^(emin-1), there is no underflow. */ 440 if (MPFR_UNLIKELY(ax < __gmpfr_emin)) 441 { 442 if (ax == __gmpfr_emin - 1 && 443 ap[2] == MPFR_LIMB_MAX && 444 ap[1] == MPFR_LIMB_MAX && 445 ap[0] == ~mask && 446 ((rnd_mode == MPFR_RNDN && rb) || 447 (MPFR_IS_LIKE_RNDA (rnd_mode, 0) && (rb | sb)))) 448 goto rounding; /* no underflow */ 449 /* for RNDN, mpfr_underflow always rounds away, thus for |a| <= 2^(emin-2) 450 we have to change to RNDZ */ 451 if (rnd_mode == MPFR_RNDN && 452 (ax < __gmpfr_emin - 1 || 453 (ap[2] == MPFR_LIMB_HIGHBIT && ap[1] == 0 && ap[0] == 0 454 && (rb | sb) == 0))) 455 rnd_mode = MPFR_RNDZ; 456 return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS); 457 } 458 459 rounding: 460 MPFR_EXP (a) = ax; /* Don't use MPFR_SET_EXP since ax might be < __gmpfr_emin 461 in the cases "goto rounding" above. */ 462 if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) 463 { 464 MPFR_ASSERTD(ax >= __gmpfr_emin); 465 MPFR_RET (0); 466 } 467 else if (rnd_mode == MPFR_RNDN) 468 { 469 if (rb == 0 || (sb == 0 && (ap[0] & (MPFR_LIMB_ONE << sh)) == 0)) 470 goto truncate; 471 else 472 goto add_one_ulp; 473 } 474 else if (MPFR_IS_LIKE_RNDZ (rnd_mode, 0)) 475 { 476 truncate: 477 MPFR_ASSERTD(ax >= __gmpfr_emin); 478 MPFR_RET(-MPFR_SIGN_POS); 479 } 480 else /* round away from zero */ 481 { 482 add_one_ulp: 483 ap[0] += MPFR_LIMB_ONE << sh; 484 ap[1] += (ap[0] == 0); 485 ap[2] += (ap[1] == 0) && (ap[0] == 0); 486 if (ap[2] == 0) 487 { 488 ap[2] = MPFR_LIMB_HIGHBIT; 489 if (MPFR_UNLIKELY(ax + 1 > __gmpfr_emax)) 490 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS); 491 MPFR_ASSERTD(ax + 1 <= __gmpfr_emax); 492 MPFR_ASSERTD(ax + 1 >= __gmpfr_emin); 493 MPFR_SET_EXP (a, ax + 1); 494 } 495 MPFR_RET(MPFR_SIGN_POS); 496 } 497 } 498 499 #endif /* !defined(MPFR_GENERIC_ABI) && ... */ 500 501 /* Note: mpfr_sqr will call mpfr_mul if bn > MPFR_SQR_THRESHOLD, 502 in order to use Mulders' mulhigh, which is handled only here 503 to avoid partial code duplication. There is some overhead due 504 to the additional tests, but slowdown should not be noticeable 505 as this code is not executed in very small precisions. */ 506 507 int 508 mpfr_sqr (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode) 509 { 510 int cc, inexact; 511 mpfr_exp_t ax; 512 mp_limb_t *tmp; 513 mp_limb_t b1; 514 mpfr_prec_t aq, bq; 515 mp_size_t bn, tn; 516 MPFR_TMP_DECL(marker); 517 518 MPFR_LOG_FUNC 519 (("x[%Pd]=%.*Rg rnd=%d", mpfr_get_prec (b), mpfr_log_prec, b, rnd_mode), 520 ("y[%Pd]=%.*Rg inexact=%d", 521 mpfr_get_prec (a), mpfr_log_prec, a, inexact)); 522 523 /* deal with special cases */ 524 if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(b))) 525 { 526 if (MPFR_IS_NAN(b)) 527 { 528 MPFR_SET_NAN(a); 529 MPFR_RET_NAN; 530 } 531 MPFR_SET_POS (a); 532 if (MPFR_IS_INF(b)) 533 MPFR_SET_INF(a); 534 else 535 ( MPFR_ASSERTD(MPFR_IS_ZERO(b)), MPFR_SET_ZERO(a) ); 536 MPFR_RET(0); 537 } 538 aq = MPFR_GET_PREC(a); 539 bq = MPFR_GET_PREC(b); 540 541 #if !defined(MPFR_GENERIC_ABI) && (GMP_NUMB_BITS == 32 || GMP_NUMB_BITS == 64) 542 if (aq == bq) 543 { 544 if (aq < GMP_NUMB_BITS) 545 return mpfr_sqr_1 (a, b, rnd_mode, aq); 546 547 if (GMP_NUMB_BITS < aq && aq < 2 * GMP_NUMB_BITS) 548 return mpfr_sqr_2 (a, b, rnd_mode, aq); 549 550 if (aq == GMP_NUMB_BITS) 551 return mpfr_sqr_1n (a, b, rnd_mode); 552 553 if (2 * GMP_NUMB_BITS < aq && aq < 3 * GMP_NUMB_BITS) 554 return mpfr_sqr_3 (a, b, rnd_mode, aq); 555 } 556 #endif 557 558 ax = 2 * MPFR_GET_EXP (b); 559 MPFR_ASSERTN (2 * (mpfr_uprec_t) bq <= MPFR_PREC_MAX); 560 561 bn = MPFR_LIMB_SIZE (b); /* number of limbs of b */ 562 tn = MPFR_PREC2LIMBS (2 * bq); /* number of limbs of square, 563 2*bn or 2*bn-1 */ 564 565 if (MPFR_UNLIKELY(bn > MPFR_SQR_THRESHOLD)) 566 /* the following line should not be replaced by mpfr_sqr, 567 otherwise we'll get an infinite loop! */ 568 return mpfr_mul (a, b, b, rnd_mode); 569 570 MPFR_TMP_MARK(marker); 571 tmp = MPFR_TMP_LIMBS_ALLOC (2 * bn); 572 573 /* Multiplies the mantissa in temporary allocated space */ 574 mpn_sqr (tmp, MPFR_MANT(b), bn); 575 b1 = tmp[2 * bn - 1]; 576 577 /* now tmp[0]..tmp[2*bn-1] contains the product of both mantissa, 578 with tmp[2*bn-1]>=2^(GMP_NUMB_BITS-2) */ 579 b1 >>= GMP_NUMB_BITS - 1; /* msb from the product */ 580 581 /* if the mantissas of b and c are uniformly distributed in ]1/2, 1], 582 then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386 583 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */ 584 tmp += 2 * bn - tn; /* +0 or +1 */ 585 if (MPFR_UNLIKELY(b1 == 0)) 586 mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */ 587 588 cc = mpfr_round_raw (MPFR_MANT (a), tmp, 2 * bq, 0, aq, rnd_mode, &inexact); 589 /* cc = 1 ==> result is a power of two */ 590 if (MPFR_UNLIKELY(cc)) 591 MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT; 592 593 MPFR_TMP_FREE(marker); 594 { 595 mpfr_exp_t ax2 = ax + ((int) b1 - 1 + cc); 596 if (MPFR_UNLIKELY( ax2 > __gmpfr_emax)) 597 return mpfr_overflow (a, rnd_mode, MPFR_SIGN_POS); 598 if (MPFR_UNLIKELY( ax2 < __gmpfr_emin)) 599 { 600 /* In the rounding to the nearest mode, if the exponent of the exact 601 result (i.e. before rounding, i.e. without taking cc into account) 602 is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if 603 both arguments are powers of 2), then round to zero. */ 604 if (rnd_mode == MPFR_RNDN && 605 (ax + (mpfr_exp_t) b1 < __gmpfr_emin || mpfr_powerof2_raw (b))) 606 rnd_mode = MPFR_RNDZ; 607 return mpfr_underflow (a, rnd_mode, MPFR_SIGN_POS); 608 } 609 MPFR_SET_EXP (a, ax2); 610 MPFR_SET_POS (a); 611 } 612 MPFR_RET (inexact); 613 } 614