1 1.12 mrg /* Copyright (C) 2007-2022 Free Software Foundation, Inc. 2 1.1 mrg 3 1.1 mrg This file is part of GCC. 4 1.1 mrg 5 1.1 mrg GCC is free software; you can redistribute it and/or modify it under 6 1.1 mrg the terms of the GNU General Public License as published by the Free 7 1.1 mrg Software Foundation; either version 3, or (at your option) any later 8 1.1 mrg version. 9 1.1 mrg 10 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 1.1 mrg for more details. 14 1.1 mrg 15 1.1 mrg Under Section 7 of GPL version 3, you are granted additional 16 1.1 mrg permissions described in the GCC Runtime Library Exception, version 17 1.1 mrg 3.1, as published by the Free Software Foundation. 18 1.1 mrg 19 1.1 mrg You should have received a copy of the GNU General Public License and 20 1.1 mrg a copy of the GCC Runtime Library Exception along with this program; 21 1.1 mrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 1.1 mrg <http://www.gnu.org/licenses/>. */ 23 1.1 mrg 24 1.1 mrg #define BID_128RES 25 1.1 mrg #include "bid_div_macros.h" 26 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 27 1.1 mrg #include <fenv.h> 28 1.1 mrg 29 1.1 mrg #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT 30 1.1 mrg #endif 31 1.1 mrg 32 1.1 mrg extern UINT32 convert_table[5][128][2]; 33 1.1 mrg extern SINT8 factors[][2]; 34 1.1 mrg extern UINT8 packed_10000_zeros[]; 35 1.1 mrg 36 1.1 mrg BID128_FUNCTION_ARG2 (bid128_div, x, y) 37 1.1 mrg 38 1.1 mrg UINT256 CA4, CA4r, P256; 39 1.5 mrg UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, res; 40 1.1 mrg UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD, 41 1.1 mrg valid_y; 42 1.1 mrg int_float fx, fy, f64; 43 1.1 mrg UINT32 QX32, tdigit[3], digit, digit_h, digit_low; 44 1.1 mrg int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, 45 1.1 mrg digits_q, amount; 46 1.1 mrg int nzeros, i, j, k, d5; 47 1.1 mrg unsigned rmode; 48 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 49 1.1 mrg fexcept_t binaryflags = 0; 50 1.1 mrg #endif 51 1.1 mrg 52 1.1 mrg valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y); 53 1.1 mrg 54 1.1 mrg // unpack arguments, check for NaN or Infinity 55 1.1 mrg if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { 56 1.1 mrg // test if x is NaN 57 1.1 mrg if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { 58 1.1 mrg #ifdef SET_STATUS_FLAGS 59 1.1 mrg if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN 60 1.1 mrg (y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) 61 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 62 1.1 mrg #endif 63 1.1 mrg res.w[1] = (CX.w[1]) & QUIET_MASK64; 64 1.1 mrg res.w[0] = CX.w[0]; 65 1.1 mrg BID_RETURN (res); 66 1.1 mrg } 67 1.1 mrg // x is Infinity? 68 1.1 mrg if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { 69 1.1 mrg // check if y is Inf. 70 1.1 mrg if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull)) 71 1.1 mrg // return NaN 72 1.1 mrg { 73 1.1 mrg #ifdef SET_STATUS_FLAGS 74 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 75 1.1 mrg #endif 76 1.1 mrg res.w[1] = 0x7c00000000000000ull; 77 1.1 mrg res.w[0] = 0; 78 1.1 mrg BID_RETURN (res); 79 1.1 mrg } 80 1.1 mrg // y is NaN? 81 1.1 mrg if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) 82 1.1 mrg // return NaN 83 1.1 mrg { 84 1.1 mrg // return +/-Inf 85 1.1 mrg res.w[1] = ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | 86 1.1 mrg 0x7800000000000000ull; 87 1.1 mrg res.w[0] = 0; 88 1.1 mrg BID_RETURN (res); 89 1.1 mrg } 90 1.1 mrg } 91 1.1 mrg // x is 0 92 1.1 mrg if ((y.w[1] & 0x7800000000000000ull) < 0x7800000000000000ull) { 93 1.1 mrg if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) { 94 1.1 mrg #ifdef SET_STATUS_FLAGS 95 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 96 1.1 mrg #endif 97 1.1 mrg // x=y=0, return NaN 98 1.1 mrg res.w[1] = 0x7c00000000000000ull; 99 1.1 mrg res.w[0] = 0; 100 1.1 mrg BID_RETURN (res); 101 1.1 mrg } 102 1.1 mrg // return 0 103 1.1 mrg res.w[1] = (x.w[1] ^ y.w[1]) & 0x8000000000000000ull; 104 1.1 mrg exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; 105 1.1 mrg if (exponent_x > DECIMAL_MAX_EXPON_128) 106 1.1 mrg exponent_x = DECIMAL_MAX_EXPON_128; 107 1.1 mrg else if (exponent_x < 0) 108 1.1 mrg exponent_x = 0; 109 1.1 mrg res.w[1] |= (((UINT64) exponent_x) << 49); 110 1.1 mrg res.w[0] = 0; 111 1.1 mrg BID_RETURN (res); 112 1.1 mrg } 113 1.1 mrg } 114 1.1 mrg if (!valid_y) { 115 1.1 mrg // y is Inf. or NaN 116 1.1 mrg 117 1.1 mrg // test if y is NaN 118 1.1 mrg if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { 119 1.1 mrg #ifdef SET_STATUS_FLAGS 120 1.1 mrg if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN 121 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 122 1.1 mrg #endif 123 1.1 mrg res.w[1] = CY.w[1] & QUIET_MASK64; 124 1.1 mrg res.w[0] = CY.w[0]; 125 1.1 mrg BID_RETURN (res); 126 1.1 mrg } 127 1.1 mrg // y is Infinity? 128 1.1 mrg if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { 129 1.1 mrg // return +/-0 130 1.1 mrg res.w[1] = sign_x ^ sign_y; 131 1.1 mrg res.w[0] = 0; 132 1.1 mrg BID_RETURN (res); 133 1.1 mrg } 134 1.1 mrg // y is 0, return +/-Inf 135 1.1 mrg #ifdef SET_STATUS_FLAGS 136 1.1 mrg __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); 137 1.1 mrg #endif 138 1.1 mrg res.w[1] = 139 1.1 mrg ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; 140 1.1 mrg res.w[0] = 0; 141 1.1 mrg BID_RETURN (res); 142 1.1 mrg } 143 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 144 1.1 mrg (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); 145 1.1 mrg #endif 146 1.1 mrg diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; 147 1.1 mrg 148 1.1 mrg if (__unsigned_compare_gt_128 (CY, CX)) { 149 1.1 mrg // CX < CY 150 1.1 mrg 151 1.1 mrg // 2^64 152 1.1 mrg f64.i = 0x5f800000; 153 1.1 mrg 154 1.1 mrg // fx ~ CX, fy ~ CY 155 1.1 mrg fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; 156 1.1 mrg fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; 157 1.1 mrg // expon_cy - expon_cx 158 1.1 mrg bin_index = (fy.i - fx.i) >> 23; 159 1.1 mrg 160 1.1 mrg if (CX.w[1]) { 161 1.1 mrg T = power10_index_binexp_128[bin_index].w[0]; 162 1.1 mrg __mul_64x128_short (CA, T, CX); 163 1.1 mrg } else { 164 1.1 mrg T128 = power10_index_binexp_128[bin_index]; 165 1.1 mrg __mul_64x128_short (CA, CX.w[0], T128); 166 1.1 mrg } 167 1.1 mrg 168 1.1 mrg ed2 = 33; 169 1.1 mrg if (__unsigned_compare_gt_128 (CY, CA)) 170 1.1 mrg ed2++; 171 1.1 mrg 172 1.1 mrg T128 = power10_table_128[ed2]; 173 1.1 mrg __mul_128x128_to_256 (CA4, CA, T128); 174 1.1 mrg 175 1.1 mrg ed2 += estimate_decimal_digits[bin_index]; 176 1.1 mrg CQ.w[0] = CQ.w[1] = 0; 177 1.1 mrg diff_expon = diff_expon - ed2; 178 1.1 mrg 179 1.1 mrg } else { 180 1.1 mrg // get CQ = CX/CY 181 1.1 mrg __div_128_by_128 (&CQ, &CR, CX, CY); 182 1.1 mrg 183 1.1 mrg if (!CR.w[1] && !CR.w[0]) { 184 1.1 mrg get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, 185 1.1 mrg pfpsf); 186 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 187 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 188 1.1 mrg #endif 189 1.1 mrg BID_RETURN (res); 190 1.1 mrg } 191 1.1 mrg // get number of decimal digits in CQ 192 1.1 mrg // 2^64 193 1.1 mrg f64.i = 0x5f800000; 194 1.1 mrg fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; 195 1.1 mrg // binary expon. of CQ 196 1.1 mrg bin_expon = (fx.i - 0x3f800000) >> 23; 197 1.1 mrg 198 1.1 mrg digits_q = estimate_decimal_digits[bin_expon]; 199 1.1 mrg TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; 200 1.1 mrg TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; 201 1.1 mrg if (__unsigned_compare_ge_128 (CQ, TP128)) 202 1.1 mrg digits_q++; 203 1.1 mrg 204 1.1 mrg ed2 = 34 - digits_q; 205 1.1 mrg T128.w[0] = power10_table_128[ed2].w[0]; 206 1.1 mrg T128.w[1] = power10_table_128[ed2].w[1]; 207 1.1 mrg __mul_128x128_to_256 (CA4, CR, T128); 208 1.1 mrg diff_expon = diff_expon - ed2; 209 1.1 mrg __mul_128x128_low (CQ, CQ, T128); 210 1.1 mrg 211 1.1 mrg } 212 1.1 mrg 213 1.1 mrg __div_256_by_128 (&CQ, &CA4, CY); 214 1.1 mrg 215 1.1 mrg #ifdef SET_STATUS_FLAGS 216 1.1 mrg if (CA4.w[0] || CA4.w[1]) { 217 1.1 mrg // set status flags 218 1.1 mrg __set_status_flags (pfpsf, INEXACT_EXCEPTION); 219 1.1 mrg } 220 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 221 1.1 mrg else 222 1.1 mrg #endif 223 1.1 mrg #else 224 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 225 1.1 mrg if (!CA4.w[0] && !CA4.w[1]) 226 1.1 mrg #endif 227 1.1 mrg #endif 228 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 229 1.1 mrg // check whether result is exact 230 1.1 mrg { 231 1.1 mrg // check whether CX, CY are short 232 1.1 mrg if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { 233 1.1 mrg i = (int) CY.w[0] - 1; 234 1.1 mrg j = (int) CX.w[0] - 1; 235 1.1 mrg // difference in powers of 2 factors for Y and X 236 1.1 mrg nzeros = ed2 - factors[i][0] + factors[j][0]; 237 1.1 mrg // difference in powers of 5 factors 238 1.1 mrg d5 = ed2 - factors[i][1] + factors[j][1]; 239 1.1 mrg if (d5 < nzeros) 240 1.1 mrg nzeros = d5; 241 1.1 mrg // get P*(2^M[extra_digits])/10^extra_digits 242 1.5 mrg __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); 243 1.1 mrg 244 1.1 mrg // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 245 1.1 mrg amount = recip_scale[nzeros]; 246 1.1 mrg __shr_128_long (CQ, Qh, amount); 247 1.1 mrg 248 1.1 mrg diff_expon += nzeros; 249 1.1 mrg } else { 250 1.1 mrg // decompose Q as Qh*10^17 + Ql 251 1.1 mrg //T128 = reciprocals10_128[17]; 252 1.1 mrg T128.w[0] = 0x44909befeb9fad49ull; 253 1.1 mrg T128.w[1] = 0x000b877aa3236a4bull; 254 1.1 mrg __mul_128x128_to_256 (P256, CQ, T128); 255 1.1 mrg //amount = recip_scale[17]; 256 1.1 mrg Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); 257 1.1 mrg Q_low = CQ.w[0] - Q_high * 100000000000000000ull; 258 1.1 mrg 259 1.1 mrg if (!Q_low) { 260 1.1 mrg diff_expon += 17; 261 1.1 mrg 262 1.1 mrg tdigit[0] = Q_high & 0x3ffffff; 263 1.1 mrg tdigit[1] = 0; 264 1.1 mrg QX = Q_high >> 26; 265 1.1 mrg QX32 = QX; 266 1.1 mrg nzeros = 0; 267 1.1 mrg 268 1.1 mrg for (j = 0; QX32; j++, QX32 >>= 7) { 269 1.1 mrg k = (QX32 & 127); 270 1.1 mrg tdigit[0] += convert_table[j][k][0]; 271 1.1 mrg tdigit[1] += convert_table[j][k][1]; 272 1.1 mrg if (tdigit[0] >= 100000000) { 273 1.1 mrg tdigit[0] -= 100000000; 274 1.1 mrg tdigit[1]++; 275 1.1 mrg } 276 1.1 mrg } 277 1.1 mrg 278 1.1 mrg if (tdigit[1] >= 100000000) { 279 1.1 mrg tdigit[1] -= 100000000; 280 1.1 mrg if (tdigit[1] >= 100000000) 281 1.1 mrg tdigit[1] -= 100000000; 282 1.1 mrg } 283 1.1 mrg 284 1.1 mrg digit = tdigit[0]; 285 1.1 mrg if (!digit && !tdigit[1]) 286 1.1 mrg nzeros += 16; 287 1.1 mrg else { 288 1.1 mrg if (!digit) { 289 1.1 mrg nzeros += 8; 290 1.1 mrg digit = tdigit[1]; 291 1.1 mrg } 292 1.1 mrg // decompose digit 293 1.1 mrg PD = (UINT64) digit *0x068DB8BBull; 294 1.1 mrg digit_h = (UINT32) (PD >> 40); 295 1.1 mrg digit_low = digit - digit_h * 10000; 296 1.1 mrg 297 1.1 mrg if (!digit_low) 298 1.1 mrg nzeros += 4; 299 1.1 mrg else 300 1.1 mrg digit_h = digit_low; 301 1.1 mrg 302 1.1 mrg if (!(digit_h & 1)) 303 1.1 mrg nzeros += 304 1.1 mrg 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> 305 1.1 mrg (digit_h & 7)); 306 1.1 mrg } 307 1.1 mrg 308 1.1 mrg if (nzeros) { 309 1.1 mrg __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); 310 1.1 mrg 311 1.1 mrg // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 312 1.1 mrg amount = short_recip_scale[nzeros]; 313 1.1 mrg CQ.w[0] = CQ.w[1] >> amount; 314 1.1 mrg } else 315 1.1 mrg CQ.w[0] = Q_high; 316 1.1 mrg CQ.w[1] = 0; 317 1.1 mrg 318 1.1 mrg diff_expon += nzeros; 319 1.1 mrg } else { 320 1.1 mrg tdigit[0] = Q_low & 0x3ffffff; 321 1.1 mrg tdigit[1] = 0; 322 1.1 mrg QX = Q_low >> 26; 323 1.1 mrg QX32 = QX; 324 1.1 mrg nzeros = 0; 325 1.1 mrg 326 1.1 mrg for (j = 0; QX32; j++, QX32 >>= 7) { 327 1.1 mrg k = (QX32 & 127); 328 1.1 mrg tdigit[0] += convert_table[j][k][0]; 329 1.1 mrg tdigit[1] += convert_table[j][k][1]; 330 1.1 mrg if (tdigit[0] >= 100000000) { 331 1.1 mrg tdigit[0] -= 100000000; 332 1.1 mrg tdigit[1]++; 333 1.1 mrg } 334 1.1 mrg } 335 1.1 mrg 336 1.1 mrg if (tdigit[1] >= 100000000) { 337 1.1 mrg tdigit[1] -= 100000000; 338 1.1 mrg if (tdigit[1] >= 100000000) 339 1.1 mrg tdigit[1] -= 100000000; 340 1.1 mrg } 341 1.1 mrg 342 1.1 mrg digit = tdigit[0]; 343 1.1 mrg if (!digit && !tdigit[1]) 344 1.1 mrg nzeros += 16; 345 1.1 mrg else { 346 1.1 mrg if (!digit) { 347 1.1 mrg nzeros += 8; 348 1.1 mrg digit = tdigit[1]; 349 1.1 mrg } 350 1.1 mrg // decompose digit 351 1.1 mrg PD = (UINT64) digit *0x068DB8BBull; 352 1.1 mrg digit_h = (UINT32) (PD >> 40); 353 1.1 mrg digit_low = digit - digit_h * 10000; 354 1.1 mrg 355 1.1 mrg if (!digit_low) 356 1.1 mrg nzeros += 4; 357 1.1 mrg else 358 1.1 mrg digit_h = digit_low; 359 1.1 mrg 360 1.1 mrg if (!(digit_h & 1)) 361 1.1 mrg nzeros += 362 1.1 mrg 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> 363 1.1 mrg (digit_h & 7)); 364 1.1 mrg } 365 1.1 mrg 366 1.1 mrg if (nzeros) { 367 1.1 mrg // get P*(2^M[extra_digits])/10^extra_digits 368 1.5 mrg __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); 369 1.1 mrg 370 1.1 mrg //now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 371 1.1 mrg amount = recip_scale[nzeros]; 372 1.1 mrg __shr_128 (CQ, Qh, amount); 373 1.1 mrg } 374 1.1 mrg diff_expon += nzeros; 375 1.1 mrg 376 1.1 mrg } 377 1.1 mrg } 378 1.1 mrg get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); 379 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 380 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 381 1.1 mrg #endif 382 1.1 mrg BID_RETURN (res); 383 1.1 mrg } 384 1.1 mrg #endif 385 1.1 mrg 386 1.1 mrg if (diff_expon >= 0) { 387 1.1 mrg #ifdef IEEE_ROUND_NEAREST 388 1.1 mrg // rounding 389 1.1 mrg // 2*CA4 - CY 390 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 391 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 392 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 393 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 394 1.1 mrg 395 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; 396 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); 397 1.1 mrg 398 1.1 mrg CQ.w[0] += carry64; 399 1.1 mrg if (CQ.w[0] < carry64) 400 1.1 mrg CQ.w[1]++; 401 1.1 mrg #else 402 1.1 mrg #ifdef IEEE_ROUND_NEAREST_TIES_AWAY 403 1.1 mrg // rounding 404 1.1 mrg // 2*CA4 - CY 405 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 406 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 407 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 408 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 409 1.1 mrg 410 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; 411 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; 412 1.1 mrg 413 1.1 mrg CQ.w[0] += carry64; 414 1.1 mrg if (CQ.w[0] < carry64) 415 1.1 mrg CQ.w[1]++; 416 1.1 mrg #else 417 1.1 mrg rmode = rnd_mode; 418 1.1 mrg if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) 419 1.1 mrg rmode = 3 - rmode; 420 1.1 mrg switch (rmode) { 421 1.1 mrg case ROUNDING_TO_NEAREST: // round to nearest code 422 1.1 mrg // rounding 423 1.1 mrg // 2*CA4 - CY 424 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 425 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 426 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 427 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 428 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; 429 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); 430 1.1 mrg CQ.w[0] += carry64; 431 1.1 mrg if (CQ.w[0] < carry64) 432 1.1 mrg CQ.w[1]++; 433 1.1 mrg break; 434 1.1 mrg case ROUNDING_TIES_AWAY: 435 1.1 mrg // rounding 436 1.1 mrg // 2*CA4 - CY 437 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 438 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 439 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 440 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 441 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; 442 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; 443 1.1 mrg CQ.w[0] += carry64; 444 1.1 mrg if (CQ.w[0] < carry64) 445 1.1 mrg CQ.w[1]++; 446 1.1 mrg break; 447 1.1 mrg case ROUNDING_DOWN: 448 1.1 mrg case ROUNDING_TO_ZERO: 449 1.1 mrg break; 450 1.1 mrg default: // rounding up 451 1.1 mrg CQ.w[0]++; 452 1.1 mrg if (!CQ.w[0]) 453 1.1 mrg CQ.w[1]++; 454 1.1 mrg break; 455 1.1 mrg } 456 1.1 mrg #endif 457 1.1 mrg #endif 458 1.1 mrg 459 1.1 mrg } else { 460 1.1 mrg #ifdef SET_STATUS_FLAGS 461 1.1 mrg if (CA4.w[0] || CA4.w[1]) { 462 1.1 mrg // set status flags 463 1.1 mrg __set_status_flags (pfpsf, INEXACT_EXCEPTION); 464 1.1 mrg } 465 1.1 mrg #endif 466 1.1 mrg 467 1.1 mrg handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, 468 1.1 mrg CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); 469 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 470 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 471 1.1 mrg #endif 472 1.1 mrg BID_RETURN (res); 473 1.1 mrg 474 1.1 mrg } 475 1.1 mrg 476 1.1 mrg get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); 477 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 478 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 479 1.1 mrg #endif 480 1.1 mrg BID_RETURN (res); 481 1.1 mrg } 482 1.1 mrg 483 1.1 mrg 484 1.1 mrg //#define LEAVE_TRAILING_ZEROS 485 1.1 mrg 486 1.1 mrg TYPE0_FUNCTION_ARGTYPE1_ARGTYPE2 (UINT128, bid128dd_div, UINT64, x, 487 1.1 mrg UINT64, y) 488 1.1 mrg 489 1.1 mrg UINT256 CA4, CA4r, P256; 490 1.5 mrg UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, res; 491 1.1 mrg UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD, 492 1.1 mrg valid_y; 493 1.1 mrg int_float fx, fy, f64; 494 1.1 mrg UINT32 QX32, tdigit[3], digit, digit_h, digit_low; 495 1.1 mrg int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, 496 1.1 mrg digits_q, amount; 497 1.1 mrg int nzeros, i, j, k, d5; 498 1.1 mrg unsigned rmode; 499 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 500 1.1 mrg fexcept_t binaryflags = 0; 501 1.1 mrg #endif 502 1.1 mrg 503 1.1 mrg valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], y); 504 1.1 mrg 505 1.1 mrg // unpack arguments, check for NaN or Infinity 506 1.1 mrg CX.w[1] = 0; 507 1.1 mrg if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], (x))) { 508 1.1 mrg #ifdef SET_STATUS_FLAGS 509 1.1 mrg if ((y & SNAN_MASK64) == SNAN_MASK64) // y is sNaN 510 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 511 1.1 mrg #endif 512 1.1 mrg 513 1.1 mrg // test if x is NaN 514 1.1 mrg if ((x & NAN_MASK64) == NAN_MASK64) { 515 1.1 mrg #ifdef SET_STATUS_FLAGS 516 1.1 mrg if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN 517 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 518 1.1 mrg #endif 519 1.1 mrg res.w[0] = (CX.w[0] & 0x0003ffffffffffffull); 520 1.1 mrg __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); 521 1.1 mrg res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull); 522 1.1 mrg BID_RETURN (res); 523 1.1 mrg } 524 1.1 mrg // x is Infinity? 525 1.1 mrg if (((x) & 0x7800000000000000ull) == 0x7800000000000000ull) { 526 1.1 mrg // check if y is Inf. 527 1.1 mrg if ((((y) & 0x7c00000000000000ull) == 0x7800000000000000ull)) 528 1.1 mrg // return NaN 529 1.1 mrg { 530 1.1 mrg #ifdef SET_STATUS_FLAGS 531 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 532 1.1 mrg #endif 533 1.1 mrg res.w[1] = 0x7c00000000000000ull; 534 1.1 mrg res.w[0] = 0; 535 1.1 mrg BID_RETURN (res); 536 1.1 mrg } 537 1.1 mrg if ((((y) & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { 538 1.1 mrg // otherwise return +/-Inf 539 1.1 mrg res.w[1] = 540 1.1 mrg (((x) ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull; 541 1.1 mrg res.w[0] = 0; 542 1.1 mrg BID_RETURN (res); 543 1.1 mrg } 544 1.1 mrg } 545 1.1 mrg // x is 0 546 1.1 mrg if ((((y) & 0x7800000000000000ull) != 0x7800000000000000ull)) { 547 1.1 mrg if(!CY.w[0]) { 548 1.1 mrg #ifdef SET_STATUS_FLAGS 549 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 550 1.1 mrg #endif 551 1.1 mrg // x=y=0, return NaN 552 1.1 mrg res.w[1] = 0x7c00000000000000ull; 553 1.1 mrg res.w[0] = 0; 554 1.1 mrg BID_RETURN (res); 555 1.1 mrg } 556 1.1 mrg // return 0 557 1.1 mrg res.w[1] = ((x) ^ (y)) & 0x8000000000000000ull; 558 1.1 mrg if (((y) & 0x6000000000000000ull) == 0x6000000000000000ull) 559 1.1 mrg exponent_y = ((UINT32) ((y) >> 51)) & 0x3ff; 560 1.1 mrg else 561 1.1 mrg exponent_y = ((UINT32) ((y) >> 53)) & 0x3ff; 562 1.1 mrg exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; 563 1.1 mrg if (exponent_x > DECIMAL_MAX_EXPON_128) 564 1.1 mrg exponent_x = DECIMAL_MAX_EXPON_128; 565 1.1 mrg else if (exponent_x < 0) 566 1.1 mrg exponent_x = 0; 567 1.1 mrg res.w[1] |= (((UINT64) exponent_x) << 49); 568 1.1 mrg res.w[0] = 0; 569 1.1 mrg BID_RETURN (res); 570 1.1 mrg } 571 1.1 mrg } 572 1.1 mrg 573 1.1 mrg CY.w[1] = 0; 574 1.1 mrg if (!valid_y) { 575 1.1 mrg // y is Inf. or NaN 576 1.1 mrg 577 1.1 mrg // test if y is NaN 578 1.1 mrg if ((y & NAN_MASK64) == NAN_MASK64) { 579 1.1 mrg #ifdef SET_STATUS_FLAGS 580 1.1 mrg if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN 581 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 582 1.1 mrg #endif 583 1.1 mrg res.w[0] = (CY.w[0] & 0x0003ffffffffffffull); 584 1.1 mrg __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); 585 1.1 mrg res.w[1] |= ((CY.w[0]) & 0xfc00000000000000ull); 586 1.1 mrg BID_RETURN (res); 587 1.1 mrg } 588 1.1 mrg // y is Infinity? 589 1.1 mrg if (((y) & 0x7800000000000000ull) == 0x7800000000000000ull) { 590 1.1 mrg // return +/-0 591 1.1 mrg res.w[1] = sign_x ^ sign_y; 592 1.1 mrg res.w[0] = 0; 593 1.1 mrg BID_RETURN (res); 594 1.1 mrg } 595 1.1 mrg // y is 0, return +/-Inf 596 1.1 mrg res.w[1] = 597 1.1 mrg (((x) ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull; 598 1.1 mrg res.w[0] = 0; 599 1.1 mrg #ifdef SET_STATUS_FLAGS 600 1.1 mrg __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); 601 1.1 mrg #endif 602 1.1 mrg BID_RETURN (res); 603 1.1 mrg } 604 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 605 1.1 mrg (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); 606 1.1 mrg #endif 607 1.1 mrg diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; 608 1.1 mrg 609 1.1 mrg if (__unsigned_compare_gt_128 (CY, CX)) { 610 1.1 mrg // CX < CY 611 1.1 mrg 612 1.1 mrg // 2^64 613 1.1 mrg f64.i = 0x5f800000; 614 1.1 mrg 615 1.1 mrg // fx ~ CX, fy ~ CY 616 1.1 mrg fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; 617 1.1 mrg fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; 618 1.1 mrg // expon_cy - expon_cx 619 1.1 mrg bin_index = (fy.i - fx.i) >> 23; 620 1.1 mrg 621 1.1 mrg if (CX.w[1]) { 622 1.1 mrg T = power10_index_binexp_128[bin_index].w[0]; 623 1.1 mrg __mul_64x128_short (CA, T, CX); 624 1.1 mrg } else { 625 1.1 mrg T128 = power10_index_binexp_128[bin_index]; 626 1.1 mrg __mul_64x128_short (CA, CX.w[0], T128); 627 1.1 mrg } 628 1.1 mrg 629 1.1 mrg ed2 = 33; 630 1.1 mrg if (__unsigned_compare_gt_128 (CY, CA)) 631 1.1 mrg ed2++; 632 1.1 mrg 633 1.1 mrg T128 = power10_table_128[ed2]; 634 1.1 mrg __mul_128x128_to_256 (CA4, CA, T128); 635 1.1 mrg 636 1.1 mrg ed2 += estimate_decimal_digits[bin_index]; 637 1.1 mrg CQ.w[0] = CQ.w[1] = 0; 638 1.1 mrg diff_expon = diff_expon - ed2; 639 1.1 mrg 640 1.1 mrg } else { 641 1.1 mrg // get CQ = CX/CY 642 1.1 mrg __div_128_by_128 (&CQ, &CR, CX, CY); 643 1.1 mrg 644 1.1 mrg if (!CR.w[1] && !CR.w[0]) { 645 1.1 mrg get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, 646 1.1 mrg pfpsf); 647 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 648 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 649 1.1 mrg #endif 650 1.1 mrg BID_RETURN (res); 651 1.1 mrg } 652 1.1 mrg // get number of decimal digits in CQ 653 1.1 mrg // 2^64 654 1.1 mrg f64.i = 0x5f800000; 655 1.1 mrg fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; 656 1.1 mrg // binary expon. of CQ 657 1.1 mrg bin_expon = (fx.i - 0x3f800000) >> 23; 658 1.1 mrg 659 1.1 mrg digits_q = estimate_decimal_digits[bin_expon]; 660 1.1 mrg TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; 661 1.1 mrg TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; 662 1.1 mrg if (__unsigned_compare_ge_128 (CQ, TP128)) 663 1.1 mrg digits_q++; 664 1.1 mrg 665 1.1 mrg ed2 = 34 - digits_q; 666 1.1 mrg T128.w[0] = power10_table_128[ed2].w[0]; 667 1.1 mrg T128.w[1] = power10_table_128[ed2].w[1]; 668 1.1 mrg __mul_128x128_to_256 (CA4, CR, T128); 669 1.1 mrg diff_expon = diff_expon - ed2; 670 1.1 mrg __mul_128x128_low (CQ, CQ, T128); 671 1.1 mrg 672 1.1 mrg } 673 1.1 mrg 674 1.1 mrg __div_256_by_128 (&CQ, &CA4, CY); 675 1.1 mrg 676 1.1 mrg 677 1.1 mrg #ifdef SET_STATUS_FLAGS 678 1.1 mrg if (CA4.w[0] || CA4.w[1]) { 679 1.1 mrg // set status flags 680 1.1 mrg __set_status_flags (pfpsf, INEXACT_EXCEPTION); 681 1.1 mrg } 682 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 683 1.1 mrg else 684 1.1 mrg #endif 685 1.1 mrg #else 686 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 687 1.1 mrg if (!CA4.w[0] && !CA4.w[1]) 688 1.1 mrg #endif 689 1.1 mrg #endif 690 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 691 1.1 mrg // check whether result is exact 692 1.1 mrg { 693 1.1 mrg // check whether CX, CY are short 694 1.1 mrg if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { 695 1.1 mrg i = (int) CY.w[0] - 1; 696 1.1 mrg j = (int) CX.w[0] - 1; 697 1.1 mrg // difference in powers of 2 factors for Y and X 698 1.1 mrg nzeros = ed2 - factors[i][0] + factors[j][0]; 699 1.1 mrg // difference in powers of 5 factors 700 1.1 mrg d5 = ed2 - factors[i][1] + factors[j][1]; 701 1.1 mrg if (d5 < nzeros) 702 1.1 mrg nzeros = d5; 703 1.1 mrg // get P*(2^M[extra_digits])/10^extra_digits 704 1.5 mrg __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); 705 1.1 mrg //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; 706 1.1 mrg 707 1.1 mrg // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 708 1.1 mrg amount = recip_scale[nzeros]; 709 1.1 mrg __shr_128_long (CQ, Qh, amount); 710 1.1 mrg 711 1.1 mrg diff_expon += nzeros; 712 1.1 mrg } else { 713 1.1 mrg // decompose Q as Qh*10^17 + Ql 714 1.1 mrg //T128 = reciprocals10_128[17]; 715 1.1 mrg T128.w[0] = 0x44909befeb9fad49ull; 716 1.1 mrg T128.w[1] = 0x000b877aa3236a4bull; 717 1.1 mrg __mul_128x128_to_256 (P256, CQ, T128); 718 1.1 mrg //amount = recip_scale[17]; 719 1.1 mrg Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); 720 1.1 mrg Q_low = CQ.w[0] - Q_high * 100000000000000000ull; 721 1.1 mrg 722 1.1 mrg if (!Q_low) { 723 1.1 mrg diff_expon += 17; 724 1.1 mrg 725 1.1 mrg tdigit[0] = Q_high & 0x3ffffff; 726 1.1 mrg tdigit[1] = 0; 727 1.1 mrg QX = Q_high >> 26; 728 1.1 mrg QX32 = QX; 729 1.1 mrg nzeros = 0; 730 1.1 mrg 731 1.1 mrg for (j = 0; QX32; j++, QX32 >>= 7) { 732 1.1 mrg k = (QX32 & 127); 733 1.1 mrg tdigit[0] += convert_table[j][k][0]; 734 1.1 mrg tdigit[1] += convert_table[j][k][1]; 735 1.1 mrg if (tdigit[0] >= 100000000) { 736 1.1 mrg tdigit[0] -= 100000000; 737 1.1 mrg tdigit[1]++; 738 1.1 mrg } 739 1.1 mrg } 740 1.1 mrg 741 1.1 mrg 742 1.1 mrg if (tdigit[1] >= 100000000) { 743 1.1 mrg tdigit[1] -= 100000000; 744 1.1 mrg if (tdigit[1] >= 100000000) 745 1.1 mrg tdigit[1] -= 100000000; 746 1.1 mrg } 747 1.1 mrg 748 1.1 mrg digit = tdigit[0]; 749 1.1 mrg if (!digit && !tdigit[1]) 750 1.1 mrg nzeros += 16; 751 1.1 mrg else { 752 1.1 mrg if (!digit) { 753 1.1 mrg nzeros += 8; 754 1.1 mrg digit = tdigit[1]; 755 1.1 mrg } 756 1.1 mrg // decompose digit 757 1.1 mrg PD = (UINT64) digit *0x068DB8BBull; 758 1.1 mrg digit_h = (UINT32) (PD >> 40); 759 1.1 mrg digit_low = digit - digit_h * 10000; 760 1.1 mrg 761 1.1 mrg if (!digit_low) 762 1.1 mrg nzeros += 4; 763 1.1 mrg else 764 1.1 mrg digit_h = digit_low; 765 1.1 mrg 766 1.1 mrg if (!(digit_h & 1)) 767 1.1 mrg nzeros += 768 1.1 mrg 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> 769 1.1 mrg (digit_h & 7)); 770 1.1 mrg } 771 1.1 mrg 772 1.1 mrg if (nzeros) { 773 1.1 mrg __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); 774 1.1 mrg 775 1.1 mrg // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 776 1.1 mrg amount = short_recip_scale[nzeros]; 777 1.1 mrg CQ.w[0] = CQ.w[1] >> amount; 778 1.1 mrg } else 779 1.1 mrg CQ.w[0] = Q_high; 780 1.1 mrg CQ.w[1] = 0; 781 1.1 mrg 782 1.1 mrg diff_expon += nzeros; 783 1.1 mrg } else { 784 1.1 mrg tdigit[0] = Q_low & 0x3ffffff; 785 1.1 mrg tdigit[1] = 0; 786 1.1 mrg QX = Q_low >> 26; 787 1.1 mrg QX32 = QX; 788 1.1 mrg nzeros = 0; 789 1.1 mrg 790 1.1 mrg for (j = 0; QX32; j++, QX32 >>= 7) { 791 1.1 mrg k = (QX32 & 127); 792 1.1 mrg tdigit[0] += convert_table[j][k][0]; 793 1.1 mrg tdigit[1] += convert_table[j][k][1]; 794 1.1 mrg if (tdigit[0] >= 100000000) { 795 1.1 mrg tdigit[0] -= 100000000; 796 1.1 mrg tdigit[1]++; 797 1.1 mrg } 798 1.1 mrg } 799 1.1 mrg 800 1.1 mrg if (tdigit[1] >= 100000000) { 801 1.1 mrg tdigit[1] -= 100000000; 802 1.1 mrg if (tdigit[1] >= 100000000) 803 1.1 mrg tdigit[1] -= 100000000; 804 1.1 mrg } 805 1.1 mrg 806 1.1 mrg digit = tdigit[0]; 807 1.1 mrg if (!digit && !tdigit[1]) 808 1.1 mrg nzeros += 16; 809 1.1 mrg else { 810 1.1 mrg if (!digit) { 811 1.1 mrg nzeros += 8; 812 1.1 mrg digit = tdigit[1]; 813 1.1 mrg } 814 1.1 mrg // decompose digit 815 1.1 mrg PD = (UINT64) digit *0x068DB8BBull; 816 1.1 mrg digit_h = (UINT32) (PD >> 40); 817 1.1 mrg digit_low = digit - digit_h * 10000; 818 1.1 mrg 819 1.1 mrg if (!digit_low) 820 1.1 mrg nzeros += 4; 821 1.1 mrg else 822 1.1 mrg digit_h = digit_low; 823 1.1 mrg 824 1.1 mrg if (!(digit_h & 1)) 825 1.1 mrg nzeros += 826 1.1 mrg 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> 827 1.1 mrg (digit_h & 7)); 828 1.1 mrg } 829 1.1 mrg 830 1.1 mrg if (nzeros) { 831 1.1 mrg // get P*(2^M[extra_digits])/10^extra_digits 832 1.5 mrg __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); 833 1.1 mrg 834 1.1 mrg // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 835 1.1 mrg amount = recip_scale[nzeros]; 836 1.1 mrg __shr_128 (CQ, Qh, amount); 837 1.1 mrg } 838 1.1 mrg diff_expon += nzeros; 839 1.1 mrg 840 1.1 mrg } 841 1.1 mrg } 842 1.1 mrg get_BID128(&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,pfpsf); 843 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 844 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 845 1.1 mrg #endif 846 1.1 mrg BID_RETURN (res); 847 1.1 mrg } 848 1.1 mrg #endif 849 1.1 mrg 850 1.1 mrg if (diff_expon >= 0) { 851 1.1 mrg #ifdef IEEE_ROUND_NEAREST 852 1.1 mrg // rounding 853 1.1 mrg // 2*CA4 - CY 854 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 855 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 856 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 857 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 858 1.1 mrg 859 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; 860 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); 861 1.1 mrg 862 1.1 mrg CQ.w[0] += carry64; 863 1.1 mrg if (CQ.w[0] < carry64) 864 1.1 mrg CQ.w[1]++; 865 1.1 mrg #else 866 1.1 mrg #ifdef IEEE_ROUND_NEAREST_TIES_AWAY 867 1.1 mrg // rounding 868 1.1 mrg // 2*CA4 - CY 869 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 870 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 871 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 872 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 873 1.1 mrg 874 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; 875 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; 876 1.1 mrg 877 1.1 mrg CQ.w[0] += carry64; 878 1.1 mrg if (CQ.w[0] < carry64) 879 1.1 mrg CQ.w[1]++; 880 1.1 mrg #else 881 1.1 mrg rmode = rnd_mode; 882 1.1 mrg if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) 883 1.1 mrg rmode = 3 - rmode; 884 1.1 mrg switch (rmode) { 885 1.1 mrg case ROUNDING_TO_NEAREST: // round to nearest code 886 1.1 mrg // rounding 887 1.1 mrg // 2*CA4 - CY 888 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 889 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 890 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 891 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 892 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; 893 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); 894 1.1 mrg CQ.w[0] += carry64; 895 1.1 mrg if (CQ.w[0] < carry64) 896 1.1 mrg CQ.w[1]++; 897 1.1 mrg break; 898 1.1 mrg case ROUNDING_TIES_AWAY: 899 1.1 mrg // rounding 900 1.1 mrg // 2*CA4 - CY 901 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 902 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 903 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 904 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 905 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; 906 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; 907 1.1 mrg CQ.w[0] += carry64; 908 1.1 mrg if (CQ.w[0] < carry64) 909 1.1 mrg CQ.w[1]++; 910 1.1 mrg break; 911 1.1 mrg case ROUNDING_DOWN: 912 1.1 mrg case ROUNDING_TO_ZERO: 913 1.1 mrg break; 914 1.1 mrg default: // rounding up 915 1.1 mrg CQ.w[0]++; 916 1.1 mrg if (!CQ.w[0]) 917 1.1 mrg CQ.w[1]++; 918 1.1 mrg break; 919 1.1 mrg } 920 1.1 mrg #endif 921 1.1 mrg #endif 922 1.1 mrg 923 1.1 mrg } else { 924 1.1 mrg #ifdef SET_STATUS_FLAGS 925 1.1 mrg if (CA4.w[0] || CA4.w[1]) { 926 1.1 mrg // set status flags 927 1.1 mrg __set_status_flags (pfpsf, INEXACT_EXCEPTION); 928 1.1 mrg } 929 1.1 mrg #endif 930 1.1 mrg handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, 931 1.1 mrg CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); 932 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 933 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 934 1.1 mrg #endif 935 1.1 mrg BID_RETURN (res); 936 1.1 mrg 937 1.1 mrg } 938 1.1 mrg 939 1.1 mrg get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); 940 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 941 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 942 1.1 mrg #endif 943 1.1 mrg BID_RETURN (res); 944 1.1 mrg } 945 1.1 mrg 946 1.1 mrg 947 1.1 mrg BID128_FUNCTION_ARGTYPE1_ARG128 (bid128dq_div, UINT64, x, y) 948 1.1 mrg UINT256 CA4, CA4r, P256; 949 1.5 mrg UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, res; 950 1.1 mrg UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, valid_y, 951 1.1 mrg PD; 952 1.1 mrg int_float fx, fy, f64; 953 1.1 mrg UINT32 QX32, tdigit[3], digit, digit_h, digit_low; 954 1.1 mrg int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, 955 1.1 mrg digits_q, amount; 956 1.1 mrg int nzeros, i, j, k, d5; 957 1.1 mrg unsigned rmode; 958 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 959 1.1 mrg fexcept_t binaryflags = 0; 960 1.1 mrg #endif 961 1.1 mrg 962 1.1 mrg valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y); 963 1.1 mrg 964 1.1 mrg // unpack arguments, check for NaN or Infinity 965 1.1 mrg CX.w[1] = 0; 966 1.1 mrg if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) { 967 1.1 mrg #ifdef SET_STATUS_FLAGS 968 1.1 mrg if ((y.w[1] & SNAN_MASK64) == SNAN_MASK64) // y is sNaN 969 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 970 1.1 mrg #endif 971 1.1 mrg 972 1.1 mrg // test if x is NaN 973 1.1 mrg if ((x & NAN_MASK64) == NAN_MASK64) { 974 1.1 mrg #ifdef SET_STATUS_FLAGS 975 1.1 mrg if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN 976 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 977 1.1 mrg #endif 978 1.1 mrg res.w[0] = (CX.w[0] & 0x0003ffffffffffffull); 979 1.1 mrg __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); 980 1.1 mrg res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull); 981 1.1 mrg BID_RETURN (res); 982 1.1 mrg } 983 1.1 mrg // x is Infinity? 984 1.1 mrg if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) { 985 1.1 mrg // check if y is Inf. 986 1.1 mrg if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull)) 987 1.1 mrg // return NaN 988 1.1 mrg { 989 1.1 mrg #ifdef SET_STATUS_FLAGS 990 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 991 1.1 mrg #endif 992 1.1 mrg res.w[1] = 0x7c00000000000000ull; 993 1.1 mrg res.w[0] = 0; 994 1.1 mrg BID_RETURN (res); 995 1.1 mrg } 996 1.1 mrg if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { 997 1.1 mrg // otherwise return +/-Inf 998 1.1 mrg res.w[1] = 999 1.1 mrg ((x ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; 1000 1.1 mrg res.w[0] = 0; 1001 1.1 mrg BID_RETURN (res); 1002 1.1 mrg } 1003 1.1 mrg } 1004 1.1 mrg // x is 0 1005 1.1 mrg if ((y.w[1] & INFINITY_MASK64) != INFINITY_MASK64) { 1006 1.1 mrg if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) { 1007 1.1 mrg #ifdef SET_STATUS_FLAGS 1008 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 1009 1.1 mrg #endif 1010 1.1 mrg // x=y=0, return NaN 1011 1.1 mrg res.w[1] = 0x7c00000000000000ull; 1012 1.1 mrg res.w[0] = 0; 1013 1.1 mrg BID_RETURN (res); 1014 1.1 mrg } 1015 1.1 mrg // return 0 1016 1.1 mrg res.w[1] = (x ^ y.w[1]) & 0x8000000000000000ull; 1017 1.1 mrg exponent_x = exponent_x - exponent_y + (DECIMAL_EXPONENT_BIAS_128<<1) - DECIMAL_EXPONENT_BIAS; 1018 1.1 mrg if (exponent_x > DECIMAL_MAX_EXPON_128) 1019 1.1 mrg exponent_x = DECIMAL_MAX_EXPON_128; 1020 1.1 mrg else if (exponent_x < 0) 1021 1.1 mrg exponent_x = 0; 1022 1.1 mrg res.w[1] |= (((UINT64) exponent_x) << 49); 1023 1.1 mrg res.w[0] = 0; 1024 1.1 mrg BID_RETURN (res); 1025 1.1 mrg } 1026 1.1 mrg } 1027 1.1 mrg exponent_x += (DECIMAL_EXPONENT_BIAS_128 - DECIMAL_EXPONENT_BIAS); 1028 1.1 mrg 1029 1.1 mrg if (!valid_y) { 1030 1.1 mrg // y is Inf. or NaN 1031 1.1 mrg 1032 1.1 mrg // test if y is NaN 1033 1.1 mrg if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { 1034 1.1 mrg #ifdef SET_STATUS_FLAGS 1035 1.1 mrg if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN 1036 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 1037 1.1 mrg #endif 1038 1.1 mrg res.w[1] = CY.w[1] & QUIET_MASK64; 1039 1.1 mrg res.w[0] = CY.w[0]; 1040 1.1 mrg BID_RETURN (res); 1041 1.1 mrg } 1042 1.1 mrg // y is Infinity? 1043 1.1 mrg if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { 1044 1.1 mrg // return +/-0 1045 1.1 mrg res.w[1] = sign_x ^ sign_y; 1046 1.1 mrg res.w[0] = 0; 1047 1.1 mrg BID_RETURN (res); 1048 1.1 mrg } 1049 1.1 mrg // y is 0, return +/-Inf 1050 1.1 mrg res.w[1] = 1051 1.1 mrg ((x ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; 1052 1.1 mrg res.w[0] = 0; 1053 1.1 mrg #ifdef SET_STATUS_FLAGS 1054 1.1 mrg __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); 1055 1.1 mrg #endif 1056 1.1 mrg BID_RETURN (res); 1057 1.1 mrg } 1058 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 1059 1.1 mrg (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); 1060 1.1 mrg #endif 1061 1.1 mrg diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; 1062 1.1 mrg 1063 1.1 mrg if (__unsigned_compare_gt_128 (CY, CX)) { 1064 1.1 mrg // CX < CY 1065 1.1 mrg 1066 1.1 mrg // 2^64 1067 1.1 mrg f64.i = 0x5f800000; 1068 1.1 mrg 1069 1.1 mrg // fx ~ CX, fy ~ CY 1070 1.1 mrg fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; 1071 1.1 mrg fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; 1072 1.1 mrg // expon_cy - expon_cx 1073 1.1 mrg bin_index = (fy.i - fx.i) >> 23; 1074 1.1 mrg 1075 1.1 mrg if (CX.w[1]) { 1076 1.1 mrg T = power10_index_binexp_128[bin_index].w[0]; 1077 1.1 mrg __mul_64x128_short (CA, T, CX); 1078 1.1 mrg } else { 1079 1.1 mrg T128 = power10_index_binexp_128[bin_index]; 1080 1.1 mrg __mul_64x128_short (CA, CX.w[0], T128); 1081 1.1 mrg } 1082 1.1 mrg 1083 1.1 mrg ed2 = 33; 1084 1.1 mrg if (__unsigned_compare_gt_128 (CY, CA)) 1085 1.1 mrg ed2++; 1086 1.1 mrg 1087 1.1 mrg T128 = power10_table_128[ed2]; 1088 1.1 mrg __mul_128x128_to_256 (CA4, CA, T128); 1089 1.1 mrg 1090 1.1 mrg ed2 += estimate_decimal_digits[bin_index]; 1091 1.1 mrg CQ.w[0] = CQ.w[1] = 0; 1092 1.1 mrg diff_expon = diff_expon - ed2; 1093 1.1 mrg 1094 1.1 mrg } else { 1095 1.1 mrg // get CQ = CX/CY 1096 1.1 mrg __div_128_by_128 (&CQ, &CR, CX, CY); 1097 1.1 mrg 1098 1.1 mrg if (!CR.w[1] && !CR.w[0]) { 1099 1.1 mrg get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, 1100 1.1 mrg pfpsf); 1101 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 1102 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 1103 1.1 mrg #endif 1104 1.1 mrg BID_RETURN (res); 1105 1.1 mrg } 1106 1.1 mrg // get number of decimal digits in CQ 1107 1.1 mrg // 2^64 1108 1.1 mrg f64.i = 0x5f800000; 1109 1.1 mrg fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; 1110 1.1 mrg // binary expon. of CQ 1111 1.1 mrg bin_expon = (fx.i - 0x3f800000) >> 23; 1112 1.1 mrg 1113 1.1 mrg digits_q = estimate_decimal_digits[bin_expon]; 1114 1.1 mrg TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; 1115 1.1 mrg TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; 1116 1.1 mrg if (__unsigned_compare_ge_128 (CQ, TP128)) 1117 1.1 mrg digits_q++; 1118 1.1 mrg 1119 1.1 mrg ed2 = 34 - digits_q; 1120 1.1 mrg T128.w[0] = power10_table_128[ed2].w[0]; 1121 1.1 mrg T128.w[1] = power10_table_128[ed2].w[1]; 1122 1.1 mrg __mul_128x128_to_256 (CA4, CR, T128); 1123 1.1 mrg diff_expon = diff_expon - ed2; 1124 1.1 mrg __mul_128x128_low (CQ, CQ, T128); 1125 1.1 mrg 1126 1.1 mrg } 1127 1.1 mrg 1128 1.1 mrg __div_256_by_128 (&CQ, &CA4, CY); 1129 1.1 mrg 1130 1.1 mrg #ifdef SET_STATUS_FLAGS 1131 1.1 mrg if (CA4.w[0] || CA4.w[1]) { 1132 1.1 mrg // set status flags 1133 1.1 mrg __set_status_flags (pfpsf, INEXACT_EXCEPTION); 1134 1.1 mrg } 1135 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 1136 1.1 mrg else 1137 1.1 mrg #endif 1138 1.1 mrg #else 1139 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 1140 1.1 mrg if (!CA4.w[0] && !CA4.w[1]) 1141 1.1 mrg #endif 1142 1.1 mrg #endif 1143 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 1144 1.1 mrg // check whether result is exact 1145 1.1 mrg { 1146 1.1 mrg //printf("ed2=%d,nz=%d,a=%d,CQ="LX16","LX16", RH="LX16", RL="LX16"\n",ed2,nzeros,amount,CQ.w[1],CQ.w[0],reciprocals10_128[nzeros].w[1],reciprocals10_128[nzeros].w[0]);fflush(stdout); 1147 1.1 mrg // check whether CX, CY are short 1148 1.1 mrg if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { 1149 1.1 mrg i = (int) CY.w[0] - 1; 1150 1.1 mrg j = (int) CX.w[0] - 1; 1151 1.1 mrg // difference in powers of 2 factors for Y and X 1152 1.1 mrg nzeros = ed2 - factors[i][0] + factors[j][0]; 1153 1.1 mrg // difference in powers of 5 factors 1154 1.1 mrg d5 = ed2 - factors[i][1] + factors[j][1]; 1155 1.1 mrg if (d5 < nzeros) 1156 1.1 mrg nzeros = d5; 1157 1.1 mrg // get P*(2^M[extra_digits])/10^extra_digits 1158 1.5 mrg __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); 1159 1.1 mrg //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; 1160 1.1 mrg 1161 1.1 mrg // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 1162 1.1 mrg amount = recip_scale[nzeros]; 1163 1.1 mrg __shr_128_long (CQ, Qh, amount); 1164 1.1 mrg 1165 1.1 mrg diff_expon += nzeros; 1166 1.1 mrg } else { 1167 1.1 mrg // decompose Q as Qh*10^17 + Ql 1168 1.1 mrg //T128 = reciprocals10_128[17]; 1169 1.1 mrg T128.w[0] = 0x44909befeb9fad49ull; 1170 1.1 mrg T128.w[1] = 0x000b877aa3236a4bull; 1171 1.1 mrg __mul_128x128_to_256 (P256, CQ, T128); 1172 1.1 mrg //amount = recip_scale[17]; 1173 1.1 mrg Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); 1174 1.1 mrg Q_low = CQ.w[0] - Q_high * 100000000000000000ull; 1175 1.1 mrg 1176 1.1 mrg if (!Q_low) { 1177 1.1 mrg diff_expon += 17; 1178 1.1 mrg 1179 1.1 mrg tdigit[0] = Q_high & 0x3ffffff; 1180 1.1 mrg tdigit[1] = 0; 1181 1.1 mrg QX = Q_high >> 26; 1182 1.1 mrg QX32 = QX; 1183 1.1 mrg nzeros = 0; 1184 1.1 mrg 1185 1.1 mrg for (j = 0; QX32; j++, QX32 >>= 7) { 1186 1.1 mrg k = (QX32 & 127); 1187 1.1 mrg tdigit[0] += convert_table[j][k][0]; 1188 1.1 mrg tdigit[1] += convert_table[j][k][1]; 1189 1.1 mrg if (tdigit[0] >= 100000000) { 1190 1.1 mrg tdigit[0] -= 100000000; 1191 1.1 mrg tdigit[1]++; 1192 1.1 mrg } 1193 1.1 mrg } 1194 1.1 mrg 1195 1.1 mrg 1196 1.1 mrg if (tdigit[1] >= 100000000) { 1197 1.1 mrg tdigit[1] -= 100000000; 1198 1.1 mrg if (tdigit[1] >= 100000000) 1199 1.1 mrg tdigit[1] -= 100000000; 1200 1.1 mrg } 1201 1.1 mrg 1202 1.1 mrg digit = tdigit[0]; 1203 1.1 mrg if (!digit && !tdigit[1]) 1204 1.1 mrg nzeros += 16; 1205 1.1 mrg else { 1206 1.1 mrg if (!digit) { 1207 1.1 mrg nzeros += 8; 1208 1.1 mrg digit = tdigit[1]; 1209 1.1 mrg } 1210 1.1 mrg // decompose digit 1211 1.1 mrg PD = (UINT64) digit *0x068DB8BBull; 1212 1.1 mrg digit_h = (UINT32) (PD >> 40); 1213 1.1 mrg //printf("i=%d, nz=%d, digit=%d (%d, %016I64x %016I64x)\n",i,nzeros,digit_h,digit,PD,digit_h);fflush(stdout); 1214 1.1 mrg digit_low = digit - digit_h * 10000; 1215 1.1 mrg 1216 1.1 mrg if (!digit_low) 1217 1.1 mrg nzeros += 4; 1218 1.1 mrg else 1219 1.1 mrg digit_h = digit_low; 1220 1.1 mrg 1221 1.1 mrg if (!(digit_h & 1)) 1222 1.1 mrg nzeros += 1223 1.1 mrg 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> 1224 1.1 mrg (digit_h & 7)); 1225 1.1 mrg } 1226 1.1 mrg 1227 1.1 mrg if (nzeros) { 1228 1.1 mrg __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); 1229 1.1 mrg 1230 1.1 mrg // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 1231 1.1 mrg amount = short_recip_scale[nzeros]; 1232 1.1 mrg CQ.w[0] = CQ.w[1] >> amount; 1233 1.1 mrg } else 1234 1.1 mrg CQ.w[0] = Q_high; 1235 1.1 mrg CQ.w[1] = 0; 1236 1.1 mrg 1237 1.1 mrg diff_expon += nzeros; 1238 1.1 mrg } else { 1239 1.1 mrg tdigit[0] = Q_low & 0x3ffffff; 1240 1.1 mrg tdigit[1] = 0; 1241 1.1 mrg QX = Q_low >> 26; 1242 1.1 mrg QX32 = QX; 1243 1.1 mrg nzeros = 0; 1244 1.1 mrg 1245 1.1 mrg for (j = 0; QX32; j++, QX32 >>= 7) { 1246 1.1 mrg k = (QX32 & 127); 1247 1.1 mrg tdigit[0] += convert_table[j][k][0]; 1248 1.1 mrg tdigit[1] += convert_table[j][k][1]; 1249 1.1 mrg if (tdigit[0] >= 100000000) { 1250 1.1 mrg tdigit[0] -= 100000000; 1251 1.1 mrg tdigit[1]++; 1252 1.1 mrg } 1253 1.1 mrg } 1254 1.1 mrg 1255 1.1 mrg if (tdigit[1] >= 100000000) { 1256 1.1 mrg tdigit[1] -= 100000000; 1257 1.1 mrg if (tdigit[1] >= 100000000) 1258 1.1 mrg tdigit[1] -= 100000000; 1259 1.1 mrg } 1260 1.1 mrg 1261 1.1 mrg digit = tdigit[0]; 1262 1.1 mrg if (!digit && !tdigit[1]) 1263 1.1 mrg nzeros += 16; 1264 1.1 mrg else { 1265 1.1 mrg if (!digit) { 1266 1.1 mrg nzeros += 8; 1267 1.1 mrg digit = tdigit[1]; 1268 1.1 mrg } 1269 1.1 mrg // decompose digit 1270 1.1 mrg PD = (UINT64) digit *0x068DB8BBull; 1271 1.1 mrg digit_h = (UINT32) (PD >> 40); 1272 1.1 mrg //printf("i=%d, nz=%d, digit=%d (%d, %016I64x %016I64x)\n",i,nzeros,digit_h,digit,PD,digit_h);fflush(stdout); 1273 1.1 mrg digit_low = digit - digit_h * 10000; 1274 1.1 mrg 1275 1.1 mrg if (!digit_low) 1276 1.1 mrg nzeros += 4; 1277 1.1 mrg else 1278 1.1 mrg digit_h = digit_low; 1279 1.1 mrg 1280 1.1 mrg if (!(digit_h & 1)) 1281 1.1 mrg nzeros += 1282 1.1 mrg 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> 1283 1.1 mrg (digit_h & 7)); 1284 1.1 mrg } 1285 1.1 mrg 1286 1.1 mrg if (nzeros) { 1287 1.1 mrg // get P*(2^M[extra_digits])/10^extra_digits 1288 1.5 mrg __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); 1289 1.1 mrg 1290 1.1 mrg // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 1291 1.1 mrg amount = recip_scale[nzeros]; 1292 1.1 mrg __shr_128 (CQ, Qh, amount); 1293 1.1 mrg } 1294 1.1 mrg diff_expon += nzeros; 1295 1.1 mrg 1296 1.1 mrg } 1297 1.1 mrg } 1298 1.1 mrg get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, 1299 1.1 mrg pfpsf); 1300 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 1301 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 1302 1.1 mrg #endif 1303 1.1 mrg BID_RETURN (res); 1304 1.1 mrg } 1305 1.1 mrg #endif 1306 1.1 mrg 1307 1.1 mrg if (diff_expon >= 0) { 1308 1.1 mrg #ifdef IEEE_ROUND_NEAREST 1309 1.1 mrg // rounding 1310 1.1 mrg // 2*CA4 - CY 1311 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 1312 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 1313 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 1314 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 1315 1.1 mrg 1316 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; 1317 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); 1318 1.1 mrg 1319 1.1 mrg CQ.w[0] += carry64; 1320 1.1 mrg if (CQ.w[0] < carry64) 1321 1.1 mrg CQ.w[1]++; 1322 1.1 mrg #else 1323 1.1 mrg #ifdef IEEE_ROUND_NEAREST_TIES_AWAY 1324 1.1 mrg // rounding 1325 1.1 mrg // 2*CA4 - CY 1326 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 1327 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 1328 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 1329 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 1330 1.1 mrg 1331 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; 1332 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; 1333 1.1 mrg 1334 1.1 mrg CQ.w[0] += carry64; 1335 1.1 mrg if (CQ.w[0] < carry64) 1336 1.1 mrg CQ.w[1]++; 1337 1.1 mrg #else 1338 1.1 mrg rmode = rnd_mode; 1339 1.1 mrg if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) 1340 1.1 mrg rmode = 3 - rmode; 1341 1.1 mrg switch (rmode) { 1342 1.1 mrg case ROUNDING_TO_NEAREST: // round to nearest code 1343 1.1 mrg // rounding 1344 1.1 mrg // 2*CA4 - CY 1345 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 1346 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 1347 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 1348 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 1349 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; 1350 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); 1351 1.1 mrg CQ.w[0] += carry64; 1352 1.1 mrg if (CQ.w[0] < carry64) 1353 1.1 mrg CQ.w[1]++; 1354 1.1 mrg break; 1355 1.1 mrg case ROUNDING_TIES_AWAY: 1356 1.1 mrg // rounding 1357 1.1 mrg // 2*CA4 - CY 1358 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 1359 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 1360 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 1361 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 1362 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; 1363 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; 1364 1.1 mrg CQ.w[0] += carry64; 1365 1.1 mrg if (CQ.w[0] < carry64) 1366 1.1 mrg CQ.w[1]++; 1367 1.1 mrg break; 1368 1.1 mrg case ROUNDING_DOWN: 1369 1.1 mrg case ROUNDING_TO_ZERO: 1370 1.1 mrg break; 1371 1.1 mrg default: // rounding up 1372 1.1 mrg CQ.w[0]++; 1373 1.1 mrg if (!CQ.w[0]) 1374 1.1 mrg CQ.w[1]++; 1375 1.1 mrg break; 1376 1.1 mrg } 1377 1.1 mrg #endif 1378 1.1 mrg #endif 1379 1.1 mrg 1380 1.1 mrg } else { 1381 1.1 mrg #ifdef SET_STATUS_FLAGS 1382 1.1 mrg if (CA4.w[0] || CA4.w[1]) { 1383 1.1 mrg // set status flags 1384 1.1 mrg __set_status_flags (pfpsf, INEXACT_EXCEPTION); 1385 1.1 mrg } 1386 1.1 mrg #endif 1387 1.1 mrg handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, 1388 1.1 mrg CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); 1389 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 1390 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 1391 1.1 mrg #endif 1392 1.1 mrg BID_RETURN (res); 1393 1.1 mrg } 1394 1.1 mrg 1395 1.1 mrg get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); 1396 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 1397 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 1398 1.1 mrg #endif 1399 1.1 mrg BID_RETURN (res); 1400 1.1 mrg 1401 1.1 mrg } 1402 1.1 mrg 1403 1.1 mrg 1404 1.1 mrg BID128_FUNCTION_ARG128_ARGTYPE2 (bid128qd_div, x, UINT64, y) 1405 1.1 mrg UINT256 CA4, CA4r, P256; 1406 1.5 mrg UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, res; 1407 1.1 mrg UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD, 1408 1.1 mrg valid_y; 1409 1.1 mrg int_float fx, fy, f64; 1410 1.1 mrg UINT32 QX32, tdigit[3], digit, digit_h, digit_low; 1411 1.1 mrg int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, 1412 1.1 mrg digits_q, amount; 1413 1.1 mrg int nzeros, i, j, k, d5, rmode; 1414 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 1415 1.1 mrg fexcept_t binaryflags = 0; 1416 1.1 mrg #endif 1417 1.1 mrg 1418 1.1 mrg 1419 1.1 mrg valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], y); 1420 1.1 mrg // unpack arguments, check for NaN or Infinity 1421 1.1 mrg if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { 1422 1.1 mrg // test if x is NaN 1423 1.1 mrg if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { 1424 1.1 mrg #ifdef SET_STATUS_FLAGS 1425 1.1 mrg if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN 1426 1.1 mrg (y & 0x7e00000000000000ull) == 0x7e00000000000000ull) 1427 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 1428 1.1 mrg #endif 1429 1.1 mrg res.w[1] = (CX.w[1]) & QUIET_MASK64; 1430 1.1 mrg res.w[0] = CX.w[0]; 1431 1.1 mrg BID_RETURN (res); 1432 1.1 mrg } 1433 1.1 mrg // x is Infinity? 1434 1.1 mrg if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { 1435 1.1 mrg // check if y is Inf. 1436 1.1 mrg if (((y & 0x7c00000000000000ull) == 0x7800000000000000ull)) 1437 1.1 mrg // return NaN 1438 1.1 mrg { 1439 1.1 mrg #ifdef SET_STATUS_FLAGS 1440 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 1441 1.1 mrg #endif 1442 1.1 mrg res.w[1] = 0x7c00000000000000ull; 1443 1.1 mrg res.w[0] = 0; 1444 1.1 mrg BID_RETURN (res); 1445 1.1 mrg } 1446 1.1 mrg // y is NaN? 1447 1.1 mrg if (((y & 0x7c00000000000000ull) != 0x7c00000000000000ull)) 1448 1.1 mrg // return NaN 1449 1.1 mrg { 1450 1.1 mrg // return +/-Inf 1451 1.1 mrg res.w[1] = ((x.w[1] ^ y) & 0x8000000000000000ull) | 1452 1.1 mrg 0x7800000000000000ull; 1453 1.1 mrg res.w[0] = 0; 1454 1.1 mrg BID_RETURN (res); 1455 1.1 mrg } 1456 1.1 mrg } 1457 1.1 mrg // x is 0 1458 1.1 mrg if ((y & 0x7800000000000000ull) < 0x7800000000000000ull) { 1459 1.1 mrg if (!CY.w[0]) { 1460 1.1 mrg #ifdef SET_STATUS_FLAGS 1461 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 1462 1.1 mrg #endif 1463 1.1 mrg // x=y=0, return NaN 1464 1.1 mrg res.w[1] = 0x7c00000000000000ull; 1465 1.1 mrg res.w[0] = 0; 1466 1.1 mrg BID_RETURN (res); 1467 1.1 mrg } 1468 1.1 mrg // return 0 1469 1.1 mrg res.w[1] = (x.w[1] ^ y) & 0x8000000000000000ull; 1470 1.1 mrg exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; 1471 1.1 mrg if (exponent_x > DECIMAL_MAX_EXPON_128) 1472 1.1 mrg exponent_x = DECIMAL_MAX_EXPON_128; 1473 1.1 mrg else if (exponent_x < 0) 1474 1.1 mrg exponent_x = 0; 1475 1.1 mrg res.w[1] |= (((UINT64) exponent_x) << 49); 1476 1.1 mrg res.w[0] = 0; 1477 1.1 mrg BID_RETURN (res); 1478 1.1 mrg } 1479 1.1 mrg } 1480 1.1 mrg CY.w[1] = 0; 1481 1.1 mrg if (!valid_y) { 1482 1.1 mrg // y is Inf. or NaN 1483 1.1 mrg 1484 1.1 mrg // test if y is NaN 1485 1.1 mrg if ((y & NAN_MASK64) == NAN_MASK64) { 1486 1.1 mrg #ifdef SET_STATUS_FLAGS 1487 1.1 mrg if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN 1488 1.1 mrg __set_status_flags (pfpsf, INVALID_EXCEPTION); 1489 1.1 mrg #endif 1490 1.1 mrg res.w[0] = (CY.w[0] & 0x0003ffffffffffffull); 1491 1.1 mrg __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); 1492 1.1 mrg res.w[1] |= ((CY.w[0]) & 0xfc00000000000000ull); 1493 1.1 mrg BID_RETURN (res); 1494 1.1 mrg } 1495 1.1 mrg // y is Infinity? 1496 1.1 mrg if ((y & INFINITY_MASK64) == INFINITY_MASK64) { 1497 1.1 mrg // return +/-0 1498 1.1 mrg res.w[1] = ((x.w[1] ^ y) & 0x8000000000000000ull); 1499 1.1 mrg res.w[0] = 0; 1500 1.1 mrg BID_RETURN (res); 1501 1.1 mrg } 1502 1.1 mrg // y is 0 1503 1.1 mrg #ifdef SET_STATUS_FLAGS 1504 1.1 mrg __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); 1505 1.1 mrg #endif 1506 1.1 mrg res.w[1] = (sign_x ^ sign_y) | INFINITY_MASK64; 1507 1.1 mrg res.w[0] = 0; 1508 1.1 mrg BID_RETURN (res); 1509 1.1 mrg } 1510 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 1511 1.1 mrg (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); 1512 1.1 mrg #endif 1513 1.1 mrg diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; 1514 1.1 mrg 1515 1.1 mrg if (__unsigned_compare_gt_128 (CY, CX)) { 1516 1.1 mrg // CX < CY 1517 1.1 mrg 1518 1.1 mrg // 2^64 1519 1.1 mrg f64.i = 0x5f800000; 1520 1.1 mrg 1521 1.1 mrg // fx ~ CX, fy ~ CY 1522 1.1 mrg fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; 1523 1.1 mrg fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; 1524 1.1 mrg // expon_cy - expon_cx 1525 1.1 mrg bin_index = (fy.i - fx.i) >> 23; 1526 1.1 mrg 1527 1.1 mrg if (CX.w[1]) { 1528 1.1 mrg T = power10_index_binexp_128[bin_index].w[0]; 1529 1.1 mrg __mul_64x128_short (CA, T, CX); 1530 1.1 mrg } else { 1531 1.1 mrg T128 = power10_index_binexp_128[bin_index]; 1532 1.1 mrg __mul_64x128_short (CA, CX.w[0], T128); 1533 1.1 mrg } 1534 1.1 mrg 1535 1.1 mrg ed2 = 33; 1536 1.1 mrg if (__unsigned_compare_gt_128 (CY, CA)) 1537 1.1 mrg ed2++; 1538 1.1 mrg 1539 1.1 mrg T128 = power10_table_128[ed2]; 1540 1.1 mrg __mul_128x128_to_256 (CA4, CA, T128); 1541 1.1 mrg 1542 1.1 mrg ed2 += estimate_decimal_digits[bin_index]; 1543 1.1 mrg CQ.w[0] = CQ.w[1] = 0; 1544 1.1 mrg diff_expon = diff_expon - ed2; 1545 1.1 mrg 1546 1.1 mrg } else { 1547 1.1 mrg // get CQ = CX/CY 1548 1.1 mrg __div_128_by_128 (&CQ, &CR, CX, CY); 1549 1.1 mrg 1550 1.1 mrg if (!CR.w[1] && !CR.w[0]) { 1551 1.1 mrg get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, 1552 1.1 mrg pfpsf); 1553 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 1554 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 1555 1.1 mrg #endif 1556 1.1 mrg BID_RETURN (res); 1557 1.1 mrg } 1558 1.1 mrg // get number of decimal digits in CQ 1559 1.1 mrg // 2^64 1560 1.1 mrg f64.i = 0x5f800000; 1561 1.1 mrg fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; 1562 1.1 mrg // binary expon. of CQ 1563 1.1 mrg bin_expon = (fx.i - 0x3f800000) >> 23; 1564 1.1 mrg 1565 1.1 mrg digits_q = estimate_decimal_digits[bin_expon]; 1566 1.1 mrg TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; 1567 1.1 mrg TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; 1568 1.1 mrg if (__unsigned_compare_ge_128 (CQ, TP128)) 1569 1.1 mrg digits_q++; 1570 1.1 mrg 1571 1.1 mrg ed2 = 34 - digits_q; 1572 1.1 mrg T128.w[0] = power10_table_128[ed2].w[0]; 1573 1.1 mrg T128.w[1] = power10_table_128[ed2].w[1]; 1574 1.1 mrg __mul_128x128_to_256 (CA4, CR, T128); 1575 1.1 mrg diff_expon = diff_expon - ed2; 1576 1.1 mrg __mul_128x128_low (CQ, CQ, T128); 1577 1.1 mrg 1578 1.1 mrg } 1579 1.1 mrg 1580 1.1 mrg __div_256_by_128 (&CQ, &CA4, CY); 1581 1.1 mrg 1582 1.1 mrg 1583 1.1 mrg #ifdef SET_STATUS_FLAGS 1584 1.1 mrg if (CA4.w[0] || CA4.w[1]) { 1585 1.1 mrg // set status flags 1586 1.1 mrg __set_status_flags (pfpsf, INEXACT_EXCEPTION); 1587 1.1 mrg } 1588 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 1589 1.1 mrg else 1590 1.1 mrg #endif 1591 1.1 mrg #else 1592 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 1593 1.1 mrg if (!CA4.w[0] && !CA4.w[1]) 1594 1.1 mrg #endif 1595 1.1 mrg #endif 1596 1.1 mrg #ifndef LEAVE_TRAILING_ZEROS 1597 1.1 mrg // check whether result is exact 1598 1.1 mrg { 1599 1.1 mrg // check whether CX, CY are short 1600 1.1 mrg if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { 1601 1.1 mrg i = (int) CY.w[0] - 1; 1602 1.1 mrg j = (int) CX.w[0] - 1; 1603 1.1 mrg // difference in powers of 2 factors for Y and X 1604 1.1 mrg nzeros = ed2 - factors[i][0] + factors[j][0]; 1605 1.1 mrg // difference in powers of 5 factors 1606 1.1 mrg d5 = ed2 - factors[i][1] + factors[j][1]; 1607 1.1 mrg if (d5 < nzeros) 1608 1.1 mrg nzeros = d5; 1609 1.1 mrg // get P*(2^M[extra_digits])/10^extra_digits 1610 1.5 mrg __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); 1611 1.1 mrg //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; 1612 1.1 mrg 1613 1.1 mrg // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 1614 1.1 mrg amount = recip_scale[nzeros]; 1615 1.1 mrg __shr_128_long (CQ, Qh, amount); 1616 1.1 mrg 1617 1.1 mrg diff_expon += nzeros; 1618 1.1 mrg } else { 1619 1.1 mrg // decompose Q as Qh*10^17 + Ql 1620 1.1 mrg //T128 = reciprocals10_128[17]; 1621 1.1 mrg T128.w[0] = 0x44909befeb9fad49ull; 1622 1.1 mrg T128.w[1] = 0x000b877aa3236a4bull; 1623 1.1 mrg __mul_128x128_to_256 (P256, CQ, T128); 1624 1.1 mrg //amount = recip_scale[17]; 1625 1.1 mrg Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); 1626 1.1 mrg Q_low = CQ.w[0] - Q_high * 100000000000000000ull; 1627 1.1 mrg 1628 1.1 mrg if (!Q_low) { 1629 1.1 mrg diff_expon += 17; 1630 1.1 mrg 1631 1.1 mrg tdigit[0] = Q_high & 0x3ffffff; 1632 1.1 mrg tdigit[1] = 0; 1633 1.1 mrg QX = Q_high >> 26; 1634 1.1 mrg QX32 = QX; 1635 1.1 mrg nzeros = 0; 1636 1.1 mrg 1637 1.1 mrg for (j = 0; QX32; j++, QX32 >>= 7) { 1638 1.1 mrg k = (QX32 & 127); 1639 1.1 mrg tdigit[0] += convert_table[j][k][0]; 1640 1.1 mrg tdigit[1] += convert_table[j][k][1]; 1641 1.1 mrg if (tdigit[0] >= 100000000) { 1642 1.1 mrg tdigit[0] -= 100000000; 1643 1.1 mrg tdigit[1]++; 1644 1.1 mrg } 1645 1.1 mrg } 1646 1.1 mrg 1647 1.1 mrg 1648 1.1 mrg if (tdigit[1] >= 100000000) { 1649 1.1 mrg tdigit[1] -= 100000000; 1650 1.1 mrg if (tdigit[1] >= 100000000) 1651 1.1 mrg tdigit[1] -= 100000000; 1652 1.1 mrg } 1653 1.1 mrg 1654 1.1 mrg digit = tdigit[0]; 1655 1.1 mrg if (!digit && !tdigit[1]) 1656 1.1 mrg nzeros += 16; 1657 1.1 mrg else { 1658 1.1 mrg if (!digit) { 1659 1.1 mrg nzeros += 8; 1660 1.1 mrg digit = tdigit[1]; 1661 1.1 mrg } 1662 1.1 mrg // decompose digit 1663 1.1 mrg PD = (UINT64) digit *0x068DB8BBull; 1664 1.1 mrg digit_h = (UINT32) (PD >> 40); 1665 1.1 mrg digit_low = digit - digit_h * 10000; 1666 1.1 mrg 1667 1.1 mrg if (!digit_low) 1668 1.1 mrg nzeros += 4; 1669 1.1 mrg else 1670 1.1 mrg digit_h = digit_low; 1671 1.1 mrg 1672 1.1 mrg if (!(digit_h & 1)) 1673 1.1 mrg nzeros += 1674 1.1 mrg 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> 1675 1.1 mrg (digit_h & 7)); 1676 1.1 mrg } 1677 1.1 mrg 1678 1.1 mrg if (nzeros) { 1679 1.1 mrg __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); 1680 1.1 mrg 1681 1.1 mrg // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 1682 1.1 mrg amount = short_recip_scale[nzeros]; 1683 1.1 mrg CQ.w[0] = CQ.w[1] >> amount; 1684 1.1 mrg } else 1685 1.1 mrg CQ.w[0] = Q_high; 1686 1.1 mrg CQ.w[1] = 0; 1687 1.1 mrg 1688 1.1 mrg diff_expon += nzeros; 1689 1.1 mrg } else { 1690 1.1 mrg tdigit[0] = Q_low & 0x3ffffff; 1691 1.1 mrg tdigit[1] = 0; 1692 1.1 mrg QX = Q_low >> 26; 1693 1.1 mrg QX32 = QX; 1694 1.1 mrg nzeros = 0; 1695 1.1 mrg 1696 1.1 mrg for (j = 0; QX32; j++, QX32 >>= 7) { 1697 1.1 mrg k = (QX32 & 127); 1698 1.1 mrg tdigit[0] += convert_table[j][k][0]; 1699 1.1 mrg tdigit[1] += convert_table[j][k][1]; 1700 1.1 mrg if (tdigit[0] >= 100000000) { 1701 1.1 mrg tdigit[0] -= 100000000; 1702 1.1 mrg tdigit[1]++; 1703 1.1 mrg } 1704 1.1 mrg } 1705 1.1 mrg 1706 1.1 mrg if (tdigit[1] >= 100000000) { 1707 1.1 mrg tdigit[1] -= 100000000; 1708 1.1 mrg if (tdigit[1] >= 100000000) 1709 1.1 mrg tdigit[1] -= 100000000; 1710 1.1 mrg } 1711 1.1 mrg 1712 1.1 mrg digit = tdigit[0]; 1713 1.1 mrg if (!digit && !tdigit[1]) 1714 1.1 mrg nzeros += 16; 1715 1.1 mrg else { 1716 1.1 mrg if (!digit) { 1717 1.1 mrg nzeros += 8; 1718 1.1 mrg digit = tdigit[1]; 1719 1.1 mrg } 1720 1.1 mrg // decompose digit 1721 1.1 mrg PD = (UINT64) digit *0x068DB8BBull; 1722 1.1 mrg digit_h = (UINT32) (PD >> 40); 1723 1.1 mrg digit_low = digit - digit_h * 10000; 1724 1.1 mrg 1725 1.1 mrg if (!digit_low) 1726 1.1 mrg nzeros += 4; 1727 1.1 mrg else 1728 1.1 mrg digit_h = digit_low; 1729 1.1 mrg 1730 1.1 mrg if (!(digit_h & 1)) 1731 1.1 mrg nzeros += 1732 1.1 mrg 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> 1733 1.1 mrg (digit_h & 7)); 1734 1.1 mrg } 1735 1.1 mrg 1736 1.1 mrg if (nzeros) { 1737 1.1 mrg // get P*(2^M[extra_digits])/10^extra_digits 1738 1.5 mrg __mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); 1739 1.1 mrg 1740 1.1 mrg // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 1741 1.1 mrg amount = recip_scale[nzeros]; 1742 1.1 mrg __shr_128 (CQ, Qh, amount); 1743 1.1 mrg } 1744 1.1 mrg diff_expon += nzeros; 1745 1.1 mrg 1746 1.1 mrg } 1747 1.1 mrg } 1748 1.1 mrg get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,pfpsf); 1749 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 1750 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 1751 1.1 mrg #endif 1752 1.1 mrg BID_RETURN (res); 1753 1.1 mrg } 1754 1.1 mrg #endif 1755 1.1 mrg 1756 1.1 mrg if (diff_expon >= 0) { 1757 1.1 mrg #ifdef IEEE_ROUND_NEAREST 1758 1.1 mrg // rounding 1759 1.1 mrg // 2*CA4 - CY 1760 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 1761 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 1762 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 1763 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 1764 1.1 mrg 1765 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; 1766 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); 1767 1.1 mrg 1768 1.1 mrg CQ.w[0] += carry64; 1769 1.1 mrg if (CQ.w[0] < carry64) 1770 1.1 mrg CQ.w[1]++; 1771 1.1 mrg #else 1772 1.1 mrg #ifdef IEEE_ROUND_NEAREST_TIES_AWAY 1773 1.1 mrg // rounding 1774 1.1 mrg // 2*CA4 - CY 1775 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 1776 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 1777 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 1778 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 1779 1.1 mrg 1780 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; 1781 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; 1782 1.1 mrg 1783 1.1 mrg CQ.w[0] += carry64; 1784 1.1 mrg if (CQ.w[0] < carry64) 1785 1.1 mrg CQ.w[1]++; 1786 1.1 mrg #else 1787 1.1 mrg rmode = rnd_mode; 1788 1.1 mrg if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) 1789 1.1 mrg rmode = 3 - rmode; 1790 1.1 mrg switch (rmode) { 1791 1.1 mrg case ROUNDING_TO_NEAREST: // round to nearest code 1792 1.1 mrg // rounding 1793 1.1 mrg // 2*CA4 - CY 1794 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 1795 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 1796 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 1797 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 1798 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; 1799 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); 1800 1.1 mrg CQ.w[0] += carry64; 1801 1.1 mrg if (CQ.w[0] < carry64) 1802 1.1 mrg CQ.w[1]++; 1803 1.1 mrg break; 1804 1.1 mrg case ROUNDING_TIES_AWAY: 1805 1.1 mrg // rounding 1806 1.1 mrg // 2*CA4 - CY 1807 1.1 mrg CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); 1808 1.1 mrg CA4r.w[0] = CA4.w[0] + CA4.w[0]; 1809 1.1 mrg __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); 1810 1.1 mrg CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; 1811 1.1 mrg D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; 1812 1.1 mrg carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; 1813 1.1 mrg CQ.w[0] += carry64; 1814 1.1 mrg if (CQ.w[0] < carry64) 1815 1.1 mrg CQ.w[1]++; 1816 1.1 mrg break; 1817 1.1 mrg case ROUNDING_DOWN: 1818 1.1 mrg case ROUNDING_TO_ZERO: 1819 1.1 mrg break; 1820 1.1 mrg default: // rounding up 1821 1.1 mrg CQ.w[0]++; 1822 1.1 mrg if (!CQ.w[0]) 1823 1.1 mrg CQ.w[1]++; 1824 1.1 mrg break; 1825 1.1 mrg } 1826 1.1 mrg #endif 1827 1.1 mrg #endif 1828 1.1 mrg 1829 1.1 mrg } else { 1830 1.1 mrg #ifdef SET_STATUS_FLAGS 1831 1.1 mrg if (CA4.w[0] || CA4.w[1]) { 1832 1.1 mrg // set status flags 1833 1.1 mrg __set_status_flags (pfpsf, INEXACT_EXCEPTION); 1834 1.1 mrg } 1835 1.1 mrg #endif 1836 1.1 mrg handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, 1837 1.1 mrg CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); 1838 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 1839 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 1840 1.1 mrg #endif 1841 1.1 mrg BID_RETURN (res); 1842 1.1 mrg 1843 1.1 mrg } 1844 1.1 mrg 1845 1.1 mrg get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); 1846 1.1 mrg #ifdef UNCHANGED_BINARY_STATUS_FLAGS 1847 1.1 mrg (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 1848 1.1 mrg #endif 1849 1.1 mrg BID_RETURN (res); 1850 1.1 mrg 1851 1.1 mrg } 1852