17ec681f3Smrg/* 27ec681f3Smrg * License for Berkeley SoftFloat Release 3e 37ec681f3Smrg * 47ec681f3Smrg * John R. Hauser 57ec681f3Smrg * 2018 January 20 67ec681f3Smrg * 77ec681f3Smrg * The following applies to the whole of SoftFloat Release 3e as well as to 87ec681f3Smrg * each source file individually. 97ec681f3Smrg * 107ec681f3Smrg * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the 117ec681f3Smrg * University of California. All rights reserved. 127ec681f3Smrg * 137ec681f3Smrg * Redistribution and use in source and binary forms, with or without 147ec681f3Smrg * modification, are permitted provided that the following conditions are met: 157ec681f3Smrg * 167ec681f3Smrg * 1. Redistributions of source code must retain the above copyright notice, 177ec681f3Smrg * this list of conditions, and the following disclaimer. 187ec681f3Smrg * 197ec681f3Smrg * 2. Redistributions in binary form must reproduce the above copyright 207ec681f3Smrg * notice, this list of conditions, and the following disclaimer in the 217ec681f3Smrg * documentation and/or other materials provided with the distribution. 227ec681f3Smrg * 237ec681f3Smrg * 3. Neither the name of the University nor the names of its contributors 247ec681f3Smrg * may be used to endorse or promote products derived from this software 257ec681f3Smrg * without specific prior written permission. 267ec681f3Smrg * 277ec681f3Smrg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY 287ec681f3Smrg * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 297ec681f3Smrg * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE 307ec681f3Smrg * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 317ec681f3Smrg * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 327ec681f3Smrg * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 337ec681f3Smrg * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 347ec681f3Smrg * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 357ec681f3Smrg * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 367ec681f3Smrg * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 377ec681f3Smrg * 387ec681f3Smrg * 397ec681f3Smrg * The functions listed in this file are modified versions of the ones 407ec681f3Smrg * from the Berkeley SoftFloat 3e Library. 417ec681f3Smrg * 427ec681f3Smrg * Their implementation correctness has been checked with the Berkeley 437ec681f3Smrg * TestFloat Release 3e tool for x86_64. 447ec681f3Smrg */ 457ec681f3Smrg 467ec681f3Smrg#include "rounding.h" 477ec681f3Smrg#include "bitscan.h" 487ec681f3Smrg#include "softfloat.h" 497ec681f3Smrg 507ec681f3Smrg#if defined(BIG_ENDIAN) 517ec681f3Smrg#define word_incr -1 527ec681f3Smrg#define index_word(total, n) ((total) - 1 - (n)) 537ec681f3Smrg#define index_word_hi(total) 0 547ec681f3Smrg#define index_word_lo(total) ((total) - 1) 557ec681f3Smrg#define index_multiword_hi(total, n) 0 567ec681f3Smrg#define index_multiword_lo(total, n) ((total) - (n)) 577ec681f3Smrg#define index_multiword_hi_but(total, n) 0 587ec681f3Smrg#define index_multiword_lo_but(total, n) (n) 597ec681f3Smrg#else 607ec681f3Smrg#define word_incr 1 617ec681f3Smrg#define index_word(total, n) (n) 627ec681f3Smrg#define index_word_hi(total) ((total) - 1) 637ec681f3Smrg#define index_word_lo(total) 0 647ec681f3Smrg#define index_multiword_hi(total, n) ((total) - (n)) 657ec681f3Smrg#define index_multiword_lo(total, n) 0 667ec681f3Smrg#define index_multiword_hi_but(total, n) (n) 677ec681f3Smrg#define index_multiword_lo_but(total, n) 0 687ec681f3Smrg#endif 697ec681f3Smrg 707ec681f3Smrgtypedef union { double f; int64_t i; uint64_t u; } di_type; 717ec681f3Smrgtypedef union { float f; int32_t i; uint32_t u; } fi_type; 727ec681f3Smrg 737ec681f3Smrgconst uint8_t count_leading_zeros8[256] = { 747ec681f3Smrg 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 757ec681f3Smrg 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 767ec681f3Smrg 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 777ec681f3Smrg 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 787ec681f3Smrg 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 797ec681f3Smrg 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 807ec681f3Smrg 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 817ec681f3Smrg 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 827ec681f3Smrg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 837ec681f3Smrg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 847ec681f3Smrg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 857ec681f3Smrg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 867ec681f3Smrg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 877ec681f3Smrg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 887ec681f3Smrg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 897ec681f3Smrg 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 907ec681f3Smrg}; 917ec681f3Smrg 927ec681f3Smrg/** 937ec681f3Smrg * \brief Shifts 'a' right by the number of bits given in 'dist', which must be in 947ec681f3Smrg * the range 1 to 63. If any nonzero bits are shifted off, they are "jammed" 957ec681f3Smrg * into the least-significant bit of the shifted value by setting the 967ec681f3Smrg * least-significant bit to 1. This shifted-and-jammed value is returned. 977ec681f3Smrg * 987ec681f3Smrg * From softfloat_shortShiftRightJam64() 997ec681f3Smrg */ 1007ec681f3Smrgstatic inline 1017ec681f3Smrguint64_t _mesa_short_shift_right_jam64(uint64_t a, uint8_t dist) 1027ec681f3Smrg{ 1037ec681f3Smrg return a >> dist | ((a & (((uint64_t) 1 << dist) - 1)) != 0); 1047ec681f3Smrg} 1057ec681f3Smrg 1067ec681f3Smrg/** 1077ec681f3Smrg * \brief Shifts 'a' right by the number of bits given in 'dist', which must not 1087ec681f3Smrg * be zero. If any nonzero bits are shifted off, they are "jammed" into the 1097ec681f3Smrg * least-significant bit of the shifted value by setting the least-significant 1107ec681f3Smrg * bit to 1. This shifted-and-jammed value is returned. 1117ec681f3Smrg * The value of 'dist' can be arbitrarily large. In particular, if 'dist' is 1127ec681f3Smrg * greater than 64, the result will be either 0 or 1, depending on whether 'a' 1137ec681f3Smrg * is zero or nonzero. 1147ec681f3Smrg * 1157ec681f3Smrg * From softfloat_shiftRightJam64() 1167ec681f3Smrg */ 1177ec681f3Smrgstatic inline 1187ec681f3Smrguint64_t _mesa_shift_right_jam64(uint64_t a, uint32_t dist) 1197ec681f3Smrg{ 1207ec681f3Smrg return 1217ec681f3Smrg (dist < 63) ? a >> dist | ((uint64_t) (a << (-dist & 63)) != 0) : (a != 0); 1227ec681f3Smrg} 1237ec681f3Smrg 1247ec681f3Smrg/** 1257ec681f3Smrg * \brief Shifts 'a' right by the number of bits given in 'dist', which must not be 1267ec681f3Smrg * zero. If any nonzero bits are shifted off, they are "jammed" into the 1277ec681f3Smrg * least-significant bit of the shifted value by setting the least-significant 1287ec681f3Smrg * bit to 1. This shifted-and-jammed value is returned. 1297ec681f3Smrg * The value of 'dist' can be arbitrarily large. In particular, if 'dist' is 1307ec681f3Smrg * greater than 32, the result will be either 0 or 1, depending on whether 'a' 1317ec681f3Smrg * is zero or nonzero. 1327ec681f3Smrg * 1337ec681f3Smrg * From softfloat_shiftRightJam32() 1347ec681f3Smrg */ 1357ec681f3Smrgstatic inline 1367ec681f3Smrguint32_t _mesa_shift_right_jam32(uint32_t a, uint16_t dist) 1377ec681f3Smrg{ 1387ec681f3Smrg return 1397ec681f3Smrg (dist < 31) ? a >> dist | ((uint32_t) (a << (-dist & 31)) != 0) : (a != 0); 1407ec681f3Smrg} 1417ec681f3Smrg 1427ec681f3Smrg/** 1437ec681f3Smrg * \brief Extracted from softfloat_roundPackToF64() 1447ec681f3Smrg */ 1457ec681f3Smrgstatic inline 1467ec681f3Smrgdouble _mesa_roundtozero_f64(int64_t s, int64_t e, int64_t m) 1477ec681f3Smrg{ 1487ec681f3Smrg di_type result; 1497ec681f3Smrg 1507ec681f3Smrg if ((uint64_t) e >= 0x7fd) { 1517ec681f3Smrg if (e < 0) { 1527ec681f3Smrg m = _mesa_shift_right_jam64(m, -e); 1537ec681f3Smrg e = 0; 1547ec681f3Smrg } else if ((e > 0x7fd) || (0x8000000000000000 <= m)) { 1557ec681f3Smrg e = 0x7ff; 1567ec681f3Smrg m = 0; 1577ec681f3Smrg result.u = (s << 63) + (e << 52) + m; 1587ec681f3Smrg result.u -= 1; 1597ec681f3Smrg return result.f; 1607ec681f3Smrg } 1617ec681f3Smrg } 1627ec681f3Smrg 1637ec681f3Smrg m >>= 10; 1647ec681f3Smrg if (m == 0) 1657ec681f3Smrg e = 0; 1667ec681f3Smrg 1677ec681f3Smrg result.u = (s << 63) + (e << 52) + m; 1687ec681f3Smrg return result.f; 1697ec681f3Smrg} 1707ec681f3Smrg 1717ec681f3Smrg/** 1727ec681f3Smrg * \brief Extracted from softfloat_roundPackToF32() 1737ec681f3Smrg */ 1747ec681f3Smrgstatic inline 1757ec681f3Smrgfloat _mesa_round_f32(int32_t s, int32_t e, int32_t m, bool rtz) 1767ec681f3Smrg{ 1777ec681f3Smrg fi_type result; 1787ec681f3Smrg uint8_t round_increment = rtz ? 0 : 0x40; 1797ec681f3Smrg 1807ec681f3Smrg if ((uint32_t) e >= 0xfd) { 1817ec681f3Smrg if (e < 0) { 1827ec681f3Smrg m = _mesa_shift_right_jam32(m, -e); 1837ec681f3Smrg e = 0; 1847ec681f3Smrg } else if ((e > 0xfd) || (0x80000000 <= m + round_increment)) { 1857ec681f3Smrg e = 0xff; 1867ec681f3Smrg m = 0; 1877ec681f3Smrg result.u = (s << 31) + (e << 23) + m; 1887ec681f3Smrg result.u -= !round_increment; 1897ec681f3Smrg return result.f; 1907ec681f3Smrg } 1917ec681f3Smrg } 1927ec681f3Smrg 1937ec681f3Smrg uint8_t round_bits; 1947ec681f3Smrg round_bits = m & 0x7f; 1957ec681f3Smrg m = ((uint32_t) m + round_increment) >> 7; 1967ec681f3Smrg m &= ~(uint32_t) (! (round_bits ^ 0x40) & !rtz); 1977ec681f3Smrg if (m == 0) 1987ec681f3Smrg e = 0; 1997ec681f3Smrg 2007ec681f3Smrg result.u = (s << 31) + (e << 23) + m; 2017ec681f3Smrg return result.f; 2027ec681f3Smrg} 2037ec681f3Smrg 2047ec681f3Smrg/** 2057ec681f3Smrg * \brief Extracted from softfloat_roundPackToF16() 2067ec681f3Smrg */ 2077ec681f3Smrgstatic inline 2087ec681f3Smrguint16_t _mesa_roundtozero_f16(int16_t s, int16_t e, int16_t m) 2097ec681f3Smrg{ 2107ec681f3Smrg if ((uint16_t) e >= 0x1d) { 2117ec681f3Smrg if (e < 0) { 2127ec681f3Smrg m = _mesa_shift_right_jam32(m, -e); 2137ec681f3Smrg e = 0; 2147ec681f3Smrg } else if (e > 0x1d) { 2157ec681f3Smrg e = 0x1f; 2167ec681f3Smrg m = 0; 2177ec681f3Smrg return (s << 15) + (e << 10) + m - 1; 2187ec681f3Smrg } 2197ec681f3Smrg } 2207ec681f3Smrg 2217ec681f3Smrg m >>= 4; 2227ec681f3Smrg if (m == 0) 2237ec681f3Smrg e = 0; 2247ec681f3Smrg 2257ec681f3Smrg return (s << 15) + (e << 10) + m; 2267ec681f3Smrg} 2277ec681f3Smrg 2287ec681f3Smrg/** 2297ec681f3Smrg * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of 2307ec681f3Smrg * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 2317ec681f3Smrg * must be in the range 1 to 31. Any nonzero bits shifted off are lost. The 2327ec681f3Smrg * shifted N-bit result is stored at the location pointed to by 'm_out'. Each 2337ec681f3Smrg * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements 2347ec681f3Smrg * that concatenate in the platform's normal endian order to form an N-bit 2357ec681f3Smrg * integer. 2367ec681f3Smrg * 2377ec681f3Smrg * From softfloat_shortShiftLeftM() 2387ec681f3Smrg */ 2397ec681f3Smrgstatic inline void 2407ec681f3Smrg_mesa_short_shift_left_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) 2417ec681f3Smrg{ 2427ec681f3Smrg uint8_t neg_dist; 2437ec681f3Smrg unsigned index, last_index; 2447ec681f3Smrg uint32_t part_word, a_word; 2457ec681f3Smrg 2467ec681f3Smrg neg_dist = -dist; 2477ec681f3Smrg index = index_word_hi(size_words); 2487ec681f3Smrg last_index = index_word_lo(size_words); 2497ec681f3Smrg part_word = a[index] << dist; 2507ec681f3Smrg while (index != last_index) { 2517ec681f3Smrg a_word = a[index - word_incr]; 2527ec681f3Smrg m_out[index] = part_word | a_word >> (neg_dist & 31); 2537ec681f3Smrg index -= word_incr; 2547ec681f3Smrg part_word = a_word << dist; 2557ec681f3Smrg } 2567ec681f3Smrg m_out[index] = part_word; 2577ec681f3Smrg} 2587ec681f3Smrg 2597ec681f3Smrg/** 2607ec681f3Smrg * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of 2617ec681f3Smrg * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 2627ec681f3Smrg * must not be zero. Any nonzero bits shifted off are lost. The shifted 2637ec681f3Smrg * N-bit result is stored at the location pointed to by 'm_out'. Each of 'a' 2647ec681f3Smrg * and 'm_out' points to a 'size_words'-long array of 32-bit elements that 2657ec681f3Smrg * concatenate in the platform's normal endian order to form an N-bit 2667ec681f3Smrg * integer. The value of 'dist' can be arbitrarily large. In particular, if 2677ec681f3Smrg * 'dist' is greater than N, the stored result will be 0. 2687ec681f3Smrg * 2697ec681f3Smrg * From softfloat_shiftLeftM() 2707ec681f3Smrg */ 2717ec681f3Smrgstatic inline void 2727ec681f3Smrg_mesa_shift_left_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out) 2737ec681f3Smrg{ 2747ec681f3Smrg uint32_t word_dist; 2757ec681f3Smrg uint8_t inner_dist; 2767ec681f3Smrg uint8_t i; 2777ec681f3Smrg 2787ec681f3Smrg word_dist = dist >> 5; 2797ec681f3Smrg if (word_dist < size_words) { 2807ec681f3Smrg a += index_multiword_lo_but(size_words, word_dist); 2817ec681f3Smrg inner_dist = dist & 31; 2827ec681f3Smrg if (inner_dist) { 2837ec681f3Smrg _mesa_short_shift_left_m(size_words - word_dist, a, inner_dist, 2847ec681f3Smrg m_out + index_multiword_hi_but(size_words, word_dist)); 2857ec681f3Smrg if (!word_dist) 2867ec681f3Smrg return; 2877ec681f3Smrg } else { 2887ec681f3Smrg uint32_t *dest = m_out + index_word_hi(size_words); 2897ec681f3Smrg a += index_word_hi(size_words - word_dist); 2907ec681f3Smrg for (i = size_words - word_dist; i; --i) { 2917ec681f3Smrg *dest = *a; 2927ec681f3Smrg a -= word_incr; 2937ec681f3Smrg dest -= word_incr; 2947ec681f3Smrg } 2957ec681f3Smrg } 2967ec681f3Smrg m_out += index_multiword_lo(size_words, word_dist); 2977ec681f3Smrg } else { 2987ec681f3Smrg word_dist = size_words; 2997ec681f3Smrg } 3007ec681f3Smrg do { 3017ec681f3Smrg *m_out++ = 0; 3027ec681f3Smrg --word_dist; 3037ec681f3Smrg } while (word_dist); 3047ec681f3Smrg} 3057ec681f3Smrg 3067ec681f3Smrg/** 3077ec681f3Smrg * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of 3087ec681f3Smrg * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 3097ec681f3Smrg * must be in the range 1 to 31. Any nonzero bits shifted off are lost. The 3107ec681f3Smrg * shifted N-bit result is stored at the location pointed to by 'm_out'. Each 3117ec681f3Smrg * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements 3127ec681f3Smrg * that concatenate in the platform's normal endian order to form an N-bit 3137ec681f3Smrg * integer. 3147ec681f3Smrg * 3157ec681f3Smrg * From softfloat_shortShiftRightM() 3167ec681f3Smrg */ 3177ec681f3Smrgstatic inline void 3187ec681f3Smrg_mesa_short_shift_right_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) 3197ec681f3Smrg{ 3207ec681f3Smrg uint8_t neg_dist; 3217ec681f3Smrg unsigned index, last_index; 3227ec681f3Smrg uint32_t part_word, a_word; 3237ec681f3Smrg 3247ec681f3Smrg neg_dist = -dist; 3257ec681f3Smrg index = index_word_lo(size_words); 3267ec681f3Smrg last_index = index_word_hi(size_words); 3277ec681f3Smrg part_word = a[index] >> dist; 3287ec681f3Smrg while (index != last_index) { 3297ec681f3Smrg a_word = a[index + word_incr]; 3307ec681f3Smrg m_out[index] = a_word << (neg_dist & 31) | part_word; 3317ec681f3Smrg index += word_incr; 3327ec681f3Smrg part_word = a_word >> dist; 3337ec681f3Smrg } 3347ec681f3Smrg m_out[index] = part_word; 3357ec681f3Smrg} 3367ec681f3Smrg 3377ec681f3Smrg/** 3387ec681f3Smrg * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of 3397ec681f3Smrg * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 3407ec681f3Smrg * must be in the range 1 to 31. If any nonzero bits are shifted off, they 3417ec681f3Smrg * are "jammed" into the least-significant bit of the shifted value by setting 3427ec681f3Smrg * the least-significant bit to 1. This shifted-and-jammed N-bit result is 3437ec681f3Smrg * stored at the location pointed to by 'm_out'. Each of 'a' and 'm_out' 3447ec681f3Smrg * points to a 'size_words'-long array of 32-bit elements that concatenate in 3457ec681f3Smrg * the platform's normal endian order to form an N-bit integer. 3467ec681f3Smrg * 3477ec681f3Smrg * 3487ec681f3Smrg * From softfloat_shortShiftRightJamM() 3497ec681f3Smrg */ 3507ec681f3Smrgstatic inline void 3517ec681f3Smrg_mesa_short_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint8_t dist, uint32_t *m_out) 3527ec681f3Smrg{ 3537ec681f3Smrg uint8_t neg_dist; 3547ec681f3Smrg unsigned index, last_index; 3557ec681f3Smrg uint64_t part_word, a_word; 3567ec681f3Smrg 3577ec681f3Smrg neg_dist = -dist; 3587ec681f3Smrg index = index_word_lo(size_words); 3597ec681f3Smrg last_index = index_word_hi(size_words); 3607ec681f3Smrg a_word = a[index]; 3617ec681f3Smrg part_word = a_word >> dist; 3627ec681f3Smrg if (part_word << dist != a_word ) 3637ec681f3Smrg part_word |= 1; 3647ec681f3Smrg while (index != last_index) { 3657ec681f3Smrg a_word = a[index + word_incr]; 3667ec681f3Smrg m_out[index] = a_word << (neg_dist & 31) | part_word; 3677ec681f3Smrg index += word_incr; 3687ec681f3Smrg part_word = a_word >> dist; 3697ec681f3Smrg } 3707ec681f3Smrg m_out[index] = part_word; 3717ec681f3Smrg} 3727ec681f3Smrg 3737ec681f3Smrg/** 3747ec681f3Smrg * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of 3757ec681f3Smrg * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist' 3767ec681f3Smrg * must not be zero. If any nonzero bits are shifted off, they are "jammed" 3777ec681f3Smrg * into the least-significant bit of the shifted value by setting the 3787ec681f3Smrg * least-significant bit to 1. This shifted-and-jammed N-bit result is stored 3797ec681f3Smrg * at the location pointed to by 'm_out'. Each of 'a' and 'm_out' points to a 3807ec681f3Smrg * 'size_words'-long array of 32-bit elements that concatenate in the 3817ec681f3Smrg * platform's normal endian order to form an N-bit integer. The value of 3827ec681f3Smrg * 'dist' can be arbitrarily large. In particular, if 'dist' is greater than 3837ec681f3Smrg * N, the stored result will be either 0 or 1, depending on whether the 3847ec681f3Smrg * original N bits are all zeros. 3857ec681f3Smrg * 3867ec681f3Smrg * From softfloat_shiftRightJamM() 3877ec681f3Smrg */ 3887ec681f3Smrgstatic inline void 3897ec681f3Smrg_mesa_shift_right_jam_m(uint8_t size_words, const uint32_t *a, uint32_t dist, uint32_t *m_out) 3907ec681f3Smrg{ 3917ec681f3Smrg uint32_t word_jam, word_dist, *tmp; 3927ec681f3Smrg uint8_t i, inner_dist; 3937ec681f3Smrg 3947ec681f3Smrg word_jam = 0; 3957ec681f3Smrg word_dist = dist >> 5; 3967ec681f3Smrg tmp = NULL; 3977ec681f3Smrg if (word_dist) { 3987ec681f3Smrg if (size_words < word_dist) 3997ec681f3Smrg word_dist = size_words; 4007ec681f3Smrg tmp = (uint32_t *) (a + index_multiword_lo(size_words, word_dist)); 4017ec681f3Smrg i = word_dist; 4027ec681f3Smrg do { 4037ec681f3Smrg word_jam = *tmp++; 4047ec681f3Smrg if (word_jam) 4057ec681f3Smrg break; 4067ec681f3Smrg --i; 4077ec681f3Smrg } while (i); 4087ec681f3Smrg tmp = m_out; 4097ec681f3Smrg } 4107ec681f3Smrg if (word_dist < size_words) { 4117ec681f3Smrg a += index_multiword_hi_but(size_words, word_dist); 4127ec681f3Smrg inner_dist = dist & 31; 4137ec681f3Smrg if (inner_dist) { 4147ec681f3Smrg _mesa_short_shift_right_jam_m(size_words - word_dist, a, inner_dist, 4157ec681f3Smrg m_out + index_multiword_lo_but(size_words, word_dist)); 4167ec681f3Smrg if (!word_dist) { 4177ec681f3Smrg if (word_jam) 4187ec681f3Smrg m_out[index_word_lo(size_words)] |= 1; 4197ec681f3Smrg return; 4207ec681f3Smrg } 4217ec681f3Smrg } else { 4227ec681f3Smrg a += index_word_lo(size_words - word_dist); 4237ec681f3Smrg tmp = m_out + index_word_lo(size_words); 4247ec681f3Smrg for (i = size_words - word_dist; i; --i) { 4257ec681f3Smrg *tmp = *a; 4267ec681f3Smrg a += word_incr; 4277ec681f3Smrg tmp += word_incr; 4287ec681f3Smrg } 4297ec681f3Smrg } 4307ec681f3Smrg tmp = m_out + index_multiword_hi(size_words, word_dist); 4317ec681f3Smrg } 4327ec681f3Smrg if (tmp) { 4337ec681f3Smrg do { 4347ec681f3Smrg *tmp++ = 0; 4357ec681f3Smrg --word_dist; 4367ec681f3Smrg } while (word_dist); 4377ec681f3Smrg } 4387ec681f3Smrg if (word_jam) 4397ec681f3Smrg m_out[index_word_lo(size_words)] |= 1; 4407ec681f3Smrg} 4417ec681f3Smrg 4427ec681f3Smrg/** 4437ec681f3Smrg * \brief Calculate a + b but rounding to zero. 4447ec681f3Smrg * 4457ec681f3Smrg * Notice that this mainly differs from the original Berkeley SoftFloat 3e 4467ec681f3Smrg * implementation in that we don't really treat NaNs, Zeroes nor the 4477ec681f3Smrg * signalling flags. Any NaN is good for us and the sign of the Zero is not 4487ec681f3Smrg * important. 4497ec681f3Smrg * 4507ec681f3Smrg * From f64_add() 4517ec681f3Smrg */ 4527ec681f3Smrgdouble 4537ec681f3Smrg_mesa_double_add_rtz(double a, double b) 4547ec681f3Smrg{ 4557ec681f3Smrg const di_type a_di = {a}; 4567ec681f3Smrg uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 4577ec681f3Smrg uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 4587ec681f3Smrg uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 4597ec681f3Smrg const di_type b_di = {b}; 4607ec681f3Smrg uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 4617ec681f3Smrg uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 4627ec681f3Smrg uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 4637ec681f3Smrg int64_t s, e, m = 0; 4647ec681f3Smrg 4657ec681f3Smrg s = a_flt_s; 4667ec681f3Smrg 4677ec681f3Smrg const int64_t exp_diff = a_flt_e - b_flt_e; 4687ec681f3Smrg 4697ec681f3Smrg /* Handle special cases */ 4707ec681f3Smrg 4717ec681f3Smrg if (a_flt_s != b_flt_s) { 4727ec681f3Smrg return _mesa_double_sub_rtz(a, -b); 4737ec681f3Smrg } else if ((a_flt_e == 0) && (a_flt_m == 0)) { 4747ec681f3Smrg /* 'a' is zero, return 'b' */ 4757ec681f3Smrg return b; 4767ec681f3Smrg } else if ((b_flt_e == 0) && (b_flt_m == 0)) { 4777ec681f3Smrg /* 'b' is zero, return 'a' */ 4787ec681f3Smrg return a; 4797ec681f3Smrg } else if (a_flt_e == 0x7ff && a_flt_m != 0) { 4807ec681f3Smrg /* 'a' is a NaN, return NaN */ 4817ec681f3Smrg return a; 4827ec681f3Smrg } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 4837ec681f3Smrg /* 'b' is a NaN, return NaN */ 4847ec681f3Smrg return b; 4857ec681f3Smrg } else if (a_flt_e == 0x7ff && a_flt_m == 0) { 4867ec681f3Smrg /* Inf + x = Inf */ 4877ec681f3Smrg return a; 4887ec681f3Smrg } else if (b_flt_e == 0x7ff && b_flt_m == 0) { 4897ec681f3Smrg /* x + Inf = Inf */ 4907ec681f3Smrg return b; 4917ec681f3Smrg } else if (exp_diff == 0 && a_flt_e == 0) { 4927ec681f3Smrg di_type result_di; 4937ec681f3Smrg result_di.u = a_di.u + b_flt_m; 4947ec681f3Smrg return result_di.f; 4957ec681f3Smrg } else if (exp_diff == 0) { 4967ec681f3Smrg e = a_flt_e; 4977ec681f3Smrg m = 0x0020000000000000 + a_flt_m + b_flt_m; 4987ec681f3Smrg m <<= 9; 4997ec681f3Smrg } else if (exp_diff < 0) { 5007ec681f3Smrg a_flt_m <<= 9; 5017ec681f3Smrg b_flt_m <<= 9; 5027ec681f3Smrg e = b_flt_e; 5037ec681f3Smrg 5047ec681f3Smrg if (a_flt_e != 0) 5057ec681f3Smrg a_flt_m += 0x2000000000000000; 5067ec681f3Smrg else 5077ec681f3Smrg a_flt_m <<= 1; 5087ec681f3Smrg 5097ec681f3Smrg a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff); 5107ec681f3Smrg m = 0x2000000000000000 + a_flt_m + b_flt_m; 5117ec681f3Smrg if (m < 0x4000000000000000) { 5127ec681f3Smrg --e; 5137ec681f3Smrg m <<= 1; 5147ec681f3Smrg } 5157ec681f3Smrg } else { 5167ec681f3Smrg a_flt_m <<= 9; 5177ec681f3Smrg b_flt_m <<= 9; 5187ec681f3Smrg e = a_flt_e; 5197ec681f3Smrg 5207ec681f3Smrg if (b_flt_e != 0) 5217ec681f3Smrg b_flt_m += 0x2000000000000000; 5227ec681f3Smrg else 5237ec681f3Smrg b_flt_m <<= 1; 5247ec681f3Smrg 5257ec681f3Smrg b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff); 5267ec681f3Smrg m = 0x2000000000000000 + a_flt_m + b_flt_m; 5277ec681f3Smrg if (m < 0x4000000000000000) { 5287ec681f3Smrg --e; 5297ec681f3Smrg m <<= 1; 5307ec681f3Smrg } 5317ec681f3Smrg } 5327ec681f3Smrg 5337ec681f3Smrg return _mesa_roundtozero_f64(s, e, m); 5347ec681f3Smrg} 5357ec681f3Smrg 5367ec681f3Smrg/** 5377ec681f3Smrg * \brief Returns the number of leading 0 bits before the most-significant 1 bit of 5387ec681f3Smrg * 'a'. If 'a' is zero, 64 is returned. 5397ec681f3Smrg */ 5407ec681f3Smrgstatic inline unsigned 5417ec681f3Smrg_mesa_count_leading_zeros64(uint64_t a) 5427ec681f3Smrg{ 5437ec681f3Smrg return 64 - util_last_bit64(a); 5447ec681f3Smrg} 5457ec681f3Smrg 5467ec681f3Smrg/** 5477ec681f3Smrg * \brief Returns the number of leading 0 bits before the most-significant 1 bit of 5487ec681f3Smrg * 'a'. If 'a' is zero, 32 is returned. 5497ec681f3Smrg */ 5507ec681f3Smrgstatic inline unsigned 5517ec681f3Smrg_mesa_count_leading_zeros32(uint32_t a) 5527ec681f3Smrg{ 5537ec681f3Smrg return 32 - util_last_bit(a); 5547ec681f3Smrg} 5557ec681f3Smrg 5567ec681f3Smrgstatic inline double 5577ec681f3Smrg_mesa_norm_round_pack_f64(int64_t s, int64_t e, int64_t m) 5587ec681f3Smrg{ 5597ec681f3Smrg int8_t shift_dist; 5607ec681f3Smrg 5617ec681f3Smrg shift_dist = _mesa_count_leading_zeros64(m) - 1; 5627ec681f3Smrg e -= shift_dist; 5637ec681f3Smrg if ((10 <= shift_dist) && ((unsigned) e < 0x7fd)) { 5647ec681f3Smrg di_type result; 5657ec681f3Smrg result.u = (s << 63) + ((m ? e : 0) << 52) + (m << (shift_dist - 10)); 5667ec681f3Smrg return result.f; 5677ec681f3Smrg } else { 5687ec681f3Smrg return _mesa_roundtozero_f64(s, e, m << shift_dist); 5697ec681f3Smrg } 5707ec681f3Smrg} 5717ec681f3Smrg 5727ec681f3Smrg/** 5737ec681f3Smrg * \brief Replaces the N-bit unsigned integer pointed to by 'm_out' by the 5747ec681f3Smrg * 2s-complement of itself, where N = 'size_words' * 32. Argument 'm_out' 5757ec681f3Smrg * points to a 'size_words'-long array of 32-bit elements that concatenate in 5767ec681f3Smrg * the platform's normal endian order to form an N-bit integer. 5777ec681f3Smrg * 5787ec681f3Smrg * From softfloat_negXM() 5797ec681f3Smrg */ 5807ec681f3Smrgstatic inline void 5817ec681f3Smrg_mesa_neg_x_m(uint8_t size_words, uint32_t *m_out) 5827ec681f3Smrg{ 5837ec681f3Smrg unsigned index, last_index; 5847ec681f3Smrg uint8_t carry; 5857ec681f3Smrg uint32_t word; 5867ec681f3Smrg 5877ec681f3Smrg index = index_word_lo(size_words); 5887ec681f3Smrg last_index = index_word_hi(size_words); 5897ec681f3Smrg carry = 1; 5907ec681f3Smrg for (;;) { 5917ec681f3Smrg word = ~m_out[index] + carry; 5927ec681f3Smrg m_out[index] = word; 5937ec681f3Smrg if (index == last_index) 5947ec681f3Smrg break; 5957ec681f3Smrg index += word_incr; 5967ec681f3Smrg if (word) 5977ec681f3Smrg carry = 0; 5987ec681f3Smrg } 5997ec681f3Smrg} 6007ec681f3Smrg 6017ec681f3Smrg/** 6027ec681f3Smrg * \brief Adds the two N-bit integers pointed to by 'a' and 'b', where N = 6037ec681f3Smrg * 'size_words' * 32. The addition is modulo 2^N, so any carry out is 6047ec681f3Smrg * lost. The N-bit sum is stored at the location pointed to by 'm_out'. Each 6057ec681f3Smrg * of 'a', 'b', and 'm_out' points to a 'size_words'-long array of 32-bit 6067ec681f3Smrg * elements that concatenate in the platform's normal endian order to form an 6077ec681f3Smrg * N-bit integer. 6087ec681f3Smrg * 6097ec681f3Smrg * From softfloat_addM() 6107ec681f3Smrg */ 6117ec681f3Smrgstatic inline void 6127ec681f3Smrg_mesa_add_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out) 6137ec681f3Smrg{ 6147ec681f3Smrg unsigned index, last_index; 6157ec681f3Smrg uint8_t carry; 6167ec681f3Smrg uint32_t a_word, word; 6177ec681f3Smrg 6187ec681f3Smrg index = index_word_lo(size_words); 6197ec681f3Smrg last_index = index_word_hi(size_words); 6207ec681f3Smrg carry = 0; 6217ec681f3Smrg for (;;) { 6227ec681f3Smrg a_word = a[index]; 6237ec681f3Smrg word = a_word + b[index] + carry; 6247ec681f3Smrg m_out[index] = word; 6257ec681f3Smrg if (index == last_index) 6267ec681f3Smrg break; 6277ec681f3Smrg if (word != a_word) 6287ec681f3Smrg carry = (word < a_word); 6297ec681f3Smrg index += word_incr; 6307ec681f3Smrg } 6317ec681f3Smrg} 6327ec681f3Smrg 6337ec681f3Smrg/** 6347ec681f3Smrg * \brief Subtracts the two N-bit integers pointed to by 'a' and 'b', where N = 6357ec681f3Smrg * 'size_words' * 32. The subtraction is modulo 2^N, so any borrow out (carry 6367ec681f3Smrg * out) is lost. The N-bit difference is stored at the location pointed to by 6377ec681f3Smrg * 'm_out'. Each of 'a', 'b', and 'm_out' points to a 'size_words'-long array 6387ec681f3Smrg * of 32-bit elements that concatenate in the platform's normal endian order 6397ec681f3Smrg * to form an N-bit integer. 6407ec681f3Smrg * 6417ec681f3Smrg * From softfloat_subM() 6427ec681f3Smrg */ 6437ec681f3Smrgstatic inline void 6447ec681f3Smrg_mesa_sub_m(uint8_t size_words, const uint32_t *a, const uint32_t *b, uint32_t *m_out) 6457ec681f3Smrg{ 6467ec681f3Smrg unsigned index, last_index; 6477ec681f3Smrg uint8_t borrow; 6487ec681f3Smrg uint32_t a_word, b_word; 6497ec681f3Smrg 6507ec681f3Smrg index = index_word_lo(size_words); 6517ec681f3Smrg last_index = index_word_hi(size_words); 6527ec681f3Smrg borrow = 0; 6537ec681f3Smrg for (;;) { 6547ec681f3Smrg a_word = a[index]; 6557ec681f3Smrg b_word = b[index]; 6567ec681f3Smrg m_out[index] = a_word - b_word - borrow; 6577ec681f3Smrg if (index == last_index) 6587ec681f3Smrg break; 6597ec681f3Smrg borrow = borrow ? (a_word <= b_word) : (a_word < b_word); 6607ec681f3Smrg index += word_incr; 6617ec681f3Smrg } 6627ec681f3Smrg} 6637ec681f3Smrg 6647ec681f3Smrg/* Calculate a - b but rounding to zero. 6657ec681f3Smrg * 6667ec681f3Smrg * Notice that this mainly differs from the original Berkeley SoftFloat 3e 6677ec681f3Smrg * implementation in that we don't really treat NaNs, Zeroes nor the 6687ec681f3Smrg * signalling flags. Any NaN is good for us and the sign of the Zero is not 6697ec681f3Smrg * important. 6707ec681f3Smrg * 6717ec681f3Smrg * From f64_sub() 6727ec681f3Smrg */ 6737ec681f3Smrgdouble 6747ec681f3Smrg_mesa_double_sub_rtz(double a, double b) 6757ec681f3Smrg{ 6767ec681f3Smrg const di_type a_di = {a}; 6777ec681f3Smrg uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 6787ec681f3Smrg uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 6797ec681f3Smrg uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 6807ec681f3Smrg const di_type b_di = {b}; 6817ec681f3Smrg uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 6827ec681f3Smrg uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 6837ec681f3Smrg uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 6847ec681f3Smrg int64_t s, e, m = 0; 6857ec681f3Smrg int64_t m_diff = 0; 6867ec681f3Smrg unsigned shift_dist = 0; 6877ec681f3Smrg 6887ec681f3Smrg s = a_flt_s; 6897ec681f3Smrg 6907ec681f3Smrg const int64_t exp_diff = a_flt_e - b_flt_e; 6917ec681f3Smrg 6927ec681f3Smrg /* Handle special cases */ 6937ec681f3Smrg 6947ec681f3Smrg if (a_flt_s != b_flt_s) { 6957ec681f3Smrg return _mesa_double_add_rtz(a, -b); 6967ec681f3Smrg } else if ((a_flt_e == 0) && (a_flt_m == 0)) { 6977ec681f3Smrg /* 'a' is zero, return '-b' */ 6987ec681f3Smrg return -b; 6997ec681f3Smrg } else if ((b_flt_e == 0) && (b_flt_m == 0)) { 7007ec681f3Smrg /* 'b' is zero, return 'a' */ 7017ec681f3Smrg return a; 7027ec681f3Smrg } else if (a_flt_e == 0x7ff && a_flt_m != 0) { 7037ec681f3Smrg /* 'a' is a NaN, return NaN */ 7047ec681f3Smrg return a; 7057ec681f3Smrg } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 7067ec681f3Smrg /* 'b' is a NaN, return NaN */ 7077ec681f3Smrg return b; 7087ec681f3Smrg } else if (a_flt_e == 0x7ff && a_flt_m == 0) { 7097ec681f3Smrg if (b_flt_e == 0x7ff && b_flt_m == 0) { 7107ec681f3Smrg /* Inf - Inf = NaN */ 7117ec681f3Smrg di_type result; 7127ec681f3Smrg e = 0x7ff; 7137ec681f3Smrg result.u = (s << 63) + (e << 52) + 0x1; 7147ec681f3Smrg return result.f; 7157ec681f3Smrg } 7167ec681f3Smrg /* Inf - x = Inf */ 7177ec681f3Smrg return a; 7187ec681f3Smrg } else if (b_flt_e == 0x7ff && b_flt_m == 0) { 7197ec681f3Smrg /* x - Inf = -Inf */ 7207ec681f3Smrg return -b; 7217ec681f3Smrg } else if (exp_diff == 0) { 7227ec681f3Smrg m_diff = a_flt_m - b_flt_m; 7237ec681f3Smrg 7247ec681f3Smrg if (m_diff == 0) 7257ec681f3Smrg return 0; 7267ec681f3Smrg if (a_flt_e) 7277ec681f3Smrg --a_flt_e; 7287ec681f3Smrg if (m_diff < 0) { 7297ec681f3Smrg s = !s; 7307ec681f3Smrg m_diff = -m_diff; 7317ec681f3Smrg } 7327ec681f3Smrg 7337ec681f3Smrg shift_dist = _mesa_count_leading_zeros64(m_diff) - 11; 7347ec681f3Smrg e = a_flt_e - shift_dist; 7357ec681f3Smrg if (e < 0) { 7367ec681f3Smrg shift_dist = a_flt_e; 7377ec681f3Smrg e = 0; 7387ec681f3Smrg } 7397ec681f3Smrg 7407ec681f3Smrg di_type result; 7417ec681f3Smrg result.u = (s << 63) + (e << 52) + (m_diff << shift_dist); 7427ec681f3Smrg return result.f; 7437ec681f3Smrg } else if (exp_diff < 0) { 7447ec681f3Smrg a_flt_m <<= 10; 7457ec681f3Smrg b_flt_m <<= 10; 7467ec681f3Smrg s = !s; 7477ec681f3Smrg 7487ec681f3Smrg a_flt_m += (a_flt_e) ? 0x4000000000000000 : a_flt_m; 7497ec681f3Smrg a_flt_m = _mesa_shift_right_jam64(a_flt_m, -exp_diff); 7507ec681f3Smrg b_flt_m |= 0x4000000000000000; 7517ec681f3Smrg e = b_flt_e; 7527ec681f3Smrg m = b_flt_m - a_flt_m; 7537ec681f3Smrg } else { 7547ec681f3Smrg a_flt_m <<= 10; 7557ec681f3Smrg b_flt_m <<= 10; 7567ec681f3Smrg 7577ec681f3Smrg b_flt_m += (b_flt_e) ? 0x4000000000000000 : b_flt_m; 7587ec681f3Smrg b_flt_m = _mesa_shift_right_jam64(b_flt_m, exp_diff); 7597ec681f3Smrg a_flt_m |= 0x4000000000000000; 7607ec681f3Smrg e = a_flt_e; 7617ec681f3Smrg m = a_flt_m - b_flt_m; 7627ec681f3Smrg } 7637ec681f3Smrg 7647ec681f3Smrg return _mesa_norm_round_pack_f64(s, e - 1, m); 7657ec681f3Smrg} 7667ec681f3Smrg 7677ec681f3Smrgstatic inline void 7687ec681f3Smrg_mesa_norm_subnormal_mantissa_f64(uint64_t m, uint64_t *exp, uint64_t *m_out) 7697ec681f3Smrg{ 7707ec681f3Smrg int shift_dist; 7717ec681f3Smrg 7727ec681f3Smrg shift_dist = _mesa_count_leading_zeros64(m) - 11; 7737ec681f3Smrg *exp = 1 - shift_dist; 7747ec681f3Smrg *m_out = m << shift_dist; 7757ec681f3Smrg} 7767ec681f3Smrg 7777ec681f3Smrgstatic inline void 7787ec681f3Smrg_mesa_norm_subnormal_mantissa_f32(uint32_t m, uint32_t *exp, uint32_t *m_out) 7797ec681f3Smrg{ 7807ec681f3Smrg int shift_dist; 7817ec681f3Smrg 7827ec681f3Smrg shift_dist = _mesa_count_leading_zeros32(m) - 8; 7837ec681f3Smrg *exp = 1 - shift_dist; 7847ec681f3Smrg *m_out = m << shift_dist; 7857ec681f3Smrg} 7867ec681f3Smrg 7877ec681f3Smrg/** 7887ec681f3Smrg * \brief Multiplies 'a' and 'b' and stores the 128-bit product at the location 7897ec681f3Smrg * pointed to by 'zPtr'. Argument 'zPtr' points to an array of four 32-bit 7907ec681f3Smrg * elements that concatenate in the platform's normal endian order to form a 7917ec681f3Smrg * 128-bit integer. 7927ec681f3Smrg * 7937ec681f3Smrg * From softfloat_mul64To128M() 7947ec681f3Smrg */ 7957ec681f3Smrgstatic inline void 7967ec681f3Smrg_mesa_softfloat_mul_f64_to_f128_m(uint64_t a, uint64_t b, uint32_t *m_out) 7977ec681f3Smrg{ 7987ec681f3Smrg uint32_t a32, a0, b32, b0; 7997ec681f3Smrg uint64_t z0, mid1, z64, mid; 8007ec681f3Smrg 8017ec681f3Smrg a32 = a >> 32; 8027ec681f3Smrg a0 = a; 8037ec681f3Smrg b32 = b >> 32; 8047ec681f3Smrg b0 = b; 8057ec681f3Smrg z0 = (uint64_t) a0 * b0; 8067ec681f3Smrg mid1 = (uint64_t) a32 * b0; 8077ec681f3Smrg mid = mid1 + (uint64_t) a0 * b32; 8087ec681f3Smrg z64 = (uint64_t) a32 * b32; 8097ec681f3Smrg z64 += (uint64_t) (mid < mid1) << 32 | mid >> 32; 8107ec681f3Smrg mid <<= 32; 8117ec681f3Smrg z0 += mid; 8127ec681f3Smrg m_out[index_word(4, 1)] = z0 >> 32; 8137ec681f3Smrg m_out[index_word(4, 0)] = z0; 8147ec681f3Smrg z64 += (z0 < mid); 8157ec681f3Smrg m_out[index_word(4, 3)] = z64 >> 32; 8167ec681f3Smrg m_out[index_word(4, 2)] = z64; 8177ec681f3Smrg} 8187ec681f3Smrg 8197ec681f3Smrg/* Calculate a * b but rounding to zero. 8207ec681f3Smrg * 8217ec681f3Smrg * Notice that this mainly differs from the original Berkeley SoftFloat 3e 8227ec681f3Smrg * implementation in that we don't really treat NaNs, Zeroes nor the 8237ec681f3Smrg * signalling flags. Any NaN is good for us and the sign of the Zero is not 8247ec681f3Smrg * important. 8257ec681f3Smrg * 8267ec681f3Smrg * From f64_mul() 8277ec681f3Smrg */ 8287ec681f3Smrgdouble 8297ec681f3Smrg_mesa_double_mul_rtz(double a, double b) 8307ec681f3Smrg{ 8317ec681f3Smrg const di_type a_di = {a}; 8327ec681f3Smrg uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 8337ec681f3Smrg uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 8347ec681f3Smrg uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 8357ec681f3Smrg const di_type b_di = {b}; 8367ec681f3Smrg uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 8377ec681f3Smrg uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 8387ec681f3Smrg uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 8397ec681f3Smrg int64_t s, e, m = 0; 8407ec681f3Smrg 8417ec681f3Smrg s = a_flt_s ^ b_flt_s; 8427ec681f3Smrg 8437ec681f3Smrg if (a_flt_e == 0x7ff) { 8447ec681f3Smrg if (a_flt_m != 0) { 8457ec681f3Smrg /* 'a' is a NaN, return NaN */ 8467ec681f3Smrg return a; 8477ec681f3Smrg } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 8487ec681f3Smrg /* 'b' is a NaN, return NaN */ 8497ec681f3Smrg return b; 8507ec681f3Smrg } 8517ec681f3Smrg 8527ec681f3Smrg if (!(b_flt_e | b_flt_m)) { 8537ec681f3Smrg /* Inf * 0 = NaN */ 8547ec681f3Smrg di_type result; 8557ec681f3Smrg e = 0x7ff; 8567ec681f3Smrg result.u = (s << 63) + (e << 52) + 0x1; 8577ec681f3Smrg return result.f; 8587ec681f3Smrg } 8597ec681f3Smrg /* Inf * x = Inf */ 8607ec681f3Smrg di_type result; 8617ec681f3Smrg e = 0x7ff; 8627ec681f3Smrg result.u = (s << 63) + (e << 52) + 0; 8637ec681f3Smrg return result.f; 8647ec681f3Smrg } 8657ec681f3Smrg 8667ec681f3Smrg if (b_flt_e == 0x7ff) { 8677ec681f3Smrg if (b_flt_m != 0) { 8687ec681f3Smrg /* 'b' is a NaN, return NaN */ 8697ec681f3Smrg return b; 8707ec681f3Smrg } 8717ec681f3Smrg if (!(a_flt_e | a_flt_m)) { 8727ec681f3Smrg /* 0 * Inf = NaN */ 8737ec681f3Smrg di_type result; 8747ec681f3Smrg e = 0x7ff; 8757ec681f3Smrg result.u = (s << 63) + (e << 52) + 0x1; 8767ec681f3Smrg return result.f; 8777ec681f3Smrg } 8787ec681f3Smrg /* x * Inf = Inf */ 8797ec681f3Smrg di_type result; 8807ec681f3Smrg e = 0x7ff; 8817ec681f3Smrg result.u = (s << 63) + (e << 52) + 0; 8827ec681f3Smrg return result.f; 8837ec681f3Smrg } 8847ec681f3Smrg 8857ec681f3Smrg if (a_flt_e == 0) { 8867ec681f3Smrg if (a_flt_m == 0) { 8877ec681f3Smrg /* 'a' is zero. Return zero */ 8887ec681f3Smrg di_type result; 8897ec681f3Smrg result.u = (s << 63) + 0; 8907ec681f3Smrg return result.f; 8917ec681f3Smrg } 8927ec681f3Smrg _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m); 8937ec681f3Smrg } 8947ec681f3Smrg if (b_flt_e == 0) { 8957ec681f3Smrg if (b_flt_m == 0) { 8967ec681f3Smrg /* 'b' is zero. Return zero */ 8977ec681f3Smrg di_type result; 8987ec681f3Smrg result.u = (s << 63) + 0; 8997ec681f3Smrg return result.f; 9007ec681f3Smrg } 9017ec681f3Smrg _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m); 9027ec681f3Smrg } 9037ec681f3Smrg 9047ec681f3Smrg e = a_flt_e + b_flt_e - 0x3ff; 9057ec681f3Smrg a_flt_m = (a_flt_m | 0x0010000000000000) << 10; 9067ec681f3Smrg b_flt_m = (b_flt_m | 0x0010000000000000) << 11; 9077ec681f3Smrg 9087ec681f3Smrg uint32_t m_128[4]; 9097ec681f3Smrg _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128); 9107ec681f3Smrg 9117ec681f3Smrg m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)]; 9127ec681f3Smrg if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 9137ec681f3Smrg m |= 1; 9147ec681f3Smrg 9157ec681f3Smrg if (m < 0x4000000000000000) { 9167ec681f3Smrg --e; 9177ec681f3Smrg m <<= 1; 9187ec681f3Smrg } 9197ec681f3Smrg 9207ec681f3Smrg return _mesa_roundtozero_f64(s, e, m); 9217ec681f3Smrg} 9227ec681f3Smrg 9237ec681f3Smrg 9247ec681f3Smrg/** 9257ec681f3Smrg * \brief Calculate a * b + c but rounding to zero. 9267ec681f3Smrg * 9277ec681f3Smrg * Notice that this mainly differs from the original Berkeley SoftFloat 3e 9287ec681f3Smrg * implementation in that we don't really treat NaNs, Zeroes nor the 9297ec681f3Smrg * signalling flags. Any NaN is good for us and the sign of the Zero is not 9307ec681f3Smrg * important. 9317ec681f3Smrg * 9327ec681f3Smrg * From f64_mulAdd() 9337ec681f3Smrg */ 9347ec681f3Smrgdouble 9357ec681f3Smrg_mesa_double_fma_rtz(double a, double b, double c) 9367ec681f3Smrg{ 9377ec681f3Smrg const di_type a_di = {a}; 9387ec681f3Smrg uint64_t a_flt_m = a_di.u & 0x0fffffffffffff; 9397ec681f3Smrg uint64_t a_flt_e = (a_di.u >> 52) & 0x7ff; 9407ec681f3Smrg uint64_t a_flt_s = (a_di.u >> 63) & 0x1; 9417ec681f3Smrg const di_type b_di = {b}; 9427ec681f3Smrg uint64_t b_flt_m = b_di.u & 0x0fffffffffffff; 9437ec681f3Smrg uint64_t b_flt_e = (b_di.u >> 52) & 0x7ff; 9447ec681f3Smrg uint64_t b_flt_s = (b_di.u >> 63) & 0x1; 9457ec681f3Smrg const di_type c_di = {c}; 9467ec681f3Smrg uint64_t c_flt_m = c_di.u & 0x0fffffffffffff; 9477ec681f3Smrg uint64_t c_flt_e = (c_di.u >> 52) & 0x7ff; 9487ec681f3Smrg uint64_t c_flt_s = (c_di.u >> 63) & 0x1; 9497ec681f3Smrg int64_t s, e, m = 0; 9507ec681f3Smrg 9517ec681f3Smrg c_flt_s ^= 0; 9527ec681f3Smrg s = a_flt_s ^ b_flt_s ^ 0; 9537ec681f3Smrg 9547ec681f3Smrg if (a_flt_e == 0x7ff) { 9557ec681f3Smrg if (a_flt_m != 0) { 9567ec681f3Smrg /* 'a' is a NaN, return NaN */ 9577ec681f3Smrg return a; 9587ec681f3Smrg } else if (b_flt_e == 0x7ff && b_flt_m != 0) { 9597ec681f3Smrg /* 'b' is a NaN, return NaN */ 9607ec681f3Smrg return b; 9617ec681f3Smrg } else if (c_flt_e == 0x7ff && c_flt_m != 0) { 9627ec681f3Smrg /* 'c' is a NaN, return NaN */ 9637ec681f3Smrg return c; 9647ec681f3Smrg } 9657ec681f3Smrg 9667ec681f3Smrg if (!(b_flt_e | b_flt_m)) { 9677ec681f3Smrg /* Inf * 0 + y = NaN */ 9687ec681f3Smrg di_type result; 9697ec681f3Smrg e = 0x7ff; 9707ec681f3Smrg result.u = (s << 63) + (e << 52) + 0x1; 9717ec681f3Smrg return result.f; 9727ec681f3Smrg } 9737ec681f3Smrg 9747ec681f3Smrg if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) { 9757ec681f3Smrg /* Inf * x - Inf = NaN */ 9767ec681f3Smrg di_type result; 9777ec681f3Smrg e = 0x7ff; 9787ec681f3Smrg result.u = (s << 63) + (e << 52) + 0x1; 9797ec681f3Smrg return result.f; 9807ec681f3Smrg } 9817ec681f3Smrg 9827ec681f3Smrg /* Inf * x + y = Inf */ 9837ec681f3Smrg di_type result; 9847ec681f3Smrg e = 0x7ff; 9857ec681f3Smrg result.u = (s << 63) + (e << 52) + 0; 9867ec681f3Smrg return result.f; 9877ec681f3Smrg } 9887ec681f3Smrg 9897ec681f3Smrg if (b_flt_e == 0x7ff) { 9907ec681f3Smrg if (b_flt_m != 0) { 9917ec681f3Smrg /* 'b' is a NaN, return NaN */ 9927ec681f3Smrg return b; 9937ec681f3Smrg } else if (c_flt_e == 0x7ff && c_flt_m != 0) { 9947ec681f3Smrg /* 'c' is a NaN, return NaN */ 9957ec681f3Smrg return c; 9967ec681f3Smrg } 9977ec681f3Smrg 9987ec681f3Smrg if (!(a_flt_e | a_flt_m)) { 9997ec681f3Smrg /* 0 * Inf + y = NaN */ 10007ec681f3Smrg di_type result; 10017ec681f3Smrg e = 0x7ff; 10027ec681f3Smrg result.u = (s << 63) + (e << 52) + 0x1; 10037ec681f3Smrg return result.f; 10047ec681f3Smrg } 10057ec681f3Smrg 10067ec681f3Smrg if ((c_flt_e == 0x7ff && c_flt_m == 0) && (s != c_flt_s)) { 10077ec681f3Smrg /* x * Inf - Inf = NaN */ 10087ec681f3Smrg di_type result; 10097ec681f3Smrg e = 0x7ff; 10107ec681f3Smrg result.u = (s << 63) + (e << 52) + 0x1; 10117ec681f3Smrg return result.f; 10127ec681f3Smrg } 10137ec681f3Smrg 10147ec681f3Smrg /* x * Inf + y = Inf */ 10157ec681f3Smrg di_type result; 10167ec681f3Smrg e = 0x7ff; 10177ec681f3Smrg result.u = (s << 63) + (e << 52) + 0; 10187ec681f3Smrg return result.f; 10197ec681f3Smrg } 10207ec681f3Smrg 10217ec681f3Smrg if (c_flt_e == 0x7ff) { 10227ec681f3Smrg if (c_flt_m != 0) { 10237ec681f3Smrg /* 'c' is a NaN, return NaN */ 10247ec681f3Smrg return c; 10257ec681f3Smrg } 10267ec681f3Smrg 10277ec681f3Smrg /* x * y + Inf = Inf */ 10287ec681f3Smrg return c; 10297ec681f3Smrg } 10307ec681f3Smrg 10317ec681f3Smrg if (a_flt_e == 0) { 10327ec681f3Smrg if (a_flt_m == 0) { 10337ec681f3Smrg /* 'a' is zero, return 'c' */ 10347ec681f3Smrg return c; 10357ec681f3Smrg } 10367ec681f3Smrg _mesa_norm_subnormal_mantissa_f64(a_flt_m , &a_flt_e, &a_flt_m); 10377ec681f3Smrg } 10387ec681f3Smrg 10397ec681f3Smrg if (b_flt_e == 0) { 10407ec681f3Smrg if (b_flt_m == 0) { 10417ec681f3Smrg /* 'b' is zero, return 'c' */ 10427ec681f3Smrg return c; 10437ec681f3Smrg } 10447ec681f3Smrg _mesa_norm_subnormal_mantissa_f64(b_flt_m , &b_flt_e, &b_flt_m); 10457ec681f3Smrg } 10467ec681f3Smrg 10477ec681f3Smrg e = a_flt_e + b_flt_e - 0x3fe; 10487ec681f3Smrg a_flt_m = (a_flt_m | 0x0010000000000000) << 10; 10497ec681f3Smrg b_flt_m = (b_flt_m | 0x0010000000000000) << 11; 10507ec681f3Smrg 10517ec681f3Smrg uint32_t m_128[4]; 10527ec681f3Smrg _mesa_softfloat_mul_f64_to_f128_m(a_flt_m, b_flt_m, m_128); 10537ec681f3Smrg 10547ec681f3Smrg m = (uint64_t) m_128[index_word(4, 3)] << 32 | m_128[index_word(4, 2)]; 10557ec681f3Smrg 10567ec681f3Smrg int64_t shift_dist = 0; 10577ec681f3Smrg if (!(m & 0x4000000000000000)) { 10587ec681f3Smrg --e; 10597ec681f3Smrg shift_dist = -1; 10607ec681f3Smrg } 10617ec681f3Smrg 10627ec681f3Smrg if (c_flt_e == 0) { 10637ec681f3Smrg if (c_flt_m == 0) { 10647ec681f3Smrg /* 'c' is zero, return 'a * b' */ 10657ec681f3Smrg if (shift_dist) 10667ec681f3Smrg m <<= 1; 10677ec681f3Smrg 10687ec681f3Smrg if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 10697ec681f3Smrg m |= 1; 10707ec681f3Smrg return _mesa_roundtozero_f64(s, e - 1, m); 10717ec681f3Smrg } 10727ec681f3Smrg _mesa_norm_subnormal_mantissa_f64(c_flt_m , &c_flt_e, &c_flt_m); 10737ec681f3Smrg } 10747ec681f3Smrg c_flt_m = (c_flt_m | 0x0010000000000000) << 10; 10757ec681f3Smrg 10767ec681f3Smrg uint32_t c_flt_m_128[4]; 10777ec681f3Smrg int64_t exp_diff = e - c_flt_e; 10787ec681f3Smrg if (exp_diff < 0) { 10797ec681f3Smrg e = c_flt_e; 10807ec681f3Smrg if ((s == c_flt_s) || (exp_diff < -1)) { 10817ec681f3Smrg shift_dist -= exp_diff; 10827ec681f3Smrg if (shift_dist) { 10837ec681f3Smrg m = _mesa_shift_right_jam64(m, shift_dist); 10847ec681f3Smrg } 10857ec681f3Smrg } else { 10867ec681f3Smrg if (!shift_dist) { 10877ec681f3Smrg _mesa_short_shift_right_m(4, m_128, 1, m_128); 10887ec681f3Smrg } 10897ec681f3Smrg } 10907ec681f3Smrg } else { 10917ec681f3Smrg if (shift_dist) 10927ec681f3Smrg _mesa_add_m(4, m_128, m_128, m_128); 10937ec681f3Smrg if (!exp_diff) { 10947ec681f3Smrg m = (uint64_t) m_128[index_word(4, 3)] << 32 10957ec681f3Smrg | m_128[index_word(4, 2)]; 10967ec681f3Smrg } else { 10977ec681f3Smrg c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32; 10987ec681f3Smrg c_flt_m_128[index_word(4, 2)] = c_flt_m; 10997ec681f3Smrg c_flt_m_128[index_word(4, 1)] = 0; 11007ec681f3Smrg c_flt_m_128[index_word(4, 0)] = 0; 11017ec681f3Smrg _mesa_shift_right_jam_m(4, c_flt_m_128, exp_diff, c_flt_m_128); 11027ec681f3Smrg } 11037ec681f3Smrg } 11047ec681f3Smrg 11057ec681f3Smrg if (s == c_flt_s) { 11067ec681f3Smrg if (exp_diff <= 0) { 11077ec681f3Smrg m += c_flt_m; 11087ec681f3Smrg } else { 11097ec681f3Smrg _mesa_add_m(4, m_128, c_flt_m_128, m_128); 11107ec681f3Smrg m = (uint64_t) m_128[index_word(4, 3)] << 32 11117ec681f3Smrg | m_128[index_word(4, 2)]; 11127ec681f3Smrg } 11137ec681f3Smrg if (m & 0x8000000000000000) { 11147ec681f3Smrg e++; 11157ec681f3Smrg m = _mesa_short_shift_right_jam64(m, 1); 11167ec681f3Smrg } 11177ec681f3Smrg } else { 11187ec681f3Smrg if (exp_diff < 0) { 11197ec681f3Smrg s = c_flt_s; 11207ec681f3Smrg if (exp_diff < -1) { 11217ec681f3Smrg m = c_flt_m - m; 11227ec681f3Smrg if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) { 11237ec681f3Smrg m = (m - 1) | 1; 11247ec681f3Smrg } 11257ec681f3Smrg if (!(m & 0x4000000000000000)) { 11267ec681f3Smrg --e; 11277ec681f3Smrg m <<= 1; 11287ec681f3Smrg } 11297ec681f3Smrg return _mesa_roundtozero_f64(s, e - 1, m); 11307ec681f3Smrg } else { 11317ec681f3Smrg c_flt_m_128[index_word(4, 3)] = c_flt_m >> 32; 11327ec681f3Smrg c_flt_m_128[index_word(4, 2)] = c_flt_m; 11337ec681f3Smrg c_flt_m_128[index_word(4, 1)] = 0; 11347ec681f3Smrg c_flt_m_128[index_word(4, 0)] = 0; 11357ec681f3Smrg _mesa_sub_m(4, c_flt_m_128, m_128, m_128); 11367ec681f3Smrg } 11377ec681f3Smrg } else if (!exp_diff) { 11387ec681f3Smrg m -= c_flt_m; 11397ec681f3Smrg if (!m && !m_128[index_word(4, 1)] && !m_128[index_word(4, 0)]) { 11407ec681f3Smrg /* Return zero */ 11417ec681f3Smrg di_type result; 11427ec681f3Smrg result.u = (s << 63) + 0; 11437ec681f3Smrg return result.f; 11447ec681f3Smrg } 11457ec681f3Smrg m_128[index_word(4, 3)] = m >> 32; 11467ec681f3Smrg m_128[index_word(4, 2)] = m; 11477ec681f3Smrg if (m & 0x8000000000000000) { 11487ec681f3Smrg s = !s; 11497ec681f3Smrg _mesa_neg_x_m(4, m_128); 11507ec681f3Smrg } 11517ec681f3Smrg } else { 11527ec681f3Smrg _mesa_sub_m(4, m_128, c_flt_m_128, m_128); 11537ec681f3Smrg if (1 < exp_diff) { 11547ec681f3Smrg m = (uint64_t) m_128[index_word(4, 3)] << 32 11557ec681f3Smrg | m_128[index_word(4, 2)]; 11567ec681f3Smrg if (!(m & 0x4000000000000000)) { 11577ec681f3Smrg --e; 11587ec681f3Smrg m <<= 1; 11597ec681f3Smrg } 11607ec681f3Smrg if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 11617ec681f3Smrg m |= 1; 11627ec681f3Smrg return _mesa_roundtozero_f64(s, e - 1, m); 11637ec681f3Smrg } 11647ec681f3Smrg } 11657ec681f3Smrg 11667ec681f3Smrg shift_dist = 0; 11677ec681f3Smrg m = (uint64_t) m_128[index_word(4, 3)] << 32 11687ec681f3Smrg | m_128[index_word(4, 2)]; 11697ec681f3Smrg if (!m) { 11707ec681f3Smrg shift_dist = 64; 11717ec681f3Smrg m = (uint64_t) m_128[index_word(4, 1)] << 32 11727ec681f3Smrg | m_128[index_word(4, 0)]; 11737ec681f3Smrg } 11747ec681f3Smrg shift_dist += _mesa_count_leading_zeros64(m) - 1; 11757ec681f3Smrg if (shift_dist) { 11767ec681f3Smrg e -= shift_dist; 11777ec681f3Smrg _mesa_shift_left_m(4, m_128, shift_dist, m_128); 11787ec681f3Smrg m = (uint64_t) m_128[index_word(4, 3)] << 32 11797ec681f3Smrg | m_128[index_word(4, 2)]; 11807ec681f3Smrg } 11817ec681f3Smrg } 11827ec681f3Smrg 11837ec681f3Smrg if (m_128[index_word(4, 1)] || m_128[index_word(4, 0)]) 11847ec681f3Smrg m |= 1; 11857ec681f3Smrg return _mesa_roundtozero_f64(s, e - 1, m); 11867ec681f3Smrg} 11877ec681f3Smrg 11887ec681f3Smrg 11897ec681f3Smrg/** 11907ec681f3Smrg * \brief Calculate a * b + c but rounding to zero. 11917ec681f3Smrg * 11927ec681f3Smrg * Notice that this mainly differs from the original Berkeley SoftFloat 3e 11937ec681f3Smrg * implementation in that we don't really treat NaNs, Zeroes nor the 11947ec681f3Smrg * signalling flags. Any NaN is good for us and the sign of the Zero is not 11957ec681f3Smrg * important. 11967ec681f3Smrg * 11977ec681f3Smrg * From f32_mulAdd() 11987ec681f3Smrg */ 11997ec681f3Smrgfloat 12007ec681f3Smrg_mesa_float_fma_rtz(float a, float b, float c) 12017ec681f3Smrg{ 12027ec681f3Smrg const fi_type a_fi = {a}; 12037ec681f3Smrg uint32_t a_flt_m = a_fi.u & 0x07fffff; 12047ec681f3Smrg uint32_t a_flt_e = (a_fi.u >> 23) & 0xff; 12057ec681f3Smrg uint32_t a_flt_s = (a_fi.u >> 31) & 0x1; 12067ec681f3Smrg const fi_type b_fi = {b}; 12077ec681f3Smrg uint32_t b_flt_m = b_fi.u & 0x07fffff; 12087ec681f3Smrg uint32_t b_flt_e = (b_fi.u >> 23) & 0xff; 12097ec681f3Smrg uint32_t b_flt_s = (b_fi.u >> 31) & 0x1; 12107ec681f3Smrg const fi_type c_fi = {c}; 12117ec681f3Smrg uint32_t c_flt_m = c_fi.u & 0x07fffff; 12127ec681f3Smrg uint32_t c_flt_e = (c_fi.u >> 23) & 0xff; 12137ec681f3Smrg uint32_t c_flt_s = (c_fi.u >> 31) & 0x1; 12147ec681f3Smrg int32_t s, e, m = 0; 12157ec681f3Smrg 12167ec681f3Smrg c_flt_s ^= 0; 12177ec681f3Smrg s = a_flt_s ^ b_flt_s ^ 0; 12187ec681f3Smrg 12197ec681f3Smrg if (a_flt_e == 0xff) { 12207ec681f3Smrg if (a_flt_m != 0) { 12217ec681f3Smrg /* 'a' is a NaN, return NaN */ 12227ec681f3Smrg return a; 12237ec681f3Smrg } else if (b_flt_e == 0xff && b_flt_m != 0) { 12247ec681f3Smrg /* 'b' is a NaN, return NaN */ 12257ec681f3Smrg return b; 12267ec681f3Smrg } else if (c_flt_e == 0xff && c_flt_m != 0) { 12277ec681f3Smrg /* 'c' is a NaN, return NaN */ 12287ec681f3Smrg return c; 12297ec681f3Smrg } 12307ec681f3Smrg 12317ec681f3Smrg if (!(b_flt_e | b_flt_m)) { 12327ec681f3Smrg /* Inf * 0 + y = NaN */ 12337ec681f3Smrg fi_type result; 12347ec681f3Smrg e = 0xff; 12357ec681f3Smrg result.u = (s << 31) + (e << 23) + 0x1; 12367ec681f3Smrg return result.f; 12377ec681f3Smrg } 12387ec681f3Smrg 12397ec681f3Smrg if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) { 12407ec681f3Smrg /* Inf * x - Inf = NaN */ 12417ec681f3Smrg fi_type result; 12427ec681f3Smrg e = 0xff; 12437ec681f3Smrg result.u = (s << 31) + (e << 23) + 0x1; 12447ec681f3Smrg return result.f; 12457ec681f3Smrg } 12467ec681f3Smrg 12477ec681f3Smrg /* Inf * x + y = Inf */ 12487ec681f3Smrg fi_type result; 12497ec681f3Smrg e = 0xff; 12507ec681f3Smrg result.u = (s << 31) + (e << 23) + 0; 12517ec681f3Smrg return result.f; 12527ec681f3Smrg } 12537ec681f3Smrg 12547ec681f3Smrg if (b_flt_e == 0xff) { 12557ec681f3Smrg if (b_flt_m != 0) { 12567ec681f3Smrg /* 'b' is a NaN, return NaN */ 12577ec681f3Smrg return b; 12587ec681f3Smrg } else if (c_flt_e == 0xff && c_flt_m != 0) { 12597ec681f3Smrg /* 'c' is a NaN, return NaN */ 12607ec681f3Smrg return c; 12617ec681f3Smrg } 12627ec681f3Smrg 12637ec681f3Smrg if (!(a_flt_e | a_flt_m)) { 12647ec681f3Smrg /* 0 * Inf + y = NaN */ 12657ec681f3Smrg fi_type result; 12667ec681f3Smrg e = 0xff; 12677ec681f3Smrg result.u = (s << 31) + (e << 23) + 0x1; 12687ec681f3Smrg return result.f; 12697ec681f3Smrg } 12707ec681f3Smrg 12717ec681f3Smrg if ((c_flt_e == 0xff && c_flt_m == 0) && (s != c_flt_s)) { 12727ec681f3Smrg /* x * Inf - Inf = NaN */ 12737ec681f3Smrg fi_type result; 12747ec681f3Smrg e = 0xff; 12757ec681f3Smrg result.u = (s << 31) + (e << 23) + 0x1; 12767ec681f3Smrg return result.f; 12777ec681f3Smrg } 12787ec681f3Smrg 12797ec681f3Smrg /* x * Inf + y = Inf */ 12807ec681f3Smrg fi_type result; 12817ec681f3Smrg e = 0xff; 12827ec681f3Smrg result.u = (s << 31) + (e << 23) + 0; 12837ec681f3Smrg return result.f; 12847ec681f3Smrg } 12857ec681f3Smrg 12867ec681f3Smrg if (c_flt_e == 0xff) { 12877ec681f3Smrg if (c_flt_m != 0) { 12887ec681f3Smrg /* 'c' is a NaN, return NaN */ 12897ec681f3Smrg return c; 12907ec681f3Smrg } 12917ec681f3Smrg 12927ec681f3Smrg /* x * y + Inf = Inf */ 12937ec681f3Smrg return c; 12947ec681f3Smrg } 12957ec681f3Smrg 12967ec681f3Smrg if (a_flt_e == 0) { 12977ec681f3Smrg if (a_flt_m == 0) { 12987ec681f3Smrg /* 'a' is zero, return 'c' */ 12997ec681f3Smrg return c; 13007ec681f3Smrg } 13017ec681f3Smrg _mesa_norm_subnormal_mantissa_f32(a_flt_m , &a_flt_e, &a_flt_m); 13027ec681f3Smrg } 13037ec681f3Smrg 13047ec681f3Smrg if (b_flt_e == 0) { 13057ec681f3Smrg if (b_flt_m == 0) { 13067ec681f3Smrg /* 'b' is zero, return 'c' */ 13077ec681f3Smrg return c; 13087ec681f3Smrg } 13097ec681f3Smrg _mesa_norm_subnormal_mantissa_f32(b_flt_m , &b_flt_e, &b_flt_m); 13107ec681f3Smrg } 13117ec681f3Smrg 13127ec681f3Smrg e = a_flt_e + b_flt_e - 0x7e; 13137ec681f3Smrg a_flt_m = (a_flt_m | 0x00800000) << 7; 13147ec681f3Smrg b_flt_m = (b_flt_m | 0x00800000) << 7; 13157ec681f3Smrg 13167ec681f3Smrg uint64_t m_64 = (uint64_t) a_flt_m * b_flt_m; 13177ec681f3Smrg if (m_64 < 0x2000000000000000) { 13187ec681f3Smrg --e; 13197ec681f3Smrg m_64 <<= 1; 13207ec681f3Smrg } 13217ec681f3Smrg 13227ec681f3Smrg if (c_flt_e == 0) { 13237ec681f3Smrg if (c_flt_m == 0) { 13247ec681f3Smrg /* 'c' is zero, return 'a * b' */ 13257ec681f3Smrg m = _mesa_short_shift_right_jam64(m_64, 31); 13267ec681f3Smrg return _mesa_round_f32(s, e - 1, m, true); 13277ec681f3Smrg } 13287ec681f3Smrg _mesa_norm_subnormal_mantissa_f32(c_flt_m , &c_flt_e, &c_flt_m); 13297ec681f3Smrg } 13307ec681f3Smrg c_flt_m = (c_flt_m | 0x00800000) << 6; 13317ec681f3Smrg 13327ec681f3Smrg int16_t exp_diff = e - c_flt_e; 13337ec681f3Smrg if (s == c_flt_s) { 13347ec681f3Smrg if (exp_diff <= 0) { 13357ec681f3Smrg e = c_flt_e; 13367ec681f3Smrg m = c_flt_m + _mesa_shift_right_jam64(m_64, 32 - exp_diff); 13377ec681f3Smrg } else { 13387ec681f3Smrg m_64 += _mesa_shift_right_jam64((uint64_t) c_flt_m << 32, exp_diff); 13397ec681f3Smrg m = _mesa_short_shift_right_jam64(m_64, 32); 13407ec681f3Smrg } 13417ec681f3Smrg if (m < 0x40000000) { 13427ec681f3Smrg --e; 13437ec681f3Smrg m <<= 1; 13447ec681f3Smrg } 13457ec681f3Smrg } else { 13467ec681f3Smrg uint64_t c_flt_m_64 = (uint64_t) c_flt_m << 32; 13477ec681f3Smrg if (exp_diff < 0) { 13487ec681f3Smrg s = c_flt_s; 13497ec681f3Smrg e = c_flt_e; 13507ec681f3Smrg m_64 = c_flt_m_64 - _mesa_shift_right_jam64(m_64, -exp_diff); 13517ec681f3Smrg } else if (!exp_diff) { 13527ec681f3Smrg m_64 -= c_flt_m_64; 13537ec681f3Smrg if (!m_64) { 13547ec681f3Smrg /* Return zero */ 13557ec681f3Smrg fi_type result; 13567ec681f3Smrg result.u = (s << 31) + 0; 13577ec681f3Smrg return result.f; 13587ec681f3Smrg } 13597ec681f3Smrg if (m_64 & 0x8000000000000000) { 13607ec681f3Smrg s = !s; 13617ec681f3Smrg m_64 = -m_64; 13627ec681f3Smrg } 13637ec681f3Smrg } else { 13647ec681f3Smrg m_64 -= _mesa_shift_right_jam64(c_flt_m_64, exp_diff); 13657ec681f3Smrg } 13667ec681f3Smrg int8_t shift_dist = _mesa_count_leading_zeros64(m_64) - 1; 13677ec681f3Smrg e -= shift_dist; 13687ec681f3Smrg shift_dist -= 32; 13697ec681f3Smrg if (shift_dist < 0) { 13707ec681f3Smrg m = _mesa_short_shift_right_jam64(m_64, -shift_dist); 13717ec681f3Smrg } else { 13727ec681f3Smrg m = (uint32_t) m_64 << shift_dist; 13737ec681f3Smrg } 13747ec681f3Smrg } 13757ec681f3Smrg 13767ec681f3Smrg return _mesa_round_f32(s, e, m, true); 13777ec681f3Smrg} 13787ec681f3Smrg 13797ec681f3Smrg 13807ec681f3Smrg/** 13817ec681f3Smrg * \brief Converts from 64bits to 32bits float and rounds according to 13827ec681f3Smrg * instructed. 13837ec681f3Smrg * 13847ec681f3Smrg * From f64_to_f32() 13857ec681f3Smrg */ 13867ec681f3Smrgfloat 13877ec681f3Smrg_mesa_double_to_f32(double val, bool rtz) 13887ec681f3Smrg{ 13897ec681f3Smrg const di_type di = {val}; 13907ec681f3Smrg uint64_t flt_m = di.u & 0x0fffffffffffff; 13917ec681f3Smrg uint64_t flt_e = (di.u >> 52) & 0x7ff; 13927ec681f3Smrg uint64_t flt_s = (di.u >> 63) & 0x1; 13937ec681f3Smrg int32_t s, e, m = 0; 13947ec681f3Smrg 13957ec681f3Smrg s = flt_s; 13967ec681f3Smrg 13977ec681f3Smrg if (flt_e == 0x7ff) { 13987ec681f3Smrg if (flt_m != 0) { 13997ec681f3Smrg /* 'val' is a NaN, return NaN */ 14007ec681f3Smrg fi_type result; 14017ec681f3Smrg e = 0xff; 14027ec681f3Smrg m = 0x1; 14037ec681f3Smrg result.u = (s << 31) + (e << 23) + m; 14047ec681f3Smrg return result.f; 14057ec681f3Smrg } 14067ec681f3Smrg 14077ec681f3Smrg /* 'val' is Inf, return Inf */ 14087ec681f3Smrg fi_type result; 14097ec681f3Smrg e = 0xff; 14107ec681f3Smrg result.u = (s << 31) + (e << 23) + m; 14117ec681f3Smrg return result.f; 14127ec681f3Smrg } 14137ec681f3Smrg 14147ec681f3Smrg if (!(flt_e | flt_m)) { 14157ec681f3Smrg /* 'val' is zero, return zero */ 14167ec681f3Smrg fi_type result; 14177ec681f3Smrg e = 0; 14187ec681f3Smrg result.u = (s << 31) + (e << 23) + m; 14197ec681f3Smrg return result.f; 14207ec681f3Smrg } 14217ec681f3Smrg 14227ec681f3Smrg m = _mesa_short_shift_right_jam64(flt_m, 22); 14237ec681f3Smrg if ( ! (flt_e | m) ) { 14247ec681f3Smrg /* 'val' is denorm, return zero */ 14257ec681f3Smrg fi_type result; 14267ec681f3Smrg e = 0; 14277ec681f3Smrg result.u = (s << 31) + (e << 23) + m; 14287ec681f3Smrg return result.f; 14297ec681f3Smrg } 14307ec681f3Smrg 14317ec681f3Smrg return _mesa_round_f32(s, flt_e - 0x381, m | 0x40000000, rtz); 14327ec681f3Smrg} 14337ec681f3Smrg 14347ec681f3Smrg 14357ec681f3Smrg/** 14367ec681f3Smrg * \brief Converts from 32bits to 16bits float and rounds the result to zero. 14377ec681f3Smrg * 14387ec681f3Smrg * From f32_to_f16() 14397ec681f3Smrg */ 14407ec681f3Smrguint16_t 14417ec681f3Smrg_mesa_float_to_half_rtz_slow(float val) 14427ec681f3Smrg{ 14437ec681f3Smrg const fi_type fi = {val}; 14447ec681f3Smrg const uint32_t flt_m = fi.u & 0x7fffff; 14457ec681f3Smrg const uint32_t flt_e = (fi.u >> 23) & 0xff; 14467ec681f3Smrg const uint32_t flt_s = (fi.u >> 31) & 0x1; 14477ec681f3Smrg int16_t s, e, m = 0; 14487ec681f3Smrg 14497ec681f3Smrg s = flt_s; 14507ec681f3Smrg 14517ec681f3Smrg if (flt_e == 0xff) { 14527ec681f3Smrg if (flt_m != 0) { 14537ec681f3Smrg /* 'val' is a NaN, return NaN */ 14547ec681f3Smrg e = 0x1f; 14557ec681f3Smrg m = 0x1; 14567ec681f3Smrg return (s << 15) + (e << 10) + m; 14577ec681f3Smrg } 14587ec681f3Smrg 14597ec681f3Smrg /* 'val' is Inf, return Inf */ 14607ec681f3Smrg e = 0x1f; 14617ec681f3Smrg return (s << 15) + (e << 10) + m; 14627ec681f3Smrg } 14637ec681f3Smrg 14647ec681f3Smrg if (!(flt_e | flt_m)) { 14657ec681f3Smrg /* 'val' is zero, return zero */ 14667ec681f3Smrg e = 0; 14677ec681f3Smrg return (s << 15) + (e << 10) + m; 14687ec681f3Smrg } 14697ec681f3Smrg 14707ec681f3Smrg m = flt_m >> 9 | ((flt_m & 0x1ff) != 0); 14717ec681f3Smrg if ( ! (flt_e | m) ) { 14727ec681f3Smrg /* 'val' is denorm, return zero */ 14737ec681f3Smrg e = 0; 14747ec681f3Smrg return (s << 15) + (e << 10) + m; 14757ec681f3Smrg } 14767ec681f3Smrg 14777ec681f3Smrg return _mesa_roundtozero_f16(s, flt_e - 0x71, m | 0x4000); 14787ec681f3Smrg} 1479