17e102996Smaya/* 27e102996Smaya * Copyright © 2015 Intel Corporation 37e102996Smaya * Copyright © 2019 Valve Corporation 47e102996Smaya * 57e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a 67e102996Smaya * copy of this software and associated documentation files (the "Software"), 77e102996Smaya * to deal in the Software without restriction, including without limitation 87e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the 107e102996Smaya * Software is furnished to do so, subject to the following conditions: 117e102996Smaya * 127e102996Smaya * The above copyright notice and this permission notice (including the next 137e102996Smaya * paragraph) shall be included in all copies or substantial portions of the 147e102996Smaya * Software. 157e102996Smaya * 167e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 217e102996Smaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 227e102996Smaya * IN THE SOFTWARE. 237e102996Smaya * 247e102996Smaya * Authors: 257e102996Smaya * Jason Ekstrand (jason@jlekstrand.net) 267e102996Smaya * Samuel Pitoiset (samuel.pitoiset@gmail.com> 277e102996Smaya */ 287e102996Smaya 297e102996Smaya#include "nir.h" 307e102996Smaya#include "nir_builder.h" 317e102996Smaya 327e102996Smayastatic nir_ssa_def * 337e102996Smayalower_frexp_sig(nir_builder *b, nir_ssa_def *x) 347e102996Smaya{ 357e102996Smaya nir_ssa_def *abs_x = nir_fabs(b, x); 367e102996Smaya nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size); 377e102996Smaya nir_ssa_def *sign_mantissa_mask, *exponent_value; 387e102996Smaya 397e102996Smaya switch (x->bit_size) { 407e102996Smaya case 16: 417e102996Smaya /* Half-precision floating-point values are stored as 427e102996Smaya * 1 sign bit; 437e102996Smaya * 5 exponent bits; 447e102996Smaya * 10 mantissa bits. 457e102996Smaya * 467e102996Smaya * An exponent shift of 10 will shift the mantissa out, leaving only the 477e102996Smaya * exponent and sign bit (which itself may be zero, if the absolute value 487e102996Smaya * was taken before the bitcast and shift). 497e102996Smaya */ 507e102996Smaya sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16); 517e102996Smaya /* Exponent of floating-point values in the range [0.5, 1.0). */ 527e102996Smaya exponent_value = nir_imm_intN_t(b, 0x3800u, 16); 537e102996Smaya break; 547e102996Smaya case 32: 557e102996Smaya /* Single-precision floating-point values are stored as 567e102996Smaya * 1 sign bit; 577e102996Smaya * 8 exponent bits; 587e102996Smaya * 23 mantissa bits. 597e102996Smaya * 607e102996Smaya * An exponent shift of 23 will shift the mantissa out, leaving only the 617e102996Smaya * exponent and sign bit (which itself may be zero, if the absolute value 627e102996Smaya * was taken before the bitcast and shift. 637e102996Smaya */ 647e102996Smaya sign_mantissa_mask = nir_imm_int(b, 0x807fffffu); 657e102996Smaya /* Exponent of floating-point values in the range [0.5, 1.0). */ 667e102996Smaya exponent_value = nir_imm_int(b, 0x3f000000u); 677e102996Smaya break; 687e102996Smaya case 64: 697e102996Smaya /* Double-precision floating-point values are stored as 707e102996Smaya * 1 sign bit; 717e102996Smaya * 11 exponent bits; 727e102996Smaya * 52 mantissa bits. 737e102996Smaya * 747e102996Smaya * An exponent shift of 20 will shift the remaining mantissa bits out, 757e102996Smaya * leaving only the exponent and sign bit (which itself may be zero, if 767e102996Smaya * the absolute value was taken before the bitcast and shift. 777e102996Smaya */ 787e102996Smaya sign_mantissa_mask = nir_imm_int(b, 0x800fffffu); 797e102996Smaya /* Exponent of floating-point values in the range [0.5, 1.0). */ 807e102996Smaya exponent_value = nir_imm_int(b, 0x3fe00000u); 817e102996Smaya break; 827e102996Smaya default: 837e102996Smaya unreachable("Invalid bitsize"); 847e102996Smaya } 857e102996Smaya 867e102996Smaya if (x->bit_size == 64) { 877e102996Smaya /* We only need to deal with the exponent so first we extract the upper 887e102996Smaya * 32 bits using nir_unpack_64_2x32_split_y. 897e102996Smaya */ 907e102996Smaya nir_ssa_def *upper_x = nir_unpack_64_2x32_split_y(b, x); 917e102996Smaya 927ec681f3Smrg /* If x is ±0, ±Inf, or NaN, return x unmodified. */ 937e102996Smaya nir_ssa_def *new_upper = 947ec681f3Smrg nir_bcsel(b, 957ec681f3Smrg nir_iand(b, 967ec681f3Smrg nir_flt(b, zero, abs_x), 977ec681f3Smrg nir_fisfinite(b, x)), 987ec681f3Smrg nir_ior(b, 997ec681f3Smrg nir_iand(b, upper_x, sign_mantissa_mask), 1007ec681f3Smrg exponent_value), 1017ec681f3Smrg upper_x); 1027e102996Smaya 1037e102996Smaya nir_ssa_def *lower_x = nir_unpack_64_2x32_split_x(b, x); 1047e102996Smaya 1057e102996Smaya return nir_pack_64_2x32_split(b, lower_x, new_upper); 1067e102996Smaya } else { 1077ec681f3Smrg /* If x is ±0, ±Inf, or NaN, return x unmodified. */ 1087ec681f3Smrg return nir_bcsel(b, 1097ec681f3Smrg nir_iand(b, 1107ec681f3Smrg nir_flt(b, zero, abs_x), 1117ec681f3Smrg nir_fisfinite(b, x)), 1127ec681f3Smrg nir_ior(b, 1137ec681f3Smrg nir_iand(b, x, sign_mantissa_mask), 1147ec681f3Smrg exponent_value), 1157ec681f3Smrg x); 1167e102996Smaya } 1177e102996Smaya} 1187e102996Smaya 1197e102996Smayastatic nir_ssa_def * 1207e102996Smayalower_frexp_exp(nir_builder *b, nir_ssa_def *x) 1217e102996Smaya{ 1227e102996Smaya nir_ssa_def *abs_x = nir_fabs(b, x); 1237e102996Smaya nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size); 1247ec681f3Smrg nir_ssa_def *is_not_zero = nir_fneu(b, abs_x, zero); 1257e102996Smaya nir_ssa_def *exponent; 1267e102996Smaya 1277e102996Smaya switch (x->bit_size) { 1287e102996Smaya case 16: { 1297e102996Smaya nir_ssa_def *exponent_shift = nir_imm_int(b, 10); 1307e102996Smaya nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16); 1317e102996Smaya 1327e102996Smaya /* Significand return must be of the same type as the input, but the 1337e102996Smaya * exponent must be a 32-bit integer. 1347e102996Smaya */ 1357e102996Smaya exponent = nir_i2i32(b, nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), 1367e102996Smaya nir_bcsel(b, is_not_zero, exponent_bias, zero))); 1377e102996Smaya break; 1387e102996Smaya } 1397e102996Smaya case 32: { 1407e102996Smaya nir_ssa_def *exponent_shift = nir_imm_int(b, 23); 1417e102996Smaya nir_ssa_def *exponent_bias = nir_imm_int(b, -126); 1427e102996Smaya 1437e102996Smaya exponent = nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), 1447e102996Smaya nir_bcsel(b, is_not_zero, exponent_bias, zero)); 1457e102996Smaya break; 1467e102996Smaya } 1477e102996Smaya case 64: { 1487e102996Smaya nir_ssa_def *exponent_shift = nir_imm_int(b, 20); 1497e102996Smaya nir_ssa_def *exponent_bias = nir_imm_int(b, -1022); 1507e102996Smaya 1517e102996Smaya nir_ssa_def *zero32 = nir_imm_int(b, 0); 1527e102996Smaya nir_ssa_def *abs_upper_x = nir_unpack_64_2x32_split_y(b, abs_x); 1537e102996Smaya 1547e102996Smaya exponent = nir_iadd(b, nir_ushr(b, abs_upper_x, exponent_shift), 1557e102996Smaya nir_bcsel(b, is_not_zero, exponent_bias, zero32)); 1567e102996Smaya break; 1577e102996Smaya } 1587e102996Smaya default: 1597e102996Smaya unreachable("Invalid bitsize"); 1607e102996Smaya } 1617e102996Smaya 1627e102996Smaya return exponent; 1637e102996Smaya} 1647e102996Smaya 1657e102996Smayastatic bool 1667e102996Smayalower_frexp_impl(nir_function_impl *impl) 1677e102996Smaya{ 1687e102996Smaya bool progress = false; 1697e102996Smaya 1707e102996Smaya nir_builder b; 1717e102996Smaya nir_builder_init(&b, impl); 1727e102996Smaya 1737e102996Smaya nir_foreach_block(block, impl) { 1747e102996Smaya nir_foreach_instr_safe(instr, block) { 1757e102996Smaya if (instr->type != nir_instr_type_alu) 1767e102996Smaya continue; 1777e102996Smaya 1787e102996Smaya nir_alu_instr *alu_instr = nir_instr_as_alu(instr); 1797e102996Smaya nir_ssa_def *lower; 1807e102996Smaya 1817e102996Smaya b.cursor = nir_before_instr(instr); 1827e102996Smaya 1837e102996Smaya switch (alu_instr->op) { 1847e102996Smaya case nir_op_frexp_sig: 1857e102996Smaya lower = lower_frexp_sig(&b, nir_ssa_for_alu_src(&b, alu_instr, 0)); 1867e102996Smaya break; 1877e102996Smaya case nir_op_frexp_exp: 1887e102996Smaya lower = lower_frexp_exp(&b, nir_ssa_for_alu_src(&b, alu_instr, 0)); 1897e102996Smaya break; 1907e102996Smaya default: 1917e102996Smaya continue; 1927e102996Smaya } 1937e102996Smaya 1947e102996Smaya nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, 1957ec681f3Smrg lower); 1967e102996Smaya nir_instr_remove(instr); 1977e102996Smaya progress = true; 1987e102996Smaya } 1997e102996Smaya } 2007e102996Smaya 2017e102996Smaya if (progress) { 2027e102996Smaya nir_metadata_preserve(impl, nir_metadata_block_index | 2037e102996Smaya nir_metadata_dominance); 2047e102996Smaya } 2057e102996Smaya 2067e102996Smaya return progress; 2077e102996Smaya} 2087e102996Smaya 2097e102996Smayabool 2107e102996Smayanir_lower_frexp(nir_shader *shader) 2117e102996Smaya{ 2127e102996Smaya bool progress = false; 2137e102996Smaya 2147e102996Smaya nir_foreach_function(function, shader) { 2157e102996Smaya if (function->impl) 2167e102996Smaya progress |= lower_frexp_impl(function->impl); 2177e102996Smaya } 2187e102996Smaya 2197e102996Smaya return progress; 2207e102996Smaya} 221