1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2015 Intel Corporation 3b8e80941Smrg * Copyright © 2019 Valve Corporation 4b8e80941Smrg * 5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 6b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 7b8e80941Smrg * to deal in the Software without restriction, including without limitation 8b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 10b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 11b8e80941Smrg * 12b8e80941Smrg * The above copyright notice and this permission notice (including the next 13b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 14b8e80941Smrg * Software. 15b8e80941Smrg * 16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22b8e80941Smrg * IN THE SOFTWARE. 23b8e80941Smrg * 24b8e80941Smrg * Authors: 25b8e80941Smrg * Jason Ekstrand (jason@jlekstrand.net) 26b8e80941Smrg * Samuel Pitoiset (samuel.pitoiset@gmail.com> 27b8e80941Smrg */ 28b8e80941Smrg 29b8e80941Smrg#include "nir.h" 30b8e80941Smrg#include "nir_builder.h" 31b8e80941Smrg 32b8e80941Smrgstatic nir_ssa_def * 33b8e80941Smrglower_frexp_sig(nir_builder *b, nir_ssa_def *x) 34b8e80941Smrg{ 35b8e80941Smrg nir_ssa_def *abs_x = nir_fabs(b, x); 36b8e80941Smrg nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size); 37b8e80941Smrg nir_ssa_def *sign_mantissa_mask, *exponent_value; 38b8e80941Smrg nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero); 39b8e80941Smrg 40b8e80941Smrg switch (x->bit_size) { 41b8e80941Smrg case 16: 42b8e80941Smrg /* Half-precision floating-point values are stored as 43b8e80941Smrg * 1 sign bit; 44b8e80941Smrg * 5 exponent bits; 45b8e80941Smrg * 10 mantissa bits. 46b8e80941Smrg * 47b8e80941Smrg * An exponent shift of 10 will shift the mantissa out, leaving only the 48b8e80941Smrg * exponent and sign bit (which itself may be zero, if the absolute value 49b8e80941Smrg * was taken before the bitcast and shift). 50b8e80941Smrg */ 51b8e80941Smrg sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16); 52b8e80941Smrg /* Exponent of floating-point values in the range [0.5, 1.0). */ 53b8e80941Smrg exponent_value = nir_imm_intN_t(b, 0x3800u, 16); 54b8e80941Smrg break; 55b8e80941Smrg case 32: 56b8e80941Smrg /* Single-precision floating-point values are stored as 57b8e80941Smrg * 1 sign bit; 58b8e80941Smrg * 8 exponent bits; 59b8e80941Smrg * 23 mantissa bits. 60b8e80941Smrg * 61b8e80941Smrg * An exponent shift of 23 will shift the mantissa out, leaving only the 62b8e80941Smrg * exponent and sign bit (which itself may be zero, if the absolute value 63b8e80941Smrg * was taken before the bitcast and shift. 64b8e80941Smrg */ 65b8e80941Smrg sign_mantissa_mask = nir_imm_int(b, 0x807fffffu); 66b8e80941Smrg /* Exponent of floating-point values in the range [0.5, 1.0). */ 67b8e80941Smrg exponent_value = nir_imm_int(b, 0x3f000000u); 68b8e80941Smrg break; 69b8e80941Smrg case 64: 70b8e80941Smrg /* Double-precision floating-point values are stored as 71b8e80941Smrg * 1 sign bit; 72b8e80941Smrg * 11 exponent bits; 73b8e80941Smrg * 52 mantissa bits. 74b8e80941Smrg * 75b8e80941Smrg * An exponent shift of 20 will shift the remaining mantissa bits out, 76b8e80941Smrg * leaving only the exponent and sign bit (which itself may be zero, if 77b8e80941Smrg * the absolute value was taken before the bitcast and shift. 78b8e80941Smrg */ 79b8e80941Smrg sign_mantissa_mask = nir_imm_int(b, 0x800fffffu); 80b8e80941Smrg /* Exponent of floating-point values in the range [0.5, 1.0). */ 81b8e80941Smrg exponent_value = nir_imm_int(b, 0x3fe00000u); 82b8e80941Smrg break; 83b8e80941Smrg default: 84b8e80941Smrg unreachable("Invalid bitsize"); 85b8e80941Smrg } 86b8e80941Smrg 87b8e80941Smrg if (x->bit_size == 64) { 88b8e80941Smrg /* We only need to deal with the exponent so first we extract the upper 89b8e80941Smrg * 32 bits using nir_unpack_64_2x32_split_y. 90b8e80941Smrg */ 91b8e80941Smrg nir_ssa_def *upper_x = nir_unpack_64_2x32_split_y(b, x); 92b8e80941Smrg nir_ssa_def *zero32 = nir_imm_int(b, 0); 93b8e80941Smrg 94b8e80941Smrg nir_ssa_def *new_upper = 95b8e80941Smrg nir_ior(b, nir_iand(b, upper_x, sign_mantissa_mask), 96b8e80941Smrg nir_bcsel(b, is_not_zero, exponent_value, zero32)); 97b8e80941Smrg 98b8e80941Smrg nir_ssa_def *lower_x = nir_unpack_64_2x32_split_x(b, x); 99b8e80941Smrg 100b8e80941Smrg return nir_pack_64_2x32_split(b, lower_x, new_upper); 101b8e80941Smrg } else { 102b8e80941Smrg return nir_ior(b, nir_iand(b, x, sign_mantissa_mask), 103b8e80941Smrg nir_bcsel(b, is_not_zero, exponent_value, zero)); 104b8e80941Smrg } 105b8e80941Smrg} 106b8e80941Smrg 107b8e80941Smrgstatic nir_ssa_def * 108b8e80941Smrglower_frexp_exp(nir_builder *b, nir_ssa_def *x) 109b8e80941Smrg{ 110b8e80941Smrg nir_ssa_def *abs_x = nir_fabs(b, x); 111b8e80941Smrg nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size); 112b8e80941Smrg nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero); 113b8e80941Smrg nir_ssa_def *exponent; 114b8e80941Smrg 115b8e80941Smrg switch (x->bit_size) { 116b8e80941Smrg case 16: { 117b8e80941Smrg nir_ssa_def *exponent_shift = nir_imm_int(b, 10); 118b8e80941Smrg nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16); 119b8e80941Smrg 120b8e80941Smrg /* Significand return must be of the same type as the input, but the 121b8e80941Smrg * exponent must be a 32-bit integer. 122b8e80941Smrg */ 123b8e80941Smrg exponent = nir_i2i32(b, nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), 124b8e80941Smrg nir_bcsel(b, is_not_zero, exponent_bias, zero))); 125b8e80941Smrg break; 126b8e80941Smrg } 127b8e80941Smrg case 32: { 128b8e80941Smrg nir_ssa_def *exponent_shift = nir_imm_int(b, 23); 129b8e80941Smrg nir_ssa_def *exponent_bias = nir_imm_int(b, -126); 130b8e80941Smrg 131b8e80941Smrg exponent = nir_iadd(b, nir_ushr(b, abs_x, exponent_shift), 132b8e80941Smrg nir_bcsel(b, is_not_zero, exponent_bias, zero)); 133b8e80941Smrg break; 134b8e80941Smrg } 135b8e80941Smrg case 64: { 136b8e80941Smrg nir_ssa_def *exponent_shift = nir_imm_int(b, 20); 137b8e80941Smrg nir_ssa_def *exponent_bias = nir_imm_int(b, -1022); 138b8e80941Smrg 139b8e80941Smrg nir_ssa_def *zero32 = nir_imm_int(b, 0); 140b8e80941Smrg nir_ssa_def *abs_upper_x = nir_unpack_64_2x32_split_y(b, abs_x); 141b8e80941Smrg 142b8e80941Smrg exponent = nir_iadd(b, nir_ushr(b, abs_upper_x, exponent_shift), 143b8e80941Smrg nir_bcsel(b, is_not_zero, exponent_bias, zero32)); 144b8e80941Smrg break; 145b8e80941Smrg } 146b8e80941Smrg default: 147b8e80941Smrg unreachable("Invalid bitsize"); 148b8e80941Smrg } 149b8e80941Smrg 150b8e80941Smrg return exponent; 151b8e80941Smrg} 152b8e80941Smrg 153b8e80941Smrgstatic bool 154b8e80941Smrglower_frexp_impl(nir_function_impl *impl) 155b8e80941Smrg{ 156b8e80941Smrg bool progress = false; 157b8e80941Smrg 158b8e80941Smrg nir_builder b; 159b8e80941Smrg nir_builder_init(&b, impl); 160b8e80941Smrg 161b8e80941Smrg nir_foreach_block(block, impl) { 162b8e80941Smrg nir_foreach_instr_safe(instr, block) { 163b8e80941Smrg if (instr->type != nir_instr_type_alu) 164b8e80941Smrg continue; 165b8e80941Smrg 166b8e80941Smrg nir_alu_instr *alu_instr = nir_instr_as_alu(instr); 167b8e80941Smrg nir_ssa_def *lower; 168b8e80941Smrg 169b8e80941Smrg b.cursor = nir_before_instr(instr); 170b8e80941Smrg 171b8e80941Smrg switch (alu_instr->op) { 172b8e80941Smrg case nir_op_frexp_sig: 173b8e80941Smrg lower = lower_frexp_sig(&b, nir_ssa_for_alu_src(&b, alu_instr, 0)); 174b8e80941Smrg break; 175b8e80941Smrg case nir_op_frexp_exp: 176b8e80941Smrg lower = lower_frexp_exp(&b, nir_ssa_for_alu_src(&b, alu_instr, 0)); 177b8e80941Smrg break; 178b8e80941Smrg default: 179b8e80941Smrg continue; 180b8e80941Smrg } 181b8e80941Smrg 182b8e80941Smrg nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa, 183b8e80941Smrg nir_src_for_ssa(lower)); 184b8e80941Smrg nir_instr_remove(instr); 185b8e80941Smrg progress = true; 186b8e80941Smrg } 187b8e80941Smrg } 188b8e80941Smrg 189b8e80941Smrg if (progress) { 190b8e80941Smrg nir_metadata_preserve(impl, nir_metadata_block_index | 191b8e80941Smrg nir_metadata_dominance); 192b8e80941Smrg } 193b8e80941Smrg 194b8e80941Smrg return progress; 195b8e80941Smrg} 196b8e80941Smrg 197b8e80941Smrgbool 198b8e80941Smrgnir_lower_frexp(nir_shader *shader) 199b8e80941Smrg{ 200b8e80941Smrg bool progress = false; 201b8e80941Smrg 202b8e80941Smrg nir_foreach_function(function, shader) { 203b8e80941Smrg if (function->impl) 204b8e80941Smrg progress |= lower_frexp_impl(function->impl); 205b8e80941Smrg } 206b8e80941Smrg 207b8e80941Smrg return progress; 208b8e80941Smrg} 209