17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2020 Google, Inc. 37ec681f3Smrg * Copyright (C) 2021 Advanced Micro Devices, Inc. 47ec681f3Smrg * 57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 67ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 77ec681f3Smrg * to deal in the Software without restriction, including without limitation 87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 107ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 117ec681f3Smrg * 127ec681f3Smrg * The above copyright notice and this permission notice (including the next 137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 147ec681f3Smrg * Software. 157ec681f3Smrg * 167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 217ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 227ec681f3Smrg * SOFTWARE. 237ec681f3Smrg */ 247ec681f3Smrg 257ec681f3Smrg#include "nir.h" 267ec681f3Smrg#include "nir_builder.h" 277ec681f3Smrg 287ec681f3Smrg/** 297ec681f3Smrg * Return the intrinsic if it matches the mask in "modes", else return NULL. 307ec681f3Smrg */ 317ec681f3Smrgstatic nir_intrinsic_instr * 327ec681f3Smrgget_io_intrinsic(nir_instr *instr, nir_variable_mode modes, 337ec681f3Smrg nir_variable_mode *out_mode) 347ec681f3Smrg{ 357ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 367ec681f3Smrg return NULL; 377ec681f3Smrg 387ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 397ec681f3Smrg 407ec681f3Smrg switch (intr->intrinsic) { 417ec681f3Smrg case nir_intrinsic_load_input: 427ec681f3Smrg case nir_intrinsic_load_input_vertex: 437ec681f3Smrg case nir_intrinsic_load_interpolated_input: 447ec681f3Smrg case nir_intrinsic_load_per_vertex_input: 457ec681f3Smrg *out_mode = nir_var_shader_in; 467ec681f3Smrg return modes & nir_var_shader_in ? intr : NULL; 477ec681f3Smrg case nir_intrinsic_load_output: 487ec681f3Smrg case nir_intrinsic_load_per_vertex_output: 497ec681f3Smrg case nir_intrinsic_store_output: 507ec681f3Smrg case nir_intrinsic_store_per_vertex_output: 517ec681f3Smrg *out_mode = nir_var_shader_out; 527ec681f3Smrg return modes & nir_var_shader_out ? intr : NULL; 537ec681f3Smrg default: 547ec681f3Smrg return NULL; 557ec681f3Smrg } 567ec681f3Smrg} 577ec681f3Smrg 587ec681f3Smrg/** 597ec681f3Smrg * Recompute the IO "base" indices from scratch to remove holes or to fix 607ec681f3Smrg * incorrect base values due to changes in IO locations by using IO locations 617ec681f3Smrg * to assign new bases. The mapping from locations to bases becomes 627ec681f3Smrg * monotonically increasing. 637ec681f3Smrg */ 647ec681f3Smrgbool 657ec681f3Smrgnir_recompute_io_bases(nir_function_impl *impl, nir_variable_mode modes) 667ec681f3Smrg{ 677ec681f3Smrg BITSET_DECLARE(inputs, NUM_TOTAL_VARYING_SLOTS); 687ec681f3Smrg BITSET_DECLARE(outputs, NUM_TOTAL_VARYING_SLOTS); 697ec681f3Smrg BITSET_ZERO(inputs); 707ec681f3Smrg BITSET_ZERO(outputs); 717ec681f3Smrg 727ec681f3Smrg /* Gather the bitmasks of used locations. */ 737ec681f3Smrg nir_foreach_block_safe (block, impl) { 747ec681f3Smrg nir_foreach_instr_safe (instr, block) { 757ec681f3Smrg nir_variable_mode mode; 767ec681f3Smrg nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode); 777ec681f3Smrg if (!intr) 787ec681f3Smrg continue; 797ec681f3Smrg 807ec681f3Smrg nir_io_semantics sem = nir_intrinsic_io_semantics(intr); 817ec681f3Smrg unsigned num_slots = sem.num_slots; 827ec681f3Smrg if (sem.medium_precision) 837ec681f3Smrg num_slots = (num_slots + sem.high_16bits + 1) / 2; 847ec681f3Smrg 857ec681f3Smrg if (mode == nir_var_shader_in) { 867ec681f3Smrg for (unsigned i = 0; i < num_slots; i++) 877ec681f3Smrg BITSET_SET(inputs, sem.location + i); 887ec681f3Smrg } else if (!sem.dual_source_blend_index) { 897ec681f3Smrg for (unsigned i = 0; i < num_slots; i++) 907ec681f3Smrg BITSET_SET(outputs, sem.location + i); 917ec681f3Smrg } 927ec681f3Smrg } 937ec681f3Smrg } 947ec681f3Smrg 957ec681f3Smrg /* Renumber bases. */ 967ec681f3Smrg bool changed = false; 977ec681f3Smrg 987ec681f3Smrg nir_foreach_block_safe (block, impl) { 997ec681f3Smrg nir_foreach_instr_safe (instr, block) { 1007ec681f3Smrg nir_variable_mode mode; 1017ec681f3Smrg nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode); 1027ec681f3Smrg if (!intr) 1037ec681f3Smrg continue; 1047ec681f3Smrg 1057ec681f3Smrg nir_io_semantics sem = nir_intrinsic_io_semantics(intr); 1067ec681f3Smrg unsigned num_slots = sem.num_slots; 1077ec681f3Smrg if (sem.medium_precision) 1087ec681f3Smrg num_slots = (num_slots + sem.high_16bits + 1) / 2; 1097ec681f3Smrg 1107ec681f3Smrg if (mode == nir_var_shader_in) { 1117ec681f3Smrg nir_intrinsic_set_base(intr, 1127ec681f3Smrg BITSET_PREFIX_SUM(inputs, sem.location)); 1137ec681f3Smrg } else if (sem.dual_source_blend_index) { 1147ec681f3Smrg nir_intrinsic_set_base(intr, 1157ec681f3Smrg BITSET_PREFIX_SUM(outputs, NUM_TOTAL_VARYING_SLOTS)); 1167ec681f3Smrg } else { 1177ec681f3Smrg nir_intrinsic_set_base(intr, 1187ec681f3Smrg BITSET_PREFIX_SUM(outputs, sem.location)); 1197ec681f3Smrg } 1207ec681f3Smrg changed = true; 1217ec681f3Smrg } 1227ec681f3Smrg } 1237ec681f3Smrg 1247ec681f3Smrg if (changed) { 1257ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_dominance | 1267ec681f3Smrg nir_metadata_block_index); 1277ec681f3Smrg } else { 1287ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 1297ec681f3Smrg } 1307ec681f3Smrg 1317ec681f3Smrg return changed; 1327ec681f3Smrg} 1337ec681f3Smrg 1347ec681f3Smrg/** 1357ec681f3Smrg * Lower mediump inputs and/or outputs to 16 bits. 1367ec681f3Smrg * 1377ec681f3Smrg * \param modes Whether to lower inputs, outputs, or both. 1387ec681f3Smrg * \param varying_mask Determines which varyings to skip (VS inputs, 1397ec681f3Smrg * FS outputs, and patch varyings ignore this mask). 1407ec681f3Smrg * \param use_16bit_slots Remap lowered slots to* VARYING_SLOT_VARn_16BIT. 1417ec681f3Smrg */ 1427ec681f3Smrgbool 1437ec681f3Smrgnir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes, 1447ec681f3Smrg uint64_t varying_mask, bool use_16bit_slots) 1457ec681f3Smrg{ 1467ec681f3Smrg bool changed = false; 1477ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 1487ec681f3Smrg assert(impl); 1497ec681f3Smrg 1507ec681f3Smrg nir_builder b; 1517ec681f3Smrg nir_builder_init(&b, impl); 1527ec681f3Smrg 1537ec681f3Smrg nir_foreach_block_safe (block, impl) { 1547ec681f3Smrg nir_foreach_instr_safe (instr, block) { 1557ec681f3Smrg nir_variable_mode mode; 1567ec681f3Smrg nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode); 1577ec681f3Smrg if (!intr) 1587ec681f3Smrg continue; 1597ec681f3Smrg 1607ec681f3Smrg nir_io_semantics sem = nir_intrinsic_io_semantics(intr); 1617ec681f3Smrg nir_ssa_def *(*convert)(nir_builder *, nir_ssa_def *); 1627ec681f3Smrg bool is_varying = !(nir->info.stage == MESA_SHADER_VERTEX && 1637ec681f3Smrg mode == nir_var_shader_in) && 1647ec681f3Smrg !(nir->info.stage == MESA_SHADER_FRAGMENT && 1657ec681f3Smrg mode == nir_var_shader_out); 1667ec681f3Smrg 1677ec681f3Smrg if (!sem.medium_precision || 1687ec681f3Smrg (is_varying && sem.location <= VARYING_SLOT_VAR31 && 1697ec681f3Smrg !(varying_mask & BITFIELD64_BIT(sem.location)))) 1707ec681f3Smrg continue; /* can't lower */ 1717ec681f3Smrg 1727ec681f3Smrg if (nir_intrinsic_has_src_type(intr)) { 1737ec681f3Smrg /* Stores. */ 1747ec681f3Smrg nir_alu_type type = nir_intrinsic_src_type(intr); 1757ec681f3Smrg 1767ec681f3Smrg switch (type) { 1777ec681f3Smrg case nir_type_float32: 1787ec681f3Smrg convert = nir_f2fmp; 1797ec681f3Smrg break; 1807ec681f3Smrg case nir_type_int32: 1817ec681f3Smrg case nir_type_uint32: 1827ec681f3Smrg convert = nir_i2imp; 1837ec681f3Smrg break; 1847ec681f3Smrg default: 1857ec681f3Smrg continue; /* already lowered? */ 1867ec681f3Smrg } 1877ec681f3Smrg 1887ec681f3Smrg /* Convert the 32-bit store into a 16-bit store. */ 1897ec681f3Smrg b.cursor = nir_before_instr(&intr->instr); 1907ec681f3Smrg nir_instr_rewrite_src_ssa(&intr->instr, &intr->src[0], 1917ec681f3Smrg convert(&b, intr->src[0].ssa)); 1927ec681f3Smrg nir_intrinsic_set_src_type(intr, (type & ~32) | 16); 1937ec681f3Smrg } else { 1947ec681f3Smrg /* Loads. */ 1957ec681f3Smrg nir_alu_type type = nir_intrinsic_dest_type(intr); 1967ec681f3Smrg 1977ec681f3Smrg switch (type) { 1987ec681f3Smrg case nir_type_float32: 1997ec681f3Smrg convert = nir_f2f32; 2007ec681f3Smrg break; 2017ec681f3Smrg case nir_type_int32: 2027ec681f3Smrg convert = nir_i2i32; 2037ec681f3Smrg break; 2047ec681f3Smrg case nir_type_uint32: 2057ec681f3Smrg convert = nir_u2u32; 2067ec681f3Smrg break; 2077ec681f3Smrg default: 2087ec681f3Smrg continue; /* already lowered? */ 2097ec681f3Smrg } 2107ec681f3Smrg 2117ec681f3Smrg /* Convert the 32-bit load into a 16-bit load. */ 2127ec681f3Smrg b.cursor = nir_after_instr(&intr->instr); 2137ec681f3Smrg intr->dest.ssa.bit_size = 16; 2147ec681f3Smrg nir_intrinsic_set_dest_type(intr, (type & ~32) | 16); 2157ec681f3Smrg nir_ssa_def *dst = convert(&b, &intr->dest.ssa); 2167ec681f3Smrg nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dst, 2177ec681f3Smrg dst->parent_instr); 2187ec681f3Smrg } 2197ec681f3Smrg 2207ec681f3Smrg if (use_16bit_slots && is_varying && 2217ec681f3Smrg sem.location >= VARYING_SLOT_VAR0 && 2227ec681f3Smrg sem.location <= VARYING_SLOT_VAR31) { 2237ec681f3Smrg unsigned index = sem.location - VARYING_SLOT_VAR0; 2247ec681f3Smrg 2257ec681f3Smrg sem.location = VARYING_SLOT_VAR0_16BIT + index / 2; 2267ec681f3Smrg sem.high_16bits = index % 2; 2277ec681f3Smrg nir_intrinsic_set_io_semantics(intr, sem); 2287ec681f3Smrg } 2297ec681f3Smrg changed = true; 2307ec681f3Smrg } 2317ec681f3Smrg } 2327ec681f3Smrg 2337ec681f3Smrg if (changed && use_16bit_slots) 2347ec681f3Smrg nir_recompute_io_bases(impl, modes); 2357ec681f3Smrg 2367ec681f3Smrg if (changed) { 2377ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_dominance | 2387ec681f3Smrg nir_metadata_block_index); 2397ec681f3Smrg } else { 2407ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 2417ec681f3Smrg } 2427ec681f3Smrg 2437ec681f3Smrg return changed; 2447ec681f3Smrg} 2457ec681f3Smrg 2467ec681f3Smrg/** 2477ec681f3Smrg * Set the mediump precision bit for those shader inputs and outputs that are 2487ec681f3Smrg * set in the "modes" mask. Non-generic varyings (that GLES3 doesn't have) 2497ec681f3Smrg * are ignored. The "types" mask can be (nir_type_float | nir_type_int), etc. 2507ec681f3Smrg */ 2517ec681f3Smrgbool 2527ec681f3Smrgnir_force_mediump_io(nir_shader *nir, nir_variable_mode modes, 2537ec681f3Smrg nir_alu_type types) 2547ec681f3Smrg{ 2557ec681f3Smrg bool changed = false; 2567ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 2577ec681f3Smrg assert(impl); 2587ec681f3Smrg 2597ec681f3Smrg nir_builder b; 2607ec681f3Smrg nir_builder_init(&b, impl); 2617ec681f3Smrg 2627ec681f3Smrg nir_foreach_block_safe (block, impl) { 2637ec681f3Smrg nir_foreach_instr_safe (instr, block) { 2647ec681f3Smrg nir_variable_mode mode; 2657ec681f3Smrg nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode); 2667ec681f3Smrg if (!intr) 2677ec681f3Smrg continue; 2687ec681f3Smrg 2697ec681f3Smrg nir_alu_type type; 2707ec681f3Smrg if (nir_intrinsic_has_src_type(intr)) 2717ec681f3Smrg type = nir_intrinsic_src_type(intr); 2727ec681f3Smrg else 2737ec681f3Smrg type = nir_intrinsic_dest_type(intr); 2747ec681f3Smrg if (!(type & types)) 2757ec681f3Smrg continue; 2767ec681f3Smrg 2777ec681f3Smrg nir_io_semantics sem = nir_intrinsic_io_semantics(intr); 2787ec681f3Smrg 2797ec681f3Smrg if (nir->info.stage == MESA_SHADER_FRAGMENT && 2807ec681f3Smrg mode == nir_var_shader_out) { 2817ec681f3Smrg /* Only accept FS outputs. */ 2827ec681f3Smrg if (sem.location < FRAG_RESULT_DATA0 && 2837ec681f3Smrg sem.location != FRAG_RESULT_COLOR) 2847ec681f3Smrg continue; 2857ec681f3Smrg } else if (nir->info.stage == MESA_SHADER_VERTEX && 2867ec681f3Smrg mode == nir_var_shader_in) { 2877ec681f3Smrg /* Accept all VS inputs. */ 2887ec681f3Smrg } else { 2897ec681f3Smrg /* Only accept generic varyings. */ 2907ec681f3Smrg if (sem.location < VARYING_SLOT_VAR0 || 2917ec681f3Smrg sem.location > VARYING_SLOT_VAR31) 2927ec681f3Smrg continue; 2937ec681f3Smrg } 2947ec681f3Smrg 2957ec681f3Smrg sem.medium_precision = 1; 2967ec681f3Smrg nir_intrinsic_set_io_semantics(intr, sem); 2977ec681f3Smrg changed = true; 2987ec681f3Smrg } 2997ec681f3Smrg } 3007ec681f3Smrg 3017ec681f3Smrg if (changed) { 3027ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_dominance | 3037ec681f3Smrg nir_metadata_block_index); 3047ec681f3Smrg } else { 3057ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 3067ec681f3Smrg } 3077ec681f3Smrg 3087ec681f3Smrg return changed; 3097ec681f3Smrg} 3107ec681f3Smrg 3117ec681f3Smrg/** 3127ec681f3Smrg * Remap 16-bit varying slots to the original 32-bit varying slots. 3137ec681f3Smrg * This only changes IO semantics and bases. 3147ec681f3Smrg */ 3157ec681f3Smrgbool 3167ec681f3Smrgnir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes) 3177ec681f3Smrg{ 3187ec681f3Smrg bool changed = false; 3197ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 3207ec681f3Smrg assert(impl); 3217ec681f3Smrg 3227ec681f3Smrg nir_foreach_block_safe (block, impl) { 3237ec681f3Smrg nir_foreach_instr_safe (instr, block) { 3247ec681f3Smrg nir_variable_mode mode; 3257ec681f3Smrg nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode); 3267ec681f3Smrg if (!intr) 3277ec681f3Smrg continue; 3287ec681f3Smrg 3297ec681f3Smrg nir_io_semantics sem = nir_intrinsic_io_semantics(intr); 3307ec681f3Smrg 3317ec681f3Smrg if (sem.location < VARYING_SLOT_VAR0_16BIT || 3327ec681f3Smrg sem.location > VARYING_SLOT_VAR15_16BIT) 3337ec681f3Smrg continue; 3347ec681f3Smrg 3357ec681f3Smrg sem.location = VARYING_SLOT_VAR0 + 3367ec681f3Smrg (sem.location - VARYING_SLOT_VAR0_16BIT) * 2 + 3377ec681f3Smrg sem.high_16bits; 3387ec681f3Smrg sem.high_16bits = 0; 3397ec681f3Smrg nir_intrinsic_set_io_semantics(intr, sem); 3407ec681f3Smrg changed = true; 3417ec681f3Smrg } 3427ec681f3Smrg } 3437ec681f3Smrg 3447ec681f3Smrg if (changed) 3457ec681f3Smrg nir_recompute_io_bases(impl, modes); 3467ec681f3Smrg 3477ec681f3Smrg if (changed) { 3487ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_dominance | 3497ec681f3Smrg nir_metadata_block_index); 3507ec681f3Smrg } else { 3517ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 3527ec681f3Smrg } 3537ec681f3Smrg 3547ec681f3Smrg return changed; 3557ec681f3Smrg} 3567ec681f3Smrg 3577ec681f3Smrgstatic bool 3587ec681f3Smrgis_n_to_m_conversion(nir_instr *instr, unsigned n, nir_op m) 3597ec681f3Smrg{ 3607ec681f3Smrg if (instr->type != nir_instr_type_alu) 3617ec681f3Smrg return false; 3627ec681f3Smrg 3637ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(instr); 3647ec681f3Smrg return alu->op == m && alu->src[0].src.ssa->bit_size == n; 3657ec681f3Smrg} 3667ec681f3Smrg 3677ec681f3Smrgstatic bool 3687ec681f3Smrgis_f16_to_f32_conversion(nir_instr *instr) 3697ec681f3Smrg{ 3707ec681f3Smrg return is_n_to_m_conversion(instr, 16, nir_op_f2f32); 3717ec681f3Smrg} 3727ec681f3Smrg 3737ec681f3Smrgstatic bool 3747ec681f3Smrgis_f32_to_f16_conversion(nir_instr *instr) 3757ec681f3Smrg{ 3767ec681f3Smrg return is_n_to_m_conversion(instr, 32, nir_op_f2f16) || 3777ec681f3Smrg is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtne) || 3787ec681f3Smrg is_n_to_m_conversion(instr, 32, nir_op_f2fmp); 3797ec681f3Smrg} 3807ec681f3Smrg 3817ec681f3Smrgstatic bool 3827ec681f3Smrgis_i16_to_i32_conversion(nir_instr *instr) 3837ec681f3Smrg{ 3847ec681f3Smrg return is_n_to_m_conversion(instr, 16, nir_op_i2i32); 3857ec681f3Smrg} 3867ec681f3Smrg 3877ec681f3Smrgstatic bool 3887ec681f3Smrgis_u16_to_u32_conversion(nir_instr *instr) 3897ec681f3Smrg{ 3907ec681f3Smrg return is_n_to_m_conversion(instr, 16, nir_op_u2u32); 3917ec681f3Smrg} 3927ec681f3Smrg 3937ec681f3Smrgstatic bool 3947ec681f3Smrgis_i32_to_i16_conversion(nir_instr *instr) 3957ec681f3Smrg{ 3967ec681f3Smrg return is_n_to_m_conversion(instr, 32, nir_op_i2i16); 3977ec681f3Smrg} 3987ec681f3Smrg 3997ec681f3Smrgstatic void 4007ec681f3Smrgreplace_with_mov(nir_builder *b, nir_instr *instr, nir_src *src, 4017ec681f3Smrg nir_alu_instr *alu) 4027ec681f3Smrg{ 4037ec681f3Smrg nir_ssa_def *mov = nir_mov_alu(b, alu->src[0], 4047ec681f3Smrg nir_dest_num_components(alu->dest.dest)); 4057ec681f3Smrg assert(!alu->dest.saturate); 4067ec681f3Smrg nir_instr_rewrite_src_ssa(instr, src, mov); 4077ec681f3Smrg} 4087ec681f3Smrg 4097ec681f3Smrg/** 4107ec681f3Smrg * If texture source operands use f16->f32 conversions or return values are 4117ec681f3Smrg * followed by f16->f32 or f32->f16, remove those conversions. This benefits 4127ec681f3Smrg * drivers that have texture opcodes that can accept and return 16-bit types. 4137ec681f3Smrg * 4147ec681f3Smrg * "tex_src_types" is a mask of nir_tex_src_* operands that should be handled. 4157ec681f3Smrg * It's always done for the destination. 4167ec681f3Smrg * 4177ec681f3Smrg * This should be run after late algebraic optimizations. 4187ec681f3Smrg * Copy propagation and DCE should be run after this. 4197ec681f3Smrg */ 4207ec681f3Smrgbool 4217ec681f3Smrgnir_fold_16bit_sampler_conversions(nir_shader *nir, 4227ec681f3Smrg unsigned tex_src_types) 4237ec681f3Smrg{ 4247ec681f3Smrg bool changed = false; 4257ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 4267ec681f3Smrg assert(impl); 4277ec681f3Smrg 4287ec681f3Smrg nir_builder b; 4297ec681f3Smrg nir_builder_init(&b, impl); 4307ec681f3Smrg 4317ec681f3Smrg nir_foreach_block_safe (block, impl) { 4327ec681f3Smrg nir_foreach_instr_safe (instr, block) { 4337ec681f3Smrg if (instr->type != nir_instr_type_tex) 4347ec681f3Smrg continue; 4357ec681f3Smrg 4367ec681f3Smrg nir_tex_instr *tex = nir_instr_as_tex(instr); 4377ec681f3Smrg nir_instr *src; 4387ec681f3Smrg nir_alu_instr *src_alu; 4397ec681f3Smrg 4407ec681f3Smrg /* Skip because AMD doesn't support 16-bit types with these. */ 4417ec681f3Smrg if ((tex->op == nir_texop_txs || 4427ec681f3Smrg tex->op == nir_texop_query_levels) || 4437ec681f3Smrg tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) 4447ec681f3Smrg continue; 4457ec681f3Smrg 4467ec681f3Smrg /* Optimize source operands. */ 4477ec681f3Smrg for (unsigned i = 0; i < tex->num_srcs; i++) { 4487ec681f3Smrg /* Filter out sources that should be ignored. */ 4497ec681f3Smrg if (!(BITFIELD_BIT(tex->src[i].src_type) & tex_src_types)) 4507ec681f3Smrg continue; 4517ec681f3Smrg 4527ec681f3Smrg src = tex->src[i].src.ssa->parent_instr; 4537ec681f3Smrg if (src->type != nir_instr_type_alu) 4547ec681f3Smrg continue; 4557ec681f3Smrg 4567ec681f3Smrg src_alu = nir_instr_as_alu(src); 4577ec681f3Smrg b.cursor = nir_before_instr(src); 4587ec681f3Smrg 4597ec681f3Smrg if (src_alu->op == nir_op_mov) { 4607ec681f3Smrg assert(!"The IR shouldn't contain any movs to make this pass" 4617ec681f3Smrg " effective."); 4627ec681f3Smrg continue; 4637ec681f3Smrg } 4647ec681f3Smrg 4657ec681f3Smrg /* Handle vector sources that are made of scalar instructions. */ 4667ec681f3Smrg if (nir_op_is_vec(src_alu->op)) { 4677ec681f3Smrg /* See if the vector is made of f16->f32 opcodes. */ 4687ec681f3Smrg unsigned num = nir_dest_num_components(src_alu->dest.dest); 4697ec681f3Smrg bool is_f16_to_f32 = true; 4707ec681f3Smrg bool is_u16_to_u32 = true; 4717ec681f3Smrg 4727ec681f3Smrg for (unsigned comp = 0; comp < num; comp++) { 4737ec681f3Smrg nir_instr *instr = src_alu->src[comp].src.ssa->parent_instr; 4747ec681f3Smrg is_f16_to_f32 &= is_f16_to_f32_conversion(instr); 4757ec681f3Smrg /* Zero-extension (u16) and sign-extension (i16) have 4767ec681f3Smrg * the same behavior here - txf returns 0 if bit 15 is set 4777ec681f3Smrg * because it's out of bounds and the higher bits don't 4787ec681f3Smrg * matter. 4797ec681f3Smrg */ 4807ec681f3Smrg is_u16_to_u32 &= is_u16_to_u32_conversion(instr) || 4817ec681f3Smrg is_i16_to_i32_conversion(instr); 4827ec681f3Smrg } 4837ec681f3Smrg 4847ec681f3Smrg if (!is_f16_to_f32 && !is_u16_to_u32) 4857ec681f3Smrg continue; 4867ec681f3Smrg 4877ec681f3Smrg nir_alu_instr *new_vec = nir_alu_instr_clone(nir, src_alu); 4887ec681f3Smrg nir_instr_insert_after(&src_alu->instr, &new_vec->instr); 4897ec681f3Smrg 4907ec681f3Smrg /* Replace conversions with mov. */ 4917ec681f3Smrg for (unsigned comp = 0; comp < num; comp++) { 4927ec681f3Smrg nir_instr *instr = new_vec->src[comp].src.ssa->parent_instr; 4937ec681f3Smrg replace_with_mov(&b, &new_vec->instr, 4947ec681f3Smrg &new_vec->src[comp].src, 4957ec681f3Smrg nir_instr_as_alu(instr)); 4967ec681f3Smrg } 4977ec681f3Smrg 4987ec681f3Smrg new_vec->dest.dest.ssa.bit_size = 4997ec681f3Smrg new_vec->src[0].src.ssa->bit_size; 5007ec681f3Smrg nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, 5017ec681f3Smrg &new_vec->dest.dest.ssa); 5027ec681f3Smrg changed = true; 5037ec681f3Smrg } else if (is_f16_to_f32_conversion(&src_alu->instr) || 5047ec681f3Smrg is_u16_to_u32_conversion(&src_alu->instr) || 5057ec681f3Smrg is_i16_to_i32_conversion(&src_alu->instr)) { 5067ec681f3Smrg /* Handle scalar sources. */ 5077ec681f3Smrg replace_with_mov(&b, &tex->instr, &tex->src[i].src, src_alu); 5087ec681f3Smrg changed = true; 5097ec681f3Smrg } 5107ec681f3Smrg } 5117ec681f3Smrg 5127ec681f3Smrg /* Optimize the destination. */ 5137ec681f3Smrg bool is_f16_to_f32 = true; 5147ec681f3Smrg bool is_f32_to_f16 = true; 5157ec681f3Smrg bool is_i16_to_i32 = true; 5167ec681f3Smrg bool is_i32_to_i16 = true; /* same behavior for int and uint */ 5177ec681f3Smrg bool is_u16_to_u32 = true; 5187ec681f3Smrg 5197ec681f3Smrg nir_foreach_use(use, &tex->dest.ssa) { 5207ec681f3Smrg is_f16_to_f32 &= is_f16_to_f32_conversion(use->parent_instr); 5217ec681f3Smrg is_f32_to_f16 &= is_f32_to_f16_conversion(use->parent_instr); 5227ec681f3Smrg is_i16_to_i32 &= is_i16_to_i32_conversion(use->parent_instr); 5237ec681f3Smrg is_i32_to_i16 &= is_i32_to_i16_conversion(use->parent_instr); 5247ec681f3Smrg is_u16_to_u32 &= is_u16_to_u32_conversion(use->parent_instr); 5257ec681f3Smrg } 5267ec681f3Smrg 5277ec681f3Smrg if (is_f16_to_f32 || is_f32_to_f16 || is_i16_to_i32 || 5287ec681f3Smrg is_i32_to_i16 || is_u16_to_u32) { 5297ec681f3Smrg /* All uses are the same conversions. Replace them with mov. */ 5307ec681f3Smrg nir_foreach_use(use, &tex->dest.ssa) { 5317ec681f3Smrg nir_alu_instr *conv = nir_instr_as_alu(use->parent_instr); 5327ec681f3Smrg conv->op = nir_op_mov; 5337ec681f3Smrg tex->dest.ssa.bit_size = conv->dest.dest.ssa.bit_size; 5347ec681f3Smrg tex->dest_type = (tex->dest_type & (~16 & ~32 & ~64)) | 5357ec681f3Smrg conv->dest.dest.ssa.bit_size; 5367ec681f3Smrg } 5377ec681f3Smrg changed = true; 5387ec681f3Smrg } 5397ec681f3Smrg } 5407ec681f3Smrg } 5417ec681f3Smrg 5427ec681f3Smrg if (changed) { 5437ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_dominance | 5447ec681f3Smrg nir_metadata_block_index); 5457ec681f3Smrg } else { 5467ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 5477ec681f3Smrg } 5487ec681f3Smrg 5497ec681f3Smrg return changed; 5507ec681f3Smrg} 5517ec681f3Smrg 5527ec681f3Smrg/** 5537ec681f3Smrg * Fix types of source operands of texture opcodes according to 5547ec681f3Smrg * the constraints by inserting the appropriate conversion opcodes. 5557ec681f3Smrg * 5567ec681f3Smrg * For example, if the type of derivatives must be equal to texture 5577ec681f3Smrg * coordinates and the type of the texture bias must be 32-bit, there 5587ec681f3Smrg * will be 2 constraints describing that. 5597ec681f3Smrg */ 5607ec681f3Smrgbool 5617ec681f3Smrgnir_legalize_16bit_sampler_srcs(nir_shader *nir, 5627ec681f3Smrg nir_tex_src_type_constraints constraints) 5637ec681f3Smrg{ 5647ec681f3Smrg bool changed = false; 5657ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(nir); 5667ec681f3Smrg assert(impl); 5677ec681f3Smrg 5687ec681f3Smrg nir_builder b; 5697ec681f3Smrg nir_builder_init(&b, impl); 5707ec681f3Smrg 5717ec681f3Smrg nir_foreach_block_safe (block, impl) { 5727ec681f3Smrg nir_foreach_instr_safe (instr, block) { 5737ec681f3Smrg if (instr->type != nir_instr_type_tex) 5747ec681f3Smrg continue; 5757ec681f3Smrg 5767ec681f3Smrg nir_tex_instr *tex = nir_instr_as_tex(instr); 5777ec681f3Smrg int8_t map[nir_num_tex_src_types]; 5787ec681f3Smrg memset(map, -1, sizeof(map)); 5797ec681f3Smrg 5807ec681f3Smrg /* Create a mapping from src_type to src[i]. */ 5817ec681f3Smrg for (unsigned i = 0; i < tex->num_srcs; i++) 5827ec681f3Smrg map[tex->src[i].src_type] = i; 5837ec681f3Smrg 5847ec681f3Smrg /* Legalize src types. */ 5857ec681f3Smrg for (unsigned i = 0; i < tex->num_srcs; i++) { 5867ec681f3Smrg nir_tex_src_type_constraint c = constraints[tex->src[i].src_type]; 5877ec681f3Smrg 5887ec681f3Smrg if (!c.legalize_type) 5897ec681f3Smrg continue; 5907ec681f3Smrg 5917ec681f3Smrg /* Determine the required bit size for the src. */ 5927ec681f3Smrg unsigned bit_size; 5937ec681f3Smrg if (c.bit_size) { 5947ec681f3Smrg bit_size = c.bit_size; 5957ec681f3Smrg } else { 5967ec681f3Smrg if (map[c.match_src] == -1) 5977ec681f3Smrg continue; /* e.g. txs */ 5987ec681f3Smrg 5997ec681f3Smrg bit_size = tex->src[map[c.match_src]].src.ssa->bit_size; 6007ec681f3Smrg } 6017ec681f3Smrg 6027ec681f3Smrg /* Check if the type is legal. */ 6037ec681f3Smrg if (bit_size == tex->src[i].src.ssa->bit_size) 6047ec681f3Smrg continue; 6057ec681f3Smrg 6067ec681f3Smrg /* Fix the bit size. */ 6077ec681f3Smrg bool is_sint = i == nir_tex_src_offset; 6087ec681f3Smrg bool is_uint = !is_sint && 6097ec681f3Smrg (tex->op == nir_texop_txf || 6107ec681f3Smrg tex->op == nir_texop_txf_ms || 6117ec681f3Smrg tex->op == nir_texop_txs || 6127ec681f3Smrg tex->op == nir_texop_samples_identical); 6137ec681f3Smrg nir_ssa_def *(*convert)(nir_builder *, nir_ssa_def *); 6147ec681f3Smrg 6157ec681f3Smrg switch (bit_size) { 6167ec681f3Smrg case 16: 6177ec681f3Smrg convert = is_sint ? nir_i2i16 : 6187ec681f3Smrg is_uint ? nir_u2u16 : nir_f2f16; 6197ec681f3Smrg break; 6207ec681f3Smrg case 32: 6217ec681f3Smrg convert = is_sint ? nir_i2i32 : 6227ec681f3Smrg is_uint ? nir_u2u32 : nir_f2f32; 6237ec681f3Smrg break; 6247ec681f3Smrg default: 6257ec681f3Smrg assert(!"unexpected bit size"); 6267ec681f3Smrg continue; 6277ec681f3Smrg } 6287ec681f3Smrg 6297ec681f3Smrg b.cursor = nir_before_instr(&tex->instr); 6307ec681f3Smrg nir_ssa_def *conv = 6317ec681f3Smrg convert(&b, nir_ssa_for_src(&b, tex->src[i].src, 6327ec681f3Smrg tex->src[i].src.ssa->num_components)); 6337ec681f3Smrg nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, conv); 6347ec681f3Smrg changed = true; 6357ec681f3Smrg } 6367ec681f3Smrg } 6377ec681f3Smrg } 6387ec681f3Smrg 6397ec681f3Smrg if (changed) { 6407ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_dominance | 6417ec681f3Smrg nir_metadata_block_index); 6427ec681f3Smrg } else { 6437ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 6447ec681f3Smrg } 6457ec681f3Smrg 6467ec681f3Smrg return changed; 6477ec681f3Smrg} 648