17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2020 Google, Inc.
37ec681f3Smrg * Copyright (C) 2021 Advanced Micro Devices, Inc.
47ec681f3Smrg *
57ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
67ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
77ec681f3Smrg * to deal in the Software without restriction, including without limitation
87ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
97ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
107ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
117ec681f3Smrg *
127ec681f3Smrg * The above copyright notice and this permission notice (including the next
137ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
147ec681f3Smrg * Software.
157ec681f3Smrg *
167ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
177ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
187ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
197ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
207ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
217ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
227ec681f3Smrg * SOFTWARE.
237ec681f3Smrg */
247ec681f3Smrg
257ec681f3Smrg#include "nir.h"
267ec681f3Smrg#include "nir_builder.h"
277ec681f3Smrg
287ec681f3Smrg/**
297ec681f3Smrg * Return the intrinsic if it matches the mask in "modes", else return NULL.
307ec681f3Smrg */
317ec681f3Smrgstatic nir_intrinsic_instr *
327ec681f3Smrgget_io_intrinsic(nir_instr *instr, nir_variable_mode modes,
337ec681f3Smrg                 nir_variable_mode *out_mode)
347ec681f3Smrg{
357ec681f3Smrg   if (instr->type != nir_instr_type_intrinsic)
367ec681f3Smrg      return NULL;
377ec681f3Smrg
387ec681f3Smrg   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
397ec681f3Smrg
407ec681f3Smrg   switch (intr->intrinsic) {
417ec681f3Smrg   case nir_intrinsic_load_input:
427ec681f3Smrg   case nir_intrinsic_load_input_vertex:
437ec681f3Smrg   case nir_intrinsic_load_interpolated_input:
447ec681f3Smrg   case nir_intrinsic_load_per_vertex_input:
457ec681f3Smrg      *out_mode = nir_var_shader_in;
467ec681f3Smrg      return modes & nir_var_shader_in ? intr : NULL;
477ec681f3Smrg   case nir_intrinsic_load_output:
487ec681f3Smrg   case nir_intrinsic_load_per_vertex_output:
497ec681f3Smrg   case nir_intrinsic_store_output:
507ec681f3Smrg   case nir_intrinsic_store_per_vertex_output:
517ec681f3Smrg      *out_mode = nir_var_shader_out;
527ec681f3Smrg      return modes & nir_var_shader_out ? intr : NULL;
537ec681f3Smrg   default:
547ec681f3Smrg      return NULL;
557ec681f3Smrg   }
567ec681f3Smrg}
577ec681f3Smrg
587ec681f3Smrg/**
597ec681f3Smrg * Recompute the IO "base" indices from scratch to remove holes or to fix
607ec681f3Smrg * incorrect base values due to changes in IO locations by using IO locations
617ec681f3Smrg * to assign new bases. The mapping from locations to bases becomes
627ec681f3Smrg * monotonically increasing.
637ec681f3Smrg */
647ec681f3Smrgbool
657ec681f3Smrgnir_recompute_io_bases(nir_function_impl *impl, nir_variable_mode modes)
667ec681f3Smrg{
677ec681f3Smrg   BITSET_DECLARE(inputs, NUM_TOTAL_VARYING_SLOTS);
687ec681f3Smrg   BITSET_DECLARE(outputs, NUM_TOTAL_VARYING_SLOTS);
697ec681f3Smrg   BITSET_ZERO(inputs);
707ec681f3Smrg   BITSET_ZERO(outputs);
717ec681f3Smrg
727ec681f3Smrg   /* Gather the bitmasks of used locations. */
737ec681f3Smrg   nir_foreach_block_safe (block, impl) {
747ec681f3Smrg      nir_foreach_instr_safe (instr, block) {
757ec681f3Smrg         nir_variable_mode mode;
767ec681f3Smrg         nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode);
777ec681f3Smrg         if (!intr)
787ec681f3Smrg            continue;
797ec681f3Smrg
807ec681f3Smrg         nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
817ec681f3Smrg         unsigned num_slots = sem.num_slots;
827ec681f3Smrg         if (sem.medium_precision)
837ec681f3Smrg            num_slots = (num_slots + sem.high_16bits + 1) / 2;
847ec681f3Smrg
857ec681f3Smrg         if (mode == nir_var_shader_in) {
867ec681f3Smrg            for (unsigned i = 0; i < num_slots; i++)
877ec681f3Smrg               BITSET_SET(inputs, sem.location + i);
887ec681f3Smrg         } else if (!sem.dual_source_blend_index) {
897ec681f3Smrg            for (unsigned i = 0; i < num_slots; i++)
907ec681f3Smrg               BITSET_SET(outputs, sem.location + i);
917ec681f3Smrg         }
927ec681f3Smrg      }
937ec681f3Smrg   }
947ec681f3Smrg
957ec681f3Smrg   /* Renumber bases. */
967ec681f3Smrg   bool changed = false;
977ec681f3Smrg
987ec681f3Smrg   nir_foreach_block_safe (block, impl) {
997ec681f3Smrg      nir_foreach_instr_safe (instr, block) {
1007ec681f3Smrg         nir_variable_mode mode;
1017ec681f3Smrg         nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode);
1027ec681f3Smrg         if (!intr)
1037ec681f3Smrg            continue;
1047ec681f3Smrg
1057ec681f3Smrg         nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
1067ec681f3Smrg         unsigned num_slots = sem.num_slots;
1077ec681f3Smrg         if (sem.medium_precision)
1087ec681f3Smrg            num_slots = (num_slots + sem.high_16bits + 1) / 2;
1097ec681f3Smrg
1107ec681f3Smrg         if (mode == nir_var_shader_in) {
1117ec681f3Smrg            nir_intrinsic_set_base(intr,
1127ec681f3Smrg                                   BITSET_PREFIX_SUM(inputs, sem.location));
1137ec681f3Smrg         } else if (sem.dual_source_blend_index) {
1147ec681f3Smrg            nir_intrinsic_set_base(intr,
1157ec681f3Smrg                                   BITSET_PREFIX_SUM(outputs, NUM_TOTAL_VARYING_SLOTS));
1167ec681f3Smrg         } else {
1177ec681f3Smrg            nir_intrinsic_set_base(intr,
1187ec681f3Smrg                                   BITSET_PREFIX_SUM(outputs, sem.location));
1197ec681f3Smrg         }
1207ec681f3Smrg         changed = true;
1217ec681f3Smrg      }
1227ec681f3Smrg   }
1237ec681f3Smrg
1247ec681f3Smrg   if (changed) {
1257ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_dominance |
1267ec681f3Smrg                                  nir_metadata_block_index);
1277ec681f3Smrg   } else {
1287ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
1297ec681f3Smrg   }
1307ec681f3Smrg
1317ec681f3Smrg   return changed;
1327ec681f3Smrg}
1337ec681f3Smrg
1347ec681f3Smrg/**
1357ec681f3Smrg * Lower mediump inputs and/or outputs to 16 bits.
1367ec681f3Smrg *
1377ec681f3Smrg * \param modes            Whether to lower inputs, outputs, or both.
1387ec681f3Smrg * \param varying_mask     Determines which varyings to skip (VS inputs,
1397ec681f3Smrg *    FS outputs, and patch varyings ignore this mask).
1407ec681f3Smrg * \param use_16bit_slots  Remap lowered slots to* VARYING_SLOT_VARn_16BIT.
1417ec681f3Smrg */
1427ec681f3Smrgbool
1437ec681f3Smrgnir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes,
1447ec681f3Smrg                     uint64_t varying_mask, bool use_16bit_slots)
1457ec681f3Smrg{
1467ec681f3Smrg   bool changed = false;
1477ec681f3Smrg   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1487ec681f3Smrg   assert(impl);
1497ec681f3Smrg
1507ec681f3Smrg   nir_builder b;
1517ec681f3Smrg   nir_builder_init(&b, impl);
1527ec681f3Smrg
1537ec681f3Smrg   nir_foreach_block_safe (block, impl) {
1547ec681f3Smrg      nir_foreach_instr_safe (instr, block) {
1557ec681f3Smrg         nir_variable_mode mode;
1567ec681f3Smrg         nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode);
1577ec681f3Smrg         if (!intr)
1587ec681f3Smrg            continue;
1597ec681f3Smrg
1607ec681f3Smrg         nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
1617ec681f3Smrg         nir_ssa_def *(*convert)(nir_builder *, nir_ssa_def *);
1627ec681f3Smrg         bool is_varying = !(nir->info.stage == MESA_SHADER_VERTEX &&
1637ec681f3Smrg                             mode == nir_var_shader_in) &&
1647ec681f3Smrg                           !(nir->info.stage == MESA_SHADER_FRAGMENT &&
1657ec681f3Smrg                             mode == nir_var_shader_out);
1667ec681f3Smrg
1677ec681f3Smrg         if (!sem.medium_precision ||
1687ec681f3Smrg             (is_varying && sem.location <= VARYING_SLOT_VAR31 &&
1697ec681f3Smrg              !(varying_mask & BITFIELD64_BIT(sem.location))))
1707ec681f3Smrg            continue; /* can't lower */
1717ec681f3Smrg
1727ec681f3Smrg         if (nir_intrinsic_has_src_type(intr)) {
1737ec681f3Smrg            /* Stores. */
1747ec681f3Smrg            nir_alu_type type = nir_intrinsic_src_type(intr);
1757ec681f3Smrg
1767ec681f3Smrg            switch (type) {
1777ec681f3Smrg            case nir_type_float32:
1787ec681f3Smrg               convert = nir_f2fmp;
1797ec681f3Smrg               break;
1807ec681f3Smrg            case nir_type_int32:
1817ec681f3Smrg            case nir_type_uint32:
1827ec681f3Smrg               convert = nir_i2imp;
1837ec681f3Smrg               break;
1847ec681f3Smrg            default:
1857ec681f3Smrg               continue; /* already lowered? */
1867ec681f3Smrg            }
1877ec681f3Smrg
1887ec681f3Smrg            /* Convert the 32-bit store into a 16-bit store. */
1897ec681f3Smrg            b.cursor = nir_before_instr(&intr->instr);
1907ec681f3Smrg            nir_instr_rewrite_src_ssa(&intr->instr, &intr->src[0],
1917ec681f3Smrg                                      convert(&b, intr->src[0].ssa));
1927ec681f3Smrg            nir_intrinsic_set_src_type(intr, (type & ~32) | 16);
1937ec681f3Smrg         } else {
1947ec681f3Smrg            /* Loads. */
1957ec681f3Smrg            nir_alu_type type = nir_intrinsic_dest_type(intr);
1967ec681f3Smrg
1977ec681f3Smrg            switch (type) {
1987ec681f3Smrg            case nir_type_float32:
1997ec681f3Smrg               convert = nir_f2f32;
2007ec681f3Smrg               break;
2017ec681f3Smrg            case nir_type_int32:
2027ec681f3Smrg               convert = nir_i2i32;
2037ec681f3Smrg               break;
2047ec681f3Smrg            case nir_type_uint32:
2057ec681f3Smrg               convert = nir_u2u32;
2067ec681f3Smrg               break;
2077ec681f3Smrg            default:
2087ec681f3Smrg               continue; /* already lowered? */
2097ec681f3Smrg            }
2107ec681f3Smrg
2117ec681f3Smrg            /* Convert the 32-bit load into a 16-bit load. */
2127ec681f3Smrg            b.cursor = nir_after_instr(&intr->instr);
2137ec681f3Smrg            intr->dest.ssa.bit_size = 16;
2147ec681f3Smrg            nir_intrinsic_set_dest_type(intr, (type & ~32) | 16);
2157ec681f3Smrg            nir_ssa_def *dst = convert(&b, &intr->dest.ssa);
2167ec681f3Smrg            nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dst,
2177ec681f3Smrg                                           dst->parent_instr);
2187ec681f3Smrg         }
2197ec681f3Smrg
2207ec681f3Smrg         if (use_16bit_slots && is_varying &&
2217ec681f3Smrg             sem.location >= VARYING_SLOT_VAR0 &&
2227ec681f3Smrg             sem.location <= VARYING_SLOT_VAR31) {
2237ec681f3Smrg            unsigned index = sem.location - VARYING_SLOT_VAR0;
2247ec681f3Smrg
2257ec681f3Smrg            sem.location = VARYING_SLOT_VAR0_16BIT + index / 2;
2267ec681f3Smrg            sem.high_16bits = index % 2;
2277ec681f3Smrg            nir_intrinsic_set_io_semantics(intr, sem);
2287ec681f3Smrg         }
2297ec681f3Smrg         changed = true;
2307ec681f3Smrg      }
2317ec681f3Smrg   }
2327ec681f3Smrg
2337ec681f3Smrg   if (changed && use_16bit_slots)
2347ec681f3Smrg      nir_recompute_io_bases(impl, modes);
2357ec681f3Smrg
2367ec681f3Smrg   if (changed) {
2377ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_dominance |
2387ec681f3Smrg                                  nir_metadata_block_index);
2397ec681f3Smrg   } else {
2407ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
2417ec681f3Smrg   }
2427ec681f3Smrg
2437ec681f3Smrg   return changed;
2447ec681f3Smrg}
2457ec681f3Smrg
2467ec681f3Smrg/**
2477ec681f3Smrg * Set the mediump precision bit for those shader inputs and outputs that are
2487ec681f3Smrg * set in the "modes" mask. Non-generic varyings (that GLES3 doesn't have)
2497ec681f3Smrg * are ignored. The "types" mask can be (nir_type_float | nir_type_int), etc.
2507ec681f3Smrg */
2517ec681f3Smrgbool
2527ec681f3Smrgnir_force_mediump_io(nir_shader *nir, nir_variable_mode modes,
2537ec681f3Smrg                     nir_alu_type types)
2547ec681f3Smrg{
2557ec681f3Smrg   bool changed = false;
2567ec681f3Smrg   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2577ec681f3Smrg   assert(impl);
2587ec681f3Smrg
2597ec681f3Smrg   nir_builder b;
2607ec681f3Smrg   nir_builder_init(&b, impl);
2617ec681f3Smrg
2627ec681f3Smrg   nir_foreach_block_safe (block, impl) {
2637ec681f3Smrg      nir_foreach_instr_safe (instr, block) {
2647ec681f3Smrg         nir_variable_mode mode;
2657ec681f3Smrg         nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode);
2667ec681f3Smrg         if (!intr)
2677ec681f3Smrg            continue;
2687ec681f3Smrg
2697ec681f3Smrg         nir_alu_type type;
2707ec681f3Smrg         if (nir_intrinsic_has_src_type(intr))
2717ec681f3Smrg            type = nir_intrinsic_src_type(intr);
2727ec681f3Smrg         else
2737ec681f3Smrg            type = nir_intrinsic_dest_type(intr);
2747ec681f3Smrg         if (!(type & types))
2757ec681f3Smrg            continue;
2767ec681f3Smrg
2777ec681f3Smrg         nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
2787ec681f3Smrg
2797ec681f3Smrg         if (nir->info.stage == MESA_SHADER_FRAGMENT &&
2807ec681f3Smrg             mode == nir_var_shader_out) {
2817ec681f3Smrg            /* Only accept FS outputs. */
2827ec681f3Smrg            if (sem.location < FRAG_RESULT_DATA0 &&
2837ec681f3Smrg                sem.location != FRAG_RESULT_COLOR)
2847ec681f3Smrg               continue;
2857ec681f3Smrg         } else if (nir->info.stage == MESA_SHADER_VERTEX &&
2867ec681f3Smrg                    mode == nir_var_shader_in) {
2877ec681f3Smrg            /* Accept all VS inputs. */
2887ec681f3Smrg         } else {
2897ec681f3Smrg            /* Only accept generic varyings. */
2907ec681f3Smrg            if (sem.location < VARYING_SLOT_VAR0 ||
2917ec681f3Smrg                sem.location > VARYING_SLOT_VAR31)
2927ec681f3Smrg            continue;
2937ec681f3Smrg         }
2947ec681f3Smrg
2957ec681f3Smrg         sem.medium_precision = 1;
2967ec681f3Smrg         nir_intrinsic_set_io_semantics(intr, sem);
2977ec681f3Smrg         changed = true;
2987ec681f3Smrg      }
2997ec681f3Smrg   }
3007ec681f3Smrg
3017ec681f3Smrg   if (changed) {
3027ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_dominance |
3037ec681f3Smrg                                  nir_metadata_block_index);
3047ec681f3Smrg   } else {
3057ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
3067ec681f3Smrg   }
3077ec681f3Smrg
3087ec681f3Smrg   return changed;
3097ec681f3Smrg}
3107ec681f3Smrg
3117ec681f3Smrg/**
3127ec681f3Smrg * Remap 16-bit varying slots to the original 32-bit varying slots.
3137ec681f3Smrg * This only changes IO semantics and bases.
3147ec681f3Smrg */
3157ec681f3Smrgbool
3167ec681f3Smrgnir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes)
3177ec681f3Smrg{
3187ec681f3Smrg   bool changed = false;
3197ec681f3Smrg   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
3207ec681f3Smrg   assert(impl);
3217ec681f3Smrg
3227ec681f3Smrg   nir_foreach_block_safe (block, impl) {
3237ec681f3Smrg      nir_foreach_instr_safe (instr, block) {
3247ec681f3Smrg         nir_variable_mode mode;
3257ec681f3Smrg         nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode);
3267ec681f3Smrg         if (!intr)
3277ec681f3Smrg            continue;
3287ec681f3Smrg
3297ec681f3Smrg         nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
3307ec681f3Smrg
3317ec681f3Smrg         if (sem.location < VARYING_SLOT_VAR0_16BIT ||
3327ec681f3Smrg             sem.location > VARYING_SLOT_VAR15_16BIT)
3337ec681f3Smrg            continue;
3347ec681f3Smrg
3357ec681f3Smrg         sem.location = VARYING_SLOT_VAR0 +
3367ec681f3Smrg                        (sem.location - VARYING_SLOT_VAR0_16BIT) * 2 +
3377ec681f3Smrg                        sem.high_16bits;
3387ec681f3Smrg         sem.high_16bits = 0;
3397ec681f3Smrg         nir_intrinsic_set_io_semantics(intr, sem);
3407ec681f3Smrg         changed = true;
3417ec681f3Smrg      }
3427ec681f3Smrg   }
3437ec681f3Smrg
3447ec681f3Smrg   if (changed)
3457ec681f3Smrg      nir_recompute_io_bases(impl, modes);
3467ec681f3Smrg
3477ec681f3Smrg   if (changed) {
3487ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_dominance |
3497ec681f3Smrg                                  nir_metadata_block_index);
3507ec681f3Smrg   } else {
3517ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
3527ec681f3Smrg   }
3537ec681f3Smrg
3547ec681f3Smrg   return changed;
3557ec681f3Smrg}
3567ec681f3Smrg
3577ec681f3Smrgstatic bool
3587ec681f3Smrgis_n_to_m_conversion(nir_instr *instr, unsigned n, nir_op m)
3597ec681f3Smrg{
3607ec681f3Smrg   if (instr->type != nir_instr_type_alu)
3617ec681f3Smrg      return false;
3627ec681f3Smrg
3637ec681f3Smrg   nir_alu_instr *alu = nir_instr_as_alu(instr);
3647ec681f3Smrg   return alu->op == m && alu->src[0].src.ssa->bit_size == n;
3657ec681f3Smrg}
3667ec681f3Smrg
3677ec681f3Smrgstatic bool
3687ec681f3Smrgis_f16_to_f32_conversion(nir_instr *instr)
3697ec681f3Smrg{
3707ec681f3Smrg   return is_n_to_m_conversion(instr, 16, nir_op_f2f32);
3717ec681f3Smrg}
3727ec681f3Smrg
3737ec681f3Smrgstatic bool
3747ec681f3Smrgis_f32_to_f16_conversion(nir_instr *instr)
3757ec681f3Smrg{
3767ec681f3Smrg   return is_n_to_m_conversion(instr, 32, nir_op_f2f16) ||
3777ec681f3Smrg          is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtne) ||
3787ec681f3Smrg          is_n_to_m_conversion(instr, 32, nir_op_f2fmp);
3797ec681f3Smrg}
3807ec681f3Smrg
3817ec681f3Smrgstatic bool
3827ec681f3Smrgis_i16_to_i32_conversion(nir_instr *instr)
3837ec681f3Smrg{
3847ec681f3Smrg   return is_n_to_m_conversion(instr, 16, nir_op_i2i32);
3857ec681f3Smrg}
3867ec681f3Smrg
3877ec681f3Smrgstatic bool
3887ec681f3Smrgis_u16_to_u32_conversion(nir_instr *instr)
3897ec681f3Smrg{
3907ec681f3Smrg   return is_n_to_m_conversion(instr, 16, nir_op_u2u32);
3917ec681f3Smrg}
3927ec681f3Smrg
3937ec681f3Smrgstatic bool
3947ec681f3Smrgis_i32_to_i16_conversion(nir_instr *instr)
3957ec681f3Smrg{
3967ec681f3Smrg   return is_n_to_m_conversion(instr, 32, nir_op_i2i16);
3977ec681f3Smrg}
3987ec681f3Smrg
3997ec681f3Smrgstatic void
4007ec681f3Smrgreplace_with_mov(nir_builder *b, nir_instr *instr, nir_src *src,
4017ec681f3Smrg                 nir_alu_instr *alu)
4027ec681f3Smrg{
4037ec681f3Smrg   nir_ssa_def *mov = nir_mov_alu(b, alu->src[0],
4047ec681f3Smrg                                  nir_dest_num_components(alu->dest.dest));
4057ec681f3Smrg   assert(!alu->dest.saturate);
4067ec681f3Smrg   nir_instr_rewrite_src_ssa(instr, src, mov);
4077ec681f3Smrg}
4087ec681f3Smrg
4097ec681f3Smrg/**
4107ec681f3Smrg * If texture source operands use f16->f32 conversions or return values are
4117ec681f3Smrg * followed by f16->f32 or f32->f16, remove those conversions. This benefits
4127ec681f3Smrg * drivers that have texture opcodes that can accept and return 16-bit types.
4137ec681f3Smrg *
4147ec681f3Smrg * "tex_src_types" is a mask of nir_tex_src_* operands that should be handled.
4157ec681f3Smrg * It's always done for the destination.
4167ec681f3Smrg *
4177ec681f3Smrg * This should be run after late algebraic optimizations.
4187ec681f3Smrg * Copy propagation and DCE should be run after this.
4197ec681f3Smrg */
4207ec681f3Smrgbool
4217ec681f3Smrgnir_fold_16bit_sampler_conversions(nir_shader *nir,
4227ec681f3Smrg                                   unsigned tex_src_types)
4237ec681f3Smrg{
4247ec681f3Smrg   bool changed = false;
4257ec681f3Smrg   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
4267ec681f3Smrg   assert(impl);
4277ec681f3Smrg
4287ec681f3Smrg   nir_builder b;
4297ec681f3Smrg   nir_builder_init(&b, impl);
4307ec681f3Smrg
4317ec681f3Smrg   nir_foreach_block_safe (block, impl) {
4327ec681f3Smrg      nir_foreach_instr_safe (instr, block) {
4337ec681f3Smrg         if (instr->type != nir_instr_type_tex)
4347ec681f3Smrg            continue;
4357ec681f3Smrg
4367ec681f3Smrg         nir_tex_instr *tex = nir_instr_as_tex(instr);
4377ec681f3Smrg         nir_instr *src;
4387ec681f3Smrg         nir_alu_instr *src_alu;
4397ec681f3Smrg
4407ec681f3Smrg         /* Skip because AMD doesn't support 16-bit types with these. */
4417ec681f3Smrg         if ((tex->op == nir_texop_txs ||
4427ec681f3Smrg              tex->op == nir_texop_query_levels) ||
4437ec681f3Smrg             tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
4447ec681f3Smrg            continue;
4457ec681f3Smrg
4467ec681f3Smrg         /* Optimize source operands. */
4477ec681f3Smrg         for (unsigned i = 0; i < tex->num_srcs; i++) {
4487ec681f3Smrg            /* Filter out sources that should be ignored. */
4497ec681f3Smrg            if (!(BITFIELD_BIT(tex->src[i].src_type) & tex_src_types))
4507ec681f3Smrg               continue;
4517ec681f3Smrg
4527ec681f3Smrg            src = tex->src[i].src.ssa->parent_instr;
4537ec681f3Smrg            if (src->type != nir_instr_type_alu)
4547ec681f3Smrg               continue;
4557ec681f3Smrg
4567ec681f3Smrg            src_alu = nir_instr_as_alu(src);
4577ec681f3Smrg            b.cursor = nir_before_instr(src);
4587ec681f3Smrg
4597ec681f3Smrg            if (src_alu->op == nir_op_mov) {
4607ec681f3Smrg               assert(!"The IR shouldn't contain any movs to make this pass"
4617ec681f3Smrg                       " effective.");
4627ec681f3Smrg               continue;
4637ec681f3Smrg            }
4647ec681f3Smrg
4657ec681f3Smrg            /* Handle vector sources that are made of scalar instructions. */
4667ec681f3Smrg            if (nir_op_is_vec(src_alu->op)) {
4677ec681f3Smrg               /* See if the vector is made of f16->f32 opcodes. */
4687ec681f3Smrg               unsigned num = nir_dest_num_components(src_alu->dest.dest);
4697ec681f3Smrg               bool is_f16_to_f32 = true;
4707ec681f3Smrg               bool is_u16_to_u32 = true;
4717ec681f3Smrg
4727ec681f3Smrg               for (unsigned comp = 0; comp < num; comp++) {
4737ec681f3Smrg                  nir_instr *instr = src_alu->src[comp].src.ssa->parent_instr;
4747ec681f3Smrg                  is_f16_to_f32 &= is_f16_to_f32_conversion(instr);
4757ec681f3Smrg                  /* Zero-extension (u16) and sign-extension (i16) have
4767ec681f3Smrg                   * the same behavior here - txf returns 0 if bit 15 is set
4777ec681f3Smrg                   * because it's out of bounds and the higher bits don't
4787ec681f3Smrg                   * matter.
4797ec681f3Smrg                   */
4807ec681f3Smrg                  is_u16_to_u32 &= is_u16_to_u32_conversion(instr) ||
4817ec681f3Smrg                                   is_i16_to_i32_conversion(instr);
4827ec681f3Smrg               }
4837ec681f3Smrg
4847ec681f3Smrg               if (!is_f16_to_f32 && !is_u16_to_u32)
4857ec681f3Smrg                  continue;
4867ec681f3Smrg
4877ec681f3Smrg               nir_alu_instr *new_vec = nir_alu_instr_clone(nir, src_alu);
4887ec681f3Smrg               nir_instr_insert_after(&src_alu->instr, &new_vec->instr);
4897ec681f3Smrg
4907ec681f3Smrg               /* Replace conversions with mov. */
4917ec681f3Smrg               for (unsigned comp = 0; comp < num; comp++) {
4927ec681f3Smrg                  nir_instr *instr = new_vec->src[comp].src.ssa->parent_instr;
4937ec681f3Smrg                  replace_with_mov(&b, &new_vec->instr,
4947ec681f3Smrg                                   &new_vec->src[comp].src,
4957ec681f3Smrg                                   nir_instr_as_alu(instr));
4967ec681f3Smrg               }
4977ec681f3Smrg
4987ec681f3Smrg               new_vec->dest.dest.ssa.bit_size =
4997ec681f3Smrg                  new_vec->src[0].src.ssa->bit_size;
5007ec681f3Smrg               nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src,
5017ec681f3Smrg                                         &new_vec->dest.dest.ssa);
5027ec681f3Smrg               changed = true;
5037ec681f3Smrg            } else if (is_f16_to_f32_conversion(&src_alu->instr) ||
5047ec681f3Smrg                       is_u16_to_u32_conversion(&src_alu->instr) ||
5057ec681f3Smrg                       is_i16_to_i32_conversion(&src_alu->instr)) {
5067ec681f3Smrg               /* Handle scalar sources. */
5077ec681f3Smrg               replace_with_mov(&b, &tex->instr, &tex->src[i].src, src_alu);
5087ec681f3Smrg               changed = true;
5097ec681f3Smrg            }
5107ec681f3Smrg         }
5117ec681f3Smrg
5127ec681f3Smrg         /* Optimize the destination. */
5137ec681f3Smrg         bool is_f16_to_f32 = true;
5147ec681f3Smrg         bool is_f32_to_f16 = true;
5157ec681f3Smrg         bool is_i16_to_i32 = true;
5167ec681f3Smrg         bool is_i32_to_i16 = true; /* same behavior for int and uint */
5177ec681f3Smrg         bool is_u16_to_u32 = true;
5187ec681f3Smrg
5197ec681f3Smrg         nir_foreach_use(use, &tex->dest.ssa) {
5207ec681f3Smrg            is_f16_to_f32 &= is_f16_to_f32_conversion(use->parent_instr);
5217ec681f3Smrg            is_f32_to_f16 &= is_f32_to_f16_conversion(use->parent_instr);
5227ec681f3Smrg            is_i16_to_i32 &= is_i16_to_i32_conversion(use->parent_instr);
5237ec681f3Smrg            is_i32_to_i16 &= is_i32_to_i16_conversion(use->parent_instr);
5247ec681f3Smrg            is_u16_to_u32 &= is_u16_to_u32_conversion(use->parent_instr);
5257ec681f3Smrg         }
5267ec681f3Smrg
5277ec681f3Smrg         if (is_f16_to_f32 || is_f32_to_f16 || is_i16_to_i32 ||
5287ec681f3Smrg             is_i32_to_i16 || is_u16_to_u32) {
5297ec681f3Smrg            /* All uses are the same conversions. Replace them with mov. */
5307ec681f3Smrg            nir_foreach_use(use, &tex->dest.ssa) {
5317ec681f3Smrg               nir_alu_instr *conv = nir_instr_as_alu(use->parent_instr);
5327ec681f3Smrg               conv->op = nir_op_mov;
5337ec681f3Smrg               tex->dest.ssa.bit_size = conv->dest.dest.ssa.bit_size;
5347ec681f3Smrg               tex->dest_type = (tex->dest_type & (~16 & ~32 & ~64)) |
5357ec681f3Smrg                                conv->dest.dest.ssa.bit_size;
5367ec681f3Smrg            }
5377ec681f3Smrg            changed = true;
5387ec681f3Smrg         }
5397ec681f3Smrg      }
5407ec681f3Smrg   }
5417ec681f3Smrg
5427ec681f3Smrg   if (changed) {
5437ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_dominance |
5447ec681f3Smrg                                  nir_metadata_block_index);
5457ec681f3Smrg   } else {
5467ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
5477ec681f3Smrg   }
5487ec681f3Smrg
5497ec681f3Smrg   return changed;
5507ec681f3Smrg}
5517ec681f3Smrg
5527ec681f3Smrg/**
5537ec681f3Smrg * Fix types of source operands of texture opcodes according to
5547ec681f3Smrg * the constraints by inserting the appropriate conversion opcodes.
5557ec681f3Smrg *
5567ec681f3Smrg * For example, if the type of derivatives must be equal to texture
5577ec681f3Smrg * coordinates and the type of the texture bias must be 32-bit, there
5587ec681f3Smrg * will be 2 constraints describing that.
5597ec681f3Smrg */
5607ec681f3Smrgbool
5617ec681f3Smrgnir_legalize_16bit_sampler_srcs(nir_shader *nir,
5627ec681f3Smrg                                nir_tex_src_type_constraints constraints)
5637ec681f3Smrg{
5647ec681f3Smrg   bool changed = false;
5657ec681f3Smrg   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
5667ec681f3Smrg   assert(impl);
5677ec681f3Smrg
5687ec681f3Smrg   nir_builder b;
5697ec681f3Smrg   nir_builder_init(&b, impl);
5707ec681f3Smrg
5717ec681f3Smrg   nir_foreach_block_safe (block, impl) {
5727ec681f3Smrg      nir_foreach_instr_safe (instr, block) {
5737ec681f3Smrg         if (instr->type != nir_instr_type_tex)
5747ec681f3Smrg            continue;
5757ec681f3Smrg
5767ec681f3Smrg         nir_tex_instr *tex = nir_instr_as_tex(instr);
5777ec681f3Smrg         int8_t map[nir_num_tex_src_types];
5787ec681f3Smrg         memset(map, -1, sizeof(map));
5797ec681f3Smrg
5807ec681f3Smrg         /* Create a mapping from src_type to src[i]. */
5817ec681f3Smrg         for (unsigned i = 0; i < tex->num_srcs; i++)
5827ec681f3Smrg            map[tex->src[i].src_type] = i;
5837ec681f3Smrg
5847ec681f3Smrg         /* Legalize src types. */
5857ec681f3Smrg         for (unsigned i = 0; i < tex->num_srcs; i++) {
5867ec681f3Smrg            nir_tex_src_type_constraint c = constraints[tex->src[i].src_type];
5877ec681f3Smrg
5887ec681f3Smrg            if (!c.legalize_type)
5897ec681f3Smrg               continue;
5907ec681f3Smrg
5917ec681f3Smrg            /* Determine the required bit size for the src. */
5927ec681f3Smrg            unsigned bit_size;
5937ec681f3Smrg            if (c.bit_size) {
5947ec681f3Smrg               bit_size = c.bit_size;
5957ec681f3Smrg            } else {
5967ec681f3Smrg               if (map[c.match_src] == -1)
5977ec681f3Smrg                  continue; /* e.g. txs */
5987ec681f3Smrg
5997ec681f3Smrg               bit_size = tex->src[map[c.match_src]].src.ssa->bit_size;
6007ec681f3Smrg            }
6017ec681f3Smrg
6027ec681f3Smrg            /* Check if the type is legal. */
6037ec681f3Smrg            if (bit_size == tex->src[i].src.ssa->bit_size)
6047ec681f3Smrg               continue;
6057ec681f3Smrg
6067ec681f3Smrg            /* Fix the bit size. */
6077ec681f3Smrg            bool is_sint = i == nir_tex_src_offset;
6087ec681f3Smrg            bool is_uint = !is_sint &&
6097ec681f3Smrg                           (tex->op == nir_texop_txf ||
6107ec681f3Smrg                            tex->op == nir_texop_txf_ms ||
6117ec681f3Smrg                            tex->op == nir_texop_txs ||
6127ec681f3Smrg                            tex->op == nir_texop_samples_identical);
6137ec681f3Smrg            nir_ssa_def *(*convert)(nir_builder *, nir_ssa_def *);
6147ec681f3Smrg
6157ec681f3Smrg            switch (bit_size) {
6167ec681f3Smrg            case 16:
6177ec681f3Smrg               convert = is_sint ? nir_i2i16 :
6187ec681f3Smrg                         is_uint ? nir_u2u16 : nir_f2f16;
6197ec681f3Smrg               break;
6207ec681f3Smrg            case 32:
6217ec681f3Smrg               convert = is_sint ? nir_i2i32 :
6227ec681f3Smrg                         is_uint ? nir_u2u32 : nir_f2f32;
6237ec681f3Smrg               break;
6247ec681f3Smrg            default:
6257ec681f3Smrg               assert(!"unexpected bit size");
6267ec681f3Smrg               continue;
6277ec681f3Smrg            }
6287ec681f3Smrg
6297ec681f3Smrg            b.cursor = nir_before_instr(&tex->instr);
6307ec681f3Smrg            nir_ssa_def *conv =
6317ec681f3Smrg               convert(&b, nir_ssa_for_src(&b, tex->src[i].src,
6327ec681f3Smrg                                           tex->src[i].src.ssa->num_components));
6337ec681f3Smrg            nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, conv);
6347ec681f3Smrg            changed = true;
6357ec681f3Smrg         }
6367ec681f3Smrg      }
6377ec681f3Smrg   }
6387ec681f3Smrg
6397ec681f3Smrg   if (changed) {
6407ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_dominance |
6417ec681f3Smrg                                  nir_metadata_block_index);
6427ec681f3Smrg   } else {
6437ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
6447ec681f3Smrg   }
6457ec681f3Smrg
6467ec681f3Smrg   return changed;
6477ec681f3Smrg}
648