17ec681f3Smrg/*
27ec681f3Smrg * Copyright © Microsoft Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "dxil_nir.h"
257ec681f3Smrg
267ec681f3Smrg#include "nir_builder.h"
277ec681f3Smrg#include "nir_deref.h"
287ec681f3Smrg#include "nir_to_dxil.h"
297ec681f3Smrg#include "util/u_math.h"
307ec681f3Smrg
317ec681f3Smrgstatic void
327ec681f3Smrgcl_type_size_align(const struct glsl_type *type, unsigned *size,
337ec681f3Smrg                   unsigned *align)
347ec681f3Smrg{
357ec681f3Smrg   *size = glsl_get_cl_size(type);
367ec681f3Smrg   *align = glsl_get_cl_alignment(type);
377ec681f3Smrg}
387ec681f3Smrg
397ec681f3Smrgstatic void
407ec681f3Smrgextract_comps_from_vec32(nir_builder *b, nir_ssa_def *vec32,
417ec681f3Smrg                         unsigned dst_bit_size,
427ec681f3Smrg                         nir_ssa_def **dst_comps,
437ec681f3Smrg                         unsigned num_dst_comps)
447ec681f3Smrg{
457ec681f3Smrg   unsigned step = DIV_ROUND_UP(dst_bit_size, 32);
467ec681f3Smrg   unsigned comps_per32b = 32 / dst_bit_size;
477ec681f3Smrg   nir_ssa_def *tmp;
487ec681f3Smrg
497ec681f3Smrg   for (unsigned i = 0; i < vec32->num_components; i += step) {
507ec681f3Smrg      switch (dst_bit_size) {
517ec681f3Smrg      case 64:
527ec681f3Smrg         tmp = nir_pack_64_2x32_split(b, nir_channel(b, vec32, i),
537ec681f3Smrg                                         nir_channel(b, vec32, i + 1));
547ec681f3Smrg         dst_comps[i / 2] = tmp;
557ec681f3Smrg         break;
567ec681f3Smrg      case 32:
577ec681f3Smrg         dst_comps[i] = nir_channel(b, vec32, i);
587ec681f3Smrg         break;
597ec681f3Smrg      case 16:
607ec681f3Smrg      case 8: {
617ec681f3Smrg         unsigned dst_offs = i * comps_per32b;
627ec681f3Smrg
637ec681f3Smrg         tmp = nir_unpack_bits(b, nir_channel(b, vec32, i), dst_bit_size);
647ec681f3Smrg         for (unsigned j = 0; j < comps_per32b && dst_offs + j < num_dst_comps; j++)
657ec681f3Smrg            dst_comps[dst_offs + j] = nir_channel(b, tmp, j);
667ec681f3Smrg         }
677ec681f3Smrg
687ec681f3Smrg         break;
697ec681f3Smrg      }
707ec681f3Smrg   }
717ec681f3Smrg}
727ec681f3Smrg
737ec681f3Smrgstatic nir_ssa_def *
747ec681f3Smrgload_comps_to_vec32(nir_builder *b, unsigned src_bit_size,
757ec681f3Smrg                    nir_ssa_def **src_comps, unsigned num_src_comps)
767ec681f3Smrg{
777ec681f3Smrg   unsigned num_vec32comps = DIV_ROUND_UP(num_src_comps * src_bit_size, 32);
787ec681f3Smrg   unsigned step = DIV_ROUND_UP(src_bit_size, 32);
797ec681f3Smrg   unsigned comps_per32b = 32 / src_bit_size;
807ec681f3Smrg   nir_ssa_def *vec32comps[4];
817ec681f3Smrg
827ec681f3Smrg   for (unsigned i = 0; i < num_vec32comps; i += step) {
837ec681f3Smrg      switch (src_bit_size) {
847ec681f3Smrg      case 64:
857ec681f3Smrg         vec32comps[i] = nir_unpack_64_2x32_split_x(b, src_comps[i / 2]);
867ec681f3Smrg         vec32comps[i + 1] = nir_unpack_64_2x32_split_y(b, src_comps[i / 2]);
877ec681f3Smrg         break;
887ec681f3Smrg      case 32:
897ec681f3Smrg         vec32comps[i] = src_comps[i];
907ec681f3Smrg         break;
917ec681f3Smrg      case 16:
927ec681f3Smrg      case 8: {
937ec681f3Smrg         unsigned src_offs = i * comps_per32b;
947ec681f3Smrg
957ec681f3Smrg         vec32comps[i] = nir_u2u32(b, src_comps[src_offs]);
967ec681f3Smrg         for (unsigned j = 1; j < comps_per32b && src_offs + j < num_src_comps; j++) {
977ec681f3Smrg            nir_ssa_def *tmp = nir_ishl(b, nir_u2u32(b, src_comps[src_offs + j]),
987ec681f3Smrg                                           nir_imm_int(b, j * src_bit_size));
997ec681f3Smrg            vec32comps[i] = nir_ior(b, vec32comps[i], tmp);
1007ec681f3Smrg         }
1017ec681f3Smrg         break;
1027ec681f3Smrg      }
1037ec681f3Smrg      }
1047ec681f3Smrg   }
1057ec681f3Smrg
1067ec681f3Smrg   return nir_vec(b, vec32comps, num_vec32comps);
1077ec681f3Smrg}
1087ec681f3Smrg
1097ec681f3Smrgstatic nir_ssa_def *
1107ec681f3Smrgbuild_load_ptr_dxil(nir_builder *b, nir_deref_instr *deref, nir_ssa_def *idx)
1117ec681f3Smrg{
1127ec681f3Smrg   return nir_load_ptr_dxil(b, 1, 32, &deref->dest.ssa, idx);
1137ec681f3Smrg}
1147ec681f3Smrg
1157ec681f3Smrgstatic bool
1167ec681f3Smrglower_load_deref(nir_builder *b, nir_intrinsic_instr *intr)
1177ec681f3Smrg{
1187ec681f3Smrg   assert(intr->dest.is_ssa);
1197ec681f3Smrg
1207ec681f3Smrg   b->cursor = nir_before_instr(&intr->instr);
1217ec681f3Smrg
1227ec681f3Smrg   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1237ec681f3Smrg   if (!nir_deref_mode_is(deref, nir_var_shader_temp))
1247ec681f3Smrg      return false;
1257ec681f3Smrg   nir_ssa_def *ptr = nir_u2u32(b, nir_build_deref_offset(b, deref, cl_type_size_align));
1267ec681f3Smrg   nir_ssa_def *offset = nir_iand(b, ptr, nir_inot(b, nir_imm_int(b, 3)));
1277ec681f3Smrg
1287ec681f3Smrg   assert(intr->dest.is_ssa);
1297ec681f3Smrg   unsigned num_components = nir_dest_num_components(intr->dest);
1307ec681f3Smrg   unsigned bit_size = nir_dest_bit_size(intr->dest);
1317ec681f3Smrg   unsigned load_size = MAX2(32, bit_size);
1327ec681f3Smrg   unsigned num_bits = num_components * bit_size;
1337ec681f3Smrg   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
1347ec681f3Smrg   unsigned comp_idx = 0;
1357ec681f3Smrg
1367ec681f3Smrg   nir_deref_path path;
1377ec681f3Smrg   nir_deref_path_init(&path, deref, NULL);
1387ec681f3Smrg   nir_ssa_def *base_idx = nir_ishr(b, offset, nir_imm_int(b, 2 /* log2(32 / 8) */));
1397ec681f3Smrg
1407ec681f3Smrg   /* Split loads into 32-bit chunks */
1417ec681f3Smrg   for (unsigned i = 0; i < num_bits; i += load_size) {
1427ec681f3Smrg      unsigned subload_num_bits = MIN2(num_bits - i, load_size);
1437ec681f3Smrg      nir_ssa_def *idx = nir_iadd(b, base_idx, nir_imm_int(b, i / 32));
1447ec681f3Smrg      nir_ssa_def *vec32 = build_load_ptr_dxil(b, path.path[0], idx);
1457ec681f3Smrg
1467ec681f3Smrg      if (load_size == 64) {
1477ec681f3Smrg         idx = nir_iadd(b, idx, nir_imm_int(b, 1));
1487ec681f3Smrg         vec32 = nir_vec2(b, vec32,
1497ec681f3Smrg                             build_load_ptr_dxil(b, path.path[0], idx));
1507ec681f3Smrg      }
1517ec681f3Smrg
1527ec681f3Smrg      /* If we have 2 bytes or less to load we need to adjust the u32 value so
1537ec681f3Smrg       * we can always extract the LSB.
1547ec681f3Smrg       */
1557ec681f3Smrg      if (subload_num_bits <= 16) {
1567ec681f3Smrg         nir_ssa_def *shift = nir_imul(b, nir_iand(b, ptr, nir_imm_int(b, 3)),
1577ec681f3Smrg                                          nir_imm_int(b, 8));
1587ec681f3Smrg         vec32 = nir_ushr(b, vec32, shift);
1597ec681f3Smrg      }
1607ec681f3Smrg
1617ec681f3Smrg      /* And now comes the pack/unpack step to match the original type. */
1627ec681f3Smrg      extract_comps_from_vec32(b, vec32, bit_size, &comps[comp_idx],
1637ec681f3Smrg                               subload_num_bits / bit_size);
1647ec681f3Smrg      comp_idx += subload_num_bits / bit_size;
1657ec681f3Smrg   }
1667ec681f3Smrg
1677ec681f3Smrg   nir_deref_path_finish(&path);
1687ec681f3Smrg   assert(comp_idx == num_components);
1697ec681f3Smrg   nir_ssa_def *result = nir_vec(b, comps, num_components);
1707ec681f3Smrg   nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
1717ec681f3Smrg   nir_instr_remove(&intr->instr);
1727ec681f3Smrg   return true;
1737ec681f3Smrg}
1747ec681f3Smrg
1757ec681f3Smrgstatic nir_ssa_def *
1767ec681f3Smrgubo_load_select_32b_comps(nir_builder *b, nir_ssa_def *vec32,
1777ec681f3Smrg                          nir_ssa_def *offset, unsigned num_bytes)
1787ec681f3Smrg{
1797ec681f3Smrg   assert(num_bytes == 16 || num_bytes == 12 || num_bytes == 8 ||
1807ec681f3Smrg          num_bytes == 4 || num_bytes == 3 || num_bytes == 2 ||
1817ec681f3Smrg          num_bytes == 1);
1827ec681f3Smrg   assert(vec32->num_components == 4);
1837ec681f3Smrg
1847ec681f3Smrg   /* 16 and 12 byte types are always aligned on 16 bytes. */
1857ec681f3Smrg   if (num_bytes > 8)
1867ec681f3Smrg      return vec32;
1877ec681f3Smrg
1887ec681f3Smrg   nir_ssa_def *comps[4];
1897ec681f3Smrg   nir_ssa_def *cond;
1907ec681f3Smrg
1917ec681f3Smrg   for (unsigned i = 0; i < 4; i++)
1927ec681f3Smrg      comps[i] = nir_channel(b, vec32, i);
1937ec681f3Smrg
1947ec681f3Smrg   /* If we have 8bytes or less to load, select which half the vec4 should
1957ec681f3Smrg    * be used.
1967ec681f3Smrg    */
1977ec681f3Smrg   cond = nir_ine(b, nir_iand(b, offset, nir_imm_int(b, 0x8)),
1987ec681f3Smrg                                 nir_imm_int(b, 0));
1997ec681f3Smrg
2007ec681f3Smrg   comps[0] = nir_bcsel(b, cond, comps[2], comps[0]);
2017ec681f3Smrg   comps[1] = nir_bcsel(b, cond, comps[3], comps[1]);
2027ec681f3Smrg
2037ec681f3Smrg   /* Thanks to the CL alignment constraints, if we want 8 bytes we're done. */
2047ec681f3Smrg   if (num_bytes == 8)
2057ec681f3Smrg      return nir_vec(b, comps, 2);
2067ec681f3Smrg
2077ec681f3Smrg   /* 4 bytes or less needed, select which of the 32bit component should be
2087ec681f3Smrg    * used and return it. The sub-32bit split is handled in
2097ec681f3Smrg    * extract_comps_from_vec32().
2107ec681f3Smrg    */
2117ec681f3Smrg   cond = nir_ine(b, nir_iand(b, offset, nir_imm_int(b, 0x4)),
2127ec681f3Smrg                                 nir_imm_int(b, 0));
2137ec681f3Smrg   return nir_bcsel(b, cond, comps[1], comps[0]);
2147ec681f3Smrg}
2157ec681f3Smrg
2167ec681f3Smrgnir_ssa_def *
2177ec681f3Smrgbuild_load_ubo_dxil(nir_builder *b, nir_ssa_def *buffer,
2187ec681f3Smrg                    nir_ssa_def *offset, unsigned num_components,
2197ec681f3Smrg                    unsigned bit_size)
2207ec681f3Smrg{
2217ec681f3Smrg   nir_ssa_def *idx = nir_ushr(b, offset, nir_imm_int(b, 4));
2227ec681f3Smrg   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
2237ec681f3Smrg   unsigned num_bits = num_components * bit_size;
2247ec681f3Smrg   unsigned comp_idx = 0;
2257ec681f3Smrg
2267ec681f3Smrg   /* We need to split loads in 16byte chunks because that's the
2277ec681f3Smrg    * granularity of cBufferLoadLegacy().
2287ec681f3Smrg    */
2297ec681f3Smrg   for (unsigned i = 0; i < num_bits; i += (16 * 8)) {
2307ec681f3Smrg      /* For each 16byte chunk (or smaller) we generate a 32bit ubo vec
2317ec681f3Smrg       * load.
2327ec681f3Smrg       */
2337ec681f3Smrg      unsigned subload_num_bits = MIN2(num_bits - i, 16 * 8);
2347ec681f3Smrg      nir_ssa_def *vec32 =
2357ec681f3Smrg         nir_load_ubo_dxil(b, 4, 32, buffer, nir_iadd(b, idx, nir_imm_int(b, i / (16 * 8))));
2367ec681f3Smrg
2377ec681f3Smrg      /* First re-arrange the vec32 to account for intra 16-byte offset. */
2387ec681f3Smrg      vec32 = ubo_load_select_32b_comps(b, vec32, offset, subload_num_bits / 8);
2397ec681f3Smrg
2407ec681f3Smrg      /* If we have 2 bytes or less to load we need to adjust the u32 value so
2417ec681f3Smrg       * we can always extract the LSB.
2427ec681f3Smrg       */
2437ec681f3Smrg      if (subload_num_bits <= 16) {
2447ec681f3Smrg         nir_ssa_def *shift = nir_imul(b, nir_iand(b, offset,
2457ec681f3Smrg                                                      nir_imm_int(b, 3)),
2467ec681f3Smrg                                          nir_imm_int(b, 8));
2477ec681f3Smrg         vec32 = nir_ushr(b, vec32, shift);
2487ec681f3Smrg      }
2497ec681f3Smrg
2507ec681f3Smrg      /* And now comes the pack/unpack step to match the original type. */
2517ec681f3Smrg      extract_comps_from_vec32(b, vec32, bit_size, &comps[comp_idx],
2527ec681f3Smrg                               subload_num_bits / bit_size);
2537ec681f3Smrg      comp_idx += subload_num_bits / bit_size;
2547ec681f3Smrg   }
2557ec681f3Smrg
2567ec681f3Smrg   assert(comp_idx == num_components);
2577ec681f3Smrg   return nir_vec(b, comps, num_components);
2587ec681f3Smrg}
2597ec681f3Smrg
2607ec681f3Smrgstatic bool
2617ec681f3Smrglower_load_ssbo(nir_builder *b, nir_intrinsic_instr *intr)
2627ec681f3Smrg{
2637ec681f3Smrg   assert(intr->dest.is_ssa);
2647ec681f3Smrg   assert(intr->src[0].is_ssa);
2657ec681f3Smrg   assert(intr->src[1].is_ssa);
2667ec681f3Smrg
2677ec681f3Smrg   b->cursor = nir_before_instr(&intr->instr);
2687ec681f3Smrg
2697ec681f3Smrg   nir_ssa_def *buffer = intr->src[0].ssa;
2707ec681f3Smrg   nir_ssa_def *offset = nir_iand(b, intr->src[1].ssa, nir_imm_int(b, ~3));
2717ec681f3Smrg   enum gl_access_qualifier access = nir_intrinsic_access(intr);
2727ec681f3Smrg   unsigned bit_size = nir_dest_bit_size(intr->dest);
2737ec681f3Smrg   unsigned num_components = nir_dest_num_components(intr->dest);
2747ec681f3Smrg   unsigned num_bits = num_components * bit_size;
2757ec681f3Smrg
2767ec681f3Smrg   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
2777ec681f3Smrg   unsigned comp_idx = 0;
2787ec681f3Smrg
2797ec681f3Smrg   /* We need to split loads in 16byte chunks because that's the optimal
2807ec681f3Smrg    * granularity of bufferLoad(). Minimum alignment is 4byte, which saves
2817ec681f3Smrg    * from us from extra complexity to extract >= 32 bit components.
2827ec681f3Smrg    */
2837ec681f3Smrg   for (unsigned i = 0; i < num_bits; i += 4 * 32) {
2847ec681f3Smrg      /* For each 16byte chunk (or smaller) we generate a 32bit ssbo vec
2857ec681f3Smrg       * load.
2867ec681f3Smrg       */
2877ec681f3Smrg      unsigned subload_num_bits = MIN2(num_bits - i, 4 * 32);
2887ec681f3Smrg
2897ec681f3Smrg      /* The number of components to store depends on the number of bytes. */
2907ec681f3Smrg      nir_ssa_def *vec32 =
2917ec681f3Smrg         nir_load_ssbo(b, DIV_ROUND_UP(subload_num_bits, 32), 32,
2927ec681f3Smrg                       buffer, nir_iadd(b, offset, nir_imm_int(b, i / 8)),
2937ec681f3Smrg                       .align_mul = 4,
2947ec681f3Smrg                       .align_offset = 0,
2957ec681f3Smrg                       .access = access);
2967ec681f3Smrg
2977ec681f3Smrg      /* If we have 2 bytes or less to load we need to adjust the u32 value so
2987ec681f3Smrg       * we can always extract the LSB.
2997ec681f3Smrg       */
3007ec681f3Smrg      if (subload_num_bits <= 16) {
3017ec681f3Smrg         nir_ssa_def *shift = nir_imul(b, nir_iand(b, intr->src[1].ssa, nir_imm_int(b, 3)),
3027ec681f3Smrg                                          nir_imm_int(b, 8));
3037ec681f3Smrg         vec32 = nir_ushr(b, vec32, shift);
3047ec681f3Smrg      }
3057ec681f3Smrg
3067ec681f3Smrg      /* And now comes the pack/unpack step to match the original type. */
3077ec681f3Smrg      extract_comps_from_vec32(b, vec32, bit_size, &comps[comp_idx],
3087ec681f3Smrg                               subload_num_bits / bit_size);
3097ec681f3Smrg      comp_idx += subload_num_bits / bit_size;
3107ec681f3Smrg   }
3117ec681f3Smrg
3127ec681f3Smrg   assert(comp_idx == num_components);
3137ec681f3Smrg   nir_ssa_def *result = nir_vec(b, comps, num_components);
3147ec681f3Smrg   nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
3157ec681f3Smrg   nir_instr_remove(&intr->instr);
3167ec681f3Smrg   return true;
3177ec681f3Smrg}
3187ec681f3Smrg
3197ec681f3Smrgstatic bool
3207ec681f3Smrglower_store_ssbo(nir_builder *b, nir_intrinsic_instr *intr)
3217ec681f3Smrg{
3227ec681f3Smrg   b->cursor = nir_before_instr(&intr->instr);
3237ec681f3Smrg
3247ec681f3Smrg   assert(intr->src[0].is_ssa);
3257ec681f3Smrg   assert(intr->src[1].is_ssa);
3267ec681f3Smrg   assert(intr->src[2].is_ssa);
3277ec681f3Smrg
3287ec681f3Smrg   nir_ssa_def *val = intr->src[0].ssa;
3297ec681f3Smrg   nir_ssa_def *buffer = intr->src[1].ssa;
3307ec681f3Smrg   nir_ssa_def *offset = nir_iand(b, intr->src[2].ssa, nir_imm_int(b, ~3));
3317ec681f3Smrg
3327ec681f3Smrg   unsigned bit_size = val->bit_size;
3337ec681f3Smrg   unsigned num_components = val->num_components;
3347ec681f3Smrg   unsigned num_bits = num_components * bit_size;
3357ec681f3Smrg
3367ec681f3Smrg   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
3377ec681f3Smrg   unsigned comp_idx = 0;
3387ec681f3Smrg
3397ec681f3Smrg   for (unsigned i = 0; i < num_components; i++)
3407ec681f3Smrg      comps[i] = nir_channel(b, val, i);
3417ec681f3Smrg
3427ec681f3Smrg   /* We split stores in 16byte chunks because that's the optimal granularity
3437ec681f3Smrg    * of bufferStore(). Minimum alignment is 4byte, which saves from us from
3447ec681f3Smrg    * extra complexity to store >= 32 bit components.
3457ec681f3Smrg    */
3467ec681f3Smrg   for (unsigned i = 0; i < num_bits; i += 4 * 32) {
3477ec681f3Smrg      /* For each 16byte chunk (or smaller) we generate a 32bit ssbo vec
3487ec681f3Smrg       * store.
3497ec681f3Smrg       */
3507ec681f3Smrg      unsigned substore_num_bits = MIN2(num_bits - i, 4 * 32);
3517ec681f3Smrg      nir_ssa_def *local_offset = nir_iadd(b, offset, nir_imm_int(b, i / 8));
3527ec681f3Smrg      nir_ssa_def *vec32 = load_comps_to_vec32(b, bit_size, &comps[comp_idx],
3537ec681f3Smrg                                               substore_num_bits / bit_size);
3547ec681f3Smrg      nir_intrinsic_instr *store;
3557ec681f3Smrg
3567ec681f3Smrg      if (substore_num_bits < 32) {
3577ec681f3Smrg         nir_ssa_def *mask = nir_imm_int(b, (1 << substore_num_bits) - 1);
3587ec681f3Smrg
3597ec681f3Smrg        /* If we have 16 bits or less to store we need to place them
3607ec681f3Smrg         * correctly in the u32 component. Anything greater than 16 bits
3617ec681f3Smrg         * (including uchar3) is naturally aligned on 32bits.
3627ec681f3Smrg         */
3637ec681f3Smrg         if (substore_num_bits <= 16) {
3647ec681f3Smrg            nir_ssa_def *pos = nir_iand(b, intr->src[2].ssa, nir_imm_int(b, 3));
3657ec681f3Smrg            nir_ssa_def *shift = nir_imul_imm(b, pos, 8);
3667ec681f3Smrg
3677ec681f3Smrg            vec32 = nir_ishl(b, vec32, shift);
3687ec681f3Smrg            mask = nir_ishl(b, mask, shift);
3697ec681f3Smrg         }
3707ec681f3Smrg
3717ec681f3Smrg         store = nir_intrinsic_instr_create(b->shader,
3727ec681f3Smrg                                            nir_intrinsic_store_ssbo_masked_dxil);
3737ec681f3Smrg         store->src[0] = nir_src_for_ssa(vec32);
3747ec681f3Smrg         store->src[1] = nir_src_for_ssa(nir_inot(b, mask));
3757ec681f3Smrg         store->src[2] = nir_src_for_ssa(buffer);
3767ec681f3Smrg         store->src[3] = nir_src_for_ssa(local_offset);
3777ec681f3Smrg      } else {
3787ec681f3Smrg         store = nir_intrinsic_instr_create(b->shader,
3797ec681f3Smrg                                            nir_intrinsic_store_ssbo);
3807ec681f3Smrg         store->src[0] = nir_src_for_ssa(vec32);
3817ec681f3Smrg         store->src[1] = nir_src_for_ssa(buffer);
3827ec681f3Smrg         store->src[2] = nir_src_for_ssa(local_offset);
3837ec681f3Smrg
3847ec681f3Smrg         nir_intrinsic_set_align(store, 4, 0);
3857ec681f3Smrg      }
3867ec681f3Smrg
3877ec681f3Smrg      /* The number of components to store depends on the number of bits. */
3887ec681f3Smrg      store->num_components = DIV_ROUND_UP(substore_num_bits, 32);
3897ec681f3Smrg      nir_builder_instr_insert(b, &store->instr);
3907ec681f3Smrg      comp_idx += substore_num_bits / bit_size;
3917ec681f3Smrg   }
3927ec681f3Smrg
3937ec681f3Smrg   nir_instr_remove(&intr->instr);
3947ec681f3Smrg   return true;
3957ec681f3Smrg}
3967ec681f3Smrg
3977ec681f3Smrgstatic void
3987ec681f3Smrglower_load_vec32(nir_builder *b, nir_ssa_def *index, unsigned num_comps, nir_ssa_def **comps, nir_intrinsic_op op)
3997ec681f3Smrg{
4007ec681f3Smrg   for (unsigned i = 0; i < num_comps; i++) {
4017ec681f3Smrg      nir_intrinsic_instr *load =
4027ec681f3Smrg         nir_intrinsic_instr_create(b->shader, op);
4037ec681f3Smrg
4047ec681f3Smrg      load->num_components = 1;
4057ec681f3Smrg      load->src[0] = nir_src_for_ssa(nir_iadd(b, index, nir_imm_int(b, i)));
4067ec681f3Smrg      nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
4077ec681f3Smrg      nir_builder_instr_insert(b, &load->instr);
4087ec681f3Smrg      comps[i] = &load->dest.ssa;
4097ec681f3Smrg   }
4107ec681f3Smrg}
4117ec681f3Smrg
4127ec681f3Smrgstatic bool
4137ec681f3Smrglower_32b_offset_load(nir_builder *b, nir_intrinsic_instr *intr)
4147ec681f3Smrg{
4157ec681f3Smrg   assert(intr->dest.is_ssa);
4167ec681f3Smrg   unsigned bit_size = nir_dest_bit_size(intr->dest);
4177ec681f3Smrg   unsigned num_components = nir_dest_num_components(intr->dest);
4187ec681f3Smrg   unsigned num_bits = num_components * bit_size;
4197ec681f3Smrg
4207ec681f3Smrg   b->cursor = nir_before_instr(&intr->instr);
4217ec681f3Smrg   nir_intrinsic_op op = intr->intrinsic;
4227ec681f3Smrg
4237ec681f3Smrg   assert(intr->src[0].is_ssa);
4247ec681f3Smrg   nir_ssa_def *offset = intr->src[0].ssa;
4257ec681f3Smrg   if (op == nir_intrinsic_load_shared) {
4267ec681f3Smrg      offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_base(intr)));
4277ec681f3Smrg      op = nir_intrinsic_load_shared_dxil;
4287ec681f3Smrg   } else {
4297ec681f3Smrg      offset = nir_u2u32(b, offset);
4307ec681f3Smrg      op = nir_intrinsic_load_scratch_dxil;
4317ec681f3Smrg   }
4327ec681f3Smrg   nir_ssa_def *index = nir_ushr(b, offset, nir_imm_int(b, 2));
4337ec681f3Smrg   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
4347ec681f3Smrg   nir_ssa_def *comps_32bit[NIR_MAX_VEC_COMPONENTS * 2];
4357ec681f3Smrg
4367ec681f3Smrg   /* We need to split loads in 32-bit accesses because the buffer
4377ec681f3Smrg    * is an i32 array and DXIL does not support type casts.
4387ec681f3Smrg    */
4397ec681f3Smrg   unsigned num_32bit_comps = DIV_ROUND_UP(num_bits, 32);
4407ec681f3Smrg   lower_load_vec32(b, index, num_32bit_comps, comps_32bit, op);
4417ec681f3Smrg   unsigned num_comps_per_pass = MIN2(num_32bit_comps, 4);
4427ec681f3Smrg
4437ec681f3Smrg   for (unsigned i = 0; i < num_32bit_comps; i += num_comps_per_pass) {
4447ec681f3Smrg      unsigned num_vec32_comps = MIN2(num_32bit_comps - i, 4);
4457ec681f3Smrg      unsigned num_dest_comps = num_vec32_comps * 32 / bit_size;
4467ec681f3Smrg      nir_ssa_def *vec32 = nir_vec(b, &comps_32bit[i], num_vec32_comps);
4477ec681f3Smrg
4487ec681f3Smrg      /* If we have 16 bits or less to load we need to adjust the u32 value so
4497ec681f3Smrg       * we can always extract the LSB.
4507ec681f3Smrg       */
4517ec681f3Smrg      if (num_bits <= 16) {
4527ec681f3Smrg         nir_ssa_def *shift =
4537ec681f3Smrg            nir_imul(b, nir_iand(b, offset, nir_imm_int(b, 3)),
4547ec681f3Smrg                        nir_imm_int(b, 8));
4557ec681f3Smrg         vec32 = nir_ushr(b, vec32, shift);
4567ec681f3Smrg      }
4577ec681f3Smrg
4587ec681f3Smrg      /* And now comes the pack/unpack step to match the original type. */
4597ec681f3Smrg      unsigned dest_index = i * 32 / bit_size;
4607ec681f3Smrg      extract_comps_from_vec32(b, vec32, bit_size, &comps[dest_index], num_dest_comps);
4617ec681f3Smrg   }
4627ec681f3Smrg
4637ec681f3Smrg   nir_ssa_def *result = nir_vec(b, comps, num_components);
4647ec681f3Smrg   nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
4657ec681f3Smrg   nir_instr_remove(&intr->instr);
4667ec681f3Smrg
4677ec681f3Smrg   return true;
4687ec681f3Smrg}
4697ec681f3Smrg
4707ec681f3Smrgstatic void
4717ec681f3Smrglower_store_vec32(nir_builder *b, nir_ssa_def *index, nir_ssa_def *vec32, nir_intrinsic_op op)
4727ec681f3Smrg{
4737ec681f3Smrg
4747ec681f3Smrg   for (unsigned i = 0; i < vec32->num_components; i++) {
4757ec681f3Smrg      nir_intrinsic_instr *store =
4767ec681f3Smrg         nir_intrinsic_instr_create(b->shader, op);
4777ec681f3Smrg
4787ec681f3Smrg      store->src[0] = nir_src_for_ssa(nir_channel(b, vec32, i));
4797ec681f3Smrg      store->src[1] = nir_src_for_ssa(nir_iadd(b, index, nir_imm_int(b, i)));
4807ec681f3Smrg      store->num_components = 1;
4817ec681f3Smrg      nir_builder_instr_insert(b, &store->instr);
4827ec681f3Smrg   }
4837ec681f3Smrg}
4847ec681f3Smrg
4857ec681f3Smrgstatic void
4867ec681f3Smrglower_masked_store_vec32(nir_builder *b, nir_ssa_def *offset, nir_ssa_def *index,
4877ec681f3Smrg                         nir_ssa_def *vec32, unsigned num_bits, nir_intrinsic_op op)
4887ec681f3Smrg{
4897ec681f3Smrg   nir_ssa_def *mask = nir_imm_int(b, (1 << num_bits) - 1);
4907ec681f3Smrg
4917ec681f3Smrg   /* If we have 16 bits or less to store we need to place them correctly in
4927ec681f3Smrg    * the u32 component. Anything greater than 16 bits (including uchar3) is
4937ec681f3Smrg    * naturally aligned on 32bits.
4947ec681f3Smrg    */
4957ec681f3Smrg   if (num_bits <= 16) {
4967ec681f3Smrg      nir_ssa_def *shift =
4977ec681f3Smrg         nir_imul_imm(b, nir_iand(b, offset, nir_imm_int(b, 3)), 8);
4987ec681f3Smrg
4997ec681f3Smrg      vec32 = nir_ishl(b, vec32, shift);
5007ec681f3Smrg      mask = nir_ishl(b, mask, shift);
5017ec681f3Smrg   }
5027ec681f3Smrg
5037ec681f3Smrg   if (op == nir_intrinsic_store_shared_dxil) {
5047ec681f3Smrg      /* Use the dedicated masked intrinsic */
5057ec681f3Smrg      nir_store_shared_masked_dxil(b, vec32, nir_inot(b, mask), index);
5067ec681f3Smrg   } else {
5077ec681f3Smrg      /* For scratch, since we don't need atomics, just generate the read-modify-write in NIR */
5087ec681f3Smrg      nir_ssa_def *load = nir_load_scratch_dxil(b, 1, 32, index);
5097ec681f3Smrg
5107ec681f3Smrg      nir_ssa_def *new_val = nir_ior(b, vec32,
5117ec681f3Smrg                                     nir_iand(b,
5127ec681f3Smrg                                              nir_inot(b, mask),
5137ec681f3Smrg                                              load));
5147ec681f3Smrg
5157ec681f3Smrg      lower_store_vec32(b, index, new_val, op);
5167ec681f3Smrg   }
5177ec681f3Smrg}
5187ec681f3Smrg
5197ec681f3Smrgstatic bool
5207ec681f3Smrglower_32b_offset_store(nir_builder *b, nir_intrinsic_instr *intr)
5217ec681f3Smrg{
5227ec681f3Smrg   assert(intr->src[0].is_ssa);
5237ec681f3Smrg   unsigned num_components = nir_src_num_components(intr->src[0]);
5247ec681f3Smrg   unsigned bit_size = nir_src_bit_size(intr->src[0]);
5257ec681f3Smrg   unsigned num_bits = num_components * bit_size;
5267ec681f3Smrg
5277ec681f3Smrg   b->cursor = nir_before_instr(&intr->instr);
5287ec681f3Smrg   nir_intrinsic_op op = intr->intrinsic;
5297ec681f3Smrg
5307ec681f3Smrg   nir_ssa_def *offset = intr->src[1].ssa;
5317ec681f3Smrg   if (op == nir_intrinsic_store_shared) {
5327ec681f3Smrg      offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_base(intr)));
5337ec681f3Smrg      op = nir_intrinsic_store_shared_dxil;
5347ec681f3Smrg   } else {
5357ec681f3Smrg      offset = nir_u2u32(b, offset);
5367ec681f3Smrg      op = nir_intrinsic_store_scratch_dxil;
5377ec681f3Smrg   }
5387ec681f3Smrg   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
5397ec681f3Smrg
5407ec681f3Smrg   unsigned comp_idx = 0;
5417ec681f3Smrg   for (unsigned i = 0; i < num_components; i++)
5427ec681f3Smrg      comps[i] = nir_channel(b, intr->src[0].ssa, i);
5437ec681f3Smrg
5447ec681f3Smrg   for (unsigned i = 0; i < num_bits; i += 4 * 32) {
5457ec681f3Smrg      /* For each 4byte chunk (or smaller) we generate a 32bit scalar store.
5467ec681f3Smrg       */
5477ec681f3Smrg      unsigned substore_num_bits = MIN2(num_bits - i, 4 * 32);
5487ec681f3Smrg      nir_ssa_def *local_offset = nir_iadd(b, offset, nir_imm_int(b, i / 8));
5497ec681f3Smrg      nir_ssa_def *vec32 = load_comps_to_vec32(b, bit_size, &comps[comp_idx],
5507ec681f3Smrg                                               substore_num_bits / bit_size);
5517ec681f3Smrg      nir_ssa_def *index = nir_ushr(b, local_offset, nir_imm_int(b, 2));
5527ec681f3Smrg
5537ec681f3Smrg      /* For anything less than 32bits we need to use the masked version of the
5547ec681f3Smrg       * intrinsic to preserve data living in the same 32bit slot.
5557ec681f3Smrg       */
5567ec681f3Smrg      if (num_bits < 32) {
5577ec681f3Smrg         lower_masked_store_vec32(b, local_offset, index, vec32, num_bits, op);
5587ec681f3Smrg      } else {
5597ec681f3Smrg         lower_store_vec32(b, index, vec32, op);
5607ec681f3Smrg      }
5617ec681f3Smrg
5627ec681f3Smrg      comp_idx += substore_num_bits / bit_size;
5637ec681f3Smrg   }
5647ec681f3Smrg
5657ec681f3Smrg   nir_instr_remove(&intr->instr);
5667ec681f3Smrg
5677ec681f3Smrg   return true;
5687ec681f3Smrg}
5697ec681f3Smrg
5707ec681f3Smrgstatic void
5717ec681f3Smrgubo_to_temp_patch_deref_mode(nir_deref_instr *deref)
5727ec681f3Smrg{
5737ec681f3Smrg   deref->modes = nir_var_shader_temp;
5747ec681f3Smrg   nir_foreach_use(use_src, &deref->dest.ssa) {
5757ec681f3Smrg      if (use_src->parent_instr->type != nir_instr_type_deref)
5767ec681f3Smrg	 continue;
5777ec681f3Smrg
5787ec681f3Smrg      nir_deref_instr *parent = nir_instr_as_deref(use_src->parent_instr);
5797ec681f3Smrg      ubo_to_temp_patch_deref_mode(parent);
5807ec681f3Smrg   }
5817ec681f3Smrg}
5827ec681f3Smrg
5837ec681f3Smrgstatic void
5847ec681f3Smrgubo_to_temp_update_entry(nir_deref_instr *deref, struct hash_entry *he)
5857ec681f3Smrg{
5867ec681f3Smrg   assert(nir_deref_mode_is(deref, nir_var_mem_constant));
5877ec681f3Smrg   assert(deref->dest.is_ssa);
5887ec681f3Smrg   assert(he->data);
5897ec681f3Smrg
5907ec681f3Smrg   nir_foreach_use(use_src, &deref->dest.ssa) {
5917ec681f3Smrg      if (use_src->parent_instr->type == nir_instr_type_deref) {
5927ec681f3Smrg         ubo_to_temp_update_entry(nir_instr_as_deref(use_src->parent_instr), he);
5937ec681f3Smrg      } else if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
5947ec681f3Smrg         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_src->parent_instr);
5957ec681f3Smrg         if (intr->intrinsic != nir_intrinsic_load_deref)
5967ec681f3Smrg            he->data = NULL;
5977ec681f3Smrg      } else {
5987ec681f3Smrg         he->data = NULL;
5997ec681f3Smrg      }
6007ec681f3Smrg
6017ec681f3Smrg      if (!he->data)
6027ec681f3Smrg         break;
6037ec681f3Smrg   }
6047ec681f3Smrg}
6057ec681f3Smrg
6067ec681f3Smrgbool
6077ec681f3Smrgdxil_nir_lower_ubo_to_temp(nir_shader *nir)
6087ec681f3Smrg{
6097ec681f3Smrg   struct hash_table *ubo_to_temp = _mesa_pointer_hash_table_create(NULL);
6107ec681f3Smrg   bool progress = false;
6117ec681f3Smrg
6127ec681f3Smrg   /* First pass: collect all UBO accesses that could be turned into
6137ec681f3Smrg    * shader temp accesses.
6147ec681f3Smrg    */
6157ec681f3Smrg   foreach_list_typed(nir_function, func, node, &nir->functions) {
6167ec681f3Smrg      if (!func->is_entrypoint)
6177ec681f3Smrg         continue;
6187ec681f3Smrg      assert(func->impl);
6197ec681f3Smrg
6207ec681f3Smrg      nir_foreach_block(block, func->impl) {
6217ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
6227ec681f3Smrg            if (instr->type != nir_instr_type_deref)
6237ec681f3Smrg               continue;
6247ec681f3Smrg
6257ec681f3Smrg            nir_deref_instr *deref = nir_instr_as_deref(instr);
6267ec681f3Smrg            if (!nir_deref_mode_is(deref, nir_var_mem_constant) ||
6277ec681f3Smrg                deref->deref_type != nir_deref_type_var)
6287ec681f3Smrg                  continue;
6297ec681f3Smrg
6307ec681f3Smrg            struct hash_entry *he =
6317ec681f3Smrg               _mesa_hash_table_search(ubo_to_temp, deref->var);
6327ec681f3Smrg
6337ec681f3Smrg            if (!he)
6347ec681f3Smrg               he = _mesa_hash_table_insert(ubo_to_temp, deref->var, deref->var);
6357ec681f3Smrg
6367ec681f3Smrg            if (!he->data)
6377ec681f3Smrg               continue;
6387ec681f3Smrg
6397ec681f3Smrg            ubo_to_temp_update_entry(deref, he);
6407ec681f3Smrg         }
6417ec681f3Smrg      }
6427ec681f3Smrg   }
6437ec681f3Smrg
6447ec681f3Smrg   hash_table_foreach(ubo_to_temp, he) {
6457ec681f3Smrg      nir_variable *var = he->data;
6467ec681f3Smrg
6477ec681f3Smrg      if (!var)
6487ec681f3Smrg         continue;
6497ec681f3Smrg
6507ec681f3Smrg      /* Change the variable mode. */
6517ec681f3Smrg      var->data.mode = nir_var_shader_temp;
6527ec681f3Smrg
6537ec681f3Smrg      /* Make sure the variable has a name.
6547ec681f3Smrg       * DXIL variables must have names.
6557ec681f3Smrg       */
6567ec681f3Smrg      if (!var->name)
6577ec681f3Smrg         var->name = ralloc_asprintf(nir, "global_%d", exec_list_length(&nir->variables));
6587ec681f3Smrg
6597ec681f3Smrg      progress = true;
6607ec681f3Smrg   }
6617ec681f3Smrg   _mesa_hash_table_destroy(ubo_to_temp, NULL);
6627ec681f3Smrg
6637ec681f3Smrg   /* Second pass: patch all derefs that were accessing the converted UBOs
6647ec681f3Smrg    * variables.
6657ec681f3Smrg    */
6667ec681f3Smrg   foreach_list_typed(nir_function, func, node, &nir->functions) {
6677ec681f3Smrg      if (!func->is_entrypoint)
6687ec681f3Smrg         continue;
6697ec681f3Smrg      assert(func->impl);
6707ec681f3Smrg
6717ec681f3Smrg      nir_foreach_block(block, func->impl) {
6727ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
6737ec681f3Smrg            if (instr->type != nir_instr_type_deref)
6747ec681f3Smrg               continue;
6757ec681f3Smrg
6767ec681f3Smrg            nir_deref_instr *deref = nir_instr_as_deref(instr);
6777ec681f3Smrg            if (nir_deref_mode_is(deref, nir_var_mem_constant) &&
6787ec681f3Smrg                deref->deref_type == nir_deref_type_var &&
6797ec681f3Smrg                deref->var->data.mode == nir_var_shader_temp)
6807ec681f3Smrg               ubo_to_temp_patch_deref_mode(deref);
6817ec681f3Smrg         }
6827ec681f3Smrg      }
6837ec681f3Smrg   }
6847ec681f3Smrg
6857ec681f3Smrg   return progress;
6867ec681f3Smrg}
6877ec681f3Smrg
6887ec681f3Smrgstatic bool
6897ec681f3Smrglower_load_ubo(nir_builder *b, nir_intrinsic_instr *intr)
6907ec681f3Smrg{
6917ec681f3Smrg   assert(intr->dest.is_ssa);
6927ec681f3Smrg   assert(intr->src[0].is_ssa);
6937ec681f3Smrg   assert(intr->src[1].is_ssa);
6947ec681f3Smrg
6957ec681f3Smrg   b->cursor = nir_before_instr(&intr->instr);
6967ec681f3Smrg
6977ec681f3Smrg   nir_ssa_def *result =
6987ec681f3Smrg      build_load_ubo_dxil(b, intr->src[0].ssa, intr->src[1].ssa,
6997ec681f3Smrg                             nir_dest_num_components(intr->dest),
7007ec681f3Smrg                             nir_dest_bit_size(intr->dest));
7017ec681f3Smrg
7027ec681f3Smrg   nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
7037ec681f3Smrg   nir_instr_remove(&intr->instr);
7047ec681f3Smrg   return true;
7057ec681f3Smrg}
7067ec681f3Smrg
7077ec681f3Smrgbool
7087ec681f3Smrgdxil_nir_lower_loads_stores_to_dxil(nir_shader *nir)
7097ec681f3Smrg{
7107ec681f3Smrg   bool progress = false;
7117ec681f3Smrg
7127ec681f3Smrg   foreach_list_typed(nir_function, func, node, &nir->functions) {
7137ec681f3Smrg      if (!func->is_entrypoint)
7147ec681f3Smrg         continue;
7157ec681f3Smrg      assert(func->impl);
7167ec681f3Smrg
7177ec681f3Smrg      nir_builder b;
7187ec681f3Smrg      nir_builder_init(&b, func->impl);
7197ec681f3Smrg
7207ec681f3Smrg      nir_foreach_block(block, func->impl) {
7217ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
7227ec681f3Smrg            if (instr->type != nir_instr_type_intrinsic)
7237ec681f3Smrg               continue;
7247ec681f3Smrg            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
7257ec681f3Smrg
7267ec681f3Smrg            switch (intr->intrinsic) {
7277ec681f3Smrg            case nir_intrinsic_load_deref:
7287ec681f3Smrg               progress |= lower_load_deref(&b, intr);
7297ec681f3Smrg               break;
7307ec681f3Smrg            case nir_intrinsic_load_shared:
7317ec681f3Smrg            case nir_intrinsic_load_scratch:
7327ec681f3Smrg               progress |= lower_32b_offset_load(&b, intr);
7337ec681f3Smrg               break;
7347ec681f3Smrg            case nir_intrinsic_load_ssbo:
7357ec681f3Smrg               progress |= lower_load_ssbo(&b, intr);
7367ec681f3Smrg               break;
7377ec681f3Smrg            case nir_intrinsic_load_ubo:
7387ec681f3Smrg               progress |= lower_load_ubo(&b, intr);
7397ec681f3Smrg               break;
7407ec681f3Smrg            case nir_intrinsic_store_shared:
7417ec681f3Smrg            case nir_intrinsic_store_scratch:
7427ec681f3Smrg               progress |= lower_32b_offset_store(&b, intr);
7437ec681f3Smrg               break;
7447ec681f3Smrg            case nir_intrinsic_store_ssbo:
7457ec681f3Smrg               progress |= lower_store_ssbo(&b, intr);
7467ec681f3Smrg               break;
7477ec681f3Smrg            default:
7487ec681f3Smrg               break;
7497ec681f3Smrg            }
7507ec681f3Smrg         }
7517ec681f3Smrg      }
7527ec681f3Smrg   }
7537ec681f3Smrg
7547ec681f3Smrg   return progress;
7557ec681f3Smrg}
7567ec681f3Smrg
7577ec681f3Smrgstatic bool
7587ec681f3Smrglower_shared_atomic(nir_builder *b, nir_intrinsic_instr *intr,
7597ec681f3Smrg                    nir_intrinsic_op dxil_op)
7607ec681f3Smrg{
7617ec681f3Smrg   b->cursor = nir_before_instr(&intr->instr);
7627ec681f3Smrg
7637ec681f3Smrg   assert(intr->src[0].is_ssa);
7647ec681f3Smrg   nir_ssa_def *offset =
7657ec681f3Smrg      nir_iadd(b, intr->src[0].ssa, nir_imm_int(b, nir_intrinsic_base(intr)));
7667ec681f3Smrg   nir_ssa_def *index = nir_ushr(b, offset, nir_imm_int(b, 2));
7677ec681f3Smrg
7687ec681f3Smrg   nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, dxil_op);
7697ec681f3Smrg   atomic->src[0] = nir_src_for_ssa(index);
7707ec681f3Smrg   assert(intr->src[1].is_ssa);
7717ec681f3Smrg   atomic->src[1] = nir_src_for_ssa(intr->src[1].ssa);
7727ec681f3Smrg   if (dxil_op == nir_intrinsic_shared_atomic_comp_swap_dxil) {
7737ec681f3Smrg      assert(intr->src[2].is_ssa);
7747ec681f3Smrg      atomic->src[2] = nir_src_for_ssa(intr->src[2].ssa);
7757ec681f3Smrg   }
7767ec681f3Smrg   atomic->num_components = 0;
7777ec681f3Smrg   nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
7787ec681f3Smrg
7797ec681f3Smrg   nir_builder_instr_insert(b, &atomic->instr);
7807ec681f3Smrg   nir_ssa_def_rewrite_uses(&intr->dest.ssa, &atomic->dest.ssa);
7817ec681f3Smrg   nir_instr_remove(&intr->instr);
7827ec681f3Smrg   return true;
7837ec681f3Smrg}
7847ec681f3Smrg
7857ec681f3Smrgbool
7867ec681f3Smrgdxil_nir_lower_atomics_to_dxil(nir_shader *nir)
7877ec681f3Smrg{
7887ec681f3Smrg   bool progress = false;
7897ec681f3Smrg
7907ec681f3Smrg   foreach_list_typed(nir_function, func, node, &nir->functions) {
7917ec681f3Smrg      if (!func->is_entrypoint)
7927ec681f3Smrg         continue;
7937ec681f3Smrg      assert(func->impl);
7947ec681f3Smrg
7957ec681f3Smrg      nir_builder b;
7967ec681f3Smrg      nir_builder_init(&b, func->impl);
7977ec681f3Smrg
7987ec681f3Smrg      nir_foreach_block(block, func->impl) {
7997ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
8007ec681f3Smrg            if (instr->type != nir_instr_type_intrinsic)
8017ec681f3Smrg               continue;
8027ec681f3Smrg            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
8037ec681f3Smrg
8047ec681f3Smrg            switch (intr->intrinsic) {
8057ec681f3Smrg
8067ec681f3Smrg#define ATOMIC(op)                                                            \
8077ec681f3Smrg  case nir_intrinsic_shared_atomic_##op:                                     \
8087ec681f3Smrg     progress |= lower_shared_atomic(&b, intr,                                \
8097ec681f3Smrg                                     nir_intrinsic_shared_atomic_##op##_dxil); \
8107ec681f3Smrg     break
8117ec681f3Smrg
8127ec681f3Smrg            ATOMIC(add);
8137ec681f3Smrg            ATOMIC(imin);
8147ec681f3Smrg            ATOMIC(umin);
8157ec681f3Smrg            ATOMIC(imax);
8167ec681f3Smrg            ATOMIC(umax);
8177ec681f3Smrg            ATOMIC(and);
8187ec681f3Smrg            ATOMIC(or);
8197ec681f3Smrg            ATOMIC(xor);
8207ec681f3Smrg            ATOMIC(exchange);
8217ec681f3Smrg            ATOMIC(comp_swap);
8227ec681f3Smrg
8237ec681f3Smrg#undef ATOMIC
8247ec681f3Smrg            default:
8257ec681f3Smrg               break;
8267ec681f3Smrg            }
8277ec681f3Smrg         }
8287ec681f3Smrg      }
8297ec681f3Smrg   }
8307ec681f3Smrg
8317ec681f3Smrg   return progress;
8327ec681f3Smrg}
8337ec681f3Smrg
8347ec681f3Smrgstatic bool
8357ec681f3Smrglower_deref_ssbo(nir_builder *b, nir_deref_instr *deref)
8367ec681f3Smrg{
8377ec681f3Smrg   assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
8387ec681f3Smrg   assert(deref->deref_type == nir_deref_type_var ||
8397ec681f3Smrg          deref->deref_type == nir_deref_type_cast);
8407ec681f3Smrg   nir_variable *var = deref->var;
8417ec681f3Smrg
8427ec681f3Smrg   b->cursor = nir_before_instr(&deref->instr);
8437ec681f3Smrg
8447ec681f3Smrg   if (deref->deref_type == nir_deref_type_var) {
8457ec681f3Smrg      /* We turn all deref_var into deref_cast and build a pointer value based on
8467ec681f3Smrg       * the var binding which encodes the UAV id.
8477ec681f3Smrg       */
8487ec681f3Smrg      nir_ssa_def *ptr = nir_imm_int64(b, (uint64_t)var->data.binding << 32);
8497ec681f3Smrg      nir_deref_instr *deref_cast =
8507ec681f3Smrg         nir_build_deref_cast(b, ptr, nir_var_mem_ssbo, deref->type,
8517ec681f3Smrg                              glsl_get_explicit_stride(var->type));
8527ec681f3Smrg      nir_ssa_def_rewrite_uses(&deref->dest.ssa,
8537ec681f3Smrg                               &deref_cast->dest.ssa);
8547ec681f3Smrg      nir_instr_remove(&deref->instr);
8557ec681f3Smrg
8567ec681f3Smrg      deref = deref_cast;
8577ec681f3Smrg      return true;
8587ec681f3Smrg   }
8597ec681f3Smrg   return false;
8607ec681f3Smrg}
8617ec681f3Smrg
8627ec681f3Smrgbool
8637ec681f3Smrgdxil_nir_lower_deref_ssbo(nir_shader *nir)
8647ec681f3Smrg{
8657ec681f3Smrg   bool progress = false;
8667ec681f3Smrg
8677ec681f3Smrg   foreach_list_typed(nir_function, func, node, &nir->functions) {
8687ec681f3Smrg      if (!func->is_entrypoint)
8697ec681f3Smrg         continue;
8707ec681f3Smrg      assert(func->impl);
8717ec681f3Smrg
8727ec681f3Smrg      nir_builder b;
8737ec681f3Smrg      nir_builder_init(&b, func->impl);
8747ec681f3Smrg
8757ec681f3Smrg      nir_foreach_block(block, func->impl) {
8767ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
8777ec681f3Smrg            if (instr->type != nir_instr_type_deref)
8787ec681f3Smrg               continue;
8797ec681f3Smrg
8807ec681f3Smrg            nir_deref_instr *deref = nir_instr_as_deref(instr);
8817ec681f3Smrg
8827ec681f3Smrg            if (!nir_deref_mode_is(deref, nir_var_mem_ssbo) ||
8837ec681f3Smrg                (deref->deref_type != nir_deref_type_var &&
8847ec681f3Smrg                 deref->deref_type != nir_deref_type_cast))
8857ec681f3Smrg               continue;
8867ec681f3Smrg
8877ec681f3Smrg            progress |= lower_deref_ssbo(&b, deref);
8887ec681f3Smrg         }
8897ec681f3Smrg      }
8907ec681f3Smrg   }
8917ec681f3Smrg
8927ec681f3Smrg   return progress;
8937ec681f3Smrg}
8947ec681f3Smrg
8957ec681f3Smrgstatic bool
8967ec681f3Smrglower_alu_deref_srcs(nir_builder *b, nir_alu_instr *alu)
8977ec681f3Smrg{
8987ec681f3Smrg   const nir_op_info *info = &nir_op_infos[alu->op];
8997ec681f3Smrg   bool progress = false;
9007ec681f3Smrg
9017ec681f3Smrg   b->cursor = nir_before_instr(&alu->instr);
9027ec681f3Smrg
9037ec681f3Smrg   for (unsigned i = 0; i < info->num_inputs; i++) {
9047ec681f3Smrg      nir_deref_instr *deref = nir_src_as_deref(alu->src[i].src);
9057ec681f3Smrg
9067ec681f3Smrg      if (!deref)
9077ec681f3Smrg         continue;
9087ec681f3Smrg
9097ec681f3Smrg      nir_deref_path path;
9107ec681f3Smrg      nir_deref_path_init(&path, deref, NULL);
9117ec681f3Smrg      nir_deref_instr *root_deref = path.path[0];
9127ec681f3Smrg      nir_deref_path_finish(&path);
9137ec681f3Smrg
9147ec681f3Smrg      if (root_deref->deref_type != nir_deref_type_cast)
9157ec681f3Smrg         continue;
9167ec681f3Smrg
9177ec681f3Smrg      nir_ssa_def *ptr =
9187ec681f3Smrg         nir_iadd(b, root_deref->parent.ssa,
9197ec681f3Smrg                     nir_build_deref_offset(b, deref, cl_type_size_align));
9207ec681f3Smrg      nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(ptr));
9217ec681f3Smrg      progress = true;
9227ec681f3Smrg   }
9237ec681f3Smrg
9247ec681f3Smrg   return progress;
9257ec681f3Smrg}
9267ec681f3Smrg
9277ec681f3Smrgbool
9287ec681f3Smrgdxil_nir_opt_alu_deref_srcs(nir_shader *nir)
9297ec681f3Smrg{
9307ec681f3Smrg   bool progress = false;
9317ec681f3Smrg
9327ec681f3Smrg   foreach_list_typed(nir_function, func, node, &nir->functions) {
9337ec681f3Smrg      if (!func->is_entrypoint)
9347ec681f3Smrg         continue;
9357ec681f3Smrg      assert(func->impl);
9367ec681f3Smrg
9377ec681f3Smrg      bool progress = false;
9387ec681f3Smrg      nir_builder b;
9397ec681f3Smrg      nir_builder_init(&b, func->impl);
9407ec681f3Smrg
9417ec681f3Smrg      nir_foreach_block(block, func->impl) {
9427ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
9437ec681f3Smrg            if (instr->type != nir_instr_type_alu)
9447ec681f3Smrg               continue;
9457ec681f3Smrg
9467ec681f3Smrg            nir_alu_instr *alu = nir_instr_as_alu(instr);
9477ec681f3Smrg            progress |= lower_alu_deref_srcs(&b, alu);
9487ec681f3Smrg         }
9497ec681f3Smrg      }
9507ec681f3Smrg   }
9517ec681f3Smrg
9527ec681f3Smrg   return progress;
9537ec681f3Smrg}
9547ec681f3Smrg
9557ec681f3Smrgstatic nir_ssa_def *
9567ec681f3Smrgmemcpy_load_deref_elem(nir_builder *b, nir_deref_instr *parent,
9577ec681f3Smrg                       nir_ssa_def *index)
9587ec681f3Smrg{
9597ec681f3Smrg   nir_deref_instr *deref;
9607ec681f3Smrg
9617ec681f3Smrg   index = nir_i2i(b, index, nir_dest_bit_size(parent->dest));
9627ec681f3Smrg   assert(parent->deref_type == nir_deref_type_cast);
9637ec681f3Smrg   deref = nir_build_deref_ptr_as_array(b, parent, index);
9647ec681f3Smrg
9657ec681f3Smrg   return nir_load_deref(b, deref);
9667ec681f3Smrg}
9677ec681f3Smrg
9687ec681f3Smrgstatic void
9697ec681f3Smrgmemcpy_store_deref_elem(nir_builder *b, nir_deref_instr *parent,
9707ec681f3Smrg                        nir_ssa_def *index, nir_ssa_def *value)
9717ec681f3Smrg{
9727ec681f3Smrg   nir_deref_instr *deref;
9737ec681f3Smrg
9747ec681f3Smrg   index = nir_i2i(b, index, nir_dest_bit_size(parent->dest));
9757ec681f3Smrg   assert(parent->deref_type == nir_deref_type_cast);
9767ec681f3Smrg   deref = nir_build_deref_ptr_as_array(b, parent, index);
9777ec681f3Smrg   nir_store_deref(b, deref, value, 1);
9787ec681f3Smrg}
9797ec681f3Smrg
9807ec681f3Smrgstatic bool
9817ec681f3Smrglower_memcpy_deref(nir_builder *b, nir_intrinsic_instr *intr)
9827ec681f3Smrg{
9837ec681f3Smrg   nir_deref_instr *dst_deref = nir_src_as_deref(intr->src[0]);
9847ec681f3Smrg   nir_deref_instr *src_deref = nir_src_as_deref(intr->src[1]);
9857ec681f3Smrg   assert(intr->src[2].is_ssa);
9867ec681f3Smrg   nir_ssa_def *num_bytes = intr->src[2].ssa;
9877ec681f3Smrg
9887ec681f3Smrg   assert(dst_deref && src_deref);
9897ec681f3Smrg
9907ec681f3Smrg   b->cursor = nir_after_instr(&intr->instr);
9917ec681f3Smrg
9927ec681f3Smrg   dst_deref = nir_build_deref_cast(b, &dst_deref->dest.ssa, dst_deref->modes,
9937ec681f3Smrg                                       glsl_uint8_t_type(), 1);
9947ec681f3Smrg   src_deref = nir_build_deref_cast(b, &src_deref->dest.ssa, src_deref->modes,
9957ec681f3Smrg                                       glsl_uint8_t_type(), 1);
9967ec681f3Smrg
9977ec681f3Smrg   /*
9987ec681f3Smrg    * We want to avoid 64b instructions, so let's assume we'll always be
9997ec681f3Smrg    * passed a value that fits in a 32b type and truncate the 64b value.
10007ec681f3Smrg    */
10017ec681f3Smrg   num_bytes = nir_u2u32(b, num_bytes);
10027ec681f3Smrg
10037ec681f3Smrg   nir_variable *loop_index_var =
10047ec681f3Smrg     nir_local_variable_create(b->impl, glsl_uint_type(), "loop_index");
10057ec681f3Smrg   nir_deref_instr *loop_index_deref = nir_build_deref_var(b, loop_index_var);
10067ec681f3Smrg   nir_store_deref(b, loop_index_deref, nir_imm_int(b, 0), 1);
10077ec681f3Smrg
10087ec681f3Smrg   nir_loop *loop = nir_push_loop(b);
10097ec681f3Smrg   nir_ssa_def *loop_index = nir_load_deref(b, loop_index_deref);
10107ec681f3Smrg   nir_ssa_def *cmp = nir_ige(b, loop_index, num_bytes);
10117ec681f3Smrg   nir_if *loop_check = nir_push_if(b, cmp);
10127ec681f3Smrg   nir_jump(b, nir_jump_break);
10137ec681f3Smrg   nir_pop_if(b, loop_check);
10147ec681f3Smrg   nir_ssa_def *val = memcpy_load_deref_elem(b, src_deref, loop_index);
10157ec681f3Smrg   memcpy_store_deref_elem(b, dst_deref, loop_index, val);
10167ec681f3Smrg   nir_store_deref(b, loop_index_deref, nir_iadd_imm(b, loop_index, 1), 1);
10177ec681f3Smrg   nir_pop_loop(b, loop);
10187ec681f3Smrg   nir_instr_remove(&intr->instr);
10197ec681f3Smrg   return true;
10207ec681f3Smrg}
10217ec681f3Smrg
10227ec681f3Smrgbool
10237ec681f3Smrgdxil_nir_lower_memcpy_deref(nir_shader *nir)
10247ec681f3Smrg{
10257ec681f3Smrg   bool progress = false;
10267ec681f3Smrg
10277ec681f3Smrg   foreach_list_typed(nir_function, func, node, &nir->functions) {
10287ec681f3Smrg      if (!func->is_entrypoint)
10297ec681f3Smrg         continue;
10307ec681f3Smrg      assert(func->impl);
10317ec681f3Smrg
10327ec681f3Smrg      nir_builder b;
10337ec681f3Smrg      nir_builder_init(&b, func->impl);
10347ec681f3Smrg
10357ec681f3Smrg      nir_foreach_block(block, func->impl) {
10367ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
10377ec681f3Smrg            if (instr->type != nir_instr_type_intrinsic)
10387ec681f3Smrg               continue;
10397ec681f3Smrg
10407ec681f3Smrg            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
10417ec681f3Smrg
10427ec681f3Smrg            if (intr->intrinsic == nir_intrinsic_memcpy_deref)
10437ec681f3Smrg               progress |= lower_memcpy_deref(&b, intr);
10447ec681f3Smrg         }
10457ec681f3Smrg      }
10467ec681f3Smrg   }
10477ec681f3Smrg
10487ec681f3Smrg   return progress;
10497ec681f3Smrg}
10507ec681f3Smrg
10517ec681f3Smrgstatic void
10527ec681f3Smrgcast_phi(nir_builder *b, nir_phi_instr *phi, unsigned new_bit_size)
10537ec681f3Smrg{
10547ec681f3Smrg   nir_phi_instr *lowered = nir_phi_instr_create(b->shader);
10557ec681f3Smrg   int num_components = 0;
10567ec681f3Smrg   int old_bit_size = phi->dest.ssa.bit_size;
10577ec681f3Smrg
10587ec681f3Smrg   nir_op upcast_op = nir_type_conversion_op(nir_type_uint | old_bit_size,
10597ec681f3Smrg                                             nir_type_uint | new_bit_size,
10607ec681f3Smrg                                             nir_rounding_mode_undef);
10617ec681f3Smrg   nir_op downcast_op = nir_type_conversion_op(nir_type_uint | new_bit_size,
10627ec681f3Smrg                                               nir_type_uint | old_bit_size,
10637ec681f3Smrg                                               nir_rounding_mode_undef);
10647ec681f3Smrg
10657ec681f3Smrg   nir_foreach_phi_src(src, phi) {
10667ec681f3Smrg      assert(num_components == 0 || num_components == src->src.ssa->num_components);
10677ec681f3Smrg      num_components = src->src.ssa->num_components;
10687ec681f3Smrg
10697ec681f3Smrg      b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr);
10707ec681f3Smrg
10717ec681f3Smrg      nir_ssa_def *cast = nir_build_alu(b, upcast_op, src->src.ssa, NULL, NULL, NULL);
10727ec681f3Smrg      nir_phi_instr_add_src(lowered, src->pred, nir_src_for_ssa(cast));
10737ec681f3Smrg   }
10747ec681f3Smrg
10757ec681f3Smrg   nir_ssa_dest_init(&lowered->instr, &lowered->dest,
10767ec681f3Smrg                     num_components, new_bit_size, NULL);
10777ec681f3Smrg
10787ec681f3Smrg   b->cursor = nir_before_instr(&phi->instr);
10797ec681f3Smrg   nir_builder_instr_insert(b, &lowered->instr);
10807ec681f3Smrg
10817ec681f3Smrg   b->cursor = nir_after_phis(nir_cursor_current_block(b->cursor));
10827ec681f3Smrg   nir_ssa_def *result = nir_build_alu(b, downcast_op, &lowered->dest.ssa, NULL, NULL, NULL);
10837ec681f3Smrg
10847ec681f3Smrg   nir_ssa_def_rewrite_uses(&phi->dest.ssa, result);
10857ec681f3Smrg   nir_instr_remove(&phi->instr);
10867ec681f3Smrg}
10877ec681f3Smrg
10887ec681f3Smrgstatic bool
10897ec681f3Smrgupcast_phi_impl(nir_function_impl *impl, unsigned min_bit_size)
10907ec681f3Smrg{
10917ec681f3Smrg   nir_builder b;
10927ec681f3Smrg   nir_builder_init(&b, impl);
10937ec681f3Smrg   bool progress = false;
10947ec681f3Smrg
10957ec681f3Smrg   nir_foreach_block_reverse(block, impl) {
10967ec681f3Smrg      nir_foreach_instr_safe(instr, block) {
10977ec681f3Smrg         if (instr->type != nir_instr_type_phi)
10987ec681f3Smrg            continue;
10997ec681f3Smrg
11007ec681f3Smrg         nir_phi_instr *phi = nir_instr_as_phi(instr);
11017ec681f3Smrg         assert(phi->dest.is_ssa);
11027ec681f3Smrg
11037ec681f3Smrg         if (phi->dest.ssa.bit_size == 1 ||
11047ec681f3Smrg             phi->dest.ssa.bit_size >= min_bit_size)
11057ec681f3Smrg            continue;
11067ec681f3Smrg
11077ec681f3Smrg         cast_phi(&b, phi, min_bit_size);
11087ec681f3Smrg         progress = true;
11097ec681f3Smrg      }
11107ec681f3Smrg   }
11117ec681f3Smrg
11127ec681f3Smrg   if (progress) {
11137ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_block_index |
11147ec681f3Smrg                                  nir_metadata_dominance);
11157ec681f3Smrg   } else {
11167ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
11177ec681f3Smrg   }
11187ec681f3Smrg
11197ec681f3Smrg   return progress;
11207ec681f3Smrg}
11217ec681f3Smrg
11227ec681f3Smrgbool
11237ec681f3Smrgdxil_nir_lower_upcast_phis(nir_shader *shader, unsigned min_bit_size)
11247ec681f3Smrg{
11257ec681f3Smrg   bool progress = false;
11267ec681f3Smrg
11277ec681f3Smrg   nir_foreach_function(function, shader) {
11287ec681f3Smrg      if (function->impl)
11297ec681f3Smrg         progress |= upcast_phi_impl(function->impl, min_bit_size);
11307ec681f3Smrg   }
11317ec681f3Smrg
11327ec681f3Smrg   return progress;
11337ec681f3Smrg}
11347ec681f3Smrg
11357ec681f3Smrgstruct dxil_nir_split_clip_cull_distance_params {
11367ec681f3Smrg   nir_variable *new_var;
11377ec681f3Smrg   nir_shader *shader;
11387ec681f3Smrg};
11397ec681f3Smrg
11407ec681f3Smrg/* In GLSL and SPIR-V, clip and cull distance are arrays of floats (with a limit of 8).
11417ec681f3Smrg * In DXIL, clip and cull distances are up to 2 float4s combined.
11427ec681f3Smrg * Coming from GLSL, we can request this 2 float4 format, but coming from SPIR-V,
11437ec681f3Smrg * we can't, and have to accept a "compact" array of scalar floats.
11447ec681f3Smrg *
11457ec681f3Smrg * To help emitting a valid input signature for this case, split the variables so that they
11467ec681f3Smrg * match what we need to put in the signature (e.g. { float clip[4]; float clip1; float cull[3]; })
11477ec681f3Smrg */
11487ec681f3Smrgstatic bool
11497ec681f3Smrgdxil_nir_split_clip_cull_distance_instr(nir_builder *b,
11507ec681f3Smrg                                        nir_instr *instr,
11517ec681f3Smrg                                        void *cb_data)
11527ec681f3Smrg{
11537ec681f3Smrg   struct dxil_nir_split_clip_cull_distance_params *params = cb_data;
11547ec681f3Smrg   nir_variable *new_var = params->new_var;
11557ec681f3Smrg
11567ec681f3Smrg   if (instr->type != nir_instr_type_deref)
11577ec681f3Smrg      return false;
11587ec681f3Smrg
11597ec681f3Smrg   nir_deref_instr *deref = nir_instr_as_deref(instr);
11607ec681f3Smrg   nir_variable *var = nir_deref_instr_get_variable(deref);
11617ec681f3Smrg   if (!var ||
11627ec681f3Smrg       var->data.location < VARYING_SLOT_CLIP_DIST0 ||
11637ec681f3Smrg       var->data.location > VARYING_SLOT_CULL_DIST1 ||
11647ec681f3Smrg       !var->data.compact)
11657ec681f3Smrg      return false;
11667ec681f3Smrg
11677ec681f3Smrg   /* The location should only be inside clip distance, because clip
11687ec681f3Smrg    * and cull should've been merged by nir_lower_clip_cull_distance_arrays()
11697ec681f3Smrg    */
11707ec681f3Smrg   assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
11717ec681f3Smrg          var->data.location == VARYING_SLOT_CLIP_DIST1);
11727ec681f3Smrg
11737ec681f3Smrg   /* The deref chain to the clip/cull variables should be simple, just the
11747ec681f3Smrg    * var and an array with a constant index, otherwise more lowering/optimization
11757ec681f3Smrg    * might be needed before this pass, e.g. copy prop, lower_io_to_temporaries,
11767ec681f3Smrg    * split_var_copies, and/or lower_var_copies
11777ec681f3Smrg    */
11787ec681f3Smrg   assert(deref->deref_type == nir_deref_type_var ||
11797ec681f3Smrg          deref->deref_type == nir_deref_type_array);
11807ec681f3Smrg
11817ec681f3Smrg   b->cursor = nir_before_instr(instr);
11827ec681f3Smrg   if (!new_var) {
11837ec681f3Smrg      /* Update lengths for new and old vars */
11847ec681f3Smrg      int old_length = glsl_array_size(var->type);
11857ec681f3Smrg      int new_length = (old_length + var->data.location_frac) - 4;
11867ec681f3Smrg      old_length -= new_length;
11877ec681f3Smrg
11887ec681f3Smrg      /* The existing variable fits in the float4 */
11897ec681f3Smrg      if (new_length <= 0)
11907ec681f3Smrg         return false;
11917ec681f3Smrg
11927ec681f3Smrg      new_var = nir_variable_clone(var, params->shader);
11937ec681f3Smrg      nir_shader_add_variable(params->shader, new_var);
11947ec681f3Smrg      assert(glsl_get_base_type(glsl_get_array_element(var->type)) == GLSL_TYPE_FLOAT);
11957ec681f3Smrg      var->type = glsl_array_type(glsl_float_type(), old_length, 0);
11967ec681f3Smrg      new_var->type = glsl_array_type(glsl_float_type(), new_length, 0);
11977ec681f3Smrg      new_var->data.location++;
11987ec681f3Smrg      new_var->data.location_frac = 0;
11997ec681f3Smrg      params->new_var = new_var;
12007ec681f3Smrg   }
12017ec681f3Smrg
12027ec681f3Smrg   /* Update the type for derefs of the old var */
12037ec681f3Smrg   if (deref->deref_type == nir_deref_type_var) {
12047ec681f3Smrg      deref->type = var->type;
12057ec681f3Smrg      return false;
12067ec681f3Smrg   }
12077ec681f3Smrg
12087ec681f3Smrg   nir_const_value *index = nir_src_as_const_value(deref->arr.index);
12097ec681f3Smrg   assert(index);
12107ec681f3Smrg
12117ec681f3Smrg   /* Treat this array as a vector starting at the component index in location_frac,
12127ec681f3Smrg    * so if location_frac is 1 and index is 0, then it's accessing the 'y' component
12137ec681f3Smrg    * of the vector. If index + location_frac is >= 4, there's no component there,
12147ec681f3Smrg    * so we need to add a new variable and adjust the index.
12157ec681f3Smrg    */
12167ec681f3Smrg   unsigned total_index = index->u32 + var->data.location_frac;
12177ec681f3Smrg   if (total_index < 4)
12187ec681f3Smrg      return false;
12197ec681f3Smrg
12207ec681f3Smrg   nir_deref_instr *new_var_deref = nir_build_deref_var(b, new_var);
12217ec681f3Smrg   nir_deref_instr *new_array_deref = nir_build_deref_array(b, new_var_deref, nir_imm_int(b, total_index % 4));
12227ec681f3Smrg   nir_ssa_def_rewrite_uses(&deref->dest.ssa, &new_array_deref->dest.ssa);
12237ec681f3Smrg   return true;
12247ec681f3Smrg}
12257ec681f3Smrg
12267ec681f3Smrgbool
12277ec681f3Smrgdxil_nir_split_clip_cull_distance(nir_shader *shader)
12287ec681f3Smrg{
12297ec681f3Smrg   struct dxil_nir_split_clip_cull_distance_params params = {
12307ec681f3Smrg      .new_var = NULL,
12317ec681f3Smrg      .shader = shader,
12327ec681f3Smrg   };
12337ec681f3Smrg   nir_shader_instructions_pass(shader,
12347ec681f3Smrg                                dxil_nir_split_clip_cull_distance_instr,
12357ec681f3Smrg                                nir_metadata_block_index |
12367ec681f3Smrg                                nir_metadata_dominance |
12377ec681f3Smrg                                nir_metadata_loop_analysis,
12387ec681f3Smrg                                &params);
12397ec681f3Smrg   return params.new_var != NULL;
12407ec681f3Smrg}
12417ec681f3Smrg
12427ec681f3Smrgstatic bool
12437ec681f3Smrgdxil_nir_lower_double_math_instr(nir_builder *b,
12447ec681f3Smrg                                 nir_instr *instr,
12457ec681f3Smrg                                 UNUSED void *cb_data)
12467ec681f3Smrg{
12477ec681f3Smrg   if (instr->type != nir_instr_type_alu)
12487ec681f3Smrg      return false;
12497ec681f3Smrg
12507ec681f3Smrg   nir_alu_instr *alu = nir_instr_as_alu(instr);
12517ec681f3Smrg
12527ec681f3Smrg   /* TODO: See if we can apply this explicitly to packs/unpacks that are then
12537ec681f3Smrg    * used as a double. As-is, if we had an app explicitly do a 64bit integer op,
12547ec681f3Smrg    * then try to bitcast to double (not expressible in HLSL, but it is in other
12557ec681f3Smrg    * source languages), this would unpack the integer and repack as a double, when
12567ec681f3Smrg    * we probably want to just send the bitcast through to the backend.
12577ec681f3Smrg    */
12587ec681f3Smrg
12597ec681f3Smrg   b->cursor = nir_before_instr(&alu->instr);
12607ec681f3Smrg
12617ec681f3Smrg   bool progress = false;
12627ec681f3Smrg   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; ++i) {
12637ec681f3Smrg      if (nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[i]) == nir_type_float &&
12647ec681f3Smrg          alu->src[i].src.ssa->bit_size == 64) {
12657ec681f3Smrg         nir_ssa_def *packed_double = nir_channel(b, alu->src[i].src.ssa, alu->src[i].swizzle[0]);
12667ec681f3Smrg         nir_ssa_def *unpacked_double = nir_unpack_64_2x32(b, packed_double);
12677ec681f3Smrg         nir_ssa_def *repacked_double = nir_pack_double_2x32_dxil(b, unpacked_double);
12687ec681f3Smrg         nir_instr_rewrite_src_ssa(instr, &alu->src[i].src, repacked_double);
12697ec681f3Smrg         memset(alu->src[i].swizzle, 0, ARRAY_SIZE(alu->src[i].swizzle));
12707ec681f3Smrg         progress = true;
12717ec681f3Smrg      }
12727ec681f3Smrg   }
12737ec681f3Smrg
12747ec681f3Smrg   if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float &&
12757ec681f3Smrg       alu->dest.dest.ssa.bit_size == 64) {
12767ec681f3Smrg      b->cursor = nir_after_instr(&alu->instr);
12777ec681f3Smrg      nir_ssa_def *packed_double = &alu->dest.dest.ssa;
12787ec681f3Smrg      nir_ssa_def *unpacked_double = nir_unpack_double_2x32_dxil(b, packed_double);
12797ec681f3Smrg      nir_ssa_def *repacked_double = nir_pack_64_2x32(b, unpacked_double);
12807ec681f3Smrg      nir_ssa_def_rewrite_uses_after(packed_double, repacked_double, unpacked_double->parent_instr);
12817ec681f3Smrg      progress = true;
12827ec681f3Smrg   }
12837ec681f3Smrg
12847ec681f3Smrg   return progress;
12857ec681f3Smrg}
12867ec681f3Smrg
12877ec681f3Smrgbool
12887ec681f3Smrgdxil_nir_lower_double_math(nir_shader *shader)
12897ec681f3Smrg{
12907ec681f3Smrg   return nir_shader_instructions_pass(shader,
12917ec681f3Smrg                                       dxil_nir_lower_double_math_instr,
12927ec681f3Smrg                                       nir_metadata_block_index |
12937ec681f3Smrg                                       nir_metadata_dominance |
12947ec681f3Smrg                                       nir_metadata_loop_analysis,
12957ec681f3Smrg                                       NULL);
12967ec681f3Smrg}
12977ec681f3Smrg
12987ec681f3Smrgtypedef struct {
12997ec681f3Smrg   gl_system_value *values;
13007ec681f3Smrg   uint32_t count;
13017ec681f3Smrg} zero_system_values_state;
13027ec681f3Smrg
13037ec681f3Smrgstatic bool
13047ec681f3Smrglower_system_value_to_zero_filter(const nir_instr* instr, const void* cb_state)
13057ec681f3Smrg{
13067ec681f3Smrg   if (instr->type != nir_instr_type_intrinsic) {
13077ec681f3Smrg      return false;
13087ec681f3Smrg   }
13097ec681f3Smrg
13107ec681f3Smrg   nir_intrinsic_instr* intrin = nir_instr_as_intrinsic(instr);
13117ec681f3Smrg
13127ec681f3Smrg   /* All the intrinsics we care about are loads */
13137ec681f3Smrg   if (!nir_intrinsic_infos[intrin->intrinsic].has_dest)
13147ec681f3Smrg      return false;
13157ec681f3Smrg
13167ec681f3Smrg   assert(intrin->dest.is_ssa);
13177ec681f3Smrg
13187ec681f3Smrg   zero_system_values_state* state = (zero_system_values_state*)cb_state;
13197ec681f3Smrg   for (uint32_t i = 0; i < state->count; ++i) {
13207ec681f3Smrg      gl_system_value value = state->values[i];
13217ec681f3Smrg      nir_intrinsic_op value_op = nir_intrinsic_from_system_value(value);
13227ec681f3Smrg
13237ec681f3Smrg      if (intrin->intrinsic == value_op) {
13247ec681f3Smrg         return true;
13257ec681f3Smrg      } else if (intrin->intrinsic == nir_intrinsic_load_deref) {
13267ec681f3Smrg         nir_deref_instr* deref = nir_src_as_deref(intrin->src[0]);
13277ec681f3Smrg         if (!nir_deref_mode_is(deref, nir_var_system_value))
13287ec681f3Smrg            return false;
13297ec681f3Smrg
13307ec681f3Smrg         nir_variable* var = deref->var;
13317ec681f3Smrg         if (var->data.location == value) {
13327ec681f3Smrg            return true;
13337ec681f3Smrg         }
13347ec681f3Smrg      }
13357ec681f3Smrg   }
13367ec681f3Smrg
13377ec681f3Smrg   return false;
13387ec681f3Smrg}
13397ec681f3Smrg
13407ec681f3Smrgstatic nir_ssa_def*
13417ec681f3Smrglower_system_value_to_zero_instr(nir_builder* b, nir_instr* instr, void* _state)
13427ec681f3Smrg{
13437ec681f3Smrg   return nir_imm_int(b, 0);
13447ec681f3Smrg}
13457ec681f3Smrg
13467ec681f3Smrgbool
13477ec681f3Smrgdxil_nir_lower_system_values_to_zero(nir_shader* shader,
13487ec681f3Smrg                                     gl_system_value* system_values,
13497ec681f3Smrg                                     uint32_t count)
13507ec681f3Smrg{
13517ec681f3Smrg   zero_system_values_state state = { system_values, count };
13527ec681f3Smrg   return nir_shader_lower_instructions(shader,
13537ec681f3Smrg      lower_system_value_to_zero_filter,
13547ec681f3Smrg      lower_system_value_to_zero_instr,
13557ec681f3Smrg      &state);
13567ec681f3Smrg}
13577ec681f3Smrg
13587ec681f3Smrgstatic const struct glsl_type *
13597ec681f3Smrgget_bare_samplers_for_type(const struct glsl_type *type)
13607ec681f3Smrg{
13617ec681f3Smrg   if (glsl_type_is_sampler(type)) {
13627ec681f3Smrg      if (glsl_sampler_type_is_shadow(type))
13637ec681f3Smrg         return glsl_bare_shadow_sampler_type();
13647ec681f3Smrg      else
13657ec681f3Smrg         return glsl_bare_sampler_type();
13667ec681f3Smrg   } else if (glsl_type_is_array(type)) {
13677ec681f3Smrg      return glsl_array_type(
13687ec681f3Smrg         get_bare_samplers_for_type(glsl_get_array_element(type)),
13697ec681f3Smrg         glsl_get_length(type),
13707ec681f3Smrg         0 /*explicit size*/);
13717ec681f3Smrg   }
13727ec681f3Smrg   assert(!"Unexpected type");
13737ec681f3Smrg   return NULL;
13747ec681f3Smrg}
13757ec681f3Smrg
13767ec681f3Smrgstatic bool
13777ec681f3Smrgredirect_sampler_derefs(struct nir_builder *b, nir_instr *instr, void *data)
13787ec681f3Smrg{
13797ec681f3Smrg   if (instr->type != nir_instr_type_tex)
13807ec681f3Smrg      return false;
13817ec681f3Smrg
13827ec681f3Smrg   nir_tex_instr *tex = nir_instr_as_tex(instr);
13837ec681f3Smrg   if (!nir_tex_instr_need_sampler(tex))
13847ec681f3Smrg      return false;
13857ec681f3Smrg
13867ec681f3Smrg   int sampler_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
13877ec681f3Smrg   if (sampler_idx == -1) {
13887ec681f3Smrg      /* No derefs, must be using indices */
13897ec681f3Smrg      nir_variable *bare_sampler = _mesa_hash_table_u64_search(data, tex->sampler_index);
13907ec681f3Smrg
13917ec681f3Smrg      /* Already have a bare sampler here */
13927ec681f3Smrg      if (bare_sampler)
13937ec681f3Smrg         return false;
13947ec681f3Smrg
13957ec681f3Smrg      nir_variable *typed_sampler = NULL;
13967ec681f3Smrg      nir_foreach_variable_with_modes(var, b->shader, nir_var_uniform) {
13977ec681f3Smrg         if (var->data.binding <= tex->sampler_index &&
13987ec681f3Smrg             var->data.binding + glsl_type_get_sampler_count(var->type) > tex->sampler_index) {
13997ec681f3Smrg            /* Already have a bare sampler for this binding, add it to the table */
14007ec681f3Smrg            if (glsl_get_sampler_result_type(glsl_without_array(var->type)) == GLSL_TYPE_VOID) {
14017ec681f3Smrg               _mesa_hash_table_u64_insert(data, tex->sampler_index, var);
14027ec681f3Smrg               return false;
14037ec681f3Smrg            }
14047ec681f3Smrg
14057ec681f3Smrg            typed_sampler = var;
14067ec681f3Smrg         }
14077ec681f3Smrg      }
14087ec681f3Smrg
14097ec681f3Smrg      /* Clone the typed sampler to a bare sampler and we're done */
14107ec681f3Smrg      assert(typed_sampler);
14117ec681f3Smrg      bare_sampler = nir_variable_clone(typed_sampler, b->shader);
14127ec681f3Smrg      bare_sampler->type = get_bare_samplers_for_type(typed_sampler->type);
14137ec681f3Smrg      nir_shader_add_variable(b->shader, bare_sampler);
14147ec681f3Smrg      _mesa_hash_table_u64_insert(data, tex->sampler_index, bare_sampler);
14157ec681f3Smrg      return true;
14167ec681f3Smrg   }
14177ec681f3Smrg
14187ec681f3Smrg   /* Using derefs, means we have to rewrite the deref chain in addition to cloning */
14197ec681f3Smrg   nir_deref_instr *final_deref = nir_src_as_deref(tex->src[sampler_idx].src);
14207ec681f3Smrg   nir_deref_path path;
14217ec681f3Smrg   nir_deref_path_init(&path, final_deref, NULL);
14227ec681f3Smrg
14237ec681f3Smrg   nir_deref_instr *old_tail = path.path[0];
14247ec681f3Smrg   assert(old_tail->deref_type == nir_deref_type_var);
14257ec681f3Smrg   nir_variable *old_var = old_tail->var;
14267ec681f3Smrg   if (glsl_get_sampler_result_type(glsl_without_array(old_var->type)) == GLSL_TYPE_VOID) {
14277ec681f3Smrg      nir_deref_path_finish(&path);
14287ec681f3Smrg      return false;
14297ec681f3Smrg   }
14307ec681f3Smrg
14317ec681f3Smrg   nir_variable *new_var = _mesa_hash_table_u64_search(data, old_var->data.binding);
14327ec681f3Smrg   if (!new_var) {
14337ec681f3Smrg      new_var = nir_variable_clone(old_var, b->shader);
14347ec681f3Smrg      new_var->type = get_bare_samplers_for_type(old_var->type);
14357ec681f3Smrg      nir_shader_add_variable(b->shader, new_var);
14367ec681f3Smrg      _mesa_hash_table_u64_insert(data, old_var->data.binding, new_var);
14377ec681f3Smrg   }
14387ec681f3Smrg
14397ec681f3Smrg   b->cursor = nir_after_instr(&old_tail->instr);
14407ec681f3Smrg   nir_deref_instr *new_tail = nir_build_deref_var(b, new_var);
14417ec681f3Smrg
14427ec681f3Smrg   for (unsigned i = 1; path.path[i]; ++i) {
14437ec681f3Smrg      b->cursor = nir_after_instr(&path.path[i]->instr);
14447ec681f3Smrg      new_tail = nir_build_deref_follower(b, new_tail, path.path[i]);
14457ec681f3Smrg   }
14467ec681f3Smrg
14477ec681f3Smrg   nir_deref_path_finish(&path);
14487ec681f3Smrg   nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[sampler_idx].src, &new_tail->dest.ssa);
14497ec681f3Smrg
14507ec681f3Smrg   return true;
14517ec681f3Smrg}
14527ec681f3Smrg
14537ec681f3Smrgbool
14547ec681f3Smrgdxil_nir_create_bare_samplers(nir_shader *nir)
14557ec681f3Smrg{
14567ec681f3Smrg   struct hash_table_u64 *sampler_to_bare = _mesa_hash_table_u64_create(NULL);
14577ec681f3Smrg
14587ec681f3Smrg   bool progress = nir_shader_instructions_pass(nir, redirect_sampler_derefs,
14597ec681f3Smrg      nir_metadata_block_index | nir_metadata_dominance | nir_metadata_loop_analysis, sampler_to_bare);
14607ec681f3Smrg
14617ec681f3Smrg   _mesa_hash_table_u64_destroy(sampler_to_bare);
14627ec681f3Smrg   return progress;
14637ec681f3Smrg}
14647ec681f3Smrg
14657ec681f3Smrg
14667ec681f3Smrgstatic bool
14677ec681f3Smrglower_bool_input_filter(const nir_instr *instr,
14687ec681f3Smrg                        UNUSED const void *_options)
14697ec681f3Smrg{
14707ec681f3Smrg   if (instr->type != nir_instr_type_intrinsic)
14717ec681f3Smrg      return false;
14727ec681f3Smrg
14737ec681f3Smrg   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
14747ec681f3Smrg   if (intr->intrinsic == nir_intrinsic_load_front_face)
14757ec681f3Smrg      return true;
14767ec681f3Smrg
14777ec681f3Smrg   if (intr->intrinsic == nir_intrinsic_load_deref) {
14787ec681f3Smrg      nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
14797ec681f3Smrg      nir_variable *var = nir_deref_instr_get_variable(deref);
14807ec681f3Smrg      return var->data.mode == nir_var_shader_in &&
14817ec681f3Smrg             glsl_get_base_type(var->type) == GLSL_TYPE_BOOL;
14827ec681f3Smrg   }
14837ec681f3Smrg
14847ec681f3Smrg   return false;
14857ec681f3Smrg}
14867ec681f3Smrg
14877ec681f3Smrgstatic nir_ssa_def *
14887ec681f3Smrglower_bool_input_impl(nir_builder *b, nir_instr *instr,
14897ec681f3Smrg                      UNUSED void *_options)
14907ec681f3Smrg{
14917ec681f3Smrg   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
14927ec681f3Smrg
14937ec681f3Smrg   if (intr->intrinsic == nir_intrinsic_load_deref) {
14947ec681f3Smrg      nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
14957ec681f3Smrg      nir_variable *var = nir_deref_instr_get_variable(deref);
14967ec681f3Smrg
14977ec681f3Smrg      /* rewrite var->type */
14987ec681f3Smrg      var->type = glsl_vector_type(GLSL_TYPE_UINT,
14997ec681f3Smrg                                   glsl_get_vector_elements(var->type));
15007ec681f3Smrg      deref->type = var->type;
15017ec681f3Smrg   }
15027ec681f3Smrg
15037ec681f3Smrg   intr->dest.ssa.bit_size = 32;
15047ec681f3Smrg   return nir_i2b1(b, &intr->dest.ssa);
15057ec681f3Smrg}
15067ec681f3Smrg
15077ec681f3Smrgbool
15087ec681f3Smrgdxil_nir_lower_bool_input(struct nir_shader *s)
15097ec681f3Smrg{
15107ec681f3Smrg   return nir_shader_lower_instructions(s, lower_bool_input_filter,
15117ec681f3Smrg                                        lower_bool_input_impl, NULL);
15127ec681f3Smrg}
15137ec681f3Smrg
15147ec681f3Smrg/* Comparison function to sort io values so that first come normal varyings,
15157ec681f3Smrg * then system values, and then system generated values.
15167ec681f3Smrg */
15177ec681f3Smrgstatic int
15187ec681f3Smrgvariable_location_cmp(const nir_variable* a, const nir_variable* b)
15197ec681f3Smrg{
15207ec681f3Smrg   // Sort by driver_location, location, then index
15217ec681f3Smrg   return a->data.driver_location != b->data.driver_location ?
15227ec681f3Smrg            a->data.driver_location - b->data.driver_location :
15237ec681f3Smrg            a->data.location !=  b->data.location ?
15247ec681f3Smrg               a->data.location - b->data.location :
15257ec681f3Smrg               a->data.index - b->data.index;
15267ec681f3Smrg}
15277ec681f3Smrg
15287ec681f3Smrg/* Order varyings according to driver location */
15297ec681f3Smrguint64_t
15307ec681f3Smrgdxil_sort_by_driver_location(nir_shader* s, nir_variable_mode modes)
15317ec681f3Smrg{
15327ec681f3Smrg   nir_sort_variables_with_modes(s, variable_location_cmp, modes);
15337ec681f3Smrg
15347ec681f3Smrg   uint64_t result = 0;
15357ec681f3Smrg   nir_foreach_variable_with_modes(var, s, modes) {
15367ec681f3Smrg      result |= 1ull << var->data.location;
15377ec681f3Smrg   }
15387ec681f3Smrg   return result;
15397ec681f3Smrg}
15407ec681f3Smrg
15417ec681f3Smrg/* Sort PS outputs so that color outputs come first */
15427ec681f3Smrgvoid
15437ec681f3Smrgdxil_sort_ps_outputs(nir_shader* s)
15447ec681f3Smrg{
15457ec681f3Smrg   nir_foreach_variable_with_modes_safe(var, s, nir_var_shader_out) {
15467ec681f3Smrg      /* We use the driver_location here to avoid introducing a new
15477ec681f3Smrg       * struct or member variable here. The true, updated driver location
15487ec681f3Smrg       * will be written below, after sorting */
15497ec681f3Smrg      switch (var->data.location) {
15507ec681f3Smrg      case FRAG_RESULT_DEPTH:
15517ec681f3Smrg         var->data.driver_location = 1;
15527ec681f3Smrg         break;
15537ec681f3Smrg      case FRAG_RESULT_STENCIL:
15547ec681f3Smrg         var->data.driver_location = 2;
15557ec681f3Smrg         break;
15567ec681f3Smrg      case FRAG_RESULT_SAMPLE_MASK:
15577ec681f3Smrg         var->data.driver_location = 3;
15587ec681f3Smrg         break;
15597ec681f3Smrg      default:
15607ec681f3Smrg         var->data.driver_location = 0;
15617ec681f3Smrg      }
15627ec681f3Smrg   }
15637ec681f3Smrg
15647ec681f3Smrg   nir_sort_variables_with_modes(s, variable_location_cmp,
15657ec681f3Smrg                                 nir_var_shader_out);
15667ec681f3Smrg
15677ec681f3Smrg   unsigned driver_loc = 0;
15687ec681f3Smrg   nir_foreach_variable_with_modes(var, s, nir_var_shader_out) {
15697ec681f3Smrg      var->data.driver_location = driver_loc++;
15707ec681f3Smrg   }
15717ec681f3Smrg}
15727ec681f3Smrg
15737ec681f3Smrg/* Order between stage values so that normal varyings come first,
15747ec681f3Smrg * then sysvalues and then system generated values.
15757ec681f3Smrg */
15767ec681f3Smrguint64_t
15777ec681f3Smrgdxil_reassign_driver_locations(nir_shader* s, nir_variable_mode modes,
15787ec681f3Smrg   uint64_t other_stage_mask)
15797ec681f3Smrg{
15807ec681f3Smrg   nir_foreach_variable_with_modes_safe(var, s, modes) {
15817ec681f3Smrg      /* We use the driver_location here to avoid introducing a new
15827ec681f3Smrg       * struct or member variable here. The true, updated driver location
15837ec681f3Smrg       * will be written below, after sorting */
15847ec681f3Smrg      var->data.driver_location = nir_var_to_dxil_sysvalue_type(var, other_stage_mask);
15857ec681f3Smrg   }
15867ec681f3Smrg
15877ec681f3Smrg   nir_sort_variables_with_modes(s, variable_location_cmp, modes);
15887ec681f3Smrg
15897ec681f3Smrg   uint64_t result = 0;
15907ec681f3Smrg   unsigned driver_loc = 0;
15917ec681f3Smrg   nir_foreach_variable_with_modes(var, s, modes) {
15927ec681f3Smrg      result |= 1ull << var->data.location;
15937ec681f3Smrg      var->data.driver_location = driver_loc++;
15947ec681f3Smrg   }
15957ec681f3Smrg   return result;
15967ec681f3Smrg}
1597