17ec681f3Smrg/*
27ec681f3Smrg * Copyright © Microsoft Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include "nir.h"
257ec681f3Smrg#include "nir_serialize.h"
267ec681f3Smrg#include "glsl_types.h"
277ec681f3Smrg#include "nir_types.h"
287ec681f3Smrg#include "clc_compiler.h"
297ec681f3Smrg#include "clc_helpers.h"
307ec681f3Smrg#include "clc_nir.h"
317ec681f3Smrg#include "../compiler/dxil_nir.h"
327ec681f3Smrg#include "../compiler/dxil_nir_lower_int_samplers.h"
337ec681f3Smrg#include "../compiler/nir_to_dxil.h"
347ec681f3Smrg
357ec681f3Smrg#include "util/u_debug.h"
367ec681f3Smrg#include <util/u_math.h>
377ec681f3Smrg#include "spirv/nir_spirv.h"
387ec681f3Smrg#include "nir_builder.h"
397ec681f3Smrg#include "nir_builtin_builder.h"
407ec681f3Smrg
417ec681f3Smrg#include "git_sha1.h"
427ec681f3Smrg
437ec681f3Smrgstruct clc_image_lower_context
447ec681f3Smrg{
457ec681f3Smrg   struct clc_dxil_metadata *metadata;
467ec681f3Smrg   unsigned *num_srvs;
477ec681f3Smrg   unsigned *num_uavs;
487ec681f3Smrg   nir_deref_instr *deref;
497ec681f3Smrg   unsigned num_buf_ids;
507ec681f3Smrg   int metadata_index;
517ec681f3Smrg};
527ec681f3Smrg
537ec681f3Smrgstatic int
547ec681f3Smrglower_image_deref_impl(nir_builder *b, struct clc_image_lower_context *context,
557ec681f3Smrg                       const struct glsl_type *new_var_type,
567ec681f3Smrg                       unsigned *num_bindings)
577ec681f3Smrg{
587ec681f3Smrg   nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
597ec681f3Smrg   nir_variable *uniform = nir_variable_create(b->shader, nir_var_uniform, new_var_type, NULL);
607ec681f3Smrg   uniform->data.access = in_var->data.access;
617ec681f3Smrg   uniform->data.binding = in_var->data.binding;
627ec681f3Smrg   if (context->num_buf_ids > 0) {
637ec681f3Smrg      // Need to assign a new binding
647ec681f3Smrg      context->metadata->args[context->metadata_index].
657ec681f3Smrg         image.buf_ids[context->num_buf_ids] = uniform->data.binding = (*num_bindings)++;
667ec681f3Smrg   }
677ec681f3Smrg   context->num_buf_ids++;
687ec681f3Smrg   return uniform->data.binding;
697ec681f3Smrg}
707ec681f3Smrg
717ec681f3Smrgstatic int
727ec681f3Smrglower_read_only_image_deref(nir_builder *b, struct clc_image_lower_context *context,
737ec681f3Smrg                            nir_alu_type image_type)
747ec681f3Smrg{
757ec681f3Smrg   nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
767ec681f3Smrg
777ec681f3Smrg   // Non-writeable images should be converted to samplers,
787ec681f3Smrg   // since they may have texture operations done on them
797ec681f3Smrg   const struct glsl_type *new_var_type =
807ec681f3Smrg      glsl_sampler_type(glsl_get_sampler_dim(in_var->type),
817ec681f3Smrg            false, glsl_sampler_type_is_array(in_var->type),
827ec681f3Smrg            nir_get_glsl_base_type_for_nir_type(image_type | 32));
837ec681f3Smrg   return lower_image_deref_impl(b, context, new_var_type, context->num_srvs);
847ec681f3Smrg}
857ec681f3Smrg
867ec681f3Smrgstatic int
877ec681f3Smrglower_read_write_image_deref(nir_builder *b, struct clc_image_lower_context *context,
887ec681f3Smrg                             nir_alu_type image_type)
897ec681f3Smrg{
907ec681f3Smrg   nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
917ec681f3Smrg   const struct glsl_type *new_var_type =
927ec681f3Smrg      glsl_image_type(glsl_get_sampler_dim(in_var->type),
937ec681f3Smrg         glsl_sampler_type_is_array(in_var->type),
947ec681f3Smrg         nir_get_glsl_base_type_for_nir_type(image_type | 32));
957ec681f3Smrg   return lower_image_deref_impl(b, context, new_var_type, context->num_uavs);
967ec681f3Smrg}
977ec681f3Smrg
987ec681f3Smrgstatic void
997ec681f3Smrgclc_lower_input_image_deref(nir_builder *b, struct clc_image_lower_context *context)
1007ec681f3Smrg{
1017ec681f3Smrg   // The input variable here isn't actually an image, it's just the
1027ec681f3Smrg   // image format data.
1037ec681f3Smrg   //
1047ec681f3Smrg   // For every use of an image in a different way, we'll add an
1057ec681f3Smrg   // appropriate uniform to match it. That can result in up to
1067ec681f3Smrg   // 3 uniforms (float4, int4, uint4) for each image. Only one of these
1077ec681f3Smrg   // formats will actually produce correct data, but a single kernel
1087ec681f3Smrg   // could use runtime conditionals to potentially access any of them.
1097ec681f3Smrg   //
1107ec681f3Smrg   // If the image is used in a query that doesn't have a corresponding
1117ec681f3Smrg   // DXIL intrinsic (CL image channel order or channel format), then
1127ec681f3Smrg   // we'll add a kernel input for that data that'll be lowered by the
1137ec681f3Smrg   // explicit IO pass later on.
1147ec681f3Smrg   //
1157ec681f3Smrg   // After all that, we can remove the image input variable and deref.
1167ec681f3Smrg
1177ec681f3Smrg   enum image_uniform_type {
1187ec681f3Smrg      FLOAT4,
1197ec681f3Smrg      INT4,
1207ec681f3Smrg      UINT4,
1217ec681f3Smrg      IMAGE_UNIFORM_TYPE_COUNT
1227ec681f3Smrg   };
1237ec681f3Smrg
1247ec681f3Smrg   int image_bindings[IMAGE_UNIFORM_TYPE_COUNT] = {-1, -1, -1};
1257ec681f3Smrg   nir_ssa_def *format_deref_dest = NULL, *order_deref_dest = NULL;
1267ec681f3Smrg
1277ec681f3Smrg   nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
1287ec681f3Smrg   enum gl_access_qualifier access = in_var->data.access;
1297ec681f3Smrg
1307ec681f3Smrg   context->metadata_index = 0;
1317ec681f3Smrg   while (context->metadata->args[context->metadata_index].image.buf_ids[0] != in_var->data.binding)
1327ec681f3Smrg      context->metadata_index++;
1337ec681f3Smrg
1347ec681f3Smrg   context->num_buf_ids = 0;
1357ec681f3Smrg
1367ec681f3Smrg   /* Do this in 2 passes:
1377ec681f3Smrg    * 1. When encountering a strongly-typed access (load/store), replace the deref
1387ec681f3Smrg    *    with one that references an appropriately typed variable. When encountering
1397ec681f3Smrg    *    an untyped access (size query), if we have a strongly-typed variable already,
1407ec681f3Smrg    *    replace the deref to point to it.
1417ec681f3Smrg    * 2. If there's any references left, they should all be untyped. If we found
1427ec681f3Smrg    *    a strongly-typed access later in the 1st pass, then just replace the reference.
1437ec681f3Smrg    *    If we didn't, e.g. the resource is only used for a size query, then pick an
1447ec681f3Smrg    *    arbitrary type for it.
1457ec681f3Smrg    */
1467ec681f3Smrg   for (int pass = 0; pass < 2; ++pass) {
1477ec681f3Smrg      nir_foreach_use_safe(src, &context->deref->dest.ssa) {
1487ec681f3Smrg         enum image_uniform_type type;
1497ec681f3Smrg
1507ec681f3Smrg         if (src->parent_instr->type == nir_instr_type_intrinsic) {
1517ec681f3Smrg            nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(src->parent_instr);
1527ec681f3Smrg            enum nir_alu_type dest_type;
1537ec681f3Smrg
1547ec681f3Smrg            b->cursor = nir_before_instr(&intrinsic->instr);
1557ec681f3Smrg
1567ec681f3Smrg            switch (intrinsic->intrinsic) {
1577ec681f3Smrg            case nir_intrinsic_image_deref_load:
1587ec681f3Smrg            case nir_intrinsic_image_deref_store: {
1597ec681f3Smrg               dest_type = intrinsic->intrinsic == nir_intrinsic_image_deref_load ?
1607ec681f3Smrg                  nir_intrinsic_dest_type(intrinsic) : nir_intrinsic_src_type(intrinsic);
1617ec681f3Smrg
1627ec681f3Smrg               switch (nir_alu_type_get_base_type(dest_type)) {
1637ec681f3Smrg               case nir_type_float: type = FLOAT4; break;
1647ec681f3Smrg               case nir_type_int: type = INT4; break;
1657ec681f3Smrg               case nir_type_uint: type = UINT4; break;
1667ec681f3Smrg               default: unreachable("Unsupported image type for load.");
1677ec681f3Smrg               }
1687ec681f3Smrg
1697ec681f3Smrg               int image_binding = image_bindings[type];
1707ec681f3Smrg               if (image_binding < 0) {
1717ec681f3Smrg                  image_binding = image_bindings[type] =
1727ec681f3Smrg                     lower_read_write_image_deref(b, context, dest_type);
1737ec681f3Smrg               }
1747ec681f3Smrg
1757ec681f3Smrg               assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0);
1767ec681f3Smrg               nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false);
1777ec681f3Smrg               break;
1787ec681f3Smrg            }
1797ec681f3Smrg
1807ec681f3Smrg            case nir_intrinsic_image_deref_size: {
1817ec681f3Smrg               int image_binding = -1;
1827ec681f3Smrg               for (unsigned i = 0; i < IMAGE_UNIFORM_TYPE_COUNT; ++i) {
1837ec681f3Smrg                  if (image_bindings[i] >= 0) {
1847ec681f3Smrg                     image_binding = image_bindings[i];
1857ec681f3Smrg                     break;
1867ec681f3Smrg                  }
1877ec681f3Smrg               }
1887ec681f3Smrg               if (image_binding < 0) {
1897ec681f3Smrg                  // Skip for now and come back to it
1907ec681f3Smrg                  if (pass == 0)
1917ec681f3Smrg                     break;
1927ec681f3Smrg
1937ec681f3Smrg                  type = FLOAT4;
1947ec681f3Smrg                  image_binding = image_bindings[type] =
1957ec681f3Smrg                     lower_read_write_image_deref(b, context, nir_type_float32);
1967ec681f3Smrg               }
1977ec681f3Smrg
1987ec681f3Smrg               assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0);
1997ec681f3Smrg               nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false);
2007ec681f3Smrg               break;
2017ec681f3Smrg            }
2027ec681f3Smrg
2037ec681f3Smrg            case nir_intrinsic_image_deref_format:
2047ec681f3Smrg            case nir_intrinsic_image_deref_order: {
2057ec681f3Smrg               nir_ssa_def **cached_deref = intrinsic->intrinsic == nir_intrinsic_image_deref_format ?
2067ec681f3Smrg                  &format_deref_dest : &order_deref_dest;
2077ec681f3Smrg               if (!*cached_deref) {
2087ec681f3Smrg                  nir_variable *new_input = nir_variable_create(b->shader, nir_var_uniform, glsl_uint_type(), NULL);
2097ec681f3Smrg                  new_input->data.driver_location = in_var->data.driver_location;
2107ec681f3Smrg                  if (intrinsic->intrinsic == nir_intrinsic_image_deref_format) {
2117ec681f3Smrg                     /* Match cl_image_format { image_channel_order, image_channel_data_type }; */
2127ec681f3Smrg                     new_input->data.driver_location += glsl_get_cl_size(new_input->type);
2137ec681f3Smrg                  }
2147ec681f3Smrg
2157ec681f3Smrg                  b->cursor = nir_after_instr(&context->deref->instr);
2167ec681f3Smrg                  *cached_deref = nir_load_var(b, new_input);
2177ec681f3Smrg               }
2187ec681f3Smrg
2197ec681f3Smrg               /* No actual intrinsic needed here, just reference the loaded variable */
2207ec681f3Smrg               nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, *cached_deref);
2217ec681f3Smrg               nir_instr_remove(&intrinsic->instr);
2227ec681f3Smrg               break;
2237ec681f3Smrg            }
2247ec681f3Smrg
2257ec681f3Smrg            default:
2267ec681f3Smrg               unreachable("Unsupported image intrinsic");
2277ec681f3Smrg            }
2287ec681f3Smrg         } else if (src->parent_instr->type == nir_instr_type_tex) {
2297ec681f3Smrg            assert(in_var->data.access & ACCESS_NON_WRITEABLE);
2307ec681f3Smrg            nir_tex_instr *tex = nir_instr_as_tex(src->parent_instr);
2317ec681f3Smrg
2327ec681f3Smrg            switch (nir_alu_type_get_base_type(tex->dest_type)) {
2337ec681f3Smrg            case nir_type_float: type = FLOAT4; break;
2347ec681f3Smrg            case nir_type_int: type = INT4; break;
2357ec681f3Smrg            case nir_type_uint: type = UINT4; break;
2367ec681f3Smrg            default: unreachable("Unsupported image format for sample.");
2377ec681f3Smrg            }
2387ec681f3Smrg
2397ec681f3Smrg            int image_binding = image_bindings[type];
2407ec681f3Smrg            if (image_binding < 0) {
2417ec681f3Smrg               image_binding = image_bindings[type] =
2427ec681f3Smrg                  lower_read_only_image_deref(b, context, tex->dest_type);
2437ec681f3Smrg            }
2447ec681f3Smrg
2457ec681f3Smrg            nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_texture_deref));
2467ec681f3Smrg            tex->texture_index = image_binding;
2477ec681f3Smrg         }
2487ec681f3Smrg      }
2497ec681f3Smrg   }
2507ec681f3Smrg
2517ec681f3Smrg   context->metadata->args[context->metadata_index].image.num_buf_ids = context->num_buf_ids;
2527ec681f3Smrg
2537ec681f3Smrg   nir_instr_remove(&context->deref->instr);
2547ec681f3Smrg   exec_node_remove(&in_var->node);
2557ec681f3Smrg}
2567ec681f3Smrg
2577ec681f3Smrgstatic void
2587ec681f3Smrgclc_lower_images(nir_shader *nir, struct clc_image_lower_context *context)
2597ec681f3Smrg{
2607ec681f3Smrg   nir_foreach_function(func, nir) {
2617ec681f3Smrg      if (!func->is_entrypoint)
2627ec681f3Smrg         continue;
2637ec681f3Smrg      assert(func->impl);
2647ec681f3Smrg
2657ec681f3Smrg      nir_builder b;
2667ec681f3Smrg      nir_builder_init(&b, func->impl);
2677ec681f3Smrg
2687ec681f3Smrg      nir_foreach_block(block, func->impl) {
2697ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
2707ec681f3Smrg            if (instr->type == nir_instr_type_deref) {
2717ec681f3Smrg               context->deref = nir_instr_as_deref(instr);
2727ec681f3Smrg
2737ec681f3Smrg               if (glsl_type_is_image(context->deref->type)) {
2747ec681f3Smrg                  assert(context->deref->deref_type == nir_deref_type_var);
2757ec681f3Smrg                  clc_lower_input_image_deref(&b, context);
2767ec681f3Smrg               }
2777ec681f3Smrg            }
2787ec681f3Smrg         }
2797ec681f3Smrg      }
2807ec681f3Smrg   }
2817ec681f3Smrg}
2827ec681f3Smrg
2837ec681f3Smrgstatic void
2847ec681f3Smrgclc_lower_64bit_semantics(nir_shader *nir)
2857ec681f3Smrg{
2867ec681f3Smrg   nir_foreach_function(func, nir) {
2877ec681f3Smrg      nir_builder b;
2887ec681f3Smrg      nir_builder_init(&b, func->impl);
2897ec681f3Smrg
2907ec681f3Smrg      nir_foreach_block(block, func->impl) {
2917ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
2927ec681f3Smrg            if (instr->type == nir_instr_type_intrinsic) {
2937ec681f3Smrg               nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
2947ec681f3Smrg               switch (intrinsic->intrinsic) {
2957ec681f3Smrg               case nir_intrinsic_load_global_invocation_id:
2967ec681f3Smrg               case nir_intrinsic_load_global_invocation_id_zero_base:
2977ec681f3Smrg               case nir_intrinsic_load_base_global_invocation_id:
2987ec681f3Smrg               case nir_intrinsic_load_local_invocation_id:
2997ec681f3Smrg               case nir_intrinsic_load_workgroup_id:
3007ec681f3Smrg               case nir_intrinsic_load_workgroup_id_zero_base:
3017ec681f3Smrg               case nir_intrinsic_load_base_workgroup_id:
3027ec681f3Smrg               case nir_intrinsic_load_num_workgroups:
3037ec681f3Smrg                  break;
3047ec681f3Smrg               default:
3057ec681f3Smrg                  continue;
3067ec681f3Smrg               }
3077ec681f3Smrg
3087ec681f3Smrg               if (nir_instr_ssa_def(instr)->bit_size != 64)
3097ec681f3Smrg                  continue;
3107ec681f3Smrg
3117ec681f3Smrg               intrinsic->dest.ssa.bit_size = 32;
3127ec681f3Smrg               b.cursor = nir_after_instr(instr);
3137ec681f3Smrg
3147ec681f3Smrg               nir_ssa_def *i64 = nir_u2u64(&b, &intrinsic->dest.ssa);
3157ec681f3Smrg               nir_ssa_def_rewrite_uses_after(
3167ec681f3Smrg                  &intrinsic->dest.ssa,
3177ec681f3Smrg                  i64,
3187ec681f3Smrg                  i64->parent_instr);
3197ec681f3Smrg            }
3207ec681f3Smrg         }
3217ec681f3Smrg      }
3227ec681f3Smrg   }
3237ec681f3Smrg}
3247ec681f3Smrg
3257ec681f3Smrgstatic void
3267ec681f3Smrgclc_lower_nonnormalized_samplers(nir_shader *nir,
3277ec681f3Smrg                                 const dxil_wrap_sampler_state *states)
3287ec681f3Smrg{
3297ec681f3Smrg   nir_foreach_function(func, nir) {
3307ec681f3Smrg      if (!func->is_entrypoint)
3317ec681f3Smrg         continue;
3327ec681f3Smrg      assert(func->impl);
3337ec681f3Smrg
3347ec681f3Smrg      nir_builder b;
3357ec681f3Smrg      nir_builder_init(&b, func->impl);
3367ec681f3Smrg
3377ec681f3Smrg      nir_foreach_block(block, func->impl) {
3387ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
3397ec681f3Smrg            if (instr->type != nir_instr_type_tex)
3407ec681f3Smrg               continue;
3417ec681f3Smrg            nir_tex_instr *tex = nir_instr_as_tex(instr);
3427ec681f3Smrg
3437ec681f3Smrg            int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
3447ec681f3Smrg            if (sampler_src_idx == -1)
3457ec681f3Smrg               continue;
3467ec681f3Smrg
3477ec681f3Smrg            nir_src *sampler_src = &tex->src[sampler_src_idx].src;
3487ec681f3Smrg            assert(sampler_src->is_ssa && sampler_src->ssa->parent_instr->type == nir_instr_type_deref);
3497ec681f3Smrg            nir_variable *sampler = nir_deref_instr_get_variable(
3507ec681f3Smrg               nir_instr_as_deref(sampler_src->ssa->parent_instr));
3517ec681f3Smrg
3527ec681f3Smrg            // If the sampler returns ints, we'll handle this in the int lowering pass
3537ec681f3Smrg            if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float)
3547ec681f3Smrg               continue;
3557ec681f3Smrg
3567ec681f3Smrg            // If sampler uses normalized coords, nothing to do
3577ec681f3Smrg            if (!states[sampler->data.binding].is_nonnormalized_coords)
3587ec681f3Smrg               continue;
3597ec681f3Smrg
3607ec681f3Smrg            b.cursor = nir_before_instr(&tex->instr);
3617ec681f3Smrg
3627ec681f3Smrg            int coords_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
3637ec681f3Smrg            assert(coords_idx != -1);
3647ec681f3Smrg            nir_ssa_def *coords =
3657ec681f3Smrg               nir_ssa_for_src(&b, tex->src[coords_idx].src, tex->coord_components);
3667ec681f3Smrg
3677ec681f3Smrg            nir_ssa_def *txs = nir_i2f32(&b, nir_get_texture_size(&b, tex));
3687ec681f3Smrg
3697ec681f3Smrg            // Normalize coords for tex
3707ec681f3Smrg            nir_ssa_def *scale = nir_frcp(&b, txs);
3717ec681f3Smrg            nir_ssa_def *comps[4];
3727ec681f3Smrg            for (unsigned i = 0; i < coords->num_components; ++i) {
3737ec681f3Smrg               comps[i] = nir_channel(&b, coords, i);
3747ec681f3Smrg               if (tex->is_array && i == coords->num_components - 1) {
3757ec681f3Smrg                  // Don't scale the array index, but do clamp it
3767ec681f3Smrg                  comps[i] = nir_fround_even(&b, comps[i]);
3777ec681f3Smrg                  comps[i] = nir_fmax(&b, comps[i], nir_imm_float(&b, 0.0f));
3787ec681f3Smrg                  comps[i] = nir_fmin(&b, comps[i], nir_fsub(&b, nir_channel(&b, txs, i), nir_imm_float(&b, 1.0f)));
3797ec681f3Smrg                  break;
3807ec681f3Smrg               }
3817ec681f3Smrg
3827ec681f3Smrg               // The CTS is pretty clear that this value has to be floored for nearest sampling
3837ec681f3Smrg               // but must not be for linear sampling.
3847ec681f3Smrg               if (!states[sampler->data.binding].is_linear_filtering)
3857ec681f3Smrg                  comps[i] = nir_fadd_imm(&b, nir_ffloor(&b, comps[i]), 0.5f);
3867ec681f3Smrg               comps[i] = nir_fmul(&b, comps[i], nir_channel(&b, scale, i));
3877ec681f3Smrg            }
3887ec681f3Smrg            nir_ssa_def *normalized_coords = nir_vec(&b, comps, coords->num_components);
3897ec681f3Smrg            nir_instr_rewrite_src(&tex->instr,
3907ec681f3Smrg                                  &tex->src[coords_idx].src,
3917ec681f3Smrg                                  nir_src_for_ssa(normalized_coords));
3927ec681f3Smrg         }
3937ec681f3Smrg      }
3947ec681f3Smrg   }
3957ec681f3Smrg}
3967ec681f3Smrg
3977ec681f3Smrgstatic nir_variable *
3987ec681f3Smrgadd_kernel_inputs_var(struct clc_dxil_object *dxil, nir_shader *nir,
3997ec681f3Smrg                      unsigned *cbv_id)
4007ec681f3Smrg{
4017ec681f3Smrg   if (!dxil->kernel->num_args)
4027ec681f3Smrg      return NULL;
4037ec681f3Smrg
4047ec681f3Smrg   struct clc_dxil_metadata *metadata = &dxil->metadata;
4057ec681f3Smrg   unsigned size = 0;
4067ec681f3Smrg
4077ec681f3Smrg   nir_foreach_variable_with_modes(var, nir, nir_var_uniform)
4087ec681f3Smrg      size = MAX2(size,
4097ec681f3Smrg                  var->data.driver_location +
4107ec681f3Smrg                  glsl_get_cl_size(var->type));
4117ec681f3Smrg
4127ec681f3Smrg   size = align(size, 4);
4137ec681f3Smrg
4147ec681f3Smrg   const struct glsl_type *array_type = glsl_array_type(glsl_uint_type(), size / 4, 4);
4157ec681f3Smrg   const struct glsl_struct_field field = { array_type, "arr" };
4167ec681f3Smrg   nir_variable *var =
4177ec681f3Smrg      nir_variable_create(nir, nir_var_mem_ubo,
4187ec681f3Smrg         glsl_struct_type(&field, 1, "kernel_inputs", false),
4197ec681f3Smrg         "kernel_inputs");
4207ec681f3Smrg   var->data.binding = (*cbv_id)++;
4217ec681f3Smrg   var->data.how_declared = nir_var_hidden;
4227ec681f3Smrg   return var;
4237ec681f3Smrg}
4247ec681f3Smrg
4257ec681f3Smrgstatic nir_variable *
4267ec681f3Smrgadd_work_properties_var(struct clc_dxil_object *dxil,
4277ec681f3Smrg                           struct nir_shader *nir, unsigned *cbv_id)
4287ec681f3Smrg{
4297ec681f3Smrg   struct clc_dxil_metadata *metadata = &dxil->metadata;
4307ec681f3Smrg   const struct glsl_type *array_type =
4317ec681f3Smrg      glsl_array_type(glsl_uint_type(),
4327ec681f3Smrg         sizeof(struct clc_work_properties_data) / sizeof(unsigned),
4337ec681f3Smrg         sizeof(unsigned));
4347ec681f3Smrg   const struct glsl_struct_field field = { array_type, "arr" };
4357ec681f3Smrg   nir_variable *var =
4367ec681f3Smrg      nir_variable_create(nir, nir_var_mem_ubo,
4377ec681f3Smrg         glsl_struct_type(&field, 1, "kernel_work_properties", false),
4387ec681f3Smrg         "kernel_work_properies");
4397ec681f3Smrg   var->data.binding = (*cbv_id)++;
4407ec681f3Smrg   var->data.how_declared = nir_var_hidden;
4417ec681f3Smrg   return var;
4427ec681f3Smrg}
4437ec681f3Smrg
4447ec681f3Smrgstatic void
4457ec681f3Smrgclc_lower_constant_to_ssbo(nir_shader *nir,
4467ec681f3Smrg                      const struct clc_kernel_info *kerninfo, unsigned *uav_id)
4477ec681f3Smrg{
4487ec681f3Smrg   /* Update UBO vars and assign them a binding. */
4497ec681f3Smrg   nir_foreach_variable_with_modes(var, nir, nir_var_mem_constant) {
4507ec681f3Smrg      var->data.mode = nir_var_mem_ssbo;
4517ec681f3Smrg      var->data.binding = (*uav_id)++;
4527ec681f3Smrg   }
4537ec681f3Smrg
4547ec681f3Smrg   /* And finally patch all the derefs referincing the constant
4557ec681f3Smrg    * variables/pointers.
4567ec681f3Smrg    */
4577ec681f3Smrg   nir_foreach_function(func, nir) {
4587ec681f3Smrg      if (!func->is_entrypoint)
4597ec681f3Smrg         continue;
4607ec681f3Smrg
4617ec681f3Smrg      assert(func->impl);
4627ec681f3Smrg
4637ec681f3Smrg      nir_builder b;
4647ec681f3Smrg      nir_builder_init(&b, func->impl);
4657ec681f3Smrg
4667ec681f3Smrg      nir_foreach_block(block, func->impl) {
4677ec681f3Smrg         nir_foreach_instr(instr, block) {
4687ec681f3Smrg            if (instr->type != nir_instr_type_deref)
4697ec681f3Smrg               continue;
4707ec681f3Smrg
4717ec681f3Smrg            nir_deref_instr *deref = nir_instr_as_deref(instr);
4727ec681f3Smrg
4737ec681f3Smrg            if (deref->modes != nir_var_mem_constant)
4747ec681f3Smrg               continue;
4757ec681f3Smrg
4767ec681f3Smrg            deref->modes = nir_var_mem_ssbo;
4777ec681f3Smrg         }
4787ec681f3Smrg      }
4797ec681f3Smrg   }
4807ec681f3Smrg}
4817ec681f3Smrg
4827ec681f3Smrgstatic void
4837ec681f3Smrgclc_lower_global_to_ssbo(nir_shader *nir)
4847ec681f3Smrg{
4857ec681f3Smrg   nir_foreach_function(func, nir) {
4867ec681f3Smrg      if (!func->is_entrypoint)
4877ec681f3Smrg         continue;
4887ec681f3Smrg
4897ec681f3Smrg      assert(func->impl);
4907ec681f3Smrg
4917ec681f3Smrg      nir_foreach_block(block, func->impl) {
4927ec681f3Smrg         nir_foreach_instr(instr, block) {
4937ec681f3Smrg            if (instr->type != nir_instr_type_deref)
4947ec681f3Smrg               continue;
4957ec681f3Smrg
4967ec681f3Smrg            nir_deref_instr *deref = nir_instr_as_deref(instr);
4977ec681f3Smrg
4987ec681f3Smrg            if (deref->modes != nir_var_mem_global)
4997ec681f3Smrg               continue;
5007ec681f3Smrg
5017ec681f3Smrg            deref->modes = nir_var_mem_ssbo;
5027ec681f3Smrg         }
5037ec681f3Smrg      }
5047ec681f3Smrg   }
5057ec681f3Smrg}
5067ec681f3Smrg
5077ec681f3Smrgstatic void
5087ec681f3Smrgcopy_const_initializer(const nir_constant *constant, const struct glsl_type *type,
5097ec681f3Smrg                       uint8_t *data)
5107ec681f3Smrg{
5117ec681f3Smrg   unsigned size = glsl_get_cl_size(type);
5127ec681f3Smrg
5137ec681f3Smrg   if (glsl_type_is_array(type)) {
5147ec681f3Smrg      const struct glsl_type *elm_type = glsl_get_array_element(type);
5157ec681f3Smrg      unsigned step_size = glsl_get_explicit_stride(type);
5167ec681f3Smrg
5177ec681f3Smrg      for (unsigned i = 0; i < constant->num_elements; i++) {
5187ec681f3Smrg         copy_const_initializer(constant->elements[i], elm_type,
5197ec681f3Smrg                                data + (i * step_size));
5207ec681f3Smrg      }
5217ec681f3Smrg   } else if (glsl_type_is_struct(type)) {
5227ec681f3Smrg      for (unsigned i = 0; i < constant->num_elements; i++) {
5237ec681f3Smrg         const struct glsl_type *elm_type = glsl_get_struct_field(type, i);
5247ec681f3Smrg         int offset = glsl_get_struct_field_offset(type, i);
5257ec681f3Smrg         copy_const_initializer(constant->elements[i], elm_type, data + offset);
5267ec681f3Smrg      }
5277ec681f3Smrg   } else {
5287ec681f3Smrg      assert(glsl_type_is_vector_or_scalar(type));
5297ec681f3Smrg
5307ec681f3Smrg      for (unsigned i = 0; i < glsl_get_components(type); i++) {
5317ec681f3Smrg         switch (glsl_get_bit_size(type)) {
5327ec681f3Smrg         case 64:
5337ec681f3Smrg            *((uint64_t *)data) = constant->values[i].u64;
5347ec681f3Smrg            break;
5357ec681f3Smrg         case 32:
5367ec681f3Smrg            *((uint32_t *)data) = constant->values[i].u32;
5377ec681f3Smrg            break;
5387ec681f3Smrg         case 16:
5397ec681f3Smrg            *((uint16_t *)data) = constant->values[i].u16;
5407ec681f3Smrg            break;
5417ec681f3Smrg         case 8:
5427ec681f3Smrg            *((uint8_t *)data) = constant->values[i].u8;
5437ec681f3Smrg            break;
5447ec681f3Smrg         default:
5457ec681f3Smrg            unreachable("Invalid base type");
5467ec681f3Smrg         }
5477ec681f3Smrg
5487ec681f3Smrg         data += glsl_get_bit_size(type) / 8;
5497ec681f3Smrg      }
5507ec681f3Smrg   }
5517ec681f3Smrg}
5527ec681f3Smrg
5537ec681f3Smrgstatic const struct glsl_type *
5547ec681f3Smrgget_cast_type(unsigned bit_size)
5557ec681f3Smrg{
5567ec681f3Smrg   switch (bit_size) {
5577ec681f3Smrg   case 64:
5587ec681f3Smrg      return glsl_int64_t_type();
5597ec681f3Smrg   case 32:
5607ec681f3Smrg      return glsl_int_type();
5617ec681f3Smrg   case 16:
5627ec681f3Smrg      return glsl_int16_t_type();
5637ec681f3Smrg   case 8:
5647ec681f3Smrg      return glsl_int8_t_type();
5657ec681f3Smrg   }
5667ec681f3Smrg   unreachable("Invalid bit_size");
5677ec681f3Smrg}
5687ec681f3Smrg
5697ec681f3Smrgstatic void
5707ec681f3Smrgsplit_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
5717ec681f3Smrg{
5727ec681f3Smrg   enum gl_access_qualifier access = nir_intrinsic_access(intrin);
5737ec681f3Smrg   nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8];
5747ec681f3Smrg   unsigned comp_size = intrin->dest.ssa.bit_size / 8;
5757ec681f3Smrg   unsigned num_comps = intrin->dest.ssa.num_components;
5767ec681f3Smrg
5777ec681f3Smrg   b->cursor = nir_before_instr(&intrin->instr);
5787ec681f3Smrg
5797ec681f3Smrg   nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
5807ec681f3Smrg
5817ec681f3Smrg   const struct glsl_type *cast_type = get_cast_type(alignment * 8);
5827ec681f3Smrg   nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
5837ec681f3Smrg
5847ec681f3Smrg   unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment);
5857ec681f3Smrg   for (unsigned i = 0; i < num_loads; ++i) {
5867ec681f3Smrg      nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
5877ec681f3Smrg      srcs[i] = nir_load_deref_with_access(b, elem, access);
5887ec681f3Smrg   }
5897ec681f3Smrg
5907ec681f3Smrg   nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size);
5917ec681f3Smrg   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
5927ec681f3Smrg   nir_instr_remove(&intrin->instr);
5937ec681f3Smrg}
5947ec681f3Smrg
5957ec681f3Smrgstatic void
5967ec681f3Smrgsplit_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
5977ec681f3Smrg{
5987ec681f3Smrg   enum gl_access_qualifier access = nir_intrinsic_access(intrin);
5997ec681f3Smrg
6007ec681f3Smrg   assert(intrin->src[1].is_ssa);
6017ec681f3Smrg   nir_ssa_def *value = intrin->src[1].ssa;
6027ec681f3Smrg   unsigned comp_size = value->bit_size / 8;
6037ec681f3Smrg   unsigned num_comps = value->num_components;
6047ec681f3Smrg
6057ec681f3Smrg   b->cursor = nir_before_instr(&intrin->instr);
6067ec681f3Smrg
6077ec681f3Smrg   nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
6087ec681f3Smrg
6097ec681f3Smrg   const struct glsl_type *cast_type = get_cast_type(alignment * 8);
6107ec681f3Smrg   nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
6117ec681f3Smrg
6127ec681f3Smrg   unsigned num_stores = DIV_ROUND_UP(comp_size * num_comps, alignment);
6137ec681f3Smrg   for (unsigned i = 0; i < num_stores; ++i) {
6147ec681f3Smrg      nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8);
6157ec681f3Smrg      nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
6167ec681f3Smrg      nir_store_deref_with_access(b, elem, substore_val, ~0, access);
6177ec681f3Smrg   }
6187ec681f3Smrg
6197ec681f3Smrg   nir_instr_remove(&intrin->instr);
6207ec681f3Smrg}
6217ec681f3Smrg
6227ec681f3Smrgstatic bool
6237ec681f3Smrgsplit_unaligned_loads_stores(nir_shader *shader)
6247ec681f3Smrg{
6257ec681f3Smrg   bool progress = false;
6267ec681f3Smrg
6277ec681f3Smrg   nir_foreach_function(function, shader) {
6287ec681f3Smrg      if (!function->impl)
6297ec681f3Smrg         continue;
6307ec681f3Smrg
6317ec681f3Smrg      nir_builder b;
6327ec681f3Smrg      nir_builder_init(&b, function->impl);
6337ec681f3Smrg
6347ec681f3Smrg      nir_foreach_block(block, function->impl) {
6357ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
6367ec681f3Smrg            if (instr->type != nir_instr_type_intrinsic)
6377ec681f3Smrg               continue;
6387ec681f3Smrg            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
6397ec681f3Smrg            if (intrin->intrinsic != nir_intrinsic_load_deref &&
6407ec681f3Smrg                intrin->intrinsic != nir_intrinsic_store_deref)
6417ec681f3Smrg               continue;
6427ec681f3Smrg            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
6437ec681f3Smrg
6447ec681f3Smrg            unsigned align_mul = 0, align_offset = 0;
6457ec681f3Smrg            nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset);
6467ec681f3Smrg
6477ec681f3Smrg            unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
6487ec681f3Smrg
6497ec681f3Smrg            /* We can load anything at 4-byte alignment, except for
6507ec681f3Smrg             * UBOs (AKA CBs where the granularity is 16 bytes).
6517ec681f3Smrg             */
6527ec681f3Smrg            if (alignment >= (deref->modes == nir_var_mem_ubo ? 16 : 4))
6537ec681f3Smrg               continue;
6547ec681f3Smrg
6557ec681f3Smrg            nir_ssa_def *val;
6567ec681f3Smrg            if (intrin->intrinsic == nir_intrinsic_load_deref) {
6577ec681f3Smrg               assert(intrin->dest.is_ssa);
6587ec681f3Smrg               val = &intrin->dest.ssa;
6597ec681f3Smrg            } else {
6607ec681f3Smrg               assert(intrin->src[1].is_ssa);
6617ec681f3Smrg               val = intrin->src[1].ssa;
6627ec681f3Smrg            }
6637ec681f3Smrg
6647ec681f3Smrg            unsigned natural_alignment =
6657ec681f3Smrg               val->bit_size / 8 *
6667ec681f3Smrg               (val->num_components == 3 ? 4 : val->num_components);
6677ec681f3Smrg
6687ec681f3Smrg            if (alignment >= natural_alignment)
6697ec681f3Smrg               continue;
6707ec681f3Smrg
6717ec681f3Smrg            if (intrin->intrinsic == nir_intrinsic_load_deref)
6727ec681f3Smrg               split_unaligned_load(&b, intrin, alignment);
6737ec681f3Smrg            else
6747ec681f3Smrg               split_unaligned_store(&b, intrin, alignment);
6757ec681f3Smrg            progress = true;
6767ec681f3Smrg         }
6777ec681f3Smrg      }
6787ec681f3Smrg   }
6797ec681f3Smrg
6807ec681f3Smrg   return progress;
6817ec681f3Smrg}
6827ec681f3Smrg
6837ec681f3Smrgstatic enum pipe_tex_wrap
6847ec681f3Smrgwrap_from_cl_addressing(unsigned addressing_mode)
6857ec681f3Smrg{
6867ec681f3Smrg   switch (addressing_mode)
6877ec681f3Smrg   {
6887ec681f3Smrg   default:
6897ec681f3Smrg   case SAMPLER_ADDRESSING_MODE_NONE:
6907ec681f3Smrg   case SAMPLER_ADDRESSING_MODE_CLAMP:
6917ec681f3Smrg      // Since OpenCL's only border color is 0's and D3D specs out-of-bounds loads to return 0, don't apply any wrap mode
6927ec681f3Smrg      return (enum pipe_tex_wrap)-1;
6937ec681f3Smrg   case SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: return PIPE_TEX_WRAP_CLAMP_TO_EDGE;
6947ec681f3Smrg   case SAMPLER_ADDRESSING_MODE_REPEAT: return PIPE_TEX_WRAP_REPEAT;
6957ec681f3Smrg   case SAMPLER_ADDRESSING_MODE_REPEAT_MIRRORED: return PIPE_TEX_WRAP_MIRROR_REPEAT;
6967ec681f3Smrg   }
6977ec681f3Smrg}
6987ec681f3Smrg
6997ec681f3Smrgstatic bool shader_has_double(nir_shader *nir)
7007ec681f3Smrg{
7017ec681f3Smrg   bool progress = false;
7027ec681f3Smrg
7037ec681f3Smrg   foreach_list_typed(nir_function, func, node, &nir->functions) {
7047ec681f3Smrg      if (!func->is_entrypoint)
7057ec681f3Smrg         continue;
7067ec681f3Smrg
7077ec681f3Smrg      assert(func->impl);
7087ec681f3Smrg
7097ec681f3Smrg      nir_foreach_block(block, func->impl) {
7107ec681f3Smrg         nir_foreach_instr_safe(instr, block) {
7117ec681f3Smrg            if (instr->type != nir_instr_type_alu)
7127ec681f3Smrg               continue;
7137ec681f3Smrg
7147ec681f3Smrg             nir_alu_instr *alu = nir_instr_as_alu(instr);
7157ec681f3Smrg             const nir_op_info *info = &nir_op_infos[alu->op];
7167ec681f3Smrg
7177ec681f3Smrg             if (info->output_type & nir_type_float &&
7187ec681f3Smrg                 nir_dest_bit_size(alu->dest.dest) == 64)
7197ec681f3Smrg                 return true;
7207ec681f3Smrg         }
7217ec681f3Smrg      }
7227ec681f3Smrg   }
7237ec681f3Smrg
7247ec681f3Smrg   return false;
7257ec681f3Smrg}
7267ec681f3Smrg
7277ec681f3Smrgstatic bool
7287ec681f3Smrgscale_fdiv(nir_shader *nir)
7297ec681f3Smrg{
7307ec681f3Smrg   bool progress = false;
7317ec681f3Smrg   nir_foreach_function(func, nir) {
7327ec681f3Smrg      if (!func->impl)
7337ec681f3Smrg         continue;
7347ec681f3Smrg      nir_builder b;
7357ec681f3Smrg      nir_builder_init(&b, func->impl);
7367ec681f3Smrg      nir_foreach_block(block, func->impl) {
7377ec681f3Smrg         nir_foreach_instr(instr, block) {
7387ec681f3Smrg            if (instr->type != nir_instr_type_alu)
7397ec681f3Smrg               continue;
7407ec681f3Smrg            nir_alu_instr *alu = nir_instr_as_alu(instr);
7417ec681f3Smrg            if (alu->op != nir_op_fdiv || alu->src[0].src.ssa->bit_size != 32)
7427ec681f3Smrg               continue;
7437ec681f3Smrg
7447ec681f3Smrg            b.cursor = nir_before_instr(instr);
7457ec681f3Smrg            nir_ssa_def *fabs = nir_fabs(&b, alu->src[1].src.ssa);
7467ec681f3Smrg            nir_ssa_def *big = nir_flt(&b, nir_imm_int(&b, 0x7e800000), fabs);
7477ec681f3Smrg            nir_ssa_def *small = nir_flt(&b, fabs, nir_imm_int(&b, 0x00800000));
7487ec681f3Smrg
7497ec681f3Smrg            nir_ssa_def *scaled_down_a = nir_fmul_imm(&b, alu->src[0].src.ssa, 0.25);
7507ec681f3Smrg            nir_ssa_def *scaled_down_b = nir_fmul_imm(&b, alu->src[1].src.ssa, 0.25);
7517ec681f3Smrg            nir_ssa_def *scaled_up_a = nir_fmul_imm(&b, alu->src[0].src.ssa, 16777216.0);
7527ec681f3Smrg            nir_ssa_def *scaled_up_b = nir_fmul_imm(&b, alu->src[1].src.ssa, 16777216.0);
7537ec681f3Smrg
7547ec681f3Smrg            nir_ssa_def *final_a =
7557ec681f3Smrg               nir_bcsel(&b, big, scaled_down_a,
7567ec681f3Smrg              (nir_bcsel(&b, small, scaled_up_a, alu->src[0].src.ssa)));
7577ec681f3Smrg            nir_ssa_def *final_b =
7587ec681f3Smrg               nir_bcsel(&b, big, scaled_down_b,
7597ec681f3Smrg              (nir_bcsel(&b, small, scaled_up_b, alu->src[1].src.ssa)));
7607ec681f3Smrg
7617ec681f3Smrg            nir_instr_rewrite_src(instr, &alu->src[0].src, nir_src_for_ssa(final_a));
7627ec681f3Smrg            nir_instr_rewrite_src(instr, &alu->src[1].src, nir_src_for_ssa(final_b));
7637ec681f3Smrg            progress = true;
7647ec681f3Smrg         }
7657ec681f3Smrg      }
7667ec681f3Smrg   }
7677ec681f3Smrg   return progress;
7687ec681f3Smrg}
7697ec681f3Smrg
7707ec681f3Smrgstruct clc_libclc *
7717ec681f3Smrgclc_libclc_new_dxil(const struct clc_logger *logger,
7727ec681f3Smrg                    const struct clc_libclc_dxil_options *options)
7737ec681f3Smrg{
7747ec681f3Smrg   struct clc_libclc_options clc_options = {
7757ec681f3Smrg      .optimize = options->optimize,
7767ec681f3Smrg      .nir_options = dxil_get_nir_compiler_options(),
7777ec681f3Smrg   };
7787ec681f3Smrg
7797ec681f3Smrg   return clc_libclc_new(logger, &clc_options);
7807ec681f3Smrg}
7817ec681f3Smrg
7827ec681f3Smrgbool
7837ec681f3Smrgclc_spirv_to_dxil(struct clc_libclc *lib,
7847ec681f3Smrg                  const struct clc_binary *linked_spirv,
7857ec681f3Smrg                  const struct clc_parsed_spirv *parsed_data,
7867ec681f3Smrg                  const char *entrypoint,
7877ec681f3Smrg                  const struct clc_runtime_kernel_conf *conf,
7887ec681f3Smrg                  const struct clc_spirv_specialization_consts *consts,
7897ec681f3Smrg                  const struct clc_logger *logger,
7907ec681f3Smrg                  struct clc_dxil_object *out_dxil)
7917ec681f3Smrg{
7927ec681f3Smrg   struct nir_shader *nir;
7937ec681f3Smrg
7947ec681f3Smrg   for (unsigned i = 0; i < parsed_data->num_kernels; i++) {
7957ec681f3Smrg      if (!strcmp(parsed_data->kernels[i].name, entrypoint)) {
7967ec681f3Smrg         out_dxil->kernel = &parsed_data->kernels[i];
7977ec681f3Smrg         break;
7987ec681f3Smrg      }
7997ec681f3Smrg   }
8007ec681f3Smrg
8017ec681f3Smrg   if (!out_dxil->kernel) {
8027ec681f3Smrg      clc_error(logger, "no '%s' kernel found", entrypoint);
8037ec681f3Smrg      return false;
8047ec681f3Smrg   }
8057ec681f3Smrg
8067ec681f3Smrg   const struct spirv_to_nir_options spirv_options = {
8077ec681f3Smrg      .environment = NIR_SPIRV_OPENCL,
8087ec681f3Smrg      .clc_shader = clc_libclc_get_clc_shader(lib),
8097ec681f3Smrg      .constant_addr_format = nir_address_format_32bit_index_offset_pack64,
8107ec681f3Smrg      .global_addr_format = nir_address_format_32bit_index_offset_pack64,
8117ec681f3Smrg      .shared_addr_format = nir_address_format_32bit_offset_as_64bit,
8127ec681f3Smrg      .temp_addr_format = nir_address_format_32bit_offset_as_64bit,
8137ec681f3Smrg      .float_controls_execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32,
8147ec681f3Smrg      .caps = {
8157ec681f3Smrg         .address = true,
8167ec681f3Smrg         .float64 = true,
8177ec681f3Smrg         .int8 = true,
8187ec681f3Smrg         .int16 = true,
8197ec681f3Smrg         .int64 = true,
8207ec681f3Smrg         .kernel = true,
8217ec681f3Smrg         .kernel_image = true,
8227ec681f3Smrg         .kernel_image_read_write = true,
8237ec681f3Smrg         .literal_sampler = true,
8247ec681f3Smrg         .printf = true,
8257ec681f3Smrg      },
8267ec681f3Smrg   };
8277ec681f3Smrg   nir_shader_compiler_options nir_options =
8287ec681f3Smrg      *dxil_get_nir_compiler_options();
8297ec681f3Smrg
8307ec681f3Smrg   if (conf && conf->lower_bit_size & 64) {
8317ec681f3Smrg      nir_options.lower_pack_64_2x32_split = false;
8327ec681f3Smrg      nir_options.lower_unpack_64_2x32_split = false;
8337ec681f3Smrg      nir_options.lower_int64_options = ~0;
8347ec681f3Smrg   }
8357ec681f3Smrg
8367ec681f3Smrg   if (conf && conf->lower_bit_size & 16)
8377ec681f3Smrg      nir_options.support_16bit_alu = true;
8387ec681f3Smrg
8397ec681f3Smrg   glsl_type_singleton_init_or_ref();
8407ec681f3Smrg
8417ec681f3Smrg   nir = spirv_to_nir(linked_spirv->data, linked_spirv->size / 4,
8427ec681f3Smrg                      consts ? (struct nir_spirv_specialization *)consts->specializations : NULL,
8437ec681f3Smrg                      consts ? consts->num_specializations : 0,
8447ec681f3Smrg                      MESA_SHADER_KERNEL, entrypoint,
8457ec681f3Smrg                      &spirv_options,
8467ec681f3Smrg                      &nir_options);
8477ec681f3Smrg   if (!nir) {
8487ec681f3Smrg      clc_error(logger, "spirv_to_nir() failed");
8497ec681f3Smrg      goto err_free_dxil;
8507ec681f3Smrg   }
8517ec681f3Smrg   nir->info.workgroup_size_variable = true;
8527ec681f3Smrg
8537ec681f3Smrg   NIR_PASS_V(nir, nir_lower_goto_ifs);
8547ec681f3Smrg   NIR_PASS_V(nir, nir_opt_dead_cf);
8557ec681f3Smrg
8567ec681f3Smrg   struct clc_dxil_metadata *metadata = &out_dxil->metadata;
8577ec681f3Smrg
8587ec681f3Smrg   metadata->args = calloc(out_dxil->kernel->num_args,
8597ec681f3Smrg                           sizeof(*metadata->args));
8607ec681f3Smrg   if (!metadata->args) {
8617ec681f3Smrg      clc_error(logger, "failed to allocate arg positions");
8627ec681f3Smrg      goto err_free_dxil;
8637ec681f3Smrg   }
8647ec681f3Smrg
8657ec681f3Smrg   {
8667ec681f3Smrg      bool progress;
8677ec681f3Smrg      do
8687ec681f3Smrg      {
8697ec681f3Smrg         progress = false;
8707ec681f3Smrg         NIR_PASS(progress, nir, nir_copy_prop);
8717ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
8727ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_deref);
8737ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_dce);
8747ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_undef);
8757ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_constant_folding);
8767ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_cse);
8777ec681f3Smrg         NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
8787ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_algebraic);
8797ec681f3Smrg      } while (progress);
8807ec681f3Smrg   }
8817ec681f3Smrg
8827ec681f3Smrg   // Inline all functions first.
8837ec681f3Smrg   // according to the comment on nir_inline_functions
8847ec681f3Smrg   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
8857ec681f3Smrg   NIR_PASS_V(nir, nir_lower_returns);
8867ec681f3Smrg   NIR_PASS_V(nir, nir_lower_libclc, clc_libclc_get_clc_shader(lib));
8877ec681f3Smrg   NIR_PASS_V(nir, nir_inline_functions);
8887ec681f3Smrg
8897ec681f3Smrg   // Pick off the single entrypoint that we want.
8907ec681f3Smrg   foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
8917ec681f3Smrg      if (!func->is_entrypoint)
8927ec681f3Smrg         exec_node_remove(&func->node);
8937ec681f3Smrg   }
8947ec681f3Smrg   assert(exec_list_length(&nir->functions) == 1);
8957ec681f3Smrg
8967ec681f3Smrg   {
8977ec681f3Smrg      bool progress;
8987ec681f3Smrg      do
8997ec681f3Smrg      {
9007ec681f3Smrg         progress = false;
9017ec681f3Smrg         NIR_PASS(progress, nir, nir_copy_prop);
9027ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
9037ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_deref);
9047ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_dce);
9057ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_undef);
9067ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_constant_folding);
9077ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_cse);
9087ec681f3Smrg         NIR_PASS(progress, nir, nir_split_var_copies);
9097ec681f3Smrg         NIR_PASS(progress, nir, nir_lower_var_copies);
9107ec681f3Smrg         NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
9117ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_algebraic);
9127ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_if, true);
9137ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_dead_cf);
9147ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_remove_phis);
9157ec681f3Smrg         NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
9167ec681f3Smrg         NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
9177ec681f3Smrg      } while (progress);
9187ec681f3Smrg   }
9197ec681f3Smrg
9207ec681f3Smrg   NIR_PASS_V(nir, scale_fdiv);
9217ec681f3Smrg
9227ec681f3Smrg   dxil_wrap_sampler_state int_sampler_states[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { {{0}} };
9237ec681f3Smrg   unsigned sampler_id = 0;
9247ec681f3Smrg
9257ec681f3Smrg   struct exec_list inline_samplers_list;
9267ec681f3Smrg   exec_list_make_empty(&inline_samplers_list);
9277ec681f3Smrg
9287ec681f3Smrg   // Move inline samplers to the end of the uniforms list
9297ec681f3Smrg   nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
9307ec681f3Smrg      if (glsl_type_is_sampler(var->type) && var->data.sampler.is_inline_sampler) {
9317ec681f3Smrg         exec_node_remove(&var->node);
9327ec681f3Smrg         exec_list_push_tail(&inline_samplers_list, &var->node);
9337ec681f3Smrg      }
9347ec681f3Smrg   }
9357ec681f3Smrg   exec_node_insert_list_after(exec_list_get_tail(&nir->variables), &inline_samplers_list);
9367ec681f3Smrg
9377ec681f3Smrg   NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_function_temp | nir_var_shader_temp));
9387ec681f3Smrg
9397ec681f3Smrg   // Lower memcpy
9407ec681f3Smrg   NIR_PASS_V(nir, dxil_nir_lower_memcpy_deref);
9417ec681f3Smrg
9427ec681f3Smrg   // Ensure the printf struct has explicit types, but we'll throw away the scratch size, because we haven't
9437ec681f3Smrg   // necessarily removed all temp variables (e.g. the printf struct itself) at this point, so we'll rerun this later
9447ec681f3Smrg   assert(nir->scratch_size == 0);
9457ec681f3Smrg   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
9467ec681f3Smrg
9477ec681f3Smrg   nir_lower_printf_options printf_options = {
9487ec681f3Smrg      .treat_doubles_as_floats = true,
9497ec681f3Smrg      .max_buffer_size = 1024 * 1024
9507ec681f3Smrg   };
9517ec681f3Smrg   NIR_PASS_V(nir, nir_lower_printf, &printf_options);
9527ec681f3Smrg
9537ec681f3Smrg   metadata->printf.info_count = nir->printf_info_count;
9547ec681f3Smrg   metadata->printf.infos = calloc(nir->printf_info_count, sizeof(struct clc_printf_info));
9557ec681f3Smrg   for (unsigned i = 0; i < nir->printf_info_count; i++) {
9567ec681f3Smrg      metadata->printf.infos[i].str = malloc(nir->printf_info[i].string_size);
9577ec681f3Smrg      memcpy(metadata->printf.infos[i].str, nir->printf_info[i].strings, nir->printf_info[i].string_size);
9587ec681f3Smrg      metadata->printf.infos[i].num_args = nir->printf_info[i].num_args;
9597ec681f3Smrg      metadata->printf.infos[i].arg_sizes = malloc(nir->printf_info[i].num_args * sizeof(unsigned));
9607ec681f3Smrg      memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned));
9617ec681f3Smrg   }
9627ec681f3Smrg
9637ec681f3Smrg   // copy propagate to prepare for lower_explicit_io
9647ec681f3Smrg   NIR_PASS_V(nir, nir_split_var_copies);
9657ec681f3Smrg   NIR_PASS_V(nir, nir_opt_copy_prop_vars);
9667ec681f3Smrg   NIR_PASS_V(nir, nir_lower_var_copies);
9677ec681f3Smrg   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
9687ec681f3Smrg   NIR_PASS_V(nir, nir_lower_alu);
9697ec681f3Smrg   NIR_PASS_V(nir, nir_opt_dce);
9707ec681f3Smrg   NIR_PASS_V(nir, nir_opt_deref);
9717ec681f3Smrg
9727ec681f3Smrg   // For uniforms (kernel inputs), run this before adjusting variable list via image/sampler lowering
9737ec681f3Smrg   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_uniform, glsl_get_cl_type_size_align);
9747ec681f3Smrg
9757ec681f3Smrg   // Calculate input offsets/metadata.
9767ec681f3Smrg   unsigned uav_id = 0;
9777ec681f3Smrg   nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
9787ec681f3Smrg      int i = var->data.location;
9797ec681f3Smrg      if (i < 0)
9807ec681f3Smrg         continue;
9817ec681f3Smrg
9827ec681f3Smrg      unsigned size = glsl_get_cl_size(var->type);
9837ec681f3Smrg
9847ec681f3Smrg      metadata->args[i].offset = var->data.driver_location;
9857ec681f3Smrg      metadata->args[i].size = size;
9867ec681f3Smrg      metadata->kernel_inputs_buf_size = MAX2(metadata->kernel_inputs_buf_size,
9877ec681f3Smrg         var->data.driver_location + size);
9887ec681f3Smrg      if ((out_dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL ||
9897ec681f3Smrg         out_dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) &&
9907ec681f3Smrg         // Ignore images during this pass - global memory buffers need to have contiguous bindings
9917ec681f3Smrg         !glsl_type_is_image(var->type)) {
9927ec681f3Smrg         metadata->args[i].globconstptr.buf_id = uav_id++;
9937ec681f3Smrg      } else if (glsl_type_is_sampler(var->type)) {
9947ec681f3Smrg         unsigned address_mode = conf ? conf->args[i].sampler.addressing_mode : 0u;
9957ec681f3Smrg         int_sampler_states[sampler_id].wrap[0] =
9967ec681f3Smrg            int_sampler_states[sampler_id].wrap[1] =
9977ec681f3Smrg            int_sampler_states[sampler_id].wrap[2] = wrap_from_cl_addressing(address_mode);
9987ec681f3Smrg         int_sampler_states[sampler_id].is_nonnormalized_coords =
9997ec681f3Smrg            conf ? !conf->args[i].sampler.normalized_coords : 0;
10007ec681f3Smrg         int_sampler_states[sampler_id].is_linear_filtering =
10017ec681f3Smrg            conf ? conf->args[i].sampler.linear_filtering : 0;
10027ec681f3Smrg         metadata->args[i].sampler.sampler_id = var->data.binding = sampler_id++;
10037ec681f3Smrg      }
10047ec681f3Smrg   }
10057ec681f3Smrg
10067ec681f3Smrg   unsigned num_global_inputs = uav_id;
10077ec681f3Smrg
10087ec681f3Smrg   // Second pass over inputs to calculate image bindings
10097ec681f3Smrg   unsigned srv_id = 0;
10107ec681f3Smrg   nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
10117ec681f3Smrg      int i = var->data.location;
10127ec681f3Smrg      if (i < 0)
10137ec681f3Smrg         continue;
10147ec681f3Smrg
10157ec681f3Smrg      if (glsl_type_is_image(var->type)) {
10167ec681f3Smrg         if (var->data.access == ACCESS_NON_WRITEABLE) {
10177ec681f3Smrg            metadata->args[i].image.buf_ids[0] = srv_id++;
10187ec681f3Smrg         } else {
10197ec681f3Smrg            // Write or read-write are UAVs
10207ec681f3Smrg            metadata->args[i].image.buf_ids[0] = uav_id++;
10217ec681f3Smrg         }
10227ec681f3Smrg
10237ec681f3Smrg         metadata->args[i].image.num_buf_ids = 1;
10247ec681f3Smrg         var->data.binding = metadata->args[i].image.buf_ids[0];
10257ec681f3Smrg      }
10267ec681f3Smrg   }
10277ec681f3Smrg
10287ec681f3Smrg   // Before removing dead uniforms, dedupe constant samplers to make more dead uniforms
10297ec681f3Smrg   NIR_PASS_V(nir, clc_nir_dedupe_const_samplers);
10307ec681f3Smrg   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo | nir_var_mem_constant | nir_var_function_temp, NULL);
10317ec681f3Smrg
10327ec681f3Smrg   // Fill out inline sampler metadata, now that they've been deduped and dead ones removed
10337ec681f3Smrg   nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
10347ec681f3Smrg      if (glsl_type_is_sampler(var->type) && var->data.sampler.is_inline_sampler) {
10357ec681f3Smrg         int_sampler_states[sampler_id].wrap[0] =
10367ec681f3Smrg            int_sampler_states[sampler_id].wrap[1] =
10377ec681f3Smrg            int_sampler_states[sampler_id].wrap[2] =
10387ec681f3Smrg            wrap_from_cl_addressing(var->data.sampler.addressing_mode);
10397ec681f3Smrg         int_sampler_states[sampler_id].is_nonnormalized_coords =
10407ec681f3Smrg            !var->data.sampler.normalized_coordinates;
10417ec681f3Smrg         int_sampler_states[sampler_id].is_linear_filtering =
10427ec681f3Smrg            var->data.sampler.filter_mode == SAMPLER_FILTER_MODE_LINEAR;
10437ec681f3Smrg         var->data.binding = sampler_id++;
10447ec681f3Smrg
10457ec681f3Smrg         assert(metadata->num_const_samplers < CLC_MAX_SAMPLERS);
10467ec681f3Smrg         metadata->const_samplers[metadata->num_const_samplers].sampler_id = var->data.binding;
10477ec681f3Smrg         metadata->const_samplers[metadata->num_const_samplers].addressing_mode = var->data.sampler.addressing_mode;
10487ec681f3Smrg         metadata->const_samplers[metadata->num_const_samplers].normalized_coords = var->data.sampler.normalized_coordinates;
10497ec681f3Smrg         metadata->const_samplers[metadata->num_const_samplers].filter_mode = var->data.sampler.filter_mode;
10507ec681f3Smrg         metadata->num_const_samplers++;
10517ec681f3Smrg      }
10527ec681f3Smrg   }
10537ec681f3Smrg
10547ec681f3Smrg   // Needs to come before lower_explicit_io
10557ec681f3Smrg   NIR_PASS_V(nir, nir_lower_readonly_images_to_tex, false);
10567ec681f3Smrg   struct clc_image_lower_context image_lower_context = { metadata, &srv_id, &uav_id };
10577ec681f3Smrg   NIR_PASS_V(nir, clc_lower_images, &image_lower_context);
10587ec681f3Smrg   NIR_PASS_V(nir, clc_lower_nonnormalized_samplers, int_sampler_states);
10597ec681f3Smrg   NIR_PASS_V(nir, nir_lower_samplers);
10607ec681f3Smrg   NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
10617ec681f3Smrg              int_sampler_states, NULL, 14.0f);
10627ec681f3Smrg
10637ec681f3Smrg   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
10647ec681f3Smrg
10657ec681f3Smrg   nir->scratch_size = 0;
10667ec681f3Smrg   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
10677ec681f3Smrg              nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant,
10687ec681f3Smrg              glsl_get_cl_type_size_align);
10697ec681f3Smrg
10707ec681f3Smrg   NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp);
10717ec681f3Smrg   NIR_PASS_V(nir, clc_lower_constant_to_ssbo, out_dxil->kernel, &uav_id);
10727ec681f3Smrg   NIR_PASS_V(nir, clc_lower_global_to_ssbo);
10737ec681f3Smrg
10747ec681f3Smrg   bool has_printf = false;
10757ec681f3Smrg   NIR_PASS(has_printf, nir, clc_lower_printf_base, uav_id);
10767ec681f3Smrg   metadata->printf.uav_id = has_printf ? uav_id++ : -1;
10777ec681f3Smrg
10787ec681f3Smrg   NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo);
10797ec681f3Smrg
10807ec681f3Smrg   NIR_PASS_V(nir, split_unaligned_loads_stores);
10817ec681f3Smrg
10827ec681f3Smrg   assert(nir->info.cs.ptr_size == 64);
10837ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
10847ec681f3Smrg              nir_address_format_32bit_index_offset_pack64);
10857ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io,
10867ec681f3Smrg              nir_var_mem_shared | nir_var_function_temp | nir_var_uniform,
10877ec681f3Smrg              nir_address_format_32bit_offset_as_64bit);
10887ec681f3Smrg
10897ec681f3Smrg   NIR_PASS_V(nir, nir_lower_system_values);
10907ec681f3Smrg
10917ec681f3Smrg   nir_lower_compute_system_values_options compute_options = {
10927ec681f3Smrg      .has_base_global_invocation_id = (conf && conf->support_global_work_id_offsets),
10937ec681f3Smrg      .has_base_workgroup_id = (conf && conf->support_workgroup_id_offsets),
10947ec681f3Smrg   };
10957ec681f3Smrg   NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_options);
10967ec681f3Smrg
10977ec681f3Smrg   NIR_PASS_V(nir, clc_lower_64bit_semantics);
10987ec681f3Smrg
10997ec681f3Smrg   NIR_PASS_V(nir, nir_opt_deref);
11007ec681f3Smrg   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
11017ec681f3Smrg
11027ec681f3Smrg   unsigned cbv_id = 0;
11037ec681f3Smrg
11047ec681f3Smrg   nir_variable *inputs_var =
11057ec681f3Smrg      add_kernel_inputs_var(out_dxil, nir, &cbv_id);
11067ec681f3Smrg   nir_variable *work_properties_var =
11077ec681f3Smrg      add_work_properties_var(out_dxil, nir, &cbv_id);
11087ec681f3Smrg
11097ec681f3Smrg   memcpy(metadata->local_size, nir->info.workgroup_size,
11107ec681f3Smrg          sizeof(metadata->local_size));
11117ec681f3Smrg   memcpy(metadata->local_size_hint, nir->info.cs.workgroup_size_hint,
11127ec681f3Smrg          sizeof(metadata->local_size));
11137ec681f3Smrg
11147ec681f3Smrg   // Patch the localsize before calling clc_nir_lower_system_values().
11157ec681f3Smrg   if (conf) {
11167ec681f3Smrg      for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) {
11177ec681f3Smrg         if (!conf->local_size[i] ||
11187ec681f3Smrg             conf->local_size[i] == nir->info.workgroup_size[i])
11197ec681f3Smrg            continue;
11207ec681f3Smrg
11217ec681f3Smrg         if (nir->info.workgroup_size[i] &&
11227ec681f3Smrg             nir->info.workgroup_size[i] != conf->local_size[i]) {
11237ec681f3Smrg            debug_printf("D3D12: runtime local size does not match reqd_work_group_size() values\n");
11247ec681f3Smrg            goto err_free_dxil;
11257ec681f3Smrg         }
11267ec681f3Smrg
11277ec681f3Smrg         nir->info.workgroup_size[i] = conf->local_size[i];
11287ec681f3Smrg      }
11297ec681f3Smrg      memcpy(metadata->local_size, nir->info.workgroup_size,
11307ec681f3Smrg            sizeof(metadata->local_size));
11317ec681f3Smrg   } else {
11327ec681f3Smrg      /* Make sure there's at least one thread that's set to run */
11337ec681f3Smrg      for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) {
11347ec681f3Smrg         if (nir->info.workgroup_size[i] == 0)
11357ec681f3Smrg            nir->info.workgroup_size[i] = 1;
11367ec681f3Smrg      }
11377ec681f3Smrg   }
11387ec681f3Smrg
11397ec681f3Smrg   NIR_PASS_V(nir, clc_nir_lower_kernel_input_loads, inputs_var);
11407ec681f3Smrg   NIR_PASS_V(nir, split_unaligned_loads_stores);
11417ec681f3Smrg   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
11427ec681f3Smrg              nir_address_format_32bit_index_offset);
11437ec681f3Smrg   NIR_PASS_V(nir, clc_nir_lower_system_values, work_properties_var);
11447ec681f3Smrg   NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil);
11457ec681f3Smrg   NIR_PASS_V(nir, dxil_nir_opt_alu_deref_srcs);
11467ec681f3Smrg   NIR_PASS_V(nir, dxil_nir_lower_atomics_to_dxil);
11477ec681f3Smrg   NIR_PASS_V(nir, nir_lower_fp16_casts);
11487ec681f3Smrg   NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
11497ec681f3Smrg
11507ec681f3Smrg   // Convert pack to pack_split
11517ec681f3Smrg   NIR_PASS_V(nir, nir_lower_pack);
11527ec681f3Smrg   // Lower pack_split to bit math
11537ec681f3Smrg   NIR_PASS_V(nir, nir_opt_algebraic);
11547ec681f3Smrg
11557ec681f3Smrg   NIR_PASS_V(nir, nir_opt_dce);
11567ec681f3Smrg
11577ec681f3Smrg   nir_validate_shader(nir, "Validate before feeding NIR to the DXIL compiler");
11587ec681f3Smrg   struct nir_to_dxil_options opts = {
11597ec681f3Smrg      .interpolate_at_vertex = false,
11607ec681f3Smrg      .lower_int16 = (conf && (conf->lower_bit_size & 16) != 0),
11617ec681f3Smrg      .ubo_binding_offset = 0,
11627ec681f3Smrg      .disable_math_refactoring = true,
11637ec681f3Smrg      .num_kernel_globals = num_global_inputs,
11647ec681f3Smrg   };
11657ec681f3Smrg
11667ec681f3Smrg   for (unsigned i = 0; i < out_dxil->kernel->num_args; i++) {
11677ec681f3Smrg      if (out_dxil->kernel->args[i].address_qualifier != CLC_KERNEL_ARG_ADDRESS_LOCAL)
11687ec681f3Smrg         continue;
11697ec681f3Smrg
11707ec681f3Smrg      /* If we don't have the runtime conf yet, we just create a dummy variable.
11717ec681f3Smrg       * This will be adjusted when clc_spirv_to_dxil() is called with a conf
11727ec681f3Smrg       * argument.
11737ec681f3Smrg       */
11747ec681f3Smrg      unsigned size = 4;
11757ec681f3Smrg      if (conf && conf->args)
11767ec681f3Smrg         size = conf->args[i].localptr.size;
11777ec681f3Smrg
11787ec681f3Smrg      /* The alignment required for the pointee type is not easy to get from
11797ec681f3Smrg       * here, so let's base our logic on the size itself. Anything bigger than
11807ec681f3Smrg       * the maximum alignment constraint (which is 128 bytes, since ulong16 or
11817ec681f3Smrg       * doubl16 size are the biggest base types) should be aligned on this
11827ec681f3Smrg       * maximum alignment constraint. For smaller types, we use the size
11837ec681f3Smrg       * itself to calculate the alignment.
11847ec681f3Smrg       */
11857ec681f3Smrg      unsigned alignment = size < 128 ? (1 << (ffs(size) - 1)) : 128;
11867ec681f3Smrg
11877ec681f3Smrg      nir->info.shared_size = align(nir->info.shared_size, alignment);
11887ec681f3Smrg      metadata->args[i].localptr.sharedmem_offset = nir->info.shared_size;
11897ec681f3Smrg      nir->info.shared_size += size;
11907ec681f3Smrg   }
11917ec681f3Smrg
11927ec681f3Smrg   metadata->local_mem_size = nir->info.shared_size;
11937ec681f3Smrg   metadata->priv_mem_size = nir->scratch_size;
11947ec681f3Smrg
11957ec681f3Smrg   /* DXIL double math is too limited compared to what NIR expects. Let's refuse
11967ec681f3Smrg    * to compile a shader when it contains double operations until we have
11977ec681f3Smrg    * double lowering hooked up.
11987ec681f3Smrg    */
11997ec681f3Smrg   if (shader_has_double(nir)) {
12007ec681f3Smrg      clc_error(logger, "NIR shader contains doubles, which we don't support yet");
12017ec681f3Smrg      goto err_free_dxil;
12027ec681f3Smrg   }
12037ec681f3Smrg
12047ec681f3Smrg   struct blob tmp;
12057ec681f3Smrg   if (!nir_to_dxil(nir, &opts, &tmp)) {
12067ec681f3Smrg      debug_printf("D3D12: nir_to_dxil failed\n");
12077ec681f3Smrg      goto err_free_dxil;
12087ec681f3Smrg   }
12097ec681f3Smrg
12107ec681f3Smrg   nir_foreach_variable_with_modes(var, nir, nir_var_mem_ssbo) {
12117ec681f3Smrg      if (var->constant_initializer) {
12127ec681f3Smrg         if (glsl_type_is_array(var->type)) {
12137ec681f3Smrg            int size = align(glsl_get_cl_size(var->type), 4);
12147ec681f3Smrg            uint8_t *data = malloc(size);
12157ec681f3Smrg            if (!data)
12167ec681f3Smrg               goto err_free_dxil;
12177ec681f3Smrg
12187ec681f3Smrg            copy_const_initializer(var->constant_initializer, var->type, data);
12197ec681f3Smrg            metadata->consts[metadata->num_consts].data = data;
12207ec681f3Smrg            metadata->consts[metadata->num_consts].size = size;
12217ec681f3Smrg            metadata->consts[metadata->num_consts].uav_id = var->data.binding;
12227ec681f3Smrg            metadata->num_consts++;
12237ec681f3Smrg         } else
12247ec681f3Smrg            unreachable("unexpected constant initializer");
12257ec681f3Smrg      }
12267ec681f3Smrg   }
12277ec681f3Smrg
12287ec681f3Smrg   metadata->kernel_inputs_cbv_id = inputs_var ? inputs_var->data.binding : 0;
12297ec681f3Smrg   metadata->work_properties_cbv_id = work_properties_var->data.binding;
12307ec681f3Smrg   metadata->num_uavs = uav_id;
12317ec681f3Smrg   metadata->num_srvs = srv_id;
12327ec681f3Smrg   metadata->num_samplers = sampler_id;
12337ec681f3Smrg
12347ec681f3Smrg   ralloc_free(nir);
12357ec681f3Smrg   glsl_type_singleton_decref();
12367ec681f3Smrg
12377ec681f3Smrg   blob_finish_get_buffer(&tmp, &out_dxil->binary.data,
12387ec681f3Smrg                          &out_dxil->binary.size);
12397ec681f3Smrg   return true;
12407ec681f3Smrg
12417ec681f3Smrgerr_free_dxil:
12427ec681f3Smrg   clc_free_dxil_object(out_dxil);
12437ec681f3Smrg   return false;
12447ec681f3Smrg}
12457ec681f3Smrg
12467ec681f3Smrgvoid clc_free_dxil_object(struct clc_dxil_object *dxil)
12477ec681f3Smrg{
12487ec681f3Smrg   for (unsigned i = 0; i < dxil->metadata.num_consts; i++)
12497ec681f3Smrg      free(dxil->metadata.consts[i].data);
12507ec681f3Smrg
12517ec681f3Smrg   for (unsigned i = 0; i < dxil->metadata.printf.info_count; i++) {
12527ec681f3Smrg      free(dxil->metadata.printf.infos[i].arg_sizes);
12537ec681f3Smrg      free(dxil->metadata.printf.infos[i].str);
12547ec681f3Smrg   }
12557ec681f3Smrg   free(dxil->metadata.printf.infos);
12567ec681f3Smrg
12577ec681f3Smrg   free(dxil->binary.data);
12587ec681f3Smrg}
12597ec681f3Smrg
12607ec681f3Smrguint64_t clc_compiler_get_version()
12617ec681f3Smrg{
12627ec681f3Smrg   const char sha1[] = MESA_GIT_SHA1;
12637ec681f3Smrg   const char* dash = strchr(sha1, '-');
12647ec681f3Smrg   if (dash) {
12657ec681f3Smrg      return strtoull(dash + 1, NULL, 16);
12667ec681f3Smrg   }
12677ec681f3Smrg   return 0;
12687ec681f3Smrg}
1269